opener-polarity-tagger 3.4.0 → 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e7d99cc90032fe712bac6fa6952d1dc482d7ea5a86d9db9544f17e52d75d68f6
4
- data.tar.gz: 8c8a5126f8a63a55bea7062c06d078bc87b0ff9286201956524755fb163e1172
3
+ metadata.gz: fe952e44b35deff6d30de2994ffff1555c6fb256915dd6931605f2141e29cb9c
4
+ data.tar.gz: 30cd612208d71ec9d2feea2fea625c445e6c5fdef1bd5f359bd160d4a8acb465
5
5
  SHA512:
6
- metadata.gz: 185160b82b01008ed03ce4a26e0d93ca18b1aab91e20c57acfe7425fa103c557606e0151e161ad7d36dc7380b2f26c9daae9e9649e4b1f7b65ce29c98f270ccc
7
- data.tar.gz: 73f6b42eba61db2004fff08f54a1636767e8e77e93ec817717530accfa9fcd3001097e630c043fea0d3afd462c42b755a03f274c4d74e427c1753cf70fb30648
6
+ metadata.gz: 94afe1918ac45c3fda4007588ee8a1c4f50ee6568a9bcf83c2b9de50090caf9bcc72494d4d731a2a3c6bbd143cc1a025ca071c18bc5379059316d293d973d1c7
7
+ data.tar.gz: 20fe782270ce4c1d1815acc9acfc9efd9870bd582435f86625bc5c9d92c09c2563fcd62cfc6a4c735a6696745f42bb253b6d5249c3222e9ad82fa196ffe19499
@@ -14,6 +14,15 @@
14
14
  <Domain/>
15
15
  </Sense>
16
16
  </LexicalEntry>
17
+ <LexicalEntry id="" partOfSpeech="noun">
18
+ <Lemma writtenForm="very comfortable"/>
19
+ <Sense>
20
+ <Confidence level="manual"/>
21
+ <MonolingualExternalRef/>
22
+ <Sentiment polarity="negative" strength="strong"/>
23
+ <Domain/>
24
+ </Sense>
25
+ </LexicalEntry>
17
26
  <LexicalEntry id="" partOfSpeech="noun">
18
27
  <Lemma writtenForm="abandonment"/>
19
28
  <Sense>
@@ -9,15 +9,16 @@ module Opener
9
9
  DESC = 'VUA polarity tagger multilanguage'
10
10
  LAST_EDITED = '21may2014'
11
11
  VERSION = '1.2'
12
+ N_WORDS = 5
12
13
 
13
- CACHE = LexiconsCache.new
14
+ CACHE = LexiconsCache.new
14
15
 
15
16
  def initialize ignore_pos: false, **params
16
17
  @ignore_pos = ignore_pos
17
18
  end
18
19
 
19
20
  def run input, params = {}
20
- kaf = KAF::Document.from_xml input
21
+ kaf = KAF::Document.from_xml input
21
22
 
22
23
  @cache_keys = params[:cache_keys] ||= {}
23
24
  @cache_keys.merge! lang: kaf.language
@@ -25,31 +26,52 @@ module Opener
25
26
 
26
27
  raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
27
28
 
28
- kaf.terms.each do |t|
29
+ next_index = 0
30
+ kaf.terms.each_with_index do |t, index|
31
+ # skip terms when a multi_word_expression is found
32
+ next if next_index > index
29
33
  lemma = t.lemma&.downcase
30
34
  text = t.text.to_s.downcase
31
35
  pos = if @ignore_pos then nil else t.pos end
32
36
  attrs = Hashie::Mash.new
33
37
 
34
- # text matching have priority as sometimes
35
- # the lemma provided by Stanza is a different word
36
- lexicon, polarity_pos = @map.by_polarity text, pos
37
- lexicon, polarity_pos = @map.by_polarity lemma, pos if lexicon.polarity == 'unknown'
38
-
39
- if l = @map.by_negator(text) || @map.by_negator(lemma)
40
- lexicon, polarity_pos = l, nil
41
- attrs.sentiment_modifier = 'shifter'
42
- elsif l = @map.by_intensifier(text) || @map.by_intensifier(lemma)
43
- lexicon, polarity_pos = l, nil
44
- attrs.sentiment_modifier = 'intensifier'
45
- elsif lexicon.polarity != 'unknown'
46
- attrs.polarity = lexicon.polarity
38
+
39
+ polarity_pos = nil
40
+
41
+ if opts = @map.by_negator(text) || @map.by_negator(lemma)
42
+ lexicon, next_index = get_lexicon(opts, kaf, index)
43
+ attrs.sentiment_modifier = 'shifter' if lexicon
44
+ elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
45
+ lexicon, next_index = get_lexicon(opts, kaf, index)
46
+ attrs.sentiment_modifier = 'intensifier' if lexicon
47
+ end
48
+
49
+ unless lexicon
50
+ # text matching have priority as sometimes
51
+ # the lemma provided by Stanza is a different word
52
+ [text, lemma].each do |word|
53
+ opts, polarity_pos = @map.by_polarity word, pos
54
+
55
+ if opts[:multi].size > 0 or opts[:single]
56
+ lexicon, next_index = get_lexicon opts, kaf, index
57
+ if lexicon
58
+ attrs.polarity = lexicon.polarity
59
+ break
60
+ end
61
+ end
62
+ end
47
63
  end
48
64
 
49
65
  if attrs.size > 0
50
- attrs['lexicon-id'] = lexicon.id.to_s if lexicon.id
51
- attrs.resource = lexicon.resource if lexicon.resource
66
+ attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
67
+ attrs.resource = lexicon.resource if lexicon&.resource
52
68
  t.setPolarity attrs, polarity_pos
69
+ i = index
70
+ while i < next_index do
71
+ term = kaf.terms[i]
72
+ term.setPolarity attrs, polarity_pos
73
+ i += 1
74
+ end
53
75
  end
54
76
  end
55
77
 
@@ -58,6 +80,29 @@ module Opener
58
80
  kaf.to_xml
59
81
  end
60
82
 
83
+ def get_lexicon opts, kaf, index
84
+ if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
85
+ index = index + lexicon.lemma.strip.split(' ').size
86
+ else
87
+ lexicon = opts.single
88
+ end
89
+
90
+ [lexicon, index]
91
+ end
92
+
93
+ def identify_lexicon terms, lexicons
94
+ return unless lexicons.size > 0
95
+
96
+ lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
97
+ text = terms.map{|t| t.text&.downcase }.join(' ')
98
+
99
+ lexicons.each do |lexicon|
100
+ return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
101
+ return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
102
+ end
103
+ nil
104
+ end
105
+
61
106
  end
62
107
  end
63
108
  end
@@ -7,9 +7,7 @@ module Opener
7
7
  attr_reader :intensifiers
8
8
  attr_reader :with_polarity
9
9
 
10
- UNKNOWN = Hashie::Mash.new polarity: 'unknown'
11
-
12
- POS_ORDER = 'NRVGAO'
10
+ POS_ORDER = 'ONRVGA'
13
11
  DEFAULT_POS = 'O'
14
12
  POS_SHORT_MAP = {
15
13
  adj: 'G',
@@ -45,43 +43,56 @@ module Opener
45
43
  @intensifiers[lemma]
46
44
  end
47
45
 
48
- def by_polarity lemma, short_pos
49
- l = @with_polarity[lemma+short_pos] if short_pos
50
- return [l, short_pos] if l
46
+ def by_polarity lemma, identified_short_pos
47
+ hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
48
+
49
+ if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
50
+ hash[:single] = lexicon
51
+ return [hash, identified_short_pos]
52
+ end
51
53
 
52
54
  POS_ORDER.chars.each do |short_pos|
53
- l = @with_polarity[lemma+short_pos]
54
- return [l, short_pos] if l
55
+ if lexicon = @with_polarity[lemma+short_pos]
56
+ hash[:single] = lexicon
57
+ return [hash, identified_short_pos]
58
+ end
55
59
  end
56
60
 
57
- [UNKNOWN, 'unknown']
61
+ [hash, 'unknown']
58
62
  end
59
63
 
60
64
  protected
61
65
 
62
- def map_one_polarity l
63
- poses = if l.poses.present? then l.poses else [l.pos] end
64
- poses.each do |pos|
65
- short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
66
- @with_polarity[l.lemma+short_pos] = l
67
- end
68
- end
69
-
70
66
  def map lexicons
71
67
  return if blank?
72
68
 
73
- lexicons.each do |l|
74
- next if l.lemma.nil?
69
+ lexicons.each do |lexicon|
70
+ next if lexicon.lemma.nil?
75
71
 
76
- case l.type
77
- when 'polarityShifter' then @negators[l.lemma] = l
78
- when 'intensifier' then @intensifiers[l.lemma] = l
79
- else
80
- if l.polarity
81
- map_one_polarity l
82
- l.variants&.each do |v|
83
- map_one_polarity v
72
+ sub_lexicons = [lexicon]
73
+ sub_lexicons += lexicon.variants if lexicon.variants
74
+
75
+ sub_lexicons.each do |variant|
76
+ if variant.lemma.strip.include? ' '
77
+ lemma = variant.lemma.strip.split(' ').first
78
+ type = :multi
79
+ else
80
+ lemma = variant.lemma
81
+ type = :single
82
+ end
83
+
84
+ if ['polarityShifter', 'intensifier'].include? lexicon.type
85
+ var = @negators if lexicon.type == 'polarityShifter'
86
+ var = @intensifiers if lexicon.type == 'intensifier'
87
+
88
+ var[lemma] ||= Hashie::Mash.new multi: []
89
+ if type == :multi
90
+ var[lemma][:multi] << lexicon
91
+ else
92
+ var[lemma][:single] = lexicon
84
93
  end
94
+ else
95
+ map_one_polarity lemma, variant, lexicon if lexicon.polarity
85
96
  end
86
97
  end
87
98
  end
@@ -91,6 +102,19 @@ module Opener
91
102
  puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
92
103
  end
93
104
 
105
+ def map_one_polarity lemma, hash, lexicon
106
+ poses = if hash.poses.present? then hash.poses else [hash.pos] end
107
+ poses.each do |pos|
108
+ short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
109
+ @with_polarity[lemma] ||= []
110
+ if hash.lemma.strip.include? ' '
111
+ @with_polarity[lemma] << lexicon
112
+ else
113
+ @with_polarity[lemma+short_pos] = lexicon
114
+ end
115
+ end
116
+ end
117
+
94
118
  end
95
119
  end
96
120
  end
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PolarityTagger
3
3
 
4
- VERSION = '3.4.0'
4
+ VERSION = '3.5.0'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-polarity-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.0
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-25 00:00:00.000000000 Z
11
+ date: 2021-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
251
251
  version: '0'
252
252
  requirements: []
253
253
  rubyforge_project:
254
- rubygems_version: 2.7.8
254
+ rubygems_version: 2.7.6
255
255
  signing_key:
256
256
  specification_version: 4
257
257
  summary: Polarity tagger for various languages.