opener-polarity-tagger 3.3.0 → 3.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dcce63ff8ce9916a2387c5d0f48209b203eacbc3834eedcf3b5b4990084ac1ad
4
- data.tar.gz: 495e79bfdec921d459ff78e50bdae28a2c6a0a8bc2aed150de27f49a7f99c423
3
+ metadata.gz: befa570d0fdb2774fee58f8f7f3154c4d944cee2865015411534d2c2c313837a
4
+ data.tar.gz: 4d95524761db77af92db9ece51dc5ff5bf66744445459a1c3c0b7c570ddb3267
5
5
  SHA512:
6
- metadata.gz: e51288c630084ce9be780b022ae23012337622b6df30de91962f5c73fd2ef32054435247bf850924a40ca723c3fe1a71fc9f688455b1ea5141acf80959fbe37a
7
- data.tar.gz: e8f00bd36a27aab5242d91e3f350d4da94d2661ef5565590e1793e02eb65075cf7dd789682b2cd0e06cf4b84bb21a5d5671d51f7e9d94ec1748cd1c8012f33ac
6
+ metadata.gz: be4cd1daf7cb8f906f2e91d1bba18e64bc713e5d13c6ec090877b4e079153b8fa818915fcc789f6df1bcfe4735b24af2e7a15dbab5517a7037bd6983eba612c8
7
+ data.tar.gz: dfd2058e9b98ea482d65e86d4a82d33865a1ef9cd5350deac4853c5e4b3c9cec39b37766b862a0f459c92644fd4e5f916f35ea4d1568ffdddde5d823b6b7b43b
@@ -14,6 +14,15 @@
14
14
  <Domain/>
15
15
  </Sense>
16
16
  </LexicalEntry>
17
+ <LexicalEntry id="" partOfSpeech="noun">
18
+ <Lemma writtenForm="very comfortable"/>
19
+ <Sense>
20
+ <Confidence level="manual"/>
21
+ <MonolingualExternalRef/>
22
+ <Sentiment polarity="negative" strength="strong"/>
23
+ <Domain/>
24
+ </Sense>
25
+ </LexicalEntry>
17
26
  <LexicalEntry id="" partOfSpeech="noun">
18
27
  <Lemma writtenForm="abandonment"/>
19
28
  <Sense>
@@ -9,46 +9,70 @@ module Opener
9
9
  DESC = 'VUA polarity tagger multilanguage'
10
10
  LAST_EDITED = '21may2014'
11
11
  VERSION = '1.2'
12
+ N_WORDS = 5
12
13
 
13
- CACHE = LexiconsCache.new
14
+ CACHE = LexiconsCache.new
14
15
 
15
16
  def initialize ignore_pos: false, **params
16
17
  @ignore_pos = ignore_pos
17
18
  end
18
19
 
19
20
  def run input, params = {}
20
- kaf = KAF::Document.from_xml input
21
+ kaf = KAF::Document.from_xml input
21
22
 
22
23
  @cache_keys = params[:cache_keys] ||= {}
23
24
  @cache_keys.merge! lang: kaf.language
25
+ @cache_keys = @cache_keys.except :property_type
24
26
  @map = kaf.map = CACHE[**@cache_keys].lexicons
25
27
 
26
28
  raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
27
29
 
28
- kaf.terms.each do |t|
30
+ next_index = 0
31
+ kaf.terms.each_with_index do |t, index|
32
+ # skip terms when a multi_word_expression is found
33
+ next if next_index > index
29
34
  lemma = t.lemma&.downcase
30
35
  text = t.text.to_s.downcase
31
36
  pos = if @ignore_pos then nil else t.pos end
32
37
  attrs = Hashie::Mash.new
33
38
 
34
- # text matching have priority as sometimes
35
- # the lemma provided by Stanza is a different word
36
- lexicon, polarity_pos = @map.by_polarity text, pos
37
- lexicon, polarity_pos = @map.by_polarity lemma, pos if lexicon.polarity == 'unknown'
38
-
39
- if l = @map.by_negator(text) || @map.by_negator(lemma)
40
- lexicon, polarity_pos = l, nil
41
- attrs.sentiment_modifier = 'shifter'
42
- elsif l = @map.by_intensifier(text) || @map.by_intensifier(lemma)
43
- lexicon, polarity_pos = l, nil
44
- attrs.sentiment_modifier = 'intensifier'
45
- elsif lexicon.polarity != 'unknown'
46
- attrs.polarity = lexicon.polarity
39
+
40
+ polarity_pos = nil
41
+
42
+ if opts = @map.by_negator(text) || @map.by_negator(lemma)
43
+ lexicon, next_index = get_lexicon(opts, kaf, index)
44
+ attrs.sentiment_modifier = 'shifter' if lexicon
45
+ elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
46
+ lexicon, next_index = get_lexicon(opts, kaf, index)
47
+ attrs.sentiment_modifier = 'intensifier' if lexicon
48
+ end
49
+
50
+ unless lexicon
51
+ # text matching have priority as sometimes
52
+ # the lemma provided by Stanza is a different word
53
+ [text, lemma].each do |word|
54
+ opts, polarity_pos = @map.by_polarity word, pos
55
+
56
+ if opts[:multi].size > 0 or opts[:single]
57
+ lexicon, next_index = get_lexicon opts, kaf, index
58
+ if lexicon
59
+ attrs.polarity = lexicon.polarity
60
+ break
61
+ end
62
+ end
63
+ end
47
64
  end
48
65
 
49
66
  if attrs.size > 0
50
- attrs.resource = lexicon.resource if lexicon.resource
67
+ attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
68
+ attrs.resource = lexicon.resource if lexicon&.resource
51
69
  t.setPolarity attrs, polarity_pos
70
+ i = index
71
+ while i < next_index do
72
+ term = kaf.terms[i]
73
+ term.setPolarity attrs, polarity_pos
74
+ i += 1
75
+ end
52
76
  end
53
77
  end
54
78
 
@@ -57,6 +81,29 @@ module Opener
57
81
  kaf.to_xml
58
82
  end
59
83
 
84
+ def get_lexicon opts, kaf, index
85
+ if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
86
+ index = index + lexicon.lemma.strip.split(' ').size
87
+ else
88
+ lexicon = opts.single
89
+ end
90
+
91
+ [lexicon, index]
92
+ end
93
+
94
+ def identify_lexicon terms, lexicons
95
+ return unless lexicons.size > 0
96
+
97
+ lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
98
+ text = terms.map{|t| t.text&.downcase }.join(' ')
99
+
100
+ lexicons.each do |lexicon|
101
+ return lexicon if lemma =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
102
+ return lexicon if text =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
103
+ end
104
+ nil
105
+ end
106
+
60
107
  end
61
108
  end
62
109
  end
@@ -7,9 +7,7 @@ module Opener
7
7
  attr_reader :intensifiers
8
8
  attr_reader :with_polarity
9
9
 
10
- UNKNOWN = Hashie::Mash.new polarity: 'unknown'
11
-
12
- POS_ORDER = 'NRVGAO'
10
+ POS_ORDER = 'ONRVGA'
13
11
  DEFAULT_POS = 'O'
14
12
  POS_SHORT_MAP = {
15
13
  adj: 'G',
@@ -45,16 +43,22 @@ module Opener
45
43
  @intensifiers[lemma]
46
44
  end
47
45
 
48
- def by_polarity lemma, short_pos
49
- l = @with_polarity[lemma+short_pos] if short_pos
50
- return [l, short_pos] if l
46
+ def by_polarity lemma, identified_short_pos
47
+ hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
48
+
49
+ if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
50
+ hash[:single] = lexicon
51
+ return [hash, identified_short_pos]
52
+ end
51
53
 
52
54
  POS_ORDER.chars.each do |short_pos|
53
- l = @with_polarity[lemma+short_pos]
54
- return [l, short_pos] if l
55
+ if lexicon = @with_polarity[lemma+short_pos]
56
+ hash[:single] = lexicon
57
+ return [hash, identified_short_pos]
58
+ end
55
59
  end
56
60
 
57
- [UNKNOWN, 'unknown']
61
+ [hash, 'unknown']
58
62
  end
59
63
 
60
64
  protected
@@ -62,16 +66,33 @@ module Opener
62
66
  def map lexicons
63
67
  return if blank?
64
68
 
65
- lexicons.each do |l|
66
- next if l.lemma.nil?
69
+ lexicons.each do |lexicon|
70
+ next if lexicon.lemma.nil?
67
71
 
68
- case l.type
69
- when 'polarityShifter' then @negators[l.lemma] = l
70
- when 'intensifier' then @intensifiers[l.lemma] = l
71
- else
72
- if l.polarity
73
- short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
74
- @with_polarity[l.lemma+short_pos] = l
72
+ sub_lexicons = [lexicon]
73
+ sub_lexicons += lexicon.variants if lexicon.variants
74
+
75
+ sub_lexicons.each do |variant|
76
+ if variant.lemma.strip.include? ' '
77
+ lemma = variant.lemma.strip.split(' ').first
78
+ type = :multi
79
+ else
80
+ lemma = variant.lemma
81
+ type = :single
82
+ end
83
+
84
+ if ['polarityShifter', 'intensifier'].include? lexicon.type
85
+ var = @negators if lexicon.type == 'polarityShifter'
86
+ var = @intensifiers if lexicon.type == 'intensifier'
87
+
88
+ var[lemma] ||= Hashie::Mash.new multi: []
89
+ if type == :multi
90
+ var[lemma][:multi] << lexicon
91
+ else
92
+ var[lemma][:single] = lexicon
93
+ end
94
+ else
95
+ map_one_polarity lemma, variant, lexicon if lexicon.polarity
75
96
  end
76
97
  end
77
98
  end
@@ -81,6 +102,19 @@ module Opener
81
102
  puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
82
103
  end
83
104
 
105
+ def map_one_polarity lemma, hash, lexicon
106
+ poses = if hash.poses.present? then hash.poses else [hash.pos] end
107
+ poses.each do |pos|
108
+ short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
109
+ @with_polarity[lemma] ||= []
110
+ if hash.lemma.strip.include? ' '
111
+ @with_polarity[lemma] << lexicon
112
+ else
113
+ @with_polarity[lemma+short_pos] = lexicon
114
+ end
115
+ end
116
+ end
117
+
84
118
  end
85
119
  end
86
120
  end
@@ -15,10 +15,11 @@ module Opener
15
15
  end
16
16
 
17
17
  def [] **params
18
+ existing = @cache[params]
19
+ return existing if existing and existing.from > UPDATE_INTERVAL.ago
20
+
18
21
  synchronize do
19
- existing = @cache[params]
20
- break existing if existing and existing.from > UPDATE_INTERVAL.ago
21
- @cache[params] = cache_update existing, **params
22
+ @cache[params] = cache_update @cache[params], **params
22
23
  end
23
24
  end
24
25
  alias_method :get, :[]
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PolarityTagger
3
3
 
4
- VERSION = '3.3.0'
4
+ VERSION = '3.5.3'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-polarity-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-10 00:00:00.000000000 Z
11
+ date: 2021-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
251
251
  version: '0'
252
252
  requirements: []
253
253
  rubyforge_project:
254
- rubygems_version: 2.7.8
254
+ rubygems_version: 2.7.6
255
255
  signing_key:
256
256
  specification_version: 4
257
257
  summary: Polarity tagger for various languages.