opener-polarity-tagger 3.3.0 → 3.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dcce63ff8ce9916a2387c5d0f48209b203eacbc3834eedcf3b5b4990084ac1ad
4
- data.tar.gz: 495e79bfdec921d459ff78e50bdae28a2c6a0a8bc2aed150de27f49a7f99c423
3
+ metadata.gz: befa570d0fdb2774fee58f8f7f3154c4d944cee2865015411534d2c2c313837a
4
+ data.tar.gz: 4d95524761db77af92db9ece51dc5ff5bf66744445459a1c3c0b7c570ddb3267
5
5
  SHA512:
6
- metadata.gz: e51288c630084ce9be780b022ae23012337622b6df30de91962f5c73fd2ef32054435247bf850924a40ca723c3fe1a71fc9f688455b1ea5141acf80959fbe37a
7
- data.tar.gz: e8f00bd36a27aab5242d91e3f350d4da94d2661ef5565590e1793e02eb65075cf7dd789682b2cd0e06cf4b84bb21a5d5671d51f7e9d94ec1748cd1c8012f33ac
6
+ metadata.gz: be4cd1daf7cb8f906f2e91d1bba18e64bc713e5d13c6ec090877b4e079153b8fa818915fcc789f6df1bcfe4735b24af2e7a15dbab5517a7037bd6983eba612c8
7
+ data.tar.gz: dfd2058e9b98ea482d65e86d4a82d33865a1ef9cd5350deac4853c5e4b3c9cec39b37766b862a0f459c92644fd4e5f916f35ea4d1568ffdddde5d823b6b7b43b
@@ -14,6 +14,15 @@
14
14
  <Domain/>
15
15
  </Sense>
16
16
  </LexicalEntry>
17
+ <LexicalEntry id="" partOfSpeech="noun">
18
+ <Lemma writtenForm="very comfortable"/>
19
+ <Sense>
20
+ <Confidence level="manual"/>
21
+ <MonolingualExternalRef/>
22
+ <Sentiment polarity="negative" strength="strong"/>
23
+ <Domain/>
24
+ </Sense>
25
+ </LexicalEntry>
17
26
  <LexicalEntry id="" partOfSpeech="noun">
18
27
  <Lemma writtenForm="abandonment"/>
19
28
  <Sense>
@@ -9,46 +9,70 @@ module Opener
9
9
  DESC = 'VUA polarity tagger multilanguage'
10
10
  LAST_EDITED = '21may2014'
11
11
  VERSION = '1.2'
12
+ N_WORDS = 5
12
13
 
13
- CACHE = LexiconsCache.new
14
+ CACHE = LexiconsCache.new
14
15
 
15
16
  def initialize ignore_pos: false, **params
16
17
  @ignore_pos = ignore_pos
17
18
  end
18
19
 
19
20
  def run input, params = {}
20
- kaf = KAF::Document.from_xml input
21
+ kaf = KAF::Document.from_xml input
21
22
 
22
23
  @cache_keys = params[:cache_keys] ||= {}
23
24
  @cache_keys.merge! lang: kaf.language
25
+ @cache_keys = @cache_keys.except :property_type
24
26
  @map = kaf.map = CACHE[**@cache_keys].lexicons
25
27
 
26
28
  raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
27
29
 
28
- kaf.terms.each do |t|
30
+ next_index = 0
31
+ kaf.terms.each_with_index do |t, index|
32
+ # skip terms when a multi_word_expression is found
33
+ next if next_index > index
29
34
  lemma = t.lemma&.downcase
30
35
  text = t.text.to_s.downcase
31
36
  pos = if @ignore_pos then nil else t.pos end
32
37
  attrs = Hashie::Mash.new
33
38
 
34
- # text matching have priority as sometimes
35
- # the lemma provided by Stanza is a different word
36
- lexicon, polarity_pos = @map.by_polarity text, pos
37
- lexicon, polarity_pos = @map.by_polarity lemma, pos if lexicon.polarity == 'unknown'
38
-
39
- if l = @map.by_negator(text) || @map.by_negator(lemma)
40
- lexicon, polarity_pos = l, nil
41
- attrs.sentiment_modifier = 'shifter'
42
- elsif l = @map.by_intensifier(text) || @map.by_intensifier(lemma)
43
- lexicon, polarity_pos = l, nil
44
- attrs.sentiment_modifier = 'intensifier'
45
- elsif lexicon.polarity != 'unknown'
46
- attrs.polarity = lexicon.polarity
39
+
40
+ polarity_pos = nil
41
+
42
+ if opts = @map.by_negator(text) || @map.by_negator(lemma)
43
+ lexicon, next_index = get_lexicon(opts, kaf, index)
44
+ attrs.sentiment_modifier = 'shifter' if lexicon
45
+ elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
46
+ lexicon, next_index = get_lexicon(opts, kaf, index)
47
+ attrs.sentiment_modifier = 'intensifier' if lexicon
48
+ end
49
+
50
+ unless lexicon
51
+ # text matching have priority as sometimes
52
+ # the lemma provided by Stanza is a different word
53
+ [text, lemma].each do |word|
54
+ opts, polarity_pos = @map.by_polarity word, pos
55
+
56
+ if opts[:multi].size > 0 or opts[:single]
57
+ lexicon, next_index = get_lexicon opts, kaf, index
58
+ if lexicon
59
+ attrs.polarity = lexicon.polarity
60
+ break
61
+ end
62
+ end
63
+ end
47
64
  end
48
65
 
49
66
  if attrs.size > 0
50
- attrs.resource = lexicon.resource if lexicon.resource
67
+ attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
68
+ attrs.resource = lexicon.resource if lexicon&.resource
51
69
  t.setPolarity attrs, polarity_pos
70
+ i = index
71
+ while i < next_index do
72
+ term = kaf.terms[i]
73
+ term.setPolarity attrs, polarity_pos
74
+ i += 1
75
+ end
52
76
  end
53
77
  end
54
78
 
@@ -57,6 +81,29 @@ module Opener
57
81
  kaf.to_xml
58
82
  end
59
83
 
84
+ def get_lexicon opts, kaf, index
85
+ if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
86
+ index = index + lexicon.lemma.strip.split(' ').size
87
+ else
88
+ lexicon = opts.single
89
+ end
90
+
91
+ [lexicon, index]
92
+ end
93
+
94
+ def identify_lexicon terms, lexicons
95
+ return unless lexicons.size > 0
96
+
97
+ lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
98
+ text = terms.map{|t| t.text&.downcase }.join(' ')
99
+
100
+ lexicons.each do |lexicon|
101
+ return lexicon if lemma =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
102
+ return lexicon if text =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
103
+ end
104
+ nil
105
+ end
106
+
60
107
  end
61
108
  end
62
109
  end
@@ -7,9 +7,7 @@ module Opener
7
7
  attr_reader :intensifiers
8
8
  attr_reader :with_polarity
9
9
 
10
- UNKNOWN = Hashie::Mash.new polarity: 'unknown'
11
-
12
- POS_ORDER = 'NRVGAO'
10
+ POS_ORDER = 'ONRVGA'
13
11
  DEFAULT_POS = 'O'
14
12
  POS_SHORT_MAP = {
15
13
  adj: 'G',
@@ -45,16 +43,22 @@ module Opener
45
43
  @intensifiers[lemma]
46
44
  end
47
45
 
48
- def by_polarity lemma, short_pos
49
- l = @with_polarity[lemma+short_pos] if short_pos
50
- return [l, short_pos] if l
46
+ def by_polarity lemma, identified_short_pos
47
+ hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
48
+
49
+ if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
50
+ hash[:single] = lexicon
51
+ return [hash, identified_short_pos]
52
+ end
51
53
 
52
54
  POS_ORDER.chars.each do |short_pos|
53
- l = @with_polarity[lemma+short_pos]
54
- return [l, short_pos] if l
55
+ if lexicon = @with_polarity[lemma+short_pos]
56
+ hash[:single] = lexicon
57
+ return [hash, identified_short_pos]
58
+ end
55
59
  end
56
60
 
57
- [UNKNOWN, 'unknown']
61
+ [hash, 'unknown']
58
62
  end
59
63
 
60
64
  protected
@@ -62,16 +66,33 @@ module Opener
62
66
  def map lexicons
63
67
  return if blank?
64
68
 
65
- lexicons.each do |l|
66
- next if l.lemma.nil?
69
+ lexicons.each do |lexicon|
70
+ next if lexicon.lemma.nil?
67
71
 
68
- case l.type
69
- when 'polarityShifter' then @negators[l.lemma] = l
70
- when 'intensifier' then @intensifiers[l.lemma] = l
71
- else
72
- if l.polarity
73
- short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
74
- @with_polarity[l.lemma+short_pos] = l
72
+ sub_lexicons = [lexicon]
73
+ sub_lexicons += lexicon.variants if lexicon.variants
74
+
75
+ sub_lexicons.each do |variant|
76
+ if variant.lemma.strip.include? ' '
77
+ lemma = variant.lemma.strip.split(' ').first
78
+ type = :multi
79
+ else
80
+ lemma = variant.lemma
81
+ type = :single
82
+ end
83
+
84
+ if ['polarityShifter', 'intensifier'].include? lexicon.type
85
+ var = @negators if lexicon.type == 'polarityShifter'
86
+ var = @intensifiers if lexicon.type == 'intensifier'
87
+
88
+ var[lemma] ||= Hashie::Mash.new multi: []
89
+ if type == :multi
90
+ var[lemma][:multi] << lexicon
91
+ else
92
+ var[lemma][:single] = lexicon
93
+ end
94
+ else
95
+ map_one_polarity lemma, variant, lexicon if lexicon.polarity
75
96
  end
76
97
  end
77
98
  end
@@ -81,6 +102,19 @@ module Opener
81
102
  puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
82
103
  end
83
104
 
105
+ def map_one_polarity lemma, hash, lexicon
106
+ poses = if hash.poses.present? then hash.poses else [hash.pos] end
107
+ poses.each do |pos|
108
+ short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
109
+ @with_polarity[lemma] ||= []
110
+ if hash.lemma.strip.include? ' '
111
+ @with_polarity[lemma] << lexicon
112
+ else
113
+ @with_polarity[lemma+short_pos] = lexicon
114
+ end
115
+ end
116
+ end
117
+
84
118
  end
85
119
  end
86
120
  end
@@ -15,10 +15,11 @@ module Opener
15
15
  end
16
16
 
17
17
  def [] **params
18
+ existing = @cache[params]
19
+ return existing if existing and existing.from > UPDATE_INTERVAL.ago
20
+
18
21
  synchronize do
19
- existing = @cache[params]
20
- break existing if existing and existing.from > UPDATE_INTERVAL.ago
21
- @cache[params] = cache_update existing, **params
22
+ @cache[params] = cache_update @cache[params], **params
22
23
  end
23
24
  end
24
25
  alias_method :get, :[]
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PolarityTagger
3
3
 
4
- VERSION = '3.3.0'
4
+ VERSION = '3.5.3'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-polarity-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-10 00:00:00.000000000 Z
11
+ date: 2021-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
251
251
  version: '0'
252
252
  requirements: []
253
253
  rubyforge_project:
254
- rubygems_version: 2.7.8
254
+ rubygems_version: 2.7.6
255
255
  signing_key:
256
256
  specification_version: 4
257
257
  summary: Polarity tagger for various languages.