opener-polarity-tagger 3.2.6 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a550043e18b17f1e12fd9466f5eee1dd2f806f97f38fe4b0e51e6ac4c5dd6ff1
4
- data.tar.gz: ec57806e4fa7559082aeccf9bb0e1492ef77d93f86d411549f5513282e10ce64
3
+ metadata.gz: 5bc937332f3126f240a402051ede1ec3d50c753c65a09e147d4d99446be67bd7
4
+ data.tar.gz: deb5934edffe4496b83d57d12d0ad2c1dc0ebff77bfbaaa1509602f2a80ae7db
5
5
  SHA512:
6
- metadata.gz: 1b53a75fccce4af956ab6b334406896ce6489967067e9abc1b529eb2ece9a1dbe167e8a567e6f938292d945a0e174d1c0b79305ce301e477549fd2161a03361c
7
- data.tar.gz: 5b8bd573afe2eb9fb41002e8d62949c890893395d711ad98220df2e159220d54caac8d70f5c5134898e821213ea6c05e1d1b226a4c50336361fc33f528b6cdb7
6
+ metadata.gz: 6430b221c2b87e2e934f442535d4bfae2dae80b520e0703864b8ff3b3a407fd36954b0538fa6a1f4d0ae6bc52461379c9aa45e98c6021d6634b9759f1a1f9495
7
+ data.tar.gz: 2626fbe6a5d959eda1ff4c06a9b22408d81520ff0c1fbb26be573051df12fd391cee1049488b25bd47a7778b186914730902d9be8765121bc787bff7f4faf5b3
data/core/LexiconMod.py CHANGED
@@ -45,9 +45,7 @@ def load_lexicons(language, path=None):
45
45
  def show_lexicons(language, path=None):
46
46
  if path is None:
47
47
  path = __module_dir
48
- #lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
49
- LexiconSent(language,None,path)
50
-
48
+ lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
51
49
  print
52
50
  print '#'*30
53
51
  print 'Available lexicons for',language
@@ -14,6 +14,15 @@
14
14
  <Domain/>
15
15
  </Sense>
16
16
  </LexicalEntry>
17
+ <LexicalEntry id="" partOfSpeech="noun">
18
+ <Lemma writtenForm="very comfortable"/>
19
+ <Sense>
20
+ <Confidence level="manual"/>
21
+ <MonolingualExternalRef/>
22
+ <Sentiment polarity="negative" strength="strong"/>
23
+ <Domain/>
24
+ </Sense>
25
+ </LexicalEntry>
17
26
  <LexicalEntry id="" partOfSpeech="noun">
18
27
  <Lemma writtenForm="abandonment"/>
19
28
  <Sense>
File without changes
@@ -18,6 +18,10 @@ module Opener
18
18
  @node.attr :lemma
19
19
  end
20
20
 
21
+ def text
22
+ @node.attr :text
23
+ end
24
+
21
25
  def pos
22
26
  @node.attr :pos
23
27
  end
@@ -1,6 +1,6 @@
1
1
  require_relative 'lexicons_cache'
2
2
  require_relative 'lexicon_map'
3
- require_relative 'kaf/document'
3
+ require_relative '../kaf/document'
4
4
 
5
5
  module Opener
6
6
  class PolarityTagger
@@ -9,53 +9,98 @@ module Opener
9
9
  DESC = 'VUA polarity tagger multilanguage'
10
10
  LAST_EDITED = '21may2014'
11
11
  VERSION = '1.2'
12
+ N_WORDS = 5
12
13
 
13
- CACHE = LexiconsCache.new
14
+ CACHE = LexiconsCache.new
14
15
 
15
16
  def initialize ignore_pos: false, **params
16
17
  @ignore_pos = ignore_pos
17
18
  end
18
19
 
19
- def clear_cache lang: nil, environment:
20
- end
21
-
22
20
  def run input, params = {}
23
- @kaf = KAF::Document.from_xml input
21
+ kaf = KAF::Document.from_xml input
24
22
 
25
23
  @cache_keys = params[:cache_keys] ||= {}
26
- @cache_keys.merge! lang: @kaf.language
27
- @map = @kaf.map = CACHE[**@cache_keys].lexicons
24
+ @cache_keys.merge! lang: kaf.language
25
+ @map = kaf.map = CACHE[**@cache_keys].lexicons
28
26
 
29
- raise Opener::Core::UnsupportedLanguageError, @kaf.language if @map.blank?
27
+ raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
30
28
 
31
- @kaf.terms.each do |t|
29
+ next_index = 0
30
+ kaf.terms.each_with_index do |t, index|
31
+ # skip terms when a multi_word_expression is found
32
+ next if next_index > index
32
33
  lemma = t.lemma&.downcase
34
+ text = t.text.to_s.downcase
33
35
  pos = if @ignore_pos then nil else t.pos end
34
36
  attrs = Hashie::Mash.new
35
37
 
36
- lexicon, polarity_pos = @map.by_polarity lemma, pos
37
38
 
38
- if lexicon.polarity != 'unknown'
39
- attrs.polarity = lexicon.polarity
40
- end
41
- if l = @map.by_negator(lemma)
42
- lexicon, polarity_pos = l, nil
43
- attrs.sentiment_modifier = 'shifter'
39
+ polarity_pos = nil
40
+
41
+ if opts = @map.by_negator(text) || @map.by_negator(lemma)
42
+ lexicon, next_index = get_lexicon(opts, kaf, index)
43
+ attrs.sentiment_modifier = 'shifter' if lexicon
44
+ elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
45
+ lexicon, next_index = get_lexicon(opts, kaf, index)
46
+ attrs.sentiment_modifier = 'intensifier' if lexicon
44
47
  end
45
- if l = @map.by_intensifier(lemma)
46
- lexicon, polarity_pos = l, nil
47
- attrs.sentiment_modifier = 'intensifier'
48
+
49
+ unless lexicon
50
+ # text matching have priority as sometimes
51
+ # the lemma provided by Stanza is a different word
52
+ [text, lemma].each do |word|
53
+ opts, polarity_pos = @map.by_polarity word, pos
54
+
55
+ if opts[:multi].size > 0 or opts[:single]
56
+ lexicon, next_index = get_lexicon opts, kaf, index
57
+ if lexicon
58
+ attrs.polarity = lexicon.polarity
59
+ break
60
+ end
61
+ end
62
+ end
48
63
  end
49
64
 
50
65
  if attrs.size > 0
51
- attrs.resource = lexicon.resource if lexicon.resource
66
+ attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
67
+ attrs.resource = lexicon.resource if lexicon&.resource
52
68
  t.setPolarity attrs, polarity_pos
69
+ i = index
70
+ while i < next_index do
71
+ term = kaf.terms[i]
72
+ term.setPolarity attrs, polarity_pos
73
+ i += 1
74
+ end
53
75
  end
54
76
  end
55
77
 
56
- @kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
78
+ kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
79
+
80
+ kaf.to_xml
81
+ end
57
82
 
58
- @kaf.to_xml
83
+ def get_lexicon opts, kaf, index
84
+ if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
85
+ index = index + lexicon.lemma.strip.split(' ').size
86
+ else
87
+ lexicon = opts.single
88
+ end
89
+
90
+ [lexicon, index]
91
+ end
92
+
93
+ def identify_lexicon terms, lexicons
94
+ return unless lexicons.size > 0
95
+
96
+ lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
97
+ text = terms.map{|t| t.text&.downcase }.join(' ')
98
+
99
+ lexicons.each do |lexicon|
100
+ return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
101
+ return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
102
+ end
103
+ nil
59
104
  end
60
105
 
61
106
  end
@@ -7,9 +7,7 @@ module Opener
7
7
  attr_reader :intensifiers
8
8
  attr_reader :with_polarity
9
9
 
10
- UNKNOWN = Hashie::Mash.new polarity: 'unknown'
11
-
12
- POS_ORDER = 'NRVGAO'
10
+ POS_ORDER = 'ONRVGA'
13
11
  DEFAULT_POS = 'O'
14
12
  POS_SHORT_MAP = {
15
13
  adj: 'G',
@@ -45,17 +43,22 @@ module Opener
45
43
  @intensifiers[lemma]
46
44
  end
47
45
 
48
- def by_polarity lemma, short_pos
49
- return [@with_polarity[lemma+short_pos] || UNKNOWN, short_pos] if short_pos
46
+ def by_polarity lemma, identified_short_pos
47
+ hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
48
+
49
+ if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
50
+ hash[:single] = lexicon
51
+ return [hash, identified_short_pos]
52
+ end
50
53
 
51
54
  POS_ORDER.chars.each do |short_pos|
52
- if l = @with_polarity[lemma+short_pos]
53
- puts "Found polarify #{l.polarity} for #{lemma} with PoS #{short_pos}"
54
- return [l, short_pos]
55
+ if lexicon = @with_polarity[lemma+short_pos]
56
+ hash[:single] = lexicon
57
+ return [hash, identified_short_pos]
55
58
  end
56
59
  end
57
60
 
58
- [UNKNOWN, 'unknown']
61
+ [hash, 'unknown']
59
62
  end
60
63
 
61
64
  protected
@@ -63,16 +66,33 @@ module Opener
63
66
  def map lexicons
64
67
  return if blank?
65
68
 
66
- lexicons.each do |l|
67
- next if l.lemma.nil?
69
+ lexicons.each do |lexicon|
70
+ next if lexicon.lemma.nil?
68
71
 
69
- case l.type
70
- when 'polarityShifter' then @negators[l.lemma] = l
71
- when 'intensifier' then @intensifiers[l.lemma] = l
72
- else
73
- if l.polarity
74
- short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
75
- @with_polarity[l.lemma+short_pos] = l
72
+ sub_lexicons = [lexicon]
73
+ sub_lexicons += lexicon.variants if lexicon.variants
74
+
75
+ sub_lexicons.each do |variant|
76
+ if variant.lemma.strip.include? ' '
77
+ lemma = variant.lemma.strip.split(' ').first
78
+ type = :multi
79
+ else
80
+ lemma = variant.lemma
81
+ type = :single
82
+ end
83
+
84
+ if ['polarityShifter', 'intensifier'].include? lexicon.type
85
+ var = @negators if lexicon.type == 'polarityShifter'
86
+ var = @intensifiers if lexicon.type == 'intensifier'
87
+
88
+ var[lemma] ||= Hashie::Mash.new multi: []
89
+ if type == :multi
90
+ var[lemma][:multi] << lexicon
91
+ else
92
+ var[lemma][:single] = lexicon
93
+ end
94
+ else
95
+ map_one_polarity lemma, variant, lexicon if lexicon.polarity
76
96
  end
77
97
  end
78
98
  end
@@ -82,6 +102,19 @@ module Opener
82
102
  puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
83
103
  end
84
104
 
105
+ def map_one_polarity lemma, hash, lexicon
106
+ poses = if hash.poses.present? then hash.poses else [hash.pos] end
107
+ poses.each do |pos|
108
+ short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
109
+ @with_polarity[lemma] ||= []
110
+ if hash.lemma.strip.include? ' '
111
+ @with_polarity[lemma] << lexicon
112
+ else
113
+ @with_polarity[lemma+short_pos] = lexicon
114
+ end
115
+ end
116
+ end
117
+
85
118
  end
86
119
  end
87
120
  end
@@ -15,10 +15,11 @@ module Opener
15
15
  end
16
16
 
17
17
  def [] **params
18
+ existing = @cache[params]
19
+ return existing if existing and existing.from > UPDATE_INTERVAL.ago
20
+
18
21
  synchronize do
19
- existing = @cache[params]
20
- break existing if existing and existing.from > UPDATE_INTERVAL.ago
21
- @cache[params] = cache_update existing, **params
22
+ @cache[params] = cache_update @cache[params], **params
22
23
  end
23
24
  end
24
25
  alias_method :get, :[]
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PolarityTagger
3
3
 
4
- VERSION = '3.2.6'
4
+ VERSION = '3.5.1'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-polarity-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-04 00:00:00.000000000 Z
11
+ date: 2021-02-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -212,12 +212,12 @@ files:
212
212
  - core/poltagger-basic-multi.py
213
213
  - exec/polarity-tagger.rb
214
214
  - ext/hack/Rakefile
215
+ - lib/opener/kaf/document.rb
216
+ - lib/opener/kaf/term.rb
215
217
  - lib/opener/polarity_tagger.rb
216
218
  - lib/opener/polarity_tagger/cli.rb
217
219
  - lib/opener/polarity_tagger/external.rb
218
220
  - lib/opener/polarity_tagger/internal.rb
219
- - lib/opener/polarity_tagger/kaf/document.rb
220
- - lib/opener/polarity_tagger/kaf/term.rb
221
221
  - lib/opener/polarity_tagger/lexicon_map.rb
222
222
  - lib/opener/polarity_tagger/lexicons_cache.rb
223
223
  - lib/opener/polarity_tagger/public/markdown.css