opener-polarity-tagger 3.2.6 → 3.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a550043e18b17f1e12fd9466f5eee1dd2f806f97f38fe4b0e51e6ac4c5dd6ff1
4
- data.tar.gz: ec57806e4fa7559082aeccf9bb0e1492ef77d93f86d411549f5513282e10ce64
3
+ metadata.gz: 5bc937332f3126f240a402051ede1ec3d50c753c65a09e147d4d99446be67bd7
4
+ data.tar.gz: deb5934edffe4496b83d57d12d0ad2c1dc0ebff77bfbaaa1509602f2a80ae7db
5
5
  SHA512:
6
- metadata.gz: 1b53a75fccce4af956ab6b334406896ce6489967067e9abc1b529eb2ece9a1dbe167e8a567e6f938292d945a0e174d1c0b79305ce301e477549fd2161a03361c
7
- data.tar.gz: 5b8bd573afe2eb9fb41002e8d62949c890893395d711ad98220df2e159220d54caac8d70f5c5134898e821213ea6c05e1d1b226a4c50336361fc33f528b6cdb7
6
+ metadata.gz: 6430b221c2b87e2e934f442535d4bfae2dae80b520e0703864b8ff3b3a407fd36954b0538fa6a1f4d0ae6bc52461379c9aa45e98c6021d6634b9759f1a1f9495
7
+ data.tar.gz: 2626fbe6a5d959eda1ff4c06a9b22408d81520ff0c1fbb26be573051df12fd391cee1049488b25bd47a7778b186914730902d9be8765121bc787bff7f4faf5b3
data/core/LexiconMod.py CHANGED
@@ -45,9 +45,7 @@ def load_lexicons(language, path=None):
45
45
  def show_lexicons(language, path=None):
46
46
  if path is None:
47
47
  path = __module_dir
48
- #lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
49
- LexiconSent(language,None,path)
50
-
48
+ lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
51
49
  print
52
50
  print '#'*30
53
51
  print 'Available lexicons for',language
@@ -14,6 +14,15 @@
14
14
  <Domain/>
15
15
  </Sense>
16
16
  </LexicalEntry>
17
+ <LexicalEntry id="" partOfSpeech="noun">
18
+ <Lemma writtenForm="very comfortable"/>
19
+ <Sense>
20
+ <Confidence level="manual"/>
21
+ <MonolingualExternalRef/>
22
+ <Sentiment polarity="negative" strength="strong"/>
23
+ <Domain/>
24
+ </Sense>
25
+ </LexicalEntry>
17
26
  <LexicalEntry id="" partOfSpeech="noun">
18
27
  <Lemma writtenForm="abandonment"/>
19
28
  <Sense>
File without changes
@@ -18,6 +18,10 @@ module Opener
18
18
  @node.attr :lemma
19
19
  end
20
20
 
21
+ def text
22
+ @node.attr :text
23
+ end
24
+
21
25
  def pos
22
26
  @node.attr :pos
23
27
  end
@@ -1,6 +1,6 @@
1
1
  require_relative 'lexicons_cache'
2
2
  require_relative 'lexicon_map'
3
- require_relative 'kaf/document'
3
+ require_relative '../kaf/document'
4
4
 
5
5
  module Opener
6
6
  class PolarityTagger
@@ -9,53 +9,98 @@ module Opener
9
9
  DESC = 'VUA polarity tagger multilanguage'
10
10
  LAST_EDITED = '21may2014'
11
11
  VERSION = '1.2'
12
+ N_WORDS = 5
12
13
 
13
- CACHE = LexiconsCache.new
14
+ CACHE = LexiconsCache.new
14
15
 
15
16
  def initialize ignore_pos: false, **params
16
17
  @ignore_pos = ignore_pos
17
18
  end
18
19
 
19
- def clear_cache lang: nil, environment:
20
- end
21
-
22
20
  def run input, params = {}
23
- @kaf = KAF::Document.from_xml input
21
+ kaf = KAF::Document.from_xml input
24
22
 
25
23
  @cache_keys = params[:cache_keys] ||= {}
26
- @cache_keys.merge! lang: @kaf.language
27
- @map = @kaf.map = CACHE[**@cache_keys].lexicons
24
+ @cache_keys.merge! lang: kaf.language
25
+ @map = kaf.map = CACHE[**@cache_keys].lexicons
28
26
 
29
- raise Opener::Core::UnsupportedLanguageError, @kaf.language if @map.blank?
27
+ raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
30
28
 
31
- @kaf.terms.each do |t|
29
+ next_index = 0
30
+ kaf.terms.each_with_index do |t, index|
31
+ # skip terms when a multi_word_expression is found
32
+ next if next_index > index
32
33
  lemma = t.lemma&.downcase
34
+ text = t.text.to_s.downcase
33
35
  pos = if @ignore_pos then nil else t.pos end
34
36
  attrs = Hashie::Mash.new
35
37
 
36
- lexicon, polarity_pos = @map.by_polarity lemma, pos
37
38
 
38
- if lexicon.polarity != 'unknown'
39
- attrs.polarity = lexicon.polarity
40
- end
41
- if l = @map.by_negator(lemma)
42
- lexicon, polarity_pos = l, nil
43
- attrs.sentiment_modifier = 'shifter'
39
+ polarity_pos = nil
40
+
41
+ if opts = @map.by_negator(text) || @map.by_negator(lemma)
42
+ lexicon, next_index = get_lexicon(opts, kaf, index)
43
+ attrs.sentiment_modifier = 'shifter' if lexicon
44
+ elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
45
+ lexicon, next_index = get_lexicon(opts, kaf, index)
46
+ attrs.sentiment_modifier = 'intensifier' if lexicon
44
47
  end
45
- if l = @map.by_intensifier(lemma)
46
- lexicon, polarity_pos = l, nil
47
- attrs.sentiment_modifier = 'intensifier'
48
+
49
+ unless lexicon
50
+ # text matching have priority as sometimes
51
+ # the lemma provided by Stanza is a different word
52
+ [text, lemma].each do |word|
53
+ opts, polarity_pos = @map.by_polarity word, pos
54
+
55
+ if opts[:multi].size > 0 or opts[:single]
56
+ lexicon, next_index = get_lexicon opts, kaf, index
57
+ if lexicon
58
+ attrs.polarity = lexicon.polarity
59
+ break
60
+ end
61
+ end
62
+ end
48
63
  end
49
64
 
50
65
  if attrs.size > 0
51
- attrs.resource = lexicon.resource if lexicon.resource
66
+ attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
67
+ attrs.resource = lexicon.resource if lexicon&.resource
52
68
  t.setPolarity attrs, polarity_pos
69
+ i = index
70
+ while i < next_index do
71
+ term = kaf.terms[i]
72
+ term.setPolarity attrs, polarity_pos
73
+ i += 1
74
+ end
53
75
  end
54
76
  end
55
77
 
56
- @kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
78
+ kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
79
+
80
+ kaf.to_xml
81
+ end
57
82
 
58
- @kaf.to_xml
83
+ def get_lexicon opts, kaf, index
84
+ if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
85
+ index = index + lexicon.lemma.strip.split(' ').size
86
+ else
87
+ lexicon = opts.single
88
+ end
89
+
90
+ [lexicon, index]
91
+ end
92
+
93
+ def identify_lexicon terms, lexicons
94
+ return unless lexicons.size > 0
95
+
96
+ lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
97
+ text = terms.map{|t| t.text&.downcase }.join(' ')
98
+
99
+ lexicons.each do |lexicon|
100
+ return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
101
+ return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
102
+ end
103
+ nil
59
104
  end
60
105
 
61
106
  end
@@ -7,9 +7,7 @@ module Opener
7
7
  attr_reader :intensifiers
8
8
  attr_reader :with_polarity
9
9
 
10
- UNKNOWN = Hashie::Mash.new polarity: 'unknown'
11
-
12
- POS_ORDER = 'NRVGAO'
10
+ POS_ORDER = 'ONRVGA'
13
11
  DEFAULT_POS = 'O'
14
12
  POS_SHORT_MAP = {
15
13
  adj: 'G',
@@ -45,17 +43,22 @@ module Opener
45
43
  @intensifiers[lemma]
46
44
  end
47
45
 
48
- def by_polarity lemma, short_pos
49
- return [@with_polarity[lemma+short_pos] || UNKNOWN, short_pos] if short_pos
46
+ def by_polarity lemma, identified_short_pos
47
+ hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
48
+
49
+ if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
50
+ hash[:single] = lexicon
51
+ return [hash, identified_short_pos]
52
+ end
50
53
 
51
54
  POS_ORDER.chars.each do |short_pos|
52
- if l = @with_polarity[lemma+short_pos]
53
- puts "Found polarify #{l.polarity} for #{lemma} with PoS #{short_pos}"
54
- return [l, short_pos]
55
+ if lexicon = @with_polarity[lemma+short_pos]
56
+ hash[:single] = lexicon
57
+ return [hash, identified_short_pos]
55
58
  end
56
59
  end
57
60
 
58
- [UNKNOWN, 'unknown']
61
+ [hash, 'unknown']
59
62
  end
60
63
 
61
64
  protected
@@ -63,16 +66,33 @@ module Opener
63
66
  def map lexicons
64
67
  return if blank?
65
68
 
66
- lexicons.each do |l|
67
- next if l.lemma.nil?
69
+ lexicons.each do |lexicon|
70
+ next if lexicon.lemma.nil?
68
71
 
69
- case l.type
70
- when 'polarityShifter' then @negators[l.lemma] = l
71
- when 'intensifier' then @intensifiers[l.lemma] = l
72
- else
73
- if l.polarity
74
- short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
75
- @with_polarity[l.lemma+short_pos] = l
72
+ sub_lexicons = [lexicon]
73
+ sub_lexicons += lexicon.variants if lexicon.variants
74
+
75
+ sub_lexicons.each do |variant|
76
+ if variant.lemma.strip.include? ' '
77
+ lemma = variant.lemma.strip.split(' ').first
78
+ type = :multi
79
+ else
80
+ lemma = variant.lemma
81
+ type = :single
82
+ end
83
+
84
+ if ['polarityShifter', 'intensifier'].include? lexicon.type
85
+ var = @negators if lexicon.type == 'polarityShifter'
86
+ var = @intensifiers if lexicon.type == 'intensifier'
87
+
88
+ var[lemma] ||= Hashie::Mash.new multi: []
89
+ if type == :multi
90
+ var[lemma][:multi] << lexicon
91
+ else
92
+ var[lemma][:single] = lexicon
93
+ end
94
+ else
95
+ map_one_polarity lemma, variant, lexicon if lexicon.polarity
76
96
  end
77
97
  end
78
98
  end
@@ -82,6 +102,19 @@ module Opener
82
102
  puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
83
103
  end
84
104
 
105
+ def map_one_polarity lemma, hash, lexicon
106
+ poses = if hash.poses.present? then hash.poses else [hash.pos] end
107
+ poses.each do |pos|
108
+ short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
109
+ @with_polarity[lemma] ||= []
110
+ if hash.lemma.strip.include? ' '
111
+ @with_polarity[lemma] << lexicon
112
+ else
113
+ @with_polarity[lemma+short_pos] = lexicon
114
+ end
115
+ end
116
+ end
117
+
85
118
  end
86
119
  end
87
120
  end
@@ -15,10 +15,11 @@ module Opener
15
15
  end
16
16
 
17
17
  def [] **params
18
+ existing = @cache[params]
19
+ return existing if existing and existing.from > UPDATE_INTERVAL.ago
20
+
18
21
  synchronize do
19
- existing = @cache[params]
20
- break existing if existing and existing.from > UPDATE_INTERVAL.ago
21
- @cache[params] = cache_update existing, **params
22
+ @cache[params] = cache_update @cache[params], **params
22
23
  end
23
24
  end
24
25
  alias_method :get, :[]
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PolarityTagger
3
3
 
4
- VERSION = '3.2.6'
4
+ VERSION = '3.5.1'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-polarity-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-04 00:00:00.000000000 Z
11
+ date: 2021-02-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -212,12 +212,12 @@ files:
212
212
  - core/poltagger-basic-multi.py
213
213
  - exec/polarity-tagger.rb
214
214
  - ext/hack/Rakefile
215
+ - lib/opener/kaf/document.rb
216
+ - lib/opener/kaf/term.rb
215
217
  - lib/opener/polarity_tagger.rb
216
218
  - lib/opener/polarity_tagger/cli.rb
217
219
  - lib/opener/polarity_tagger/external.rb
218
220
  - lib/opener/polarity_tagger/internal.rb
219
- - lib/opener/polarity_tagger/kaf/document.rb
220
- - lib/opener/polarity_tagger/kaf/term.rb
221
221
  - lib/opener/polarity_tagger/lexicon_map.rb
222
222
  - lib/opener/polarity_tagger/lexicons_cache.rb
223
223
  - lib/opener/polarity_tagger/public/markdown.css