opener-polarity-tagger 3.2.7 → 3.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e46a3cf60d76757018619e73dca7629c175c2bf091c3291b2935ef0826e7a08a
4
- data.tar.gz: 0bb5b5ac8d605478791314260d6bd1a7d709722e1598a354236a62dd5741b0b0
3
+ metadata.gz: afcf1b19a55cbd0c37e6f6adfba85ad3f376cea5bcabd2c3d5d780d45eb5bd4e
4
+ data.tar.gz: 86f7473b8ae1cafa47bf78aa5a685280f6a0844065d264c0bb27f944ec3a1c92
5
5
  SHA512:
6
- metadata.gz: f6759038cb51cfef7aa1dd14aef0d8dda9260dc93f0a532fb9dad03bb3d26bfc1c3f60b163f1e77358b9a80862a14aeb4ad8d01a3f1a6b40118d9896a6ef520d
7
- data.tar.gz: 4bdb95248999e753e29b85ea172fdbea1489925bab48227de3018c46736068c0cd91eefa2867ee6f181bdc09a59a8250df4e445ec80d41b5b3cd216398b80b0d
6
+ metadata.gz: b2eadf5b3e97ff01a8934f03c48e01d0af98209bd37c0884d5dac6e714430eb23bbd878faf036369dca11eadad6f59acff45b9a2271559f95a7f1d1399e78adc
7
+ data.tar.gz: d7b70f216f1e10b5085a391c91ede0cb88aec903c892592ee5ab3269e821bf333d3a24ec1b474505688b532e64ece201967cbd6537dc554ef885e9971854c6d8
data/core/LexiconMod.py CHANGED
@@ -45,9 +45,7 @@ def load_lexicons(language, path=None):
45
45
  def show_lexicons(language, path=None):
46
46
  if path is None:
47
47
  path = __module_dir
48
- #lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
49
- LexiconSent(language,None,path)
50
-
48
+ lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
51
49
  print
52
50
  print '#'*30
53
51
  print 'Available lexicons for',language
@@ -14,6 +14,15 @@
14
14
  <Domain/>
15
15
  </Sense>
16
16
  </LexicalEntry>
17
+ <LexicalEntry id="" partOfSpeech="noun">
18
+ <Lemma writtenForm="very comfortable"/>
19
+ <Sense>
20
+ <Confidence level="manual"/>
21
+ <MonolingualExternalRef/>
22
+ <Sentiment polarity="negative" strength="strong"/>
23
+ <Domain/>
24
+ </Sense>
25
+ </LexicalEntry>
17
26
  <LexicalEntry id="" partOfSpeech="noun">
18
27
  <Lemma writtenForm="abandonment"/>
19
28
  <Sense>
@@ -9,46 +9,70 @@ module Opener
9
9
  DESC = 'VUA polarity tagger multilanguage'
10
10
  LAST_EDITED = '21may2014'
11
11
  VERSION = '1.2'
12
+ N_WORDS = 5
12
13
 
13
- CACHE = LexiconsCache.new
14
+ CACHE = LexiconsCache.new
14
15
 
15
16
  def initialize ignore_pos: false, **params
16
17
  @ignore_pos = ignore_pos
17
18
  end
18
19
 
19
20
  def run input, params = {}
20
- kaf = KAF::Document.from_xml input
21
+ kaf = KAF::Document.from_xml input
21
22
 
22
23
  @cache_keys = params[:cache_keys] ||= {}
23
24
  @cache_keys.merge! lang: kaf.language
25
+ @cache_keys = @cache_keys.except :property_type
24
26
  @map = kaf.map = CACHE[**@cache_keys].lexicons
25
27
 
26
28
  raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
27
29
 
28
- kaf.terms.each do |t|
30
+ next_index = 0
31
+ kaf.terms.each_with_index do |t, index|
32
+ # skip terms when a multi_word_expression is found
33
+ next if next_index > index
29
34
  lemma = t.lemma&.downcase
30
35
  text = t.text.to_s.downcase
31
36
  pos = if @ignore_pos then nil else t.pos end
32
37
  attrs = Hashie::Mash.new
33
38
 
34
- lexicon, polarity_pos = @map.by_polarity lemma, pos
35
- lexicon, polarity_pos = @map.by_polarity text, pos if lexicon.polarity == 'unknown'
36
39
 
37
- if lexicon.polarity != 'unknown'
38
- attrs.polarity = lexicon.polarity
39
- end
40
- if l = @map.by_negator(lemma) || @map.by_negator(text)
41
- lexicon, polarity_pos = l, nil
42
- attrs.sentiment_modifier = 'shifter'
40
+ polarity_pos = nil
41
+
42
+ if opts = @map.by_negator(text) || @map.by_negator(lemma)
43
+ lexicon, next_index = get_lexicon(opts, kaf, index)
44
+ attrs.sentiment_modifier = 'shifter' if lexicon
45
+ elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
46
+ lexicon, next_index = get_lexicon(opts, kaf, index)
47
+ attrs.sentiment_modifier = 'intensifier' if lexicon
43
48
  end
44
- if l = @map.by_intensifier(lemma) || @map.by_intensifier(text)
45
- lexicon, polarity_pos = l, nil
46
- attrs.sentiment_modifier = 'intensifier'
49
+
50
+ unless lexicon
51
+ # text matching have priority as sometimes
52
+ # the lemma provided by Stanza is a different word
53
+ [text, lemma].each do |word|
54
+ opts, polarity_pos = @map.by_polarity word, pos
55
+
56
+ if opts[:multi].size > 0 or opts[:single]
57
+ lexicon, next_index = get_lexicon opts, kaf, index
58
+ if lexicon
59
+ attrs.polarity = lexicon.polarity
60
+ break
61
+ end
62
+ end
63
+ end
47
64
  end
48
65
 
49
66
  if attrs.size > 0
50
- attrs.resource = lexicon.resource if lexicon.resource
67
+ attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
68
+ attrs.resource = lexicon.resource if lexicon&.resource
51
69
  t.setPolarity attrs, polarity_pos
70
+ i = index
71
+ while i < next_index do
72
+ term = kaf.terms[i]
73
+ term.setPolarity attrs, polarity_pos
74
+ i += 1
75
+ end
52
76
  end
53
77
  end
54
78
 
@@ -57,6 +81,29 @@ module Opener
57
81
  kaf.to_xml
58
82
  end
59
83
 
84
+ def get_lexicon opts, kaf, index
85
+ if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
86
+ index = index + lexicon.lemma.strip.split(' ').size
87
+ else
88
+ lexicon = opts.single
89
+ end
90
+
91
+ [lexicon, index]
92
+ end
93
+
94
+ def identify_lexicon terms, lexicons
95
+ return unless lexicons.size > 0
96
+
97
+ lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
98
+ text = terms.map{|t| t.text&.downcase }.join(' ')
99
+
100
+ lexicons.each do |lexicon|
101
+ return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
102
+ return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
103
+ end
104
+ nil
105
+ end
106
+
60
107
  end
61
108
  end
62
109
  end
@@ -7,9 +7,7 @@ module Opener
7
7
  attr_reader :intensifiers
8
8
  attr_reader :with_polarity
9
9
 
10
- UNKNOWN = Hashie::Mash.new polarity: 'unknown'
11
-
12
- POS_ORDER = 'NRVGAO'
10
+ POS_ORDER = 'ONRVGA'
13
11
  DEFAULT_POS = 'O'
14
12
  POS_SHORT_MAP = {
15
13
  adj: 'G',
@@ -45,17 +43,22 @@ module Opener
45
43
  @intensifiers[lemma]
46
44
  end
47
45
 
48
- def by_polarity lemma, short_pos
49
- return [@with_polarity[lemma+short_pos] || UNKNOWN, short_pos] if short_pos
46
+ def by_polarity lemma, identified_short_pos
47
+ hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
48
+
49
+ if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
50
+ hash[:single] = lexicon
51
+ return [hash, identified_short_pos]
52
+ end
50
53
 
51
54
  POS_ORDER.chars.each do |short_pos|
52
- if l = @with_polarity[lemma+short_pos]
53
- puts "Found polarify #{l.polarity} for #{lemma} with PoS #{short_pos}"
54
- return [l, short_pos]
55
+ if lexicon = @with_polarity[lemma+short_pos]
56
+ hash[:single] = lexicon
57
+ return [hash, identified_short_pos]
55
58
  end
56
59
  end
57
60
 
58
- [UNKNOWN, 'unknown']
61
+ [hash, 'unknown']
59
62
  end
60
63
 
61
64
  protected
@@ -63,16 +66,33 @@ module Opener
63
66
  def map lexicons
64
67
  return if blank?
65
68
 
66
- lexicons.each do |l|
67
- next if l.lemma.nil?
69
+ lexicons.each do |lexicon|
70
+ next if lexicon.lemma.nil?
68
71
 
69
- case l.type
70
- when 'polarityShifter' then @negators[l.lemma] = l
71
- when 'intensifier' then @intensifiers[l.lemma] = l
72
- else
73
- if l.polarity
74
- short_pos = POS_SHORT_MAP[l.pos&.to_sym] || DEFAULT_POS
75
- @with_polarity[l.lemma+short_pos] = l
72
+ sub_lexicons = [lexicon]
73
+ sub_lexicons += lexicon.variants if lexicon.variants
74
+
75
+ sub_lexicons.each do |variant|
76
+ if variant.lemma.strip.include? ' '
77
+ lemma = variant.lemma.strip.split(' ').first
78
+ type = :multi
79
+ else
80
+ lemma = variant.lemma
81
+ type = :single
82
+ end
83
+
84
+ if ['polarityShifter', 'intensifier'].include? lexicon.type
85
+ var = @negators if lexicon.type == 'polarityShifter'
86
+ var = @intensifiers if lexicon.type == 'intensifier'
87
+
88
+ var[lemma] ||= Hashie::Mash.new multi: []
89
+ if type == :multi
90
+ var[lemma][:multi] << lexicon
91
+ else
92
+ var[lemma][:single] = lexicon
93
+ end
94
+ else
95
+ map_one_polarity lemma, variant, lexicon if lexicon.polarity
76
96
  end
77
97
  end
78
98
  end
@@ -82,6 +102,19 @@ module Opener
82
102
  puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
83
103
  end
84
104
 
105
+ def map_one_polarity lemma, hash, lexicon
106
+ poses = if hash.poses.present? then hash.poses else [hash.pos] end
107
+ poses.each do |pos|
108
+ short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
109
+ @with_polarity[lemma] ||= []
110
+ if hash.lemma.strip.include? ' '
111
+ @with_polarity[lemma] << lexicon
112
+ else
113
+ @with_polarity[lemma+short_pos] = lexicon
114
+ end
115
+ end
116
+ end
117
+
85
118
  end
86
119
  end
87
120
  end
@@ -15,10 +15,11 @@ module Opener
15
15
  end
16
16
 
17
17
  def [] **params
18
+ existing = @cache[params]
19
+ return existing if existing and existing.from > UPDATE_INTERVAL.ago
20
+
18
21
  synchronize do
19
- existing = @cache[params]
20
- break existing if existing and existing.from > UPDATE_INTERVAL.ago
21
- @cache[params] = cache_update existing, **params
22
+ @cache[params] = cache_update @cache[params], **params
22
23
  end
23
24
  end
24
25
  alias_method :get, :[]
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PolarityTagger
3
3
 
4
- VERSION = '3.2.7'
4
+ VERSION = '3.5.2'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-polarity-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.7
4
+ version: 3.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-09 00:00:00.000000000 Z
11
+ date: 2021-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons