opener-polarity-tagger 3.3.0 → 3.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/core/general-lexicons/EN-lexicon/Sentiment-English-general.xml +9 -0
- data/lib/opener/polarity_tagger/internal.rb +64 -17
- data/lib/opener/polarity_tagger/lexicon_map.rb +52 -18
- data/lib/opener/polarity_tagger/lexicons_cache.rb +4 -3
- data/lib/opener/polarity_tagger/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: befa570d0fdb2774fee58f8f7f3154c4d944cee2865015411534d2c2c313837a
|
4
|
+
data.tar.gz: 4d95524761db77af92db9ece51dc5ff5bf66744445459a1c3c0b7c570ddb3267
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be4cd1daf7cb8f906f2e91d1bba18e64bc713e5d13c6ec090877b4e079153b8fa818915fcc789f6df1bcfe4735b24af2e7a15dbab5517a7037bd6983eba612c8
|
7
|
+
data.tar.gz: dfd2058e9b98ea482d65e86d4a82d33865a1ef9cd5350deac4853c5e4b3c9cec39b37766b862a0f459c92644fd4e5f916f35ea4d1568ffdddde5d823b6b7b43b
|
@@ -14,6 +14,15 @@
|
|
14
14
|
<Domain/>
|
15
15
|
</Sense>
|
16
16
|
</LexicalEntry>
|
17
|
+
<LexicalEntry id="" partOfSpeech="noun">
|
18
|
+
<Lemma writtenForm="very comfortable"/>
|
19
|
+
<Sense>
|
20
|
+
<Confidence level="manual"/>
|
21
|
+
<MonolingualExternalRef/>
|
22
|
+
<Sentiment polarity="negative" strength="strong"/>
|
23
|
+
<Domain/>
|
24
|
+
</Sense>
|
25
|
+
</LexicalEntry>
|
17
26
|
<LexicalEntry id="" partOfSpeech="noun">
|
18
27
|
<Lemma writtenForm="abandonment"/>
|
19
28
|
<Sense>
|
@@ -9,46 +9,70 @@ module Opener
|
|
9
9
|
DESC = 'VUA polarity tagger multilanguage'
|
10
10
|
LAST_EDITED = '21may2014'
|
11
11
|
VERSION = '1.2'
|
12
|
+
N_WORDS = 5
|
12
13
|
|
13
|
-
CACHE
|
14
|
+
CACHE = LexiconsCache.new
|
14
15
|
|
15
16
|
def initialize ignore_pos: false, **params
|
16
17
|
@ignore_pos = ignore_pos
|
17
18
|
end
|
18
19
|
|
19
20
|
def run input, params = {}
|
20
|
-
kaf
|
21
|
+
kaf = KAF::Document.from_xml input
|
21
22
|
|
22
23
|
@cache_keys = params[:cache_keys] ||= {}
|
23
24
|
@cache_keys.merge! lang: kaf.language
|
25
|
+
@cache_keys = @cache_keys.except :property_type
|
24
26
|
@map = kaf.map = CACHE[**@cache_keys].lexicons
|
25
27
|
|
26
28
|
raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
|
27
29
|
|
28
|
-
|
30
|
+
next_index = 0
|
31
|
+
kaf.terms.each_with_index do |t, index|
|
32
|
+
# skip terms when a multi_word_expression is found
|
33
|
+
next if next_index > index
|
29
34
|
lemma = t.lemma&.downcase
|
30
35
|
text = t.text.to_s.downcase
|
31
36
|
pos = if @ignore_pos then nil else t.pos end
|
32
37
|
attrs = Hashie::Mash.new
|
33
38
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
39
|
+
|
40
|
+
polarity_pos = nil
|
41
|
+
|
42
|
+
if opts = @map.by_negator(text) || @map.by_negator(lemma)
|
43
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
44
|
+
attrs.sentiment_modifier = 'shifter' if lexicon
|
45
|
+
elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
|
46
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
47
|
+
attrs.sentiment_modifier = 'intensifier' if lexicon
|
48
|
+
end
|
49
|
+
|
50
|
+
unless lexicon
|
51
|
+
# text matching have priority as sometimes
|
52
|
+
# the lemma provided by Stanza is a different word
|
53
|
+
[text, lemma].each do |word|
|
54
|
+
opts, polarity_pos = @map.by_polarity word, pos
|
55
|
+
|
56
|
+
if opts[:multi].size > 0 or opts[:single]
|
57
|
+
lexicon, next_index = get_lexicon opts, kaf, index
|
58
|
+
if lexicon
|
59
|
+
attrs.polarity = lexicon.polarity
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
47
64
|
end
|
48
65
|
|
49
66
|
if attrs.size > 0
|
50
|
-
attrs
|
67
|
+
attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
|
68
|
+
attrs.resource = lexicon.resource if lexicon&.resource
|
51
69
|
t.setPolarity attrs, polarity_pos
|
70
|
+
i = index
|
71
|
+
while i < next_index do
|
72
|
+
term = kaf.terms[i]
|
73
|
+
term.setPolarity attrs, polarity_pos
|
74
|
+
i += 1
|
75
|
+
end
|
52
76
|
end
|
53
77
|
end
|
54
78
|
|
@@ -57,6 +81,29 @@ module Opener
|
|
57
81
|
kaf.to_xml
|
58
82
|
end
|
59
83
|
|
84
|
+
def get_lexicon opts, kaf, index
|
85
|
+
if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
|
86
|
+
index = index + lexicon.lemma.strip.split(' ').size
|
87
|
+
else
|
88
|
+
lexicon = opts.single
|
89
|
+
end
|
90
|
+
|
91
|
+
[lexicon, index]
|
92
|
+
end
|
93
|
+
|
94
|
+
def identify_lexicon terms, lexicons
|
95
|
+
return unless lexicons.size > 0
|
96
|
+
|
97
|
+
lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
|
98
|
+
text = terms.map{|t| t.text&.downcase }.join(' ')
|
99
|
+
|
100
|
+
lexicons.each do |lexicon|
|
101
|
+
return lexicon if lemma =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
|
102
|
+
return lexicon if text =~ /^#{Regexp.escape(lexicon.lemma)}($|\s)+/
|
103
|
+
end
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
|
60
107
|
end
|
61
108
|
end
|
62
109
|
end
|
@@ -7,9 +7,7 @@ module Opener
|
|
7
7
|
attr_reader :intensifiers
|
8
8
|
attr_reader :with_polarity
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
POS_ORDER = 'NRVGAO'
|
10
|
+
POS_ORDER = 'ONRVGA'
|
13
11
|
DEFAULT_POS = 'O'
|
14
12
|
POS_SHORT_MAP = {
|
15
13
|
adj: 'G',
|
@@ -45,16 +43,22 @@ module Opener
|
|
45
43
|
@intensifiers[lemma]
|
46
44
|
end
|
47
45
|
|
48
|
-
def by_polarity lemma,
|
49
|
-
|
50
|
-
|
46
|
+
def by_polarity lemma, identified_short_pos
|
47
|
+
hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
|
48
|
+
|
49
|
+
if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
|
50
|
+
hash[:single] = lexicon
|
51
|
+
return [hash, identified_short_pos]
|
52
|
+
end
|
51
53
|
|
52
54
|
POS_ORDER.chars.each do |short_pos|
|
53
|
-
|
54
|
-
|
55
|
+
if lexicon = @with_polarity[lemma+short_pos]
|
56
|
+
hash[:single] = lexicon
|
57
|
+
return [hash, identified_short_pos]
|
58
|
+
end
|
55
59
|
end
|
56
60
|
|
57
|
-
[
|
61
|
+
[hash, 'unknown']
|
58
62
|
end
|
59
63
|
|
60
64
|
protected
|
@@ -62,16 +66,33 @@ module Opener
|
|
62
66
|
def map lexicons
|
63
67
|
return if blank?
|
64
68
|
|
65
|
-
lexicons.each do |
|
66
|
-
next if
|
69
|
+
lexicons.each do |lexicon|
|
70
|
+
next if lexicon.lemma.nil?
|
67
71
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
if
|
73
|
-
|
74
|
-
|
72
|
+
sub_lexicons = [lexicon]
|
73
|
+
sub_lexicons += lexicon.variants if lexicon.variants
|
74
|
+
|
75
|
+
sub_lexicons.each do |variant|
|
76
|
+
if variant.lemma.strip.include? ' '
|
77
|
+
lemma = variant.lemma.strip.split(' ').first
|
78
|
+
type = :multi
|
79
|
+
else
|
80
|
+
lemma = variant.lemma
|
81
|
+
type = :single
|
82
|
+
end
|
83
|
+
|
84
|
+
if ['polarityShifter', 'intensifier'].include? lexicon.type
|
85
|
+
var = @negators if lexicon.type == 'polarityShifter'
|
86
|
+
var = @intensifiers if lexicon.type == 'intensifier'
|
87
|
+
|
88
|
+
var[lemma] ||= Hashie::Mash.new multi: []
|
89
|
+
if type == :multi
|
90
|
+
var[lemma][:multi] << lexicon
|
91
|
+
else
|
92
|
+
var[lemma][:single] = lexicon
|
93
|
+
end
|
94
|
+
else
|
95
|
+
map_one_polarity lemma, variant, lexicon if lexicon.polarity
|
75
96
|
end
|
76
97
|
end
|
77
98
|
end
|
@@ -81,6 +102,19 @@ module Opener
|
|
81
102
|
puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
|
82
103
|
end
|
83
104
|
|
105
|
+
def map_one_polarity lemma, hash, lexicon
|
106
|
+
poses = if hash.poses.present? then hash.poses else [hash.pos] end
|
107
|
+
poses.each do |pos|
|
108
|
+
short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
|
109
|
+
@with_polarity[lemma] ||= []
|
110
|
+
if hash.lemma.strip.include? ' '
|
111
|
+
@with_polarity[lemma] << lexicon
|
112
|
+
else
|
113
|
+
@with_polarity[lemma+short_pos] = lexicon
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
84
118
|
end
|
85
119
|
end
|
86
120
|
end
|
@@ -15,10 +15,11 @@ module Opener
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def [] **params
|
18
|
+
existing = @cache[params]
|
19
|
+
return existing if existing and existing.from > UPDATE_INTERVAL.ago
|
20
|
+
|
18
21
|
synchronize do
|
19
|
-
|
20
|
-
break existing if existing and existing.from > UPDATE_INTERVAL.ago
|
21
|
-
@cache[params] = cache_update existing, **params
|
22
|
+
@cache[params] = cache_update @cache[params], **params
|
22
23
|
end
|
23
24
|
end
|
24
25
|
alias_method :get, :[]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3
|
4
|
+
version: 3.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
251
251
|
version: '0'
|
252
252
|
requirements: []
|
253
253
|
rubyforge_project:
|
254
|
-
rubygems_version: 2.7.
|
254
|
+
rubygems_version: 2.7.6
|
255
255
|
signing_key:
|
256
256
|
specification_version: 4
|
257
257
|
summary: Polarity tagger for various languages.
|