opener-polarity-tagger 3.2.6 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/core/LexiconMod.py +1 -3
- data/core/general-lexicons/EN-lexicon/Sentiment-English-general.xml +9 -0
- data/lib/opener/{polarity_tagger/kaf → kaf}/document.rb +0 -0
- data/lib/opener/{polarity_tagger/kaf → kaf}/term.rb +4 -0
- data/lib/opener/polarity_tagger/internal.rb +68 -23
- data/lib/opener/polarity_tagger/lexicon_map.rb +51 -18
- data/lib/opener/polarity_tagger/lexicons_cache.rb +4 -3
- data/lib/opener/polarity_tagger/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5bc937332f3126f240a402051ede1ec3d50c753c65a09e147d4d99446be67bd7
|
4
|
+
data.tar.gz: deb5934edffe4496b83d57d12d0ad2c1dc0ebff77bfbaaa1509602f2a80ae7db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6430b221c2b87e2e934f442535d4bfae2dae80b520e0703864b8ff3b3a407fd36954b0538fa6a1f4d0ae6bc52461379c9aa45e98c6021d6634b9759f1a1f9495
|
7
|
+
data.tar.gz: 2626fbe6a5d959eda1ff4c06a9b22408d81520ff0c1fbb26be573051df12fd391cee1049488b25bd47a7778b186914730902d9be8765121bc787bff7f4faf5b3
|
data/core/LexiconMod.py
CHANGED
@@ -45,9 +45,7 @@ def load_lexicons(language, path=None):
|
|
45
45
|
def show_lexicons(language, path=None):
|
46
46
|
if path is None:
|
47
47
|
path = __module_dir
|
48
|
-
|
49
|
-
LexiconSent(language,None,path)
|
50
|
-
|
48
|
+
lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
|
51
49
|
print
|
52
50
|
print '#'*30
|
53
51
|
print 'Available lexicons for',language
|
@@ -14,6 +14,15 @@
|
|
14
14
|
<Domain/>
|
15
15
|
</Sense>
|
16
16
|
</LexicalEntry>
|
17
|
+
<LexicalEntry id="" partOfSpeech="noun">
|
18
|
+
<Lemma writtenForm="very comfortable"/>
|
19
|
+
<Sense>
|
20
|
+
<Confidence level="manual"/>
|
21
|
+
<MonolingualExternalRef/>
|
22
|
+
<Sentiment polarity="negative" strength="strong"/>
|
23
|
+
<Domain/>
|
24
|
+
</Sense>
|
25
|
+
</LexicalEntry>
|
17
26
|
<LexicalEntry id="" partOfSpeech="noun">
|
18
27
|
<Lemma writtenForm="abandonment"/>
|
19
28
|
<Sense>
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'lexicons_cache'
|
2
2
|
require_relative 'lexicon_map'
|
3
|
-
require_relative 'kaf/document'
|
3
|
+
require_relative '../kaf/document'
|
4
4
|
|
5
5
|
module Opener
|
6
6
|
class PolarityTagger
|
@@ -9,53 +9,98 @@ module Opener
|
|
9
9
|
DESC = 'VUA polarity tagger multilanguage'
|
10
10
|
LAST_EDITED = '21may2014'
|
11
11
|
VERSION = '1.2'
|
12
|
+
N_WORDS = 5
|
12
13
|
|
13
|
-
CACHE
|
14
|
+
CACHE = LexiconsCache.new
|
14
15
|
|
15
16
|
def initialize ignore_pos: false, **params
|
16
17
|
@ignore_pos = ignore_pos
|
17
18
|
end
|
18
19
|
|
19
|
-
def clear_cache lang: nil, environment:
|
20
|
-
end
|
21
|
-
|
22
20
|
def run input, params = {}
|
23
|
-
|
21
|
+
kaf = KAF::Document.from_xml input
|
24
22
|
|
25
23
|
@cache_keys = params[:cache_keys] ||= {}
|
26
|
-
@cache_keys.merge! lang:
|
27
|
-
@map =
|
24
|
+
@cache_keys.merge! lang: kaf.language
|
25
|
+
@map = kaf.map = CACHE[**@cache_keys].lexicons
|
28
26
|
|
29
|
-
raise Opener::Core::UnsupportedLanguageError,
|
27
|
+
raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
|
30
28
|
|
31
|
-
|
29
|
+
next_index = 0
|
30
|
+
kaf.terms.each_with_index do |t, index|
|
31
|
+
# skip terms when a multi_word_expression is found
|
32
|
+
next if next_index > index
|
32
33
|
lemma = t.lemma&.downcase
|
34
|
+
text = t.text.to_s.downcase
|
33
35
|
pos = if @ignore_pos then nil else t.pos end
|
34
36
|
attrs = Hashie::Mash.new
|
35
37
|
|
36
|
-
lexicon, polarity_pos = @map.by_polarity lemma, pos
|
37
38
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
polarity_pos = nil
|
40
|
+
|
41
|
+
if opts = @map.by_negator(text) || @map.by_negator(lemma)
|
42
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
43
|
+
attrs.sentiment_modifier = 'shifter' if lexicon
|
44
|
+
elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
|
45
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
46
|
+
attrs.sentiment_modifier = 'intensifier' if lexicon
|
44
47
|
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
+
|
49
|
+
unless lexicon
|
50
|
+
# text matching have priority as sometimes
|
51
|
+
# the lemma provided by Stanza is a different word
|
52
|
+
[text, lemma].each do |word|
|
53
|
+
opts, polarity_pos = @map.by_polarity word, pos
|
54
|
+
|
55
|
+
if opts[:multi].size > 0 or opts[:single]
|
56
|
+
lexicon, next_index = get_lexicon opts, kaf, index
|
57
|
+
if lexicon
|
58
|
+
attrs.polarity = lexicon.polarity
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
48
63
|
end
|
49
64
|
|
50
65
|
if attrs.size > 0
|
51
|
-
attrs
|
66
|
+
attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
|
67
|
+
attrs.resource = lexicon.resource if lexicon&.resource
|
52
68
|
t.setPolarity attrs, polarity_pos
|
69
|
+
i = index
|
70
|
+
while i < next_index do
|
71
|
+
term = kaf.terms[i]
|
72
|
+
term.setPolarity attrs, polarity_pos
|
73
|
+
i += 1
|
74
|
+
end
|
53
75
|
end
|
54
76
|
end
|
55
77
|
|
56
|
-
|
78
|
+
kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
|
79
|
+
|
80
|
+
kaf.to_xml
|
81
|
+
end
|
57
82
|
|
58
|
-
|
83
|
+
def get_lexicon opts, kaf, index
|
84
|
+
if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
|
85
|
+
index = index + lexicon.lemma.strip.split(' ').size
|
86
|
+
else
|
87
|
+
lexicon = opts.single
|
88
|
+
end
|
89
|
+
|
90
|
+
[lexicon, index]
|
91
|
+
end
|
92
|
+
|
93
|
+
def identify_lexicon terms, lexicons
|
94
|
+
return unless lexicons.size > 0
|
95
|
+
|
96
|
+
lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
|
97
|
+
text = terms.map{|t| t.text&.downcase }.join(' ')
|
98
|
+
|
99
|
+
lexicons.each do |lexicon|
|
100
|
+
return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
|
101
|
+
return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
|
102
|
+
end
|
103
|
+
nil
|
59
104
|
end
|
60
105
|
|
61
106
|
end
|
@@ -7,9 +7,7 @@ module Opener
|
|
7
7
|
attr_reader :intensifiers
|
8
8
|
attr_reader :with_polarity
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
POS_ORDER = 'NRVGAO'
|
10
|
+
POS_ORDER = 'ONRVGA'
|
13
11
|
DEFAULT_POS = 'O'
|
14
12
|
POS_SHORT_MAP = {
|
15
13
|
adj: 'G',
|
@@ -45,17 +43,22 @@ module Opener
|
|
45
43
|
@intensifiers[lemma]
|
46
44
|
end
|
47
45
|
|
48
|
-
def by_polarity lemma,
|
49
|
-
|
46
|
+
def by_polarity lemma, identified_short_pos
|
47
|
+
hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
|
48
|
+
|
49
|
+
if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
|
50
|
+
hash[:single] = lexicon
|
51
|
+
return [hash, identified_short_pos]
|
52
|
+
end
|
50
53
|
|
51
54
|
POS_ORDER.chars.each do |short_pos|
|
52
|
-
if
|
53
|
-
|
54
|
-
return [
|
55
|
+
if lexicon = @with_polarity[lemma+short_pos]
|
56
|
+
hash[:single] = lexicon
|
57
|
+
return [hash, identified_short_pos]
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
58
|
-
[
|
61
|
+
[hash, 'unknown']
|
59
62
|
end
|
60
63
|
|
61
64
|
protected
|
@@ -63,16 +66,33 @@ module Opener
|
|
63
66
|
def map lexicons
|
64
67
|
return if blank?
|
65
68
|
|
66
|
-
lexicons.each do |
|
67
|
-
next if
|
69
|
+
lexicons.each do |lexicon|
|
70
|
+
next if lexicon.lemma.nil?
|
68
71
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
if
|
74
|
-
|
75
|
-
|
72
|
+
sub_lexicons = [lexicon]
|
73
|
+
sub_lexicons += lexicon.variants if lexicon.variants
|
74
|
+
|
75
|
+
sub_lexicons.each do |variant|
|
76
|
+
if variant.lemma.strip.include? ' '
|
77
|
+
lemma = variant.lemma.strip.split(' ').first
|
78
|
+
type = :multi
|
79
|
+
else
|
80
|
+
lemma = variant.lemma
|
81
|
+
type = :single
|
82
|
+
end
|
83
|
+
|
84
|
+
if ['polarityShifter', 'intensifier'].include? lexicon.type
|
85
|
+
var = @negators if lexicon.type == 'polarityShifter'
|
86
|
+
var = @intensifiers if lexicon.type == 'intensifier'
|
87
|
+
|
88
|
+
var[lemma] ||= Hashie::Mash.new multi: []
|
89
|
+
if type == :multi
|
90
|
+
var[lemma][:multi] << lexicon
|
91
|
+
else
|
92
|
+
var[lemma][:single] = lexicon
|
93
|
+
end
|
94
|
+
else
|
95
|
+
map_one_polarity lemma, variant, lexicon if lexicon.polarity
|
76
96
|
end
|
77
97
|
end
|
78
98
|
end
|
@@ -82,6 +102,19 @@ module Opener
|
|
82
102
|
puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
|
83
103
|
end
|
84
104
|
|
105
|
+
def map_one_polarity lemma, hash, lexicon
|
106
|
+
poses = if hash.poses.present? then hash.poses else [hash.pos] end
|
107
|
+
poses.each do |pos|
|
108
|
+
short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
|
109
|
+
@with_polarity[lemma] ||= []
|
110
|
+
if hash.lemma.strip.include? ' '
|
111
|
+
@with_polarity[lemma] << lexicon
|
112
|
+
else
|
113
|
+
@with_polarity[lemma+short_pos] = lexicon
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
85
118
|
end
|
86
119
|
end
|
87
120
|
end
|
@@ -15,10 +15,11 @@ module Opener
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def [] **params
|
18
|
+
existing = @cache[params]
|
19
|
+
return existing if existing and existing.from > UPDATE_INTERVAL.ago
|
20
|
+
|
18
21
|
synchronize do
|
19
|
-
|
20
|
-
break existing if existing and existing.from > UPDATE_INTERVAL.ago
|
21
|
-
@cache[params] = cache_update existing, **params
|
22
|
+
@cache[params] = cache_update @cache[params], **params
|
22
23
|
end
|
23
24
|
end
|
24
25
|
alias_method :get, :[]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -212,12 +212,12 @@ files:
|
|
212
212
|
- core/poltagger-basic-multi.py
|
213
213
|
- exec/polarity-tagger.rb
|
214
214
|
- ext/hack/Rakefile
|
215
|
+
- lib/opener/kaf/document.rb
|
216
|
+
- lib/opener/kaf/term.rb
|
215
217
|
- lib/opener/polarity_tagger.rb
|
216
218
|
- lib/opener/polarity_tagger/cli.rb
|
217
219
|
- lib/opener/polarity_tagger/external.rb
|
218
220
|
- lib/opener/polarity_tagger/internal.rb
|
219
|
-
- lib/opener/polarity_tagger/kaf/document.rb
|
220
|
-
- lib/opener/polarity_tagger/kaf/term.rb
|
221
221
|
- lib/opener/polarity_tagger/lexicon_map.rb
|
222
222
|
- lib/opener/polarity_tagger/lexicons_cache.rb
|
223
223
|
- lib/opener/polarity_tagger/public/markdown.css
|