opener-polarity-tagger 3.2.6 → 3.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/core/LexiconMod.py +1 -3
- data/core/general-lexicons/EN-lexicon/Sentiment-English-general.xml +9 -0
- data/lib/opener/{polarity_tagger/kaf → kaf}/document.rb +0 -0
- data/lib/opener/{polarity_tagger/kaf → kaf}/term.rb +4 -0
- data/lib/opener/polarity_tagger/internal.rb +68 -23
- data/lib/opener/polarity_tagger/lexicon_map.rb +51 -18
- data/lib/opener/polarity_tagger/lexicons_cache.rb +4 -3
- data/lib/opener/polarity_tagger/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5bc937332f3126f240a402051ede1ec3d50c753c65a09e147d4d99446be67bd7
|
4
|
+
data.tar.gz: deb5934edffe4496b83d57d12d0ad2c1dc0ebff77bfbaaa1509602f2a80ae7db
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6430b221c2b87e2e934f442535d4bfae2dae80b520e0703864b8ff3b3a407fd36954b0538fa6a1f4d0ae6bc52461379c9aa45e98c6021d6634b9759f1a1f9495
|
7
|
+
data.tar.gz: 2626fbe6a5d959eda1ff4c06a9b22408d81520ff0c1fbb26be573051df12fd391cee1049488b25bd47a7778b186914730902d9be8765121bc787bff7f4faf5b3
|
data/core/LexiconMod.py
CHANGED
@@ -45,9 +45,7 @@ def load_lexicons(language, path=None):
|
|
45
45
|
def show_lexicons(language, path=None):
|
46
46
|
if path is None:
|
47
47
|
path = __module_dir
|
48
|
-
|
49
|
-
LexiconSent(language,None,path)
|
50
|
-
|
48
|
+
lexicons, default_id, this_folder, folder_per_lang = load_lexicons(language, path)
|
51
49
|
print
|
52
50
|
print '#'*30
|
53
51
|
print 'Available lexicons for',language
|
@@ -14,6 +14,15 @@
|
|
14
14
|
<Domain/>
|
15
15
|
</Sense>
|
16
16
|
</LexicalEntry>
|
17
|
+
<LexicalEntry id="" partOfSpeech="noun">
|
18
|
+
<Lemma writtenForm="very comfortable"/>
|
19
|
+
<Sense>
|
20
|
+
<Confidence level="manual"/>
|
21
|
+
<MonolingualExternalRef/>
|
22
|
+
<Sentiment polarity="negative" strength="strong"/>
|
23
|
+
<Domain/>
|
24
|
+
</Sense>
|
25
|
+
</LexicalEntry>
|
17
26
|
<LexicalEntry id="" partOfSpeech="noun">
|
18
27
|
<Lemma writtenForm="abandonment"/>
|
19
28
|
<Sense>
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'lexicons_cache'
|
2
2
|
require_relative 'lexicon_map'
|
3
|
-
require_relative 'kaf/document'
|
3
|
+
require_relative '../kaf/document'
|
4
4
|
|
5
5
|
module Opener
|
6
6
|
class PolarityTagger
|
@@ -9,53 +9,98 @@ module Opener
|
|
9
9
|
DESC = 'VUA polarity tagger multilanguage'
|
10
10
|
LAST_EDITED = '21may2014'
|
11
11
|
VERSION = '1.2'
|
12
|
+
N_WORDS = 5
|
12
13
|
|
13
|
-
CACHE
|
14
|
+
CACHE = LexiconsCache.new
|
14
15
|
|
15
16
|
def initialize ignore_pos: false, **params
|
16
17
|
@ignore_pos = ignore_pos
|
17
18
|
end
|
18
19
|
|
19
|
-
def clear_cache lang: nil, environment:
|
20
|
-
end
|
21
|
-
|
22
20
|
def run input, params = {}
|
23
|
-
|
21
|
+
kaf = KAF::Document.from_xml input
|
24
22
|
|
25
23
|
@cache_keys = params[:cache_keys] ||= {}
|
26
|
-
@cache_keys.merge! lang:
|
27
|
-
@map =
|
24
|
+
@cache_keys.merge! lang: kaf.language
|
25
|
+
@map = kaf.map = CACHE[**@cache_keys].lexicons
|
28
26
|
|
29
|
-
raise Opener::Core::UnsupportedLanguageError,
|
27
|
+
raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
|
30
28
|
|
31
|
-
|
29
|
+
next_index = 0
|
30
|
+
kaf.terms.each_with_index do |t, index|
|
31
|
+
# skip terms when a multi_word_expression is found
|
32
|
+
next if next_index > index
|
32
33
|
lemma = t.lemma&.downcase
|
34
|
+
text = t.text.to_s.downcase
|
33
35
|
pos = if @ignore_pos then nil else t.pos end
|
34
36
|
attrs = Hashie::Mash.new
|
35
37
|
|
36
|
-
lexicon, polarity_pos = @map.by_polarity lemma, pos
|
37
38
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
polarity_pos = nil
|
40
|
+
|
41
|
+
if opts = @map.by_negator(text) || @map.by_negator(lemma)
|
42
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
43
|
+
attrs.sentiment_modifier = 'shifter' if lexicon
|
44
|
+
elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
|
45
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
46
|
+
attrs.sentiment_modifier = 'intensifier' if lexicon
|
44
47
|
end
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
+
|
49
|
+
unless lexicon
|
50
|
+
# text matching have priority as sometimes
|
51
|
+
# the lemma provided by Stanza is a different word
|
52
|
+
[text, lemma].each do |word|
|
53
|
+
opts, polarity_pos = @map.by_polarity word, pos
|
54
|
+
|
55
|
+
if opts[:multi].size > 0 or opts[:single]
|
56
|
+
lexicon, next_index = get_lexicon opts, kaf, index
|
57
|
+
if lexicon
|
58
|
+
attrs.polarity = lexicon.polarity
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
48
63
|
end
|
49
64
|
|
50
65
|
if attrs.size > 0
|
51
|
-
attrs
|
66
|
+
attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
|
67
|
+
attrs.resource = lexicon.resource if lexicon&.resource
|
52
68
|
t.setPolarity attrs, polarity_pos
|
69
|
+
i = index
|
70
|
+
while i < next_index do
|
71
|
+
term = kaf.terms[i]
|
72
|
+
term.setPolarity attrs, polarity_pos
|
73
|
+
i += 1
|
74
|
+
end
|
53
75
|
end
|
54
76
|
end
|
55
77
|
|
56
|
-
|
78
|
+
kaf.add_linguistic_processor DESC, "#{LAST_EDITED}_#{VERSION}", 'terms'
|
79
|
+
|
80
|
+
kaf.to_xml
|
81
|
+
end
|
57
82
|
|
58
|
-
|
83
|
+
def get_lexicon opts, kaf, index
|
84
|
+
if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
|
85
|
+
index = index + lexicon.lemma.strip.split(' ').size
|
86
|
+
else
|
87
|
+
lexicon = opts.single
|
88
|
+
end
|
89
|
+
|
90
|
+
[lexicon, index]
|
91
|
+
end
|
92
|
+
|
93
|
+
def identify_lexicon terms, lexicons
|
94
|
+
return unless lexicons.size > 0
|
95
|
+
|
96
|
+
lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
|
97
|
+
text = terms.map{|t| t.text&.downcase }.join(' ')
|
98
|
+
|
99
|
+
lexicons.each do |lexicon|
|
100
|
+
return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
|
101
|
+
return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
|
102
|
+
end
|
103
|
+
nil
|
59
104
|
end
|
60
105
|
|
61
106
|
end
|
@@ -7,9 +7,7 @@ module Opener
|
|
7
7
|
attr_reader :intensifiers
|
8
8
|
attr_reader :with_polarity
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
POS_ORDER = 'NRVGAO'
|
10
|
+
POS_ORDER = 'ONRVGA'
|
13
11
|
DEFAULT_POS = 'O'
|
14
12
|
POS_SHORT_MAP = {
|
15
13
|
adj: 'G',
|
@@ -45,17 +43,22 @@ module Opener
|
|
45
43
|
@intensifiers[lemma]
|
46
44
|
end
|
47
45
|
|
48
|
-
def by_polarity lemma,
|
49
|
-
|
46
|
+
def by_polarity lemma, identified_short_pos
|
47
|
+
hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
|
48
|
+
|
49
|
+
if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
|
50
|
+
hash[:single] = lexicon
|
51
|
+
return [hash, identified_short_pos]
|
52
|
+
end
|
50
53
|
|
51
54
|
POS_ORDER.chars.each do |short_pos|
|
52
|
-
if
|
53
|
-
|
54
|
-
return [
|
55
|
+
if lexicon = @with_polarity[lemma+short_pos]
|
56
|
+
hash[:single] = lexicon
|
57
|
+
return [hash, identified_short_pos]
|
55
58
|
end
|
56
59
|
end
|
57
60
|
|
58
|
-
[
|
61
|
+
[hash, 'unknown']
|
59
62
|
end
|
60
63
|
|
61
64
|
protected
|
@@ -63,16 +66,33 @@ module Opener
|
|
63
66
|
def map lexicons
|
64
67
|
return if blank?
|
65
68
|
|
66
|
-
lexicons.each do |
|
67
|
-
next if
|
69
|
+
lexicons.each do |lexicon|
|
70
|
+
next if lexicon.lemma.nil?
|
68
71
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
if
|
74
|
-
|
75
|
-
|
72
|
+
sub_lexicons = [lexicon]
|
73
|
+
sub_lexicons += lexicon.variants if lexicon.variants
|
74
|
+
|
75
|
+
sub_lexicons.each do |variant|
|
76
|
+
if variant.lemma.strip.include? ' '
|
77
|
+
lemma = variant.lemma.strip.split(' ').first
|
78
|
+
type = :multi
|
79
|
+
else
|
80
|
+
lemma = variant.lemma
|
81
|
+
type = :single
|
82
|
+
end
|
83
|
+
|
84
|
+
if ['polarityShifter', 'intensifier'].include? lexicon.type
|
85
|
+
var = @negators if lexicon.type == 'polarityShifter'
|
86
|
+
var = @intensifiers if lexicon.type == 'intensifier'
|
87
|
+
|
88
|
+
var[lemma] ||= Hashie::Mash.new multi: []
|
89
|
+
if type == :multi
|
90
|
+
var[lemma][:multi] << lexicon
|
91
|
+
else
|
92
|
+
var[lemma][:single] = lexicon
|
93
|
+
end
|
94
|
+
else
|
95
|
+
map_one_polarity lemma, variant, lexicon if lexicon.polarity
|
76
96
|
end
|
77
97
|
end
|
78
98
|
end
|
@@ -82,6 +102,19 @@ module Opener
|
|
82
102
|
puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
|
83
103
|
end
|
84
104
|
|
105
|
+
def map_one_polarity lemma, hash, lexicon
|
106
|
+
poses = if hash.poses.present? then hash.poses else [hash.pos] end
|
107
|
+
poses.each do |pos|
|
108
|
+
short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
|
109
|
+
@with_polarity[lemma] ||= []
|
110
|
+
if hash.lemma.strip.include? ' '
|
111
|
+
@with_polarity[lemma] << lexicon
|
112
|
+
else
|
113
|
+
@with_polarity[lemma+short_pos] = lexicon
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
85
118
|
end
|
86
119
|
end
|
87
120
|
end
|
@@ -15,10 +15,11 @@ module Opener
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def [] **params
|
18
|
+
existing = @cache[params]
|
19
|
+
return existing if existing and existing.from > UPDATE_INTERVAL.ago
|
20
|
+
|
18
21
|
synchronize do
|
19
|
-
|
20
|
-
break existing if existing and existing.from > UPDATE_INTERVAL.ago
|
21
|
-
@cache[params] = cache_update existing, **params
|
22
|
+
@cache[params] = cache_update @cache[params], **params
|
22
23
|
end
|
23
24
|
end
|
24
25
|
alias_method :get, :[]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -212,12 +212,12 @@ files:
|
|
212
212
|
- core/poltagger-basic-multi.py
|
213
213
|
- exec/polarity-tagger.rb
|
214
214
|
- ext/hack/Rakefile
|
215
|
+
- lib/opener/kaf/document.rb
|
216
|
+
- lib/opener/kaf/term.rb
|
215
217
|
- lib/opener/polarity_tagger.rb
|
216
218
|
- lib/opener/polarity_tagger/cli.rb
|
217
219
|
- lib/opener/polarity_tagger/external.rb
|
218
220
|
- lib/opener/polarity_tagger/internal.rb
|
219
|
-
- lib/opener/polarity_tagger/kaf/document.rb
|
220
|
-
- lib/opener/polarity_tagger/kaf/term.rb
|
221
221
|
- lib/opener/polarity_tagger/lexicon_map.rb
|
222
222
|
- lib/opener/polarity_tagger/lexicons_cache.rb
|
223
223
|
- lib/opener/polarity_tagger/public/markdown.css
|