opener-polarity-tagger 3.4.0 → 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe952e44b35deff6d30de2994ffff1555c6fb256915dd6931605f2141e29cb9c
|
4
|
+
data.tar.gz: 30cd612208d71ec9d2feea2fea625c445e6c5fdef1bd5f359bd160d4a8acb465
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 94afe1918ac45c3fda4007588ee8a1c4f50ee6568a9bcf83c2b9de50090caf9bcc72494d4d731a2a3c6bbd143cc1a025ca071c18bc5379059316d293d973d1c7
|
7
|
+
data.tar.gz: 20fe782270ce4c1d1815acc9acfc9efd9870bd582435f86625bc5c9d92c09c2563fcd62cfc6a4c735a6696745f42bb253b6d5249c3222e9ad82fa196ffe19499
|
@@ -14,6 +14,15 @@
|
|
14
14
|
<Domain/>
|
15
15
|
</Sense>
|
16
16
|
</LexicalEntry>
|
17
|
+
<LexicalEntry id="" partOfSpeech="noun">
|
18
|
+
<Lemma writtenForm="very comfortable"/>
|
19
|
+
<Sense>
|
20
|
+
<Confidence level="manual"/>
|
21
|
+
<MonolingualExternalRef/>
|
22
|
+
<Sentiment polarity="negative" strength="strong"/>
|
23
|
+
<Domain/>
|
24
|
+
</Sense>
|
25
|
+
</LexicalEntry>
|
17
26
|
<LexicalEntry id="" partOfSpeech="noun">
|
18
27
|
<Lemma writtenForm="abandonment"/>
|
19
28
|
<Sense>
|
@@ -9,15 +9,16 @@ module Opener
|
|
9
9
|
DESC = 'VUA polarity tagger multilanguage'
|
10
10
|
LAST_EDITED = '21may2014'
|
11
11
|
VERSION = '1.2'
|
12
|
+
N_WORDS = 5
|
12
13
|
|
13
|
-
CACHE
|
14
|
+
CACHE = LexiconsCache.new
|
14
15
|
|
15
16
|
def initialize ignore_pos: false, **params
|
16
17
|
@ignore_pos = ignore_pos
|
17
18
|
end
|
18
19
|
|
19
20
|
def run input, params = {}
|
20
|
-
kaf
|
21
|
+
kaf = KAF::Document.from_xml input
|
21
22
|
|
22
23
|
@cache_keys = params[:cache_keys] ||= {}
|
23
24
|
@cache_keys.merge! lang: kaf.language
|
@@ -25,31 +26,52 @@ module Opener
|
|
25
26
|
|
26
27
|
raise Opener::Core::UnsupportedLanguageError, kaf.language if @map.blank?
|
27
28
|
|
28
|
-
|
29
|
+
next_index = 0
|
30
|
+
kaf.terms.each_with_index do |t, index|
|
31
|
+
# skip terms when a multi_word_expression is found
|
32
|
+
next if next_index > index
|
29
33
|
lemma = t.lemma&.downcase
|
30
34
|
text = t.text.to_s.downcase
|
31
35
|
pos = if @ignore_pos then nil else t.pos end
|
32
36
|
attrs = Hashie::Mash.new
|
33
37
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
38
|
+
|
39
|
+
polarity_pos = nil
|
40
|
+
|
41
|
+
if opts = @map.by_negator(text) || @map.by_negator(lemma)
|
42
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
43
|
+
attrs.sentiment_modifier = 'shifter' if lexicon
|
44
|
+
elsif opts = @map.by_intensifier(text) || @map.by_intensifier(lemma)
|
45
|
+
lexicon, next_index = get_lexicon(opts, kaf, index)
|
46
|
+
attrs.sentiment_modifier = 'intensifier' if lexicon
|
47
|
+
end
|
48
|
+
|
49
|
+
unless lexicon
|
50
|
+
# text matching have priority as sometimes
|
51
|
+
# the lemma provided by Stanza is a different word
|
52
|
+
[text, lemma].each do |word|
|
53
|
+
opts, polarity_pos = @map.by_polarity word, pos
|
54
|
+
|
55
|
+
if opts[:multi].size > 0 or opts[:single]
|
56
|
+
lexicon, next_index = get_lexicon opts, kaf, index
|
57
|
+
if lexicon
|
58
|
+
attrs.polarity = lexicon.polarity
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
47
63
|
end
|
48
64
|
|
49
65
|
if attrs.size > 0
|
50
|
-
attrs['lexicon-id'] = lexicon.id.to_s if lexicon
|
51
|
-
attrs.resource = lexicon.resource if lexicon
|
66
|
+
attrs['lexicon-id'] = lexicon.id.to_s if lexicon&.id
|
67
|
+
attrs.resource = lexicon.resource if lexicon&.resource
|
52
68
|
t.setPolarity attrs, polarity_pos
|
69
|
+
i = index
|
70
|
+
while i < next_index do
|
71
|
+
term = kaf.terms[i]
|
72
|
+
term.setPolarity attrs, polarity_pos
|
73
|
+
i += 1
|
74
|
+
end
|
53
75
|
end
|
54
76
|
end
|
55
77
|
|
@@ -58,6 +80,29 @@ module Opener
|
|
58
80
|
kaf.to_xml
|
59
81
|
end
|
60
82
|
|
83
|
+
def get_lexicon opts, kaf, index
|
84
|
+
if lexicon = identify_lexicon(kaf.terms[index, N_WORDS], opts.multi)
|
85
|
+
index = index + lexicon.lemma.strip.split(' ').size
|
86
|
+
else
|
87
|
+
lexicon = opts.single
|
88
|
+
end
|
89
|
+
|
90
|
+
[lexicon, index]
|
91
|
+
end
|
92
|
+
|
93
|
+
def identify_lexicon terms, lexicons
|
94
|
+
return unless lexicons.size > 0
|
95
|
+
|
96
|
+
lemma = terms.map{|t| t.lemma&.downcase }.join(' ')
|
97
|
+
text = terms.map{|t| t.text&.downcase }.join(' ')
|
98
|
+
|
99
|
+
lexicons.each do |lexicon|
|
100
|
+
return lexicon if lemma =~ /^#{lexicon.lemma}($|\s)+/
|
101
|
+
return lexicon if text =~ /^#{lexicon.lemma}($|\s)+/
|
102
|
+
end
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
61
106
|
end
|
62
107
|
end
|
63
108
|
end
|
@@ -7,9 +7,7 @@ module Opener
|
|
7
7
|
attr_reader :intensifiers
|
8
8
|
attr_reader :with_polarity
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
POS_ORDER = 'NRVGAO'
|
10
|
+
POS_ORDER = 'ONRVGA'
|
13
11
|
DEFAULT_POS = 'O'
|
14
12
|
POS_SHORT_MAP = {
|
15
13
|
adj: 'G',
|
@@ -45,43 +43,56 @@ module Opener
|
|
45
43
|
@intensifiers[lemma]
|
46
44
|
end
|
47
45
|
|
48
|
-
def by_polarity lemma,
|
49
|
-
|
50
|
-
|
46
|
+
def by_polarity lemma, identified_short_pos
|
47
|
+
hash = Hashie::Mash.new multi: (@with_polarity[lemma] || [])
|
48
|
+
|
49
|
+
if identified_short_pos and lexicon = @with_polarity[lemma+identified_short_pos]
|
50
|
+
hash[:single] = lexicon
|
51
|
+
return [hash, identified_short_pos]
|
52
|
+
end
|
51
53
|
|
52
54
|
POS_ORDER.chars.each do |short_pos|
|
53
|
-
|
54
|
-
|
55
|
+
if lexicon = @with_polarity[lemma+short_pos]
|
56
|
+
hash[:single] = lexicon
|
57
|
+
return [hash, identified_short_pos]
|
58
|
+
end
|
55
59
|
end
|
56
60
|
|
57
|
-
[
|
61
|
+
[hash, 'unknown']
|
58
62
|
end
|
59
63
|
|
60
64
|
protected
|
61
65
|
|
62
|
-
def map_one_polarity l
|
63
|
-
poses = if l.poses.present? then l.poses else [l.pos] end
|
64
|
-
poses.each do |pos|
|
65
|
-
short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
|
66
|
-
@with_polarity[l.lemma+short_pos] = l
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
66
|
def map lexicons
|
71
67
|
return if blank?
|
72
68
|
|
73
|
-
lexicons.each do |
|
74
|
-
next if
|
69
|
+
lexicons.each do |lexicon|
|
70
|
+
next if lexicon.lemma.nil?
|
75
71
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
if
|
81
|
-
|
82
|
-
|
83
|
-
|
72
|
+
sub_lexicons = [lexicon]
|
73
|
+
sub_lexicons += lexicon.variants if lexicon.variants
|
74
|
+
|
75
|
+
sub_lexicons.each do |variant|
|
76
|
+
if variant.lemma.strip.include? ' '
|
77
|
+
lemma = variant.lemma.strip.split(' ').first
|
78
|
+
type = :multi
|
79
|
+
else
|
80
|
+
lemma = variant.lemma
|
81
|
+
type = :single
|
82
|
+
end
|
83
|
+
|
84
|
+
if ['polarityShifter', 'intensifier'].include? lexicon.type
|
85
|
+
var = @negators if lexicon.type == 'polarityShifter'
|
86
|
+
var = @intensifiers if lexicon.type == 'intensifier'
|
87
|
+
|
88
|
+
var[lemma] ||= Hashie::Mash.new multi: []
|
89
|
+
if type == :multi
|
90
|
+
var[lemma][:multi] << lexicon
|
91
|
+
else
|
92
|
+
var[lemma][:single] = lexicon
|
84
93
|
end
|
94
|
+
else
|
95
|
+
map_one_polarity lemma, variant, lexicon if lexicon.polarity
|
85
96
|
end
|
86
97
|
end
|
87
98
|
end
|
@@ -91,6 +102,19 @@ module Opener
|
|
91
102
|
puts "#{@lang}: loaded #{@with_polarity.size} elements with polarity"
|
92
103
|
end
|
93
104
|
|
105
|
+
def map_one_polarity lemma, hash, lexicon
|
106
|
+
poses = if hash.poses.present? then hash.poses else [hash.pos] end
|
107
|
+
poses.each do |pos|
|
108
|
+
short_pos = POS_SHORT_MAP[pos&.to_sym] || DEFAULT_POS
|
109
|
+
@with_polarity[lemma] ||= []
|
110
|
+
if hash.lemma.strip.include? ' '
|
111
|
+
@with_polarity[lemma] << lexicon
|
112
|
+
else
|
113
|
+
@with_polarity[lemma+short_pos] = lexicon
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
94
118
|
end
|
95
119
|
end
|
96
120
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -251,7 +251,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
251
251
|
version: '0'
|
252
252
|
requirements: []
|
253
253
|
rubyforge_project:
|
254
|
-
rubygems_version: 2.7.
|
254
|
+
rubygems_version: 2.7.6
|
255
255
|
signing_key:
|
256
256
|
specification_version: 4
|
257
257
|
summary: Polarity tagger for various languages.
|