opener-polarity-tagger 3.1.1 → 3.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/core/poltagger-basic-multi.py +1 -1
- data/lib/opener/polarity_tagger.rb +7 -2
- data/lib/opener/polarity_tagger/external.rb +2 -2
- data/lib/opener/polarity_tagger/internal.rb +12 -8
- data/lib/opener/polarity_tagger/kaf/document.rb +2 -2
- data/lib/opener/polarity_tagger/lexicon_map.rb +19 -14
- data/lib/opener/polarity_tagger/lexicons_cache.rb +41 -9
- data/lib/opener/polarity_tagger/version.rb +1 -1
- data/opener-polarity-tagger.gemspec +1 -0
- data/task/requirements.rake +2 -2
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a62617dcfcc50787c56ac1243ecdacdd6c85d39621662e3c585a944453153469
|
4
|
+
data.tar.gz: 818c317c7b7ce7ddc4668bcef407ceb41e9a96ed781c0dcf47e6bedfd016cfaf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: babc5afc6e4ab7b7845386e2493c88aa975429397aa695eac4b8705f3aa92417ce240b367844b7cdeec1f3f446a792e5d501a38f7e303c1587196bb4020ebf14
|
7
|
+
data.tar.gz: 221d76e8fe87f56861e7a48ac5dd1ce3bae9dd85397ce0d165f67c13cfaf9241fc7cd31cd5f630b0fdb15b5d373ff3b307e73f7e993e6b576faf2ddcbaf2938b
|
@@ -2,6 +2,7 @@ require 'open3'
|
|
2
2
|
require 'opener/core'
|
3
3
|
require 'nokogiri'
|
4
4
|
require 'hashie'
|
5
|
+
require 'active_support/all'
|
5
6
|
|
6
7
|
require_relative 'polarity_tagger/version'
|
7
8
|
require_relative 'polarity_tagger/cli'
|
@@ -19,8 +20,12 @@ module Opener
|
|
19
20
|
@proc = @klass.new args: @args
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
-
@proc.
|
23
|
+
def clear_cache params = {}
|
24
|
+
@proc.clear_cache(**params)
|
25
|
+
end
|
26
|
+
|
27
|
+
def run input, params = {}
|
28
|
+
@proc.run input, params
|
24
29
|
end
|
25
30
|
|
26
31
|
end
|
@@ -32,7 +32,7 @@ module Opener
|
|
32
32
|
# @return [String]
|
33
33
|
#
|
34
34
|
def command
|
35
|
-
|
35
|
+
"#{adjust_python_path} python2 -E #{kernel} #{lexicon_path} #{args.join(" ")}"
|
36
36
|
end
|
37
37
|
|
38
38
|
##
|
@@ -52,7 +52,7 @@ module Opener
|
|
52
52
|
# @param [String] input The text of which to detect the language.
|
53
53
|
# @return [Array]
|
54
54
|
#
|
55
|
-
def run
|
55
|
+
def run input, params
|
56
56
|
stdout, stderr, process = capture(input)
|
57
57
|
|
58
58
|
raise stderr unless process.success?
|
@@ -10,17 +10,22 @@ module Opener
|
|
10
10
|
LAST_EDITED = '21may2014'
|
11
11
|
VERSION = '1.2'
|
12
12
|
|
13
|
-
|
14
|
-
@cache = LexiconsCache.new
|
13
|
+
CACHE = LexiconsCache.new
|
15
14
|
|
15
|
+
def initialize ignore_pos: false, **params
|
16
16
|
@ignore_pos = ignore_pos
|
17
17
|
end
|
18
18
|
|
19
|
-
def
|
19
|
+
def clear_cache lang: nil, environment:
|
20
|
+
end
|
21
|
+
|
22
|
+
def run input, params = {}
|
20
23
|
@kaf = KAF::Document.from_xml input
|
21
|
-
@map = @kaf.map = @cache[@kaf.language]
|
22
24
|
|
23
|
-
|
25
|
+
@cache_keys = params[:cache_keys] ||= {}
|
26
|
+
@cache_keys.merge! lang: @kaf.language
|
27
|
+
@map = @kaf.map = CACHE[**@cache_keys].lexicons
|
28
|
+
|
24
29
|
@kaf.terms.each do |t|
|
25
30
|
lemma = t.lemma&.downcase
|
26
31
|
pos = if @ignore_pos then nil else t.pos end
|
@@ -32,12 +37,11 @@ module Opener
|
|
32
37
|
attrs.polarity = lexicon.polarity
|
33
38
|
end
|
34
39
|
if l = @map.by_negator(lemma)
|
35
|
-
|
36
|
-
lexicon, polarity = l, nil
|
40
|
+
lexicon, polarity_pos = l, nil
|
37
41
|
attrs.sentiment_modifier = 'shifter'
|
38
42
|
end
|
39
43
|
if l = @map.by_intensifier(lemma)
|
40
|
-
lexicon,
|
44
|
+
lexicon, polarity_pos = l, nil
|
41
45
|
attrs.sentiment_modifier = 'intensifier'
|
42
46
|
end
|
43
47
|
|
@@ -26,9 +26,9 @@ module Opener
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def add_linguistic_processor name, version, layer, timestamp: false
|
29
|
-
header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>')
|
29
|
+
header = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>').first
|
30
30
|
procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
|
31
|
-
procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>")
|
31
|
+
procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>").first
|
32
32
|
lp = procs.add_child('<lp/>')
|
33
33
|
lp.attr(
|
34
34
|
timestamp: if timestamp then Time.now.iso8601 else '*' end,
|
@@ -7,21 +7,10 @@ module Opener
|
|
7
7
|
attr_reader :intensifiers
|
8
8
|
attr_reader :with_polarity
|
9
9
|
|
10
|
-
|
11
|
-
UNKNOWN = Hashie::Mash.new polarity: 'unknown'
|
12
|
-
|
13
|
-
def initialize lang:, lexicons:
|
14
|
-
@lang = lang
|
15
|
-
@lexicons = lexicons
|
16
|
-
|
17
|
-
@negators = {}
|
18
|
-
@intensifiers = {}
|
19
|
-
@with_polarity = {}
|
20
|
-
map lexicons
|
21
|
-
end
|
22
|
-
|
23
|
-
DEFAULT_POS = 'O'
|
10
|
+
UNKNOWN = Hashie::Mash.new polarity: 'unknown'
|
24
11
|
|
12
|
+
POS_ORDER = 'NRVGAO'
|
13
|
+
DEFAULT_POS = 'O'
|
25
14
|
POS_SHORT_MAP = {
|
26
15
|
adj: 'G',
|
27
16
|
adv: 'A',
|
@@ -34,6 +23,20 @@ module Opener
|
|
34
23
|
multi_word_expression: 'O',
|
35
24
|
}
|
36
25
|
|
26
|
+
def initialize lang:, lexicons:
|
27
|
+
@lang = lang
|
28
|
+
@lexicons = lexicons
|
29
|
+
|
30
|
+
@negators = {}
|
31
|
+
@intensifiers = {}
|
32
|
+
@with_polarity = {}
|
33
|
+
map lexicons
|
34
|
+
end
|
35
|
+
|
36
|
+
def blank?
|
37
|
+
@lexicons.blank?
|
38
|
+
end
|
39
|
+
|
37
40
|
def by_negator lemma
|
38
41
|
@negators[lemma]
|
39
42
|
end
|
@@ -58,6 +61,8 @@ module Opener
|
|
58
61
|
protected
|
59
62
|
|
60
63
|
def map lexicons
|
64
|
+
return if blank?
|
65
|
+
|
61
66
|
lexicons.each do |l|
|
62
67
|
next if l.lemma.nil?
|
63
68
|
|
@@ -2,8 +2,11 @@ module Opener
|
|
2
2
|
class PolarityTagger
|
3
3
|
class LexiconsCache
|
4
4
|
|
5
|
+
include MonitorMixin
|
6
|
+
|
7
|
+
UPDATE_INTERVAL = (ENV['CACHE_EXPIRE_MINS']&.to_i || 5).minutes
|
8
|
+
|
5
9
|
def initialize
|
6
|
-
extend MonitorMixin
|
7
10
|
super #MonitorMixin
|
8
11
|
|
9
12
|
@url = ENV['POLARITY_LEXICON_URL']
|
@@ -11,28 +14,47 @@ module Opener
|
|
11
14
|
@cache = {}
|
12
15
|
end
|
13
16
|
|
14
|
-
def []
|
17
|
+
def [] **params
|
15
18
|
synchronize do
|
16
|
-
@cache[
|
19
|
+
existing = @cache[params]
|
20
|
+
break existing if existing and existing.from > UPDATE_INTERVAL.ago
|
21
|
+
@cache[params] = cache_update existing, **params
|
17
22
|
end
|
18
23
|
end
|
19
24
|
alias_method :get, :[]
|
20
25
|
|
21
|
-
def
|
22
|
-
|
26
|
+
def cache_update existing = nil, **params
|
27
|
+
from = Time.now
|
28
|
+
lexicons = load_lexicons cache: existing, **params
|
29
|
+
|
30
|
+
if existing and lexicons.blank?
|
31
|
+
existing.from = from
|
32
|
+
return existing
|
33
|
+
end
|
34
|
+
|
35
|
+
Hashie::Mash.new(
|
36
|
+
lexicons: lexicons,
|
37
|
+
from: from,
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
def load_lexicons lang:, **params
|
42
|
+
lexicons = if @url then load_from_url lang: lang, **params else load_from_path lang: lang, **params end
|
23
43
|
|
24
44
|
LexiconMap.new lang: lang, lexicons: lexicons
|
25
45
|
end
|
26
46
|
|
27
|
-
def load_from_url lang
|
28
|
-
url
|
47
|
+
def load_from_url lang:, cache:, **params
|
48
|
+
url = "#{@url}&language_code=#{lang}&#{params.to_query}"
|
49
|
+
url += "&if_updated_since=#{cache.from.iso8601}" if cache
|
29
50
|
puts "#{lang}: loading lexicons from url #{url}"
|
30
|
-
|
51
|
+
|
52
|
+
lexicons = JSON.parse http.get(url).body
|
31
53
|
lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
|
32
54
|
lexicons
|
33
55
|
end
|
34
56
|
|
35
|
-
def load_from_path lang
|
57
|
+
def load_from_path lang:, **params
|
36
58
|
@path ||= 'core/general-lexicons'
|
37
59
|
dir = "#{@path}/#{lang.upcase}-lexicon"
|
38
60
|
config = Nokogiri::XML File.read "#{dir}/config.xml"
|
@@ -63,6 +85,16 @@ module Opener
|
|
63
85
|
lexicons
|
64
86
|
end
|
65
87
|
|
88
|
+
def http
|
89
|
+
return @http if @http
|
90
|
+
|
91
|
+
@http = HTTPClient.new
|
92
|
+
@http.send_timeout = 120
|
93
|
+
@http.receive_timeout = 120
|
94
|
+
@http.connect_timeout = 120
|
95
|
+
@http
|
96
|
+
end
|
97
|
+
|
66
98
|
end
|
67
99
|
end
|
68
100
|
end
|
@@ -33,6 +33,7 @@ Gem::Specification.new do |gem|
|
|
33
33
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
34
34
|
gem.add_dependency 'opener-core', '~> 2.2'
|
35
35
|
|
36
|
+
gem.add_dependency 'activesupport'
|
36
37
|
gem.add_dependency 'hashie'
|
37
38
|
gem.add_dependency 'rake'
|
38
39
|
gem.add_dependency 'nokogiri'
|
data/task/requirements.rake
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-polarity-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: activesupport
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: hashie
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -167,10 +181,10 @@ dependencies:
|
|
167
181
|
description: Polarity tagger for various languages.
|
168
182
|
email:
|
169
183
|
executables:
|
170
|
-
- polarity-tagger
|
171
|
-
- polarity-tagger-daemon
|
172
184
|
- polarity-tagger-server
|
173
185
|
- console
|
186
|
+
- polarity-tagger
|
187
|
+
- polarity-tagger-daemon
|
174
188
|
extensions:
|
175
189
|
- ext/hack/Rakefile
|
176
190
|
extra_rdoc_files: []
|