opener-chained-daemon 3.3.0 → 3.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/opener/chained_daemon.rb +1 -3
- data/lib/opener/chained_daemon/chained_daemon.rb +3 -4
- data/lib/opener/chained_daemon/cli.rb +10 -2
- data/lib/opener/chained_daemon/languages_cache.rb +3 -3
- data/lib/opener/chained_daemon/version.rb +1 -1
- data/lib/opener/kaf/document.rb +15 -13
- data/lib/opener/kaf/text.rb +2 -0
- data/lib/opener/stanza/{tokenizer_pos.rb → processor.rb} +31 -24
- data/opener-chained-daemon.gemspec +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44922bb7219f85282643092261f4aa995e3ecbe1bbfc202b1da8f1141ff27771
|
4
|
+
data.tar.gz: 582a30c7ce359cbbf74179fb8611c315e39ee709008c7b41a0214c7df966af1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b4b339b252dcbd4439e2a084899afbb3a37fe65e00a442ba25774c6daafcaf59860fc075f71314ea06bb8a2dca9b25315cc5b93d457a3fef6bb08585446e4578
|
7
|
+
data.tar.gz: 9fffc136aba038a4499f6f6895cd55848e6173c2be413304d51e0161f0579ae9038d6ac5f84c9d8114268db4b14dcf7d08dfe7a643950879b0f6246c2a579606
|
@@ -12,12 +12,10 @@ require_relative 'sym_mash'
|
|
12
12
|
|
13
13
|
require_relative 'chained_daemon/languages_cache'
|
14
14
|
require 'opener/language_identifier'
|
15
|
-
require 'opener/tokenizer'
|
16
|
-
require 'opener/pos_tagger' if RUBY_ENGINE == 'jruby'
|
17
15
|
require 'opener/polarity_tagger'
|
18
16
|
require 'opener/property_tagger'
|
19
17
|
require 'opener/opinion_detector_basic'
|
20
|
-
require 'opener/stanza/
|
18
|
+
require 'opener/stanza/processor'
|
21
19
|
|
22
20
|
require_relative 'chained_daemon/chained_daemon'
|
23
21
|
require_relative 'chained_daemon/cli'
|
@@ -8,9 +8,7 @@ module Opener
|
|
8
8
|
@options = DEFAULT_OPTIONS.merge options
|
9
9
|
@queue_map = {
|
10
10
|
'opener-language-identifier': Opener::LanguageIdentifier.new,
|
11
|
-
'stanza-
|
12
|
-
#'opener-tokenizer': Opener::Tokenizer.new,
|
13
|
-
#'opener-pos-tagger': Opener::POSTagger.new,
|
11
|
+
'stanza-processor': Stanza::Processor.new,
|
14
12
|
'opener-property-tagger': Opener::PropertyTagger.new,
|
15
13
|
'opener-polarity-tagger': Opener::PolarityTagger.new,
|
16
14
|
'opener-opinion-detector-basic': Opener::OpinionDetectorBasic.new,
|
@@ -24,6 +22,7 @@ module Opener
|
|
24
22
|
if params.filter_vertical and params.property_type.present?
|
25
23
|
params.cache_keys.property_type = params.property_type
|
26
24
|
end
|
25
|
+
params.cache_keys.environment ||= 'production'
|
27
26
|
|
28
27
|
lang = nil
|
29
28
|
output = nil
|
@@ -49,7 +48,7 @@ module Opener
|
|
49
48
|
output = xml.to_s
|
50
49
|
end
|
51
50
|
|
52
|
-
output = pretty_print output if params.cache_keys
|
51
|
+
output = pretty_print output if params.cache_keys.environment == 'staging'
|
53
52
|
output
|
54
53
|
|
55
54
|
rescue Core::UnsupportedLanguageError
|
@@ -48,10 +48,18 @@ Example:
|
|
48
48
|
daemon = ChainedDaemon.new args: args
|
49
49
|
input = STDIN.tty? ? nil : STDIN.read
|
50
50
|
params = if ENV['PARAMS'] then JSON.parse ENV['PARAMS'] else {} end
|
51
|
+
|
51
52
|
# Set environment as staging from console for testing purposes
|
52
|
-
|
53
|
+
env = ENV['LEXICONS_ENV'] || 'staging'
|
54
|
+
pt = ENV['LEXICONS_PROPERTY_TYPE']
|
55
|
+
params[:cache_keys] = {
|
56
|
+
environment: env,
|
57
|
+
property_type: pt,
|
58
|
+
merged: (true if env == 'staging'),
|
59
|
+
}
|
53
60
|
|
54
|
-
|
61
|
+
output = daemon.run input, params
|
62
|
+
puts output
|
55
63
|
end
|
56
64
|
end
|
57
65
|
end
|
@@ -19,15 +19,15 @@ module Opener
|
|
19
19
|
break @cache if @last_updated and @last_updated > UPDATE_INTERVAL.ago
|
20
20
|
cache_update
|
21
21
|
end
|
22
|
-
@cache
|
23
22
|
end
|
24
23
|
|
25
24
|
def cache_update
|
26
25
|
puts "loading supported languages from url #{@url}" if ENV['DEBUG']
|
27
26
|
|
28
|
-
languages = JSON.parse http.get(@url).body
|
29
|
-
@cache = languages['data'].map { |l| l['code'] }
|
27
|
+
languages = SymMash.new JSON.parse http.get(@url).body
|
30
28
|
@last_updated = Time.now
|
29
|
+
@cache = languages.data.each.with_object({}){ |l,h| h[l.code] = l }
|
30
|
+
@cache
|
31
31
|
end
|
32
32
|
|
33
33
|
def http
|
data/lib/opener/kaf/document.rb
CHANGED
@@ -48,13 +48,14 @@ module Opener
|
|
48
48
|
|
49
49
|
def add_word_form params
|
50
50
|
text = @document.at('text') || @document.root.add_child('<text/>').first
|
51
|
-
wf = text.add_child("<wf>#{params
|
51
|
+
wf = text.add_child("<wf>#{params.text}</wf>")
|
52
52
|
attrs = {
|
53
|
-
wid: "w#{params
|
54
|
-
sent: params
|
55
|
-
para: params
|
56
|
-
offset: params
|
57
|
-
length: params
|
53
|
+
wid: "w#{params.wid}",
|
54
|
+
sent: params.sid,
|
55
|
+
para: params.para,
|
56
|
+
offset: params.offset,
|
57
|
+
length: params.length,
|
58
|
+
head: params.head,
|
58
59
|
}
|
59
60
|
wf.attr attrs
|
60
61
|
end
|
@@ -63,15 +64,16 @@ module Opener
|
|
63
64
|
text = @document.at('terms') || @document.root.add_child('<terms/>').first
|
64
65
|
term = text.add_child("<term/>")
|
65
66
|
attrs = {
|
66
|
-
tid: "t#{params
|
67
|
-
type: params
|
68
|
-
lemma: params
|
69
|
-
text: params
|
70
|
-
pos: params
|
71
|
-
morphofeat: params
|
67
|
+
tid: "t#{params.tid}",
|
68
|
+
type: params.type,
|
69
|
+
lemma: params.lemma,
|
70
|
+
text: params.text,
|
71
|
+
pos: params.pos,
|
72
|
+
morphofeat: params.morphofeat,
|
73
|
+
head: params.head,
|
72
74
|
}
|
73
75
|
term.attr attrs
|
74
|
-
term.first.add_child("<span><target id='w#{params
|
76
|
+
term.first.add_child("<span><target id='w#{params.wid}'/></span>")
|
75
77
|
end
|
76
78
|
|
77
79
|
def to_xml
|
data/lib/opener/kaf/text.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
module Opener
|
2
2
|
module KAF
|
3
3
|
class WordForm
|
4
|
+
|
4
5
|
def initialize(document, xml_node)
|
5
6
|
@document = document
|
6
7
|
@xml_node = xml_node
|
@@ -25,6 +26,7 @@ module Opener
|
|
25
26
|
def paragraph
|
26
27
|
return @paragraph ||= @xml_node.attr('para').to_i
|
27
28
|
end
|
29
|
+
|
28
30
|
end
|
29
31
|
end
|
30
32
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Opener
|
2
2
|
module Stanza
|
3
|
-
class
|
3
|
+
class Processor
|
4
4
|
|
5
5
|
DESC = 'Tokenizer / POS by Stanza'
|
6
6
|
VERSION = '1.0'
|
@@ -8,8 +8,10 @@ module Opener
|
|
8
8
|
BASE_URL = ENV['STANZA_SERVER']
|
9
9
|
LANGUAGES_CACHE = Opener::ChainedDaemon::LanguagesCache.new
|
10
10
|
|
11
|
-
RTL_LANGUAGES = [
|
12
|
-
|
11
|
+
RTL_LANGUAGES = %w[
|
12
|
+
ar ara arc ae ave egy he heb nqo pal phn sam
|
13
|
+
syc syr fa per fas ku kur ur urd
|
14
|
+
]
|
13
15
|
|
14
16
|
POS = {
|
15
17
|
'DET' => 'D',
|
@@ -37,63 +39,68 @@ module Opener
|
|
37
39
|
raise 'missing Stanza server' if ENV['STANZA_SERVER'].blank?
|
38
40
|
|
39
41
|
kaf = KAF::Document.from_xml input
|
40
|
-
|
41
|
-
|
42
|
-
|
42
|
+
lang = LANGUAGES_CACHE.get[kaf.language]
|
43
|
+
env = params.cache_keys.environment
|
44
|
+
unless lang&.environments&.include? env or (params.cache_keys.merged and lang&.environments&.include? 'production')
|
45
|
+
raise Core::UnsupportedLanguageError.new kaf.language
|
46
|
+
end
|
47
|
+
if env == 'production' and !lang.supported_by_opener
|
43
48
|
raise Core::UnsupportedLanguageError.new kaf.language
|
44
49
|
end
|
45
50
|
|
46
|
-
input
|
47
|
-
input
|
48
|
-
response
|
51
|
+
input = kaf.raw
|
52
|
+
input = input.gsub(/\,[^\ ]/, ', ')
|
53
|
+
response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
|
49
54
|
raise Core::UnsupportedLanguageError, kaf.language if response.status == 406
|
50
55
|
raise response.body if response.status >= 400
|
51
|
-
|
56
|
+
sentences = JSON.parse response.body
|
57
|
+
sentences.each{ |s| s.map!{ |t| Hashie::Mash.new t } }
|
52
58
|
|
53
59
|
w_index = 0
|
54
60
|
|
55
61
|
miscs = {}
|
56
|
-
|
62
|
+
sentences.each.with_index do |s, i|
|
57
63
|
miscs[i] = {}
|
58
|
-
|
59
|
-
word
|
64
|
+
s.each do |word|
|
65
|
+
word.id.is_a?(Array) && word.id.each{ |id| miscs[i][id] = word.misc }
|
60
66
|
end
|
61
67
|
end
|
62
68
|
|
63
|
-
|
64
|
-
|
65
|
-
|
69
|
+
sentences.map{ |s| s.reverse! } if RTL_LANGUAGES.include? kaf.language
|
70
|
+
sentences.each.with_index do |s, s_index|
|
71
|
+
s.each do |word|
|
66
72
|
w_index += 1
|
67
73
|
# save misc for later usase in a MWT case
|
68
|
-
next if word
|
74
|
+
next if word.id.is_a? Array
|
69
75
|
|
70
|
-
misc = word
|
76
|
+
misc = word.misc || miscs[s_index][word.id]
|
71
77
|
|
72
|
-
Rollbar.scoped({ input: input, params: params,
|
78
|
+
Rollbar.scoped({ input: input, params: params, sentences: sentences, word: word }) do
|
73
79
|
raise 'Missing misc'
|
74
80
|
end if misc.nil?
|
75
81
|
|
76
82
|
offset = misc.match(/start_char=(\d+)|/)[1].to_i
|
77
83
|
length = misc.match(/end_char=(\d+)/)[1].to_i - offset
|
78
84
|
|
79
|
-
u_pos = word
|
85
|
+
u_pos = word.upos
|
80
86
|
pos = POS[u_pos]
|
81
87
|
raise "Didn't find a map for #{u_pos}" if pos.nil?
|
82
88
|
type = if POS_OPEN.include? pos then 'open' else 'close' end
|
83
89
|
|
84
|
-
params =
|
90
|
+
params = Hashie::Mash.new(
|
85
91
|
wid: w_index,
|
86
92
|
sid: s_index + 1,
|
87
93
|
tid: w_index,
|
88
94
|
para: 1,
|
89
95
|
offset: offset,
|
90
96
|
length: length,
|
91
|
-
text: word
|
92
|
-
lemma: word
|
97
|
+
text: word.text,
|
98
|
+
lemma: word.lemma,
|
93
99
|
morphofeat: u_pos,
|
94
100
|
pos: pos,
|
95
101
|
type: type,
|
96
|
-
|
102
|
+
head: word.head,
|
103
|
+
)
|
97
104
|
|
98
105
|
kaf.add_word_form params
|
99
106
|
kaf.add_term params
|
@@ -36,7 +36,7 @@ Gem::Specification.new do |spec|
|
|
36
36
|
spec.add_dependency 'opener-tokenizer', '>= 2.2.0'
|
37
37
|
spec.add_dependency 'opener-pos-tagger', '>= 3.2.0'
|
38
38
|
spec.add_dependency 'opener-property-tagger', '>= 3.4.0'
|
39
|
-
spec.add_dependency 'opener-polarity-tagger', '>= 3.
|
39
|
+
spec.add_dependency 'opener-polarity-tagger', '>= 3.5.0'
|
40
40
|
spec.add_dependency 'opener-opinion-detector-basic', '>= 3.2.3'
|
41
41
|
|
42
42
|
spec.add_development_dependency 'bundler', '~> 1.3'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-chained-daemon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -198,14 +198,14 @@ dependencies:
|
|
198
198
|
requirements:
|
199
199
|
- - ">="
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 3.
|
201
|
+
version: 3.5.0
|
202
202
|
type: :runtime
|
203
203
|
prerelease: false
|
204
204
|
version_requirements: !ruby/object:Gem::Requirement
|
205
205
|
requirements:
|
206
206
|
- - ">="
|
207
207
|
- !ruby/object:Gem::Version
|
208
|
-
version: 3.
|
208
|
+
version: 3.5.0
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
210
|
name: opener-opinion-detector-basic
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -307,10 +307,10 @@ dependencies:
|
|
307
307
|
description: OpeNER daemon for processing multiple queues at once
|
308
308
|
email:
|
309
309
|
executables:
|
310
|
-
- chained-daemon
|
310
|
+
- chained-daemon-daemon
|
311
311
|
- chained-daemon-csv
|
312
312
|
- console
|
313
|
-
- chained-daemon
|
313
|
+
- chained-daemon
|
314
314
|
extensions: []
|
315
315
|
extra_rdoc_files: []
|
316
316
|
files:
|
@@ -330,7 +330,7 @@ files:
|
|
330
330
|
- lib/opener/kaf/document.rb
|
331
331
|
- lib/opener/kaf/term.rb
|
332
332
|
- lib/opener/kaf/text.rb
|
333
|
-
- lib/opener/stanza/
|
333
|
+
- lib/opener/stanza/processor.rb
|
334
334
|
- lib/opener/sym_mash.rb
|
335
335
|
- opener-chained-daemon.gemspec
|
336
336
|
homepage:
|
@@ -353,7 +353,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
353
353
|
version: '0'
|
354
354
|
requirements: []
|
355
355
|
rubyforge_project:
|
356
|
-
rubygems_version: 2.7.
|
356
|
+
rubygems_version: 2.7.6.2
|
357
357
|
signing_key:
|
358
358
|
specification_version: 4
|
359
359
|
summary: OpeNER daemon for processing multiple queues at once
|