opener-chained-daemon 3.1.5 → 3.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a3d261875bffe0427ec49e37eb23a13dc8ddddb6a0677681f7a21a550961da5
4
- data.tar.gz: eaf306aa0f178899dde39b6995efa08985cb00020636a3bcaa81061b2d5bc1c2
3
+ metadata.gz: 5bdd841299458126e66df2dff1857ea3c2d2386efab9f092def36357935e2f9d
4
+ data.tar.gz: 140ff88d7ddd90a90bc21106bc9a0d3370802969bca20820ee04b45398e975c3
5
5
  SHA512:
6
- metadata.gz: 98a9a18b0130a61f41697c38deb88fe48769be4c5263a05aad4bd91b472db3d47b3e1cd36e69a6f6c6a5860a7413a4322cf8853837e7155d0e0adf0087c492cf
7
- data.tar.gz: 2783a9005454540407af1a2fd07f748db06ee02716720abc9695f532448e367322fe09e6c28df59e8b690435aa1958c7ae10cc773a23418a1ff6993480984fc2
6
+ metadata.gz: 7026baaf0c0541725cb3459575bec129354bada59be1aa30174425051d26ff973f4a0359985cb58b1ad70035ba6c6c9db4409b29631cf9b1d8fbe16152a59268
7
+ data.tar.gz: d947f1bc75e048b56d18b5406ed455a365c406fc1b01bf6e4b6ee0ae4584f2513e45589f93d5e1486c47fe34e1305d4a0105612ec7b17dc5d75e67c0658e0d0a
@@ -8,14 +8,14 @@ require 'rexml/formatters/pretty'
8
8
 
9
9
  require 'opener/daemons'
10
10
 
11
+ require_relative 'sym_mash'
12
+
11
13
  require_relative 'chained_daemon/languages_cache'
12
14
  require 'opener/language_identifier'
13
- require 'opener/tokenizer'
14
- require 'opener/pos_tagger' if RUBY_ENGINE == 'jruby'
15
15
  require 'opener/polarity_tagger'
16
16
  require 'opener/property_tagger'
17
17
  require 'opener/opinion_detector_basic'
18
- require 'opener/stanza/tokenizer_pos'
18
+ require 'opener/stanza/processor'
19
19
 
20
20
  require_relative 'chained_daemon/chained_daemon'
21
21
  require_relative 'chained_daemon/cli'
@@ -8,20 +8,21 @@ module Opener
8
8
  @options = DEFAULT_OPTIONS.merge options
9
9
  @queue_map = {
10
10
  'opener-language-identifier': Opener::LanguageIdentifier.new,
11
- 'stanza-tokenizer-pos': Stanza::TokenizerPos.new, # replace this tokenizer-pos with both below with you dont have a stanza server
12
- #'opener-tokenizer': Opener::Tokenizer.new,
13
- #'opener-pos-tagger': Opener::POSTagger.new,
11
+ 'stanza-processor': Stanza::Processor.new,
14
12
  'opener-property-tagger': Opener::PropertyTagger.new,
15
13
  'opener-polarity-tagger': Opener::PolarityTagger.new,
16
14
  'opener-opinion-detector-basic': Opener::OpinionDetectorBasic.new,
17
15
  }
18
16
  end
19
17
 
20
- def run input, params = {}
21
- params ||= {}
22
- params.deep_symbolize_keys!
23
- params[:translate_languages] ||= []
24
- params[:cache_keys] = params[:cache_keys]&.sort&.to_h || {}
18
+ def run input, _params = {}
19
+ params = SymMash.new _params
20
+ params.translate_languages ||= []
21
+ params.cache_keys = SymMash.new params.cache_keys&.to_h&.sort&.to_h || {}
22
+ if params.filter_vertical and params.property_type.present?
23
+ params.cache_keys.property_type = params.property_type
24
+ end
25
+ params.cache_keys.environment ||= 'production'
25
26
 
26
27
  lang = nil
27
28
  output = nil
@@ -34,7 +35,7 @@ module Opener
34
35
  rescue Core::UnsupportedLanguageError
35
36
  xml = Nokogiri.parse input
36
37
  lang = xml.root.attr('xml:lang')
37
- raise unless lang.in? params[:translate_languages]
38
+ raise unless lang.in? params.translate_languages
38
39
 
39
40
  input = translate xml, params
40
41
  retry
@@ -47,7 +48,7 @@ module Opener
47
48
  output = xml.to_s
48
49
  end
49
50
 
50
- output = pretty_print output if params[:cache_keys][:environment] == 'staging'
51
+ output = pretty_print output if params.cache_keys.environment == 'staging'
51
52
  output
52
53
 
53
54
  rescue Core::UnsupportedLanguageError
@@ -84,7 +85,7 @@ module Opener
84
85
  protected
85
86
 
86
87
  def translate_service params
87
- params[:translate_service]&.to_sym || :google
88
+ params.translate_service&.to_sym || :google
88
89
  end
89
90
 
90
91
  def google_translator
@@ -47,16 +47,19 @@ Example:
47
47
  run do |opts, args|
48
48
  daemon = ChainedDaemon.new args: args
49
49
  input = STDIN.tty? ? nil : STDIN.read
50
+ params = if ENV['PARAMS'] then JSON.parse ENV['PARAMS'] else {} end
50
51
 
51
- params = if ENV['PARAMS']
52
- JSON.parse ENV['PARAMS']
53
- else
54
- {}
55
- end
56
52
  # Set environment as staging from console for testing purposes
57
- params[:cache_keys] = { environment: 'staging', merged: true }
53
+ env = ENV['LEXICONS_ENV'] || 'staging'
54
+ pt = ENV['LEXICONS_PROPERTY_TYPE']
55
+ params[:cache_keys] = {
56
+ environment: env,
57
+ property_type: pt,
58
+ merged: (true if env == 'staging'),
59
+ }
58
60
 
59
- puts daemon.run input, params || {}
61
+ output = daemon.run input, params
62
+ puts output
60
63
  end
61
64
  end
62
65
  end
@@ -19,15 +19,15 @@ module Opener
19
19
  break @cache if @last_updated and @last_updated > UPDATE_INTERVAL.ago
20
20
  cache_update
21
21
  end
22
- @cache
23
22
  end
24
23
 
25
24
  def cache_update
26
25
  puts "loading supported languages from url #{@url}" if ENV['DEBUG']
27
26
 
28
- languages = JSON.parse http.get(@url).body
29
- @cache = languages['data'].map { |l| l['code'] }
27
+ languages = SymMash.new JSON.parse http.get(@url).body
30
28
  @last_updated = Time.now
29
+ @cache = languages.data.each.with_object({}){ |l,h| h[l.code] = l }
30
+ @cache
31
31
  end
32
32
 
33
33
  def http
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class ChainedDaemon
3
3
 
4
- VERSION = '3.1.5'
4
+ VERSION = '3.3.3'
5
5
 
6
6
  end
7
7
  end
@@ -48,13 +48,14 @@ module Opener
48
48
 
49
49
  def add_word_form params
50
50
  text = @document.at('text') || @document.root.add_child('<text/>').first
51
- wf = text.add_child("<wf>#{params[:text]}</wf>")
51
+ wf = text.add_child("<wf>#{params.text}</wf>")
52
52
  attrs = {
53
- wid: "w#{params[:wid]}",
54
- sent: params[:sid],
55
- para: params[:para],
56
- offset: params[:offset],
57
- length: params[:length],
53
+ wid: "w#{params.wid}",
54
+ sent: params.sid,
55
+ para: params.para,
56
+ offset: params.offset,
57
+ length: params.length,
58
+ head: params.head,
58
59
  }
59
60
  wf.attr attrs
60
61
  end
@@ -63,15 +64,16 @@ module Opener
63
64
  text = @document.at('terms') || @document.root.add_child('<terms/>').first
64
65
  term = text.add_child("<term/>")
65
66
  attrs = {
66
- tid: "t#{params[:tid]}",
67
- type: params[:type],
68
- lemma: params[:lemma],
69
- text: params[:text],
70
- pos: params[:pos],
71
- morphofeat: params[:morphofeat],
67
+ tid: "t#{params.tid}",
68
+ type: params.type,
69
+ lemma: params.lemma,
70
+ text: params.text,
71
+ pos: params.pos,
72
+ morphofeat: params.morphofeat,
73
+ head: params.head,
72
74
  }
73
75
  term.attr attrs
74
- term.first.add_child("<span><target id='w#{params[:wid]}' /></span>")
76
+ term.first.add_child("<span><target id='w#{params.wid}'/></span>")
75
77
  end
76
78
 
77
79
  def to_xml
@@ -1,6 +1,7 @@
1
1
  module Opener
2
2
  module KAF
3
3
  class WordForm
4
+
4
5
  def initialize(document, xml_node)
5
6
  @document = document
6
7
  @xml_node = xml_node
@@ -25,6 +26,7 @@ module Opener
25
26
  def paragraph
26
27
  return @paragraph ||= @xml_node.attr('para').to_i
27
28
  end
29
+
28
30
  end
29
31
  end
30
32
  end
@@ -1,6 +1,6 @@
1
1
  module Opener
2
2
  module Stanza
3
- class TokenizerPos
3
+ class Processor
4
4
 
5
5
  DESC = 'Tokenizer / POS by Stanza'
6
6
  VERSION = '1.0'
@@ -8,8 +8,10 @@ module Opener
8
8
  BASE_URL = ENV['STANZA_SERVER']
9
9
  LANGUAGES_CACHE = Opener::ChainedDaemon::LanguagesCache.new
10
10
 
11
- RTL_LANGUAGES = [ "ar", "ara", "arc", "ae", "ave", "egy", "he", "heb", "nqo", "pal", "phn", "sam",
12
- "syc", "syr", "fa", "per", "fas", "ku", "kur", "ur", "urd" ]
11
+ RTL_LANGUAGES = %w[
12
+ ar ara arc ae ave egy he heb nqo pal phn sam
13
+ syc syr fa per fas ku kur ur urd
14
+ ]
13
15
 
14
16
  POS = {
15
17
  'DET' => 'D',
@@ -37,66 +39,65 @@ module Opener
37
39
  raise 'missing Stanza server' if ENV['STANZA_SERVER'].blank?
38
40
 
39
41
  kaf = KAF::Document.from_xml input
40
-
41
- prod = params[:cache_keys][:environment] != 'staging'
42
- if prod and !LANGUAGES_CACHE.get.include?(kaf.language)
42
+ lang = LANGUAGES_CACHE.get[kaf.language]
43
+ env = params.cache_keys.environment
44
+ unless lang&.environments&.include? env or (env == 'staging' and lang&.environments&.include? 'production')
43
45
  raise Core::UnsupportedLanguageError.new kaf.language
44
46
  end
45
47
 
46
- input = kaf.raw
47
- input = input.gsub(/\,[^\ ]/, ', ')
48
- response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
48
+ input = kaf.raw
49
+ input = input.gsub(/\,[^\ ]/, ', ')
50
+ response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
49
51
  raise Core::UnsupportedLanguageError, kaf.language if response.status == 406
50
52
  raise response.body if response.status >= 400
51
- tokens = JSON.parse response.body
53
+ sentences = JSON.parse response.body
54
+ sentences.each{ |s| s.map!{ |t| Hashie::Mash.new t } }
52
55
 
53
56
  w_index = 0
54
57
 
55
58
  miscs = {}
56
- tokens.each_with_index do |t, i|
57
- miscs[i] ||= {}
58
- t.each do |word|
59
- word['id'].is_a?(Array) && word['id'].each do |id|
60
- puts id
61
- miscs[i][id] = word['misc']
62
- end
59
+ sentences.each.with_index do |s, i|
60
+ miscs[i] = {}
61
+ s.each do |word|
62
+ word.id.is_a?(Array) && word.id.each{ |id| miscs[i][id] = word.misc }
63
63
  end
64
64
  end
65
65
 
66
- tokens.map{ |t| t.reverse! } if RTL_LANGUAGES.include? kaf.language
67
- tokens.each_with_index do |sentence, s_index|
68
- sentence.each_with_index do |word|
66
+ sentences.map{ |s| s.reverse! } if RTL_LANGUAGES.include? kaf.language
67
+ sentences.each.with_index do |s, s_index|
68
+ s.each do |word|
69
69
  w_index += 1
70
70
  # save misc for later usase in a MWT case
71
- next if word['id'].is_a? Array
71
+ next if word.id.is_a? Array
72
72
 
73
- misc = word['misc'] || miscs[s_index][word['id']]
73
+ misc = word.misc || miscs[s_index][word.id]
74
74
 
75
- Rollbar.scoped({ input: input, params: params, tokens: tokens, word: word }) do
75
+ Rollbar.scoped({ input: input, params: params, sentences: sentences, word: word }) do
76
76
  raise 'Missing misc'
77
77
  end if misc.nil?
78
78
 
79
79
  offset = misc.match(/start_char=(\d+)|/)[1].to_i
80
80
  length = misc.match(/end_char=(\d+)/)[1].to_i - offset
81
81
 
82
- u_pos = word['upos']
82
+ u_pos = word.upos
83
83
  pos = POS[u_pos]
84
84
  raise "Didn't find a map for #{u_pos}" if pos.nil?
85
85
  type = if POS_OPEN.include? pos then 'open' else 'close' end
86
86
 
87
- params = {
87
+ params = Hashie::Mash.new(
88
88
  wid: w_index,
89
89
  sid: s_index + 1,
90
90
  tid: w_index,
91
91
  para: 1,
92
92
  offset: offset,
93
93
  length: length,
94
- text: word['text'],
95
- lemma: word['lemma'],
94
+ text: word.text,
95
+ lemma: word.lemma,
96
96
  morphofeat: u_pos,
97
97
  pos: pos,
98
98
  type: type,
99
- }
99
+ head: word.head,
100
+ )
100
101
 
101
102
  kaf.add_word_form params
102
103
  kaf.add_term params
@@ -0,0 +1,14 @@
1
+ class SymMash < ::Hashie::Mash
2
+
3
+ disable_warnings
4
+
5
+ include Hashie::Extensions::Mash::SymbolizeKeys
6
+
7
+ def inspect
8
+ map do |k,v|
9
+ v = "{#{v.inspect}}" if v.is_a? SymMash
10
+ "#{k}=#{v}"
11
+ end.join ' '
12
+ end
13
+
14
+ end
@@ -35,8 +35,8 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency 'opener-language-identifier', '>= 4.4.0'
36
36
  spec.add_dependency 'opener-tokenizer', '>= 2.2.0'
37
37
  spec.add_dependency 'opener-pos-tagger', '>= 3.2.0'
38
- spec.add_dependency 'opener-property-tagger', '>= 3.3.6'
39
- spec.add_dependency 'opener-polarity-tagger', '>= 3.3.0'
38
+ spec.add_dependency 'opener-property-tagger', '>= 3.4.0'
39
+ spec.add_dependency 'opener-polarity-tagger', '>= 3.4.0'
40
40
  spec.add_dependency 'opener-opinion-detector-basic', '>= 3.2.3'
41
41
 
42
42
  spec.add_development_dependency 'bundler', '~> 1.3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-chained-daemon
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.5
4
+ version: 3.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-11 00:00:00.000000000 Z
11
+ date: 2021-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -184,28 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - ">="
186
186
  - !ruby/object:Gem::Version
187
- version: 3.3.6
187
+ version: 3.4.0
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - ">="
193
193
  - !ruby/object:Gem::Version
194
- version: 3.3.6
194
+ version: 3.4.0
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: opener-polarity-tagger
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - ">="
200
200
  - !ruby/object:Gem::Version
201
- version: 3.3.0
201
+ version: 3.4.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - ">="
207
207
  - !ruby/object:Gem::Version
208
- version: 3.3.0
208
+ version: 3.4.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: opener-opinion-detector-basic
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -307,10 +307,10 @@ dependencies:
307
307
  description: OpeNER daemon for processing multiple queues at once
308
308
  email:
309
309
  executables:
310
- - chained-daemon
311
310
  - chained-daemon-daemon
312
311
  - chained-daemon-csv
313
312
  - console
313
+ - chained-daemon
314
314
  extensions: []
315
315
  extra_rdoc_files: []
316
316
  files:
@@ -330,7 +330,8 @@ files:
330
330
  - lib/opener/kaf/document.rb
331
331
  - lib/opener/kaf/term.rb
332
332
  - lib/opener/kaf/text.rb
333
- - lib/opener/stanza/tokenizer_pos.rb
333
+ - lib/opener/stanza/processor.rb
334
+ - lib/opener/sym_mash.rb
334
335
  - opener-chained-daemon.gemspec
335
336
  homepage:
336
337
  licenses: