opener-chained-daemon 3.3.1 → 3.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 79cb9c8054d212f2c89a21080c690e644b40a22a3e05faa51db78dfdae187fd8
4
- data.tar.gz: b21914d6cbee14b48f15f94c623014bdfee4ddf22a35eb96059e72c1a8cf6b9f
3
+ metadata.gz: adaeee08b8374c1047c08bed61dc5159ff18178d6f12ccac8715aea05a27019b
4
+ data.tar.gz: 78e762ec93e9aa6fd19a617690daa19676753d0249727e56b88f299ca60ae2e9
5
5
  SHA512:
6
- metadata.gz: ba7d21f0ec57b3207da58a7c2b153df79f3d797012fb5e934d82a90ba9cca8ea67a254a009d112572b810ef454bfbc8f5bb07376d2ac3941037c4c97c7bbcd2f
7
- data.tar.gz: a17df2a6081b365381d099a75f70e0ac432236082245324922aa822e47d63f5eb1b53845b688bfef4fcec263da8901e9f14e23f0b39bea1af48183d4d2955574
6
+ metadata.gz: fc2f6223c49f7b4aa78bb4dc1a82f569443c579e3888339067b9faef568c45cfe43854829c0e6256b091fa4b2c6ca2509f8679659f09e66bb5f2614cd521bb91
7
+ data.tar.gz: cc60b5c3d37a621ebd41608751c2f569aa26af3e36fc1f5a5134168b66bcaf7efb6c0868110e5d69b1a9ae8a50a4b53fb08a3015806fcbf4452eefb19f1f68bb
@@ -12,12 +12,10 @@ require_relative 'sym_mash'
12
12
 
13
13
  require_relative 'chained_daemon/languages_cache'
14
14
  require 'opener/language_identifier'
15
- require 'opener/tokenizer'
16
- require 'opener/pos_tagger' if RUBY_ENGINE == 'jruby'
17
15
  require 'opener/polarity_tagger'
18
16
  require 'opener/property_tagger'
19
17
  require 'opener/opinion_detector_basic'
20
- require 'opener/stanza/tokenizer_pos'
18
+ require 'opener/stanza/processor'
21
19
 
22
20
  require_relative 'chained_daemon/chained_daemon'
23
21
  require_relative 'chained_daemon/cli'
@@ -1,6 +1,12 @@
1
1
  module Opener
2
2
  class ChainedDaemon
3
3
 
4
+ class_attribute :http
5
+ self.http = HTTPClient.new
6
+ self.http.send_timeout = 600
7
+ self.http.receive_timeout = 600
8
+ self.http.connect_timeout = 600
9
+
4
10
  DEFAULT_OPTIONS = {
5
11
  }
6
12
 
@@ -8,9 +14,7 @@ module Opener
8
14
  @options = DEFAULT_OPTIONS.merge options
9
15
  @queue_map = {
10
16
  'opener-language-identifier': Opener::LanguageIdentifier.new,
11
- 'stanza-tokenizer-pos': Stanza::TokenizerPos.new, # replace this tokenizer-pos with both below with you dont have a stanza server
12
- #'opener-tokenizer': Opener::Tokenizer.new,
13
- #'opener-pos-tagger': Opener::POSTagger.new,
17
+ 'stanza-processor': Stanza::Processor.new,
14
18
  'opener-property-tagger': Opener::PropertyTagger.new,
15
19
  'opener-polarity-tagger': Opener::PolarityTagger.new,
16
20
  'opener-opinion-detector-basic': Opener::OpinionDetectorBasic.new,
@@ -24,6 +28,7 @@ module Opener
24
28
  if params.filter_vertical and params.property_type.present?
25
29
  params.cache_keys.property_type = params.property_type
26
30
  end
31
+ params.cache_keys.environment ||= 'production'
27
32
 
28
33
  lang = nil
29
34
  output = nil
@@ -49,7 +54,7 @@ module Opener
49
54
  output = xml.to_s
50
55
  end
51
56
 
52
- output = pretty_print output if params.cache_keys&.environment == 'staging'
57
+ output = pretty_print output if params.cache_keys.environment == 'staging'
53
58
  output
54
59
 
55
60
  rescue Core::UnsupportedLanguageError
@@ -48,10 +48,18 @@ Example:
48
48
  daemon = ChainedDaemon.new args: args
49
49
  input = STDIN.tty? ? nil : STDIN.read
50
50
  params = if ENV['PARAMS'] then JSON.parse ENV['PARAMS'] else {} end
51
+
51
52
  # Set environment as staging from console for testing purposes
52
- params[:cache_keys] = { environment: 'staging', merged: true }
53
+ env = ENV['LEXICONS_ENV'] || 'staging'
54
+ pt = ENV['LEXICONS_PROPERTY_TYPE']
55
+ params[:cache_keys] = {
56
+ environment: env,
57
+ property_type: pt,
58
+ merged: (true if env == 'staging'),
59
+ }
53
60
 
54
- puts daemon.run input, params || {}
61
+ output = daemon.run input, params
62
+ puts output
55
63
  end
56
64
  end
57
65
  end
@@ -19,25 +19,15 @@ module Opener
19
19
  break @cache if @last_updated and @last_updated > UPDATE_INTERVAL.ago
20
20
  cache_update
21
21
  end
22
- @cache
23
22
  end
24
23
 
25
24
  def cache_update
26
25
  puts "loading supported languages from url #{@url}" if ENV['DEBUG']
27
26
 
28
- languages = JSON.parse http.get(@url).body
29
- @cache = languages['data'].map { |l| l['code'] }
27
+ languages = SymMash.new JSON.parse ChainedDaemon.http.get(@url).body
30
28
  @last_updated = Time.now
31
- end
32
-
33
- def http
34
- return @http if @http
35
-
36
- @http = HTTPClient.new
37
- @http.send_timeout = 120
38
- @http.receive_timeout = 120
39
- @http.connect_timeout = 120
40
- @http
29
+ @cache = languages.data.each.with_object({}){ |l,h| h[l.code] = l }
30
+ @cache
41
31
  end
42
32
 
43
33
  end
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class ChainedDaemon
3
3
 
4
- VERSION = '3.3.1'
4
+ VERSION = '3.3.6'
5
5
 
6
6
  end
7
7
  end
@@ -48,13 +48,14 @@ module Opener
48
48
 
49
49
  def add_word_form params
50
50
  text = @document.at('text') || @document.root.add_child('<text/>').first
51
- wf = text.add_child("<wf>#{params[:text]}</wf>")
51
+ wf = text.add_child("<wf>#{params.text}</wf>")
52
52
  attrs = {
53
- wid: "w#{params[:wid]}",
54
- sent: params[:sid],
55
- para: params[:para],
56
- offset: params[:offset],
57
- length: params[:length],
53
+ wid: "w#{params.wid}",
54
+ sent: params.sid,
55
+ para: params.para,
56
+ offset: params.offset,
57
+ length: params.length,
58
+ head: params.head,
58
59
  }
59
60
  wf.attr attrs
60
61
  end
@@ -63,15 +64,16 @@ module Opener
63
64
  text = @document.at('terms') || @document.root.add_child('<terms/>').first
64
65
  term = text.add_child("<term/>")
65
66
  attrs = {
66
- tid: "t#{params[:tid]}",
67
- type: params[:type],
68
- lemma: params[:lemma],
69
- text: params[:text],
70
- pos: params[:pos],
71
- morphofeat: params[:morphofeat],
67
+ tid: "t#{params.tid}",
68
+ type: params.type,
69
+ lemma: params.lemma,
70
+ text: params.text,
71
+ pos: params.pos,
72
+ morphofeat: params.morphofeat,
73
+ head: params.head,
72
74
  }
73
75
  term.attr attrs
74
- term.first.add_child("<span><target id='w#{params[:wid]}' /></span>")
76
+ term.first.add_child("<span><target id='w#{params.wid}'/></span>")
75
77
  end
76
78
 
77
79
  def to_xml
@@ -1,6 +1,7 @@
1
1
  module Opener
2
2
  module KAF
3
3
  class WordForm
4
+
4
5
  def initialize(document, xml_node)
5
6
  @document = document
6
7
  @xml_node = xml_node
@@ -25,6 +26,7 @@ module Opener
25
26
  def paragraph
26
27
  return @paragraph ||= @xml_node.attr('para').to_i
27
28
  end
29
+
28
30
  end
29
31
  end
30
32
  end
@@ -1,6 +1,6 @@
1
1
  module Opener
2
2
  module Stanza
3
- class TokenizerPos
3
+ class Processor
4
4
 
5
5
  DESC = 'Tokenizer / POS by Stanza'
6
6
  VERSION = '1.0'
@@ -8,8 +8,10 @@ module Opener
8
8
  BASE_URL = ENV['STANZA_SERVER']
9
9
  LANGUAGES_CACHE = Opener::ChainedDaemon::LanguagesCache.new
10
10
 
11
- RTL_LANGUAGES = [ "ar", "ara", "arc", "ae", "ave", "egy", "he", "heb", "nqo", "pal", "phn", "sam",
12
- "syc", "syr", "fa", "per", "fas", "ku", "kur", "ur", "urd" ]
11
+ RTL_LANGUAGES = %w[
12
+ ar ara arc ae ave egy he heb nqo pal phn sam
13
+ syc syr fa per fas ku kur ur urd
14
+ ]
13
15
 
14
16
  POS = {
15
17
  'DET' => 'D',
@@ -37,63 +39,68 @@ module Opener
37
39
  raise 'missing Stanza server' if ENV['STANZA_SERVER'].blank?
38
40
 
39
41
  kaf = KAF::Document.from_xml input
40
-
41
- prod = params[:cache_keys][:environment] == 'production'
42
- if prod and !LANGUAGES_CACHE.get.include?(kaf.language)
42
+ lang = LANGUAGES_CACHE.get[kaf.language]
43
+ env = params.cache_keys.environment
44
+ unless lang&.environments&.include? env or (params.cache_keys.merged and lang&.environments&.include? 'production')
45
+ raise Core::UnsupportedLanguageError.new kaf.language
46
+ end
47
+ if env == 'production' and !lang.supported_by_opener
43
48
  raise Core::UnsupportedLanguageError.new kaf.language
44
49
  end
45
50
 
46
- input = kaf.raw
47
- input = input.gsub(/\,[^\ ]/, ', ')
48
- response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
51
+ input = kaf.raw
52
+ input = input.gsub(/\,[^\ ]/, ', ')
53
+ response = ChainedDaemon.http.post BASE_URL, {lang: kaf.language, input: input}.to_query
49
54
  raise Core::UnsupportedLanguageError, kaf.language if response.status == 406
50
55
  raise response.body if response.status >= 400
51
- tokens = JSON.parse response.body
56
+ sentences = JSON.parse response.body
57
+ sentences.each{ |s| s.map!{ |t| Hashie::Mash.new t } }
52
58
 
53
59
  w_index = 0
54
60
 
55
61
  miscs = {}
56
- tokens.each_with_index do |t, i|
62
+ sentences.each.with_index do |s, i|
57
63
  miscs[i] = {}
58
- t.each do |word|
59
- word['id'].is_a?(Array) && word['id'].each { |id| miscs[i][id] = word['misc'] }
64
+ s.each do |word|
65
+ word.id.is_a?(Array) && word.id.each{ |id| miscs[i][id] = word.misc }
60
66
  end
61
67
  end
62
68
 
63
- tokens.map{ |t| t.reverse! } if RTL_LANGUAGES.include? kaf.language
64
- tokens.each_with_index do |sentence, s_index|
65
- sentence.each_with_index do |word|
69
+ sentences.map{ |s| s.reverse! } if RTL_LANGUAGES.include? kaf.language
70
+ sentences.each.with_index do |s, s_index|
71
+ s.each do |word|
66
72
  w_index += 1
67
73
  # save misc for later usase in a MWT case
68
- next if word['id'].is_a? Array
74
+ next if word.id.is_a? Array
69
75
 
70
- misc = word['misc'] || miscs[s_index][word['id']]
76
+ misc = word.misc || miscs[s_index][word.id]
71
77
 
72
- Rollbar.scoped({ input: input, params: params, tokens: tokens, word: word }) do
78
+ Rollbar.scoped({ input: input, params: params, sentences: sentences, word: word }) do
73
79
  raise 'Missing misc'
74
80
  end if misc.nil?
75
81
 
76
82
  offset = misc.match(/start_char=(\d+)|/)[1].to_i
77
83
  length = misc.match(/end_char=(\d+)/)[1].to_i - offset
78
84
 
79
- u_pos = word['upos']
85
+ u_pos = word.upos
80
86
  pos = POS[u_pos]
81
87
  raise "Didn't find a map for #{u_pos}" if pos.nil?
82
88
  type = if POS_OPEN.include? pos then 'open' else 'close' end
83
89
 
84
- params = {
90
+ params = Hashie::Mash.new(
85
91
  wid: w_index,
86
92
  sid: s_index + 1,
87
93
  tid: w_index,
88
94
  para: 1,
89
95
  offset: offset,
90
96
  length: length,
91
- text: word['text'],
92
- lemma: word['lemma'],
97
+ text: word.text,
98
+ lemma: word.lemma,
93
99
  morphofeat: u_pos,
94
100
  pos: pos,
95
101
  type: type,
96
- }
102
+ head: word.head,
103
+ )
97
104
 
98
105
  kaf.add_word_form params
99
106
  kaf.add_term params
@@ -28,7 +28,6 @@ Gem::Specification.new do |spec|
28
28
 
29
29
  spec.add_dependency 'roda'
30
30
  spec.add_dependency 'rack-timeout'
31
- spec.add_dependency 'faraday'
32
31
  spec.add_dependency 'opener-daemons', '~> 2.7.2'
33
32
  spec.add_dependency 'opener-callback-handler', '~> 1.0'
34
33
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-chained-daemon
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.1
4
+ version: 3.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-04 00:00:00.000000000 Z
11
+ date: 2021-02-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: faraday
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: opener-daemons
113
99
  requirement: !ruby/object:Gem::Requirement
@@ -330,7 +316,7 @@ files:
330
316
  - lib/opener/kaf/document.rb
331
317
  - lib/opener/kaf/term.rb
332
318
  - lib/opener/kaf/text.rb
333
- - lib/opener/stanza/tokenizer_pos.rb
319
+ - lib/opener/stanza/processor.rb
334
320
  - lib/opener/sym_mash.rb
335
321
  - opener-chained-daemon.gemspec
336
322
  homepage: