opener-chained-daemon 3.2.0 → 3.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ccbd3019511ea9dd574ac013aeadb58f5c480e5a71a641320b047f6220c5288a
4
- data.tar.gz: 5dfa19e0fc0d176bd9bfe5cdbc04740ebf1ee2c15165f99454552d6686f751fe
3
+ metadata.gz: 8d0427acbe8fd6a291ba1545675f971e47a430dd00c9192fc7fb7fa8eb5e3c12
4
+ data.tar.gz: 88705c0187a9862c5dd86301517182cf961811a69579d2f60fec693a8cc39a64
5
5
  SHA512:
6
- metadata.gz: 3d2b23910cdb755ac043ea9143716a18fa305f7dfcb381914dc8387fd94ce72de7bf4a6bca163280c5b2ef14298191aa21604ae0fca01a7e5f77153c0cf63fa6
7
- data.tar.gz: 8c56eb7b23ad95e7d6a50f059154f870fd3b6cd16a3f4ffa2532ea5898e2ff52f903b71138eba3e06e679ce77a03c03972bb1df1cb5238305ddff764d4918274
6
+ metadata.gz: 6edb64c0e9a808a171bf3f9754238b0cbad52cf3e2713bcd86fae2d7932b851e40a1afcc68aa54a896f5c1104dda765026944b5bad81a9b4af9d6a5e6c10a6a6
7
+ data.tar.gz: d16b3460dc985b885f63f19dfe29804a50523c3c900f99a7214578342f82d2d7742cbfb67f5b5d66c96cf0a173832467dbc8057b844355d55a5315c65ddb460c
@@ -12,12 +12,10 @@ require_relative 'sym_mash'
12
12
 
13
13
  require_relative 'chained_daemon/languages_cache'
14
14
  require 'opener/language_identifier'
15
- require 'opener/tokenizer'
16
- require 'opener/pos_tagger' if RUBY_ENGINE == 'jruby'
17
15
  require 'opener/polarity_tagger'
18
16
  require 'opener/property_tagger'
19
17
  require 'opener/opinion_detector_basic'
20
- require 'opener/stanza/tokenizer_pos'
18
+ require 'opener/stanza/processor'
21
19
 
22
20
  require_relative 'chained_daemon/chained_daemon'
23
21
  require_relative 'chained_daemon/cli'
@@ -8,9 +8,7 @@ module Opener
8
8
  @options = DEFAULT_OPTIONS.merge options
9
9
  @queue_map = {
10
10
  'opener-language-identifier': Opener::LanguageIdentifier.new,
11
- 'stanza-tokenizer-pos': Stanza::TokenizerPos.new, # replace this tokenizer-pos with both below with you dont have a stanza server
12
- #'opener-tokenizer': Opener::Tokenizer.new,
13
- #'opener-pos-tagger': Opener::POSTagger.new,
11
+ 'stanza-processor': Stanza::Processor.new,
14
12
  'opener-property-tagger': Opener::PropertyTagger.new,
15
13
  'opener-polarity-tagger': Opener::PolarityTagger.new,
16
14
  'opener-opinion-detector-basic': Opener::OpinionDetectorBasic.new,
@@ -24,6 +22,7 @@ module Opener
24
22
  if params.filter_vertical and params.property_type.present?
25
23
  params.cache_keys.property_type = params.property_type
26
24
  end
25
+ params.cache_keys.environment ||= 'production'
27
26
 
28
27
  lang = nil
29
28
  output = nil
@@ -49,7 +48,7 @@ module Opener
49
48
  output = xml.to_s
50
49
  end
51
50
 
52
- output = pretty_print output if params.cache_keys&.environment == 'staging'
51
+ output = pretty_print output if params.cache_keys.environment == 'staging'
53
52
  output
54
53
 
55
54
  rescue Core::UnsupportedLanguageError
@@ -48,10 +48,18 @@ Example:
48
48
  daemon = ChainedDaemon.new args: args
49
49
  input = STDIN.tty? ? nil : STDIN.read
50
50
  params = if ENV['PARAMS'] then JSON.parse ENV['PARAMS'] else {} end
51
+
51
52
  # Set environment as staging from console for testing purposes
52
- params[:cache_keys] = { environment: 'staging', merged: true }
53
+ env = ENV['LEXICONS_ENV'] || 'staging'
54
+ pt = ENV['LEXICONS_PROPERTY_TYPE']
55
+ params[:cache_keys] = {
56
+ environment: env,
57
+ property_type: pt,
58
+ merged: (true if env == 'staging'),
59
+ }
53
60
 
54
- puts daemon.run input, params || {}
61
+ output = daemon.run input, params
62
+ puts output
55
63
  end
56
64
  end
57
65
  end
@@ -19,15 +19,15 @@ module Opener
19
19
  break @cache if @last_updated and @last_updated > UPDATE_INTERVAL.ago
20
20
  cache_update
21
21
  end
22
- @cache
23
22
  end
24
23
 
25
24
  def cache_update
26
25
  puts "loading supported languages from url #{@url}" if ENV['DEBUG']
27
26
 
28
- languages = JSON.parse http.get(@url).body
29
- @cache = languages['data'].map { |l| l['code'] }
27
+ languages = SymMash.new JSON.parse http.get(@url).body
30
28
  @last_updated = Time.now
29
+ @cache = languages.data.each.with_object({}){ |l,h| h[l.code] = l }
30
+ @cache
31
31
  end
32
32
 
33
33
  def http
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class ChainedDaemon
3
3
 
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.4'
5
5
 
6
6
  end
7
7
  end
@@ -48,13 +48,14 @@ module Opener
48
48
 
49
49
  def add_word_form params
50
50
  text = @document.at('text') || @document.root.add_child('<text/>').first
51
- wf = text.add_child("<wf>#{params[:text]}</wf>")
51
+ wf = text.add_child("<wf>#{params.text}</wf>")
52
52
  attrs = {
53
- wid: "w#{params[:wid]}",
54
- sent: params[:sid],
55
- para: params[:para],
56
- offset: params[:offset],
57
- length: params[:length],
53
+ wid: "w#{params.wid}",
54
+ sent: params.sid,
55
+ para: params.para,
56
+ offset: params.offset,
57
+ length: params.length,
58
+ head: params.head,
58
59
  }
59
60
  wf.attr attrs
60
61
  end
@@ -63,15 +64,16 @@ module Opener
63
64
  text = @document.at('terms') || @document.root.add_child('<terms/>').first
64
65
  term = text.add_child("<term/>")
65
66
  attrs = {
66
- tid: "t#{params[:tid]}",
67
- type: params[:type],
68
- lemma: params[:lemma],
69
- text: params[:text],
70
- pos: params[:pos],
71
- morphofeat: params[:morphofeat],
67
+ tid: "t#{params.tid}",
68
+ type: params.type,
69
+ lemma: params.lemma,
70
+ text: params.text,
71
+ pos: params.pos,
72
+ morphofeat: params.morphofeat,
73
+ head: params.head,
72
74
  }
73
75
  term.attr attrs
74
- term.first.add_child("<span><target id='w#{params[:wid]}' /></span>")
76
+ term.first.add_child("<span><target id='w#{params.wid}'/></span>")
75
77
  end
76
78
 
77
79
  def to_xml
@@ -1,6 +1,7 @@
1
1
  module Opener
2
2
  module KAF
3
3
  class WordForm
4
+
4
5
  def initialize(document, xml_node)
5
6
  @document = document
6
7
  @xml_node = xml_node
@@ -25,6 +26,7 @@ module Opener
25
26
  def paragraph
26
27
  return @paragraph ||= @xml_node.attr('para').to_i
27
28
  end
29
+
28
30
  end
29
31
  end
30
32
  end
@@ -1,6 +1,6 @@
1
1
  module Opener
2
2
  module Stanza
3
- class TokenizerPos
3
+ class Processor
4
4
 
5
5
  DESC = 'Tokenizer / POS by Stanza'
6
6
  VERSION = '1.0'
@@ -8,8 +8,10 @@ module Opener
8
8
  BASE_URL = ENV['STANZA_SERVER']
9
9
  LANGUAGES_CACHE = Opener::ChainedDaemon::LanguagesCache.new
10
10
 
11
- RTL_LANGUAGES = [ "ar", "ara", "arc", "ae", "ave", "egy", "he", "heb", "nqo", "pal", "phn", "sam",
12
- "syc", "syr", "fa", "per", "fas", "ku", "kur", "ur", "urd" ]
11
+ RTL_LANGUAGES = %w[
12
+ ar ara arc ae ave egy he heb nqo pal phn sam
13
+ syc syr fa per fas ku kur ur urd
14
+ ]
13
15
 
14
16
  POS = {
15
17
  'DET' => 'D',
@@ -37,63 +39,68 @@ module Opener
37
39
  raise 'missing Stanza server' if ENV['STANZA_SERVER'].blank?
38
40
 
39
41
  kaf = KAF::Document.from_xml input
40
-
41
- prod = params[:cache_keys][:environment] != 'staging'
42
- if prod and !LANGUAGES_CACHE.get.include?(kaf.language)
42
+ lang = LANGUAGES_CACHE.get[kaf.language]
43
+ env = params.cache_keys.environment
44
+ unless lang&.environments&.include? env or (params.cache_keys.merged and lang&.environments&.include? 'production')
45
+ raise Core::UnsupportedLanguageError.new kaf.language
46
+ end
47
+ if env == 'production' and !lang.supported_by_opener
43
48
  raise Core::UnsupportedLanguageError.new kaf.language
44
49
  end
45
50
 
46
- input = kaf.raw
47
- input = input.gsub(/\,[^\ ]/, ', ')
48
- response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
51
+ input = kaf.raw
52
+ input = input.gsub(/\,[^\ ]/, ', ')
53
+ response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
49
54
  raise Core::UnsupportedLanguageError, kaf.language if response.status == 406
50
55
  raise response.body if response.status >= 400
51
- tokens = JSON.parse response.body
56
+ sentences = JSON.parse response.body
57
+ sentences.each{ |s| s.map!{ |t| Hashie::Mash.new t } }
52
58
 
53
59
  w_index = 0
54
60
 
55
61
  miscs = {}
56
- tokens.each_with_index do |t, i|
62
+ sentences.each.with_index do |s, i|
57
63
  miscs[i] = {}
58
- t.each do |word|
59
- word['id'].is_a?(Array) && word['id'].each { |id| miscs[i][id] = word['misc'] }
64
+ s.each do |word|
65
+ word.id.is_a?(Array) && word.id.each{ |id| miscs[i][id] = word.misc }
60
66
  end
61
67
  end
62
68
 
63
- tokens.map{ |t| t.reverse! } if RTL_LANGUAGES.include? kaf.language
64
- tokens.each_with_index do |sentence, s_index|
65
- sentence.each_with_index do |word|
69
+ sentences.map{ |s| s.reverse! } if RTL_LANGUAGES.include? kaf.language
70
+ sentences.each.with_index do |s, s_index|
71
+ s.each do |word|
66
72
  w_index += 1
67
73
  # save misc for later usase in a MWT case
68
- next if word['id'].is_a? Array
74
+ next if word.id.is_a? Array
69
75
 
70
- misc = word['misc'] || miscs[s_index][word['id']]
76
+ misc = word.misc || miscs[s_index][word.id]
71
77
 
72
- Rollbar.scoped({ input: input, params: params, tokens: tokens, word: word }) do
78
+ Rollbar.scoped({ input: input, params: params, sentences: sentences, word: word }) do
73
79
  raise 'Missing misc'
74
80
  end if misc.nil?
75
81
 
76
82
  offset = misc.match(/start_char=(\d+)|/)[1].to_i
77
83
  length = misc.match(/end_char=(\d+)/)[1].to_i - offset
78
84
 
79
- u_pos = word['upos']
85
+ u_pos = word.upos
80
86
  pos = POS[u_pos]
81
87
  raise "Didn't find a map for #{u_pos}" if pos.nil?
82
88
  type = if POS_OPEN.include? pos then 'open' else 'close' end
83
89
 
84
- params = {
90
+ params = Hashie::Mash.new(
85
91
  wid: w_index,
86
92
  sid: s_index + 1,
87
93
  tid: w_index,
88
94
  para: 1,
89
95
  offset: offset,
90
96
  length: length,
91
- text: word['text'],
92
- lemma: word['lemma'],
97
+ text: word.text,
98
+ lemma: word.lemma,
93
99
  morphofeat: u_pos,
94
100
  pos: pos,
95
101
  type: type,
96
- }
102
+ head: word.head,
103
+ )
97
104
 
98
105
  kaf.add_word_form params
99
106
  kaf.add_term params
@@ -35,8 +35,8 @@ Gem::Specification.new do |spec|
35
35
  spec.add_dependency 'opener-language-identifier', '>= 4.4.0'
36
36
  spec.add_dependency 'opener-tokenizer', '>= 2.2.0'
37
37
  spec.add_dependency 'opener-pos-tagger', '>= 3.2.0'
38
- spec.add_dependency 'opener-property-tagger', '>= 3.3.6'
39
- spec.add_dependency 'opener-polarity-tagger', '>= 3.3.0'
38
+ spec.add_dependency 'opener-property-tagger', '>= 3.4.0'
39
+ spec.add_dependency 'opener-polarity-tagger', '>= 3.4.0'
40
40
  spec.add_dependency 'opener-opinion-detector-basic', '>= 3.2.3'
41
41
 
42
42
  spec.add_development_dependency 'bundler', '~> 1.3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-chained-daemon
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-20 00:00:00.000000000 Z
11
+ date: 2021-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -184,28 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - ">="
186
186
  - !ruby/object:Gem::Version
187
- version: 3.3.6
187
+ version: 3.4.0
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - ">="
193
193
  - !ruby/object:Gem::Version
194
- version: 3.3.6
194
+ version: 3.4.0
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: opener-polarity-tagger
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - ">="
200
200
  - !ruby/object:Gem::Version
201
- version: 3.3.0
201
+ version: 3.4.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - ">="
207
207
  - !ruby/object:Gem::Version
208
- version: 3.3.0
208
+ version: 3.4.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: opener-opinion-detector-basic
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -307,10 +307,10 @@ dependencies:
307
307
  description: OpeNER daemon for processing multiple queues at once
308
308
  email:
309
309
  executables:
310
- - chained-daemon
310
+ - chained-daemon-daemon
311
311
  - chained-daemon-csv
312
312
  - console
313
- - chained-daemon-daemon
313
+ - chained-daemon
314
314
  extensions: []
315
315
  extra_rdoc_files: []
316
316
  files:
@@ -330,7 +330,7 @@ files:
330
330
  - lib/opener/kaf/document.rb
331
331
  - lib/opener/kaf/term.rb
332
332
  - lib/opener/kaf/text.rb
333
- - lib/opener/stanza/tokenizer_pos.rb
333
+ - lib/opener/stanza/processor.rb
334
334
  - lib/opener/sym_mash.rb
335
335
  - opener-chained-daemon.gemspec
336
336
  homepage:
@@ -353,7 +353,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
353
353
  version: '0'
354
354
  requirements: []
355
355
  rubyforge_project:
356
- rubygems_version: 2.7.8
356
+ rubygems_version: 2.7.6.2
357
357
  signing_key:
358
358
  specification_version: 4
359
359
  summary: OpeNER daemon for processing multiple queues at once