opener-chained-daemon 3.3.0 → 3.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 71df3cf3717d569743e19e86677e9c23d8520e424681815209bf3ae22fa6fd41
4
- data.tar.gz: a2090c0b7f42aebba6d75e8ad82df34b5e75ea0b38f0aa883b865dfbdb440540
3
+ metadata.gz: 44922bb7219f85282643092261f4aa995e3ecbe1bbfc202b1da8f1141ff27771
4
+ data.tar.gz: 582a30c7ce359cbbf74179fb8611c315e39ee709008c7b41a0214c7df966af1b
5
5
  SHA512:
6
- metadata.gz: 382efe055a88866b6ca83331f8787a24284f0cfbe31dd92321e3cf9f5c71c3175e470ee8152cee24b17694295345e414c20d2e3f14af869fa5ddbbff24da0d46
7
- data.tar.gz: 76d553e08a5ec465a6cf1e3a5a6e0cc65b53fce661cff527ab5a33cfb5ad94ac09bc0f1fc0fcefa1d9c2c59fee665f9acca5ba9202cfe2acabbfd39b8e7790bb
6
+ metadata.gz: b4b339b252dcbd4439e2a084899afbb3a37fe65e00a442ba25774c6daafcaf59860fc075f71314ea06bb8a2dca9b25315cc5b93d457a3fef6bb08585446e4578
7
+ data.tar.gz: 9fffc136aba038a4499f6f6895cd55848e6173c2be413304d51e0161f0579ae9038d6ac5f84c9d8114268db4b14dcf7d08dfe7a643950879b0f6246c2a579606
@@ -12,12 +12,10 @@ require_relative 'sym_mash'
12
12
 
13
13
  require_relative 'chained_daemon/languages_cache'
14
14
  require 'opener/language_identifier'
15
- require 'opener/tokenizer'
16
- require 'opener/pos_tagger' if RUBY_ENGINE == 'jruby'
17
15
  require 'opener/polarity_tagger'
18
16
  require 'opener/property_tagger'
19
17
  require 'opener/opinion_detector_basic'
20
- require 'opener/stanza/tokenizer_pos'
18
+ require 'opener/stanza/processor'
21
19
 
22
20
  require_relative 'chained_daemon/chained_daemon'
23
21
  require_relative 'chained_daemon/cli'
@@ -8,9 +8,7 @@ module Opener
8
8
  @options = DEFAULT_OPTIONS.merge options
9
9
  @queue_map = {
10
10
  'opener-language-identifier': Opener::LanguageIdentifier.new,
11
- 'stanza-tokenizer-pos': Stanza::TokenizerPos.new, # replace this tokenizer-pos with both below with you dont have a stanza server
12
- #'opener-tokenizer': Opener::Tokenizer.new,
13
- #'opener-pos-tagger': Opener::POSTagger.new,
11
+ 'stanza-processor': Stanza::Processor.new,
14
12
  'opener-property-tagger': Opener::PropertyTagger.new,
15
13
  'opener-polarity-tagger': Opener::PolarityTagger.new,
16
14
  'opener-opinion-detector-basic': Opener::OpinionDetectorBasic.new,
@@ -24,6 +22,7 @@ module Opener
24
22
  if params.filter_vertical and params.property_type.present?
25
23
  params.cache_keys.property_type = params.property_type
26
24
  end
25
+ params.cache_keys.environment ||= 'production'
27
26
 
28
27
  lang = nil
29
28
  output = nil
@@ -49,7 +48,7 @@ module Opener
49
48
  output = xml.to_s
50
49
  end
51
50
 
52
- output = pretty_print output if params.cache_keys&.environment == 'staging'
51
+ output = pretty_print output if params.cache_keys.environment == 'staging'
53
52
  output
54
53
 
55
54
  rescue Core::UnsupportedLanguageError
@@ -48,10 +48,18 @@ Example:
48
48
  daemon = ChainedDaemon.new args: args
49
49
  input = STDIN.tty? ? nil : STDIN.read
50
50
  params = if ENV['PARAMS'] then JSON.parse ENV['PARAMS'] else {} end
51
+
51
52
  # Set environment as staging from console for testing purposes
52
- params[:cache_keys] = { environment: 'staging', merged: true }
53
+ env = ENV['LEXICONS_ENV'] || 'staging'
54
+ pt = ENV['LEXICONS_PROPERTY_TYPE']
55
+ params[:cache_keys] = {
56
+ environment: env,
57
+ property_type: pt,
58
+ merged: (true if env == 'staging'),
59
+ }
53
60
 
54
- puts daemon.run input, params || {}
61
+ output = daemon.run input, params
62
+ puts output
55
63
  end
56
64
  end
57
65
  end
@@ -19,15 +19,15 @@ module Opener
19
19
  break @cache if @last_updated and @last_updated > UPDATE_INTERVAL.ago
20
20
  cache_update
21
21
  end
22
- @cache
23
22
  end
24
23
 
25
24
  def cache_update
26
25
  puts "loading supported languages from url #{@url}" if ENV['DEBUG']
27
26
 
28
- languages = JSON.parse http.get(@url).body
29
- @cache = languages['data'].map { |l| l['code'] }
27
+ languages = SymMash.new JSON.parse http.get(@url).body
30
28
  @last_updated = Time.now
29
+ @cache = languages.data.each.with_object({}){ |l,h| h[l.code] = l }
30
+ @cache
31
31
  end
32
32
 
33
33
  def http
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class ChainedDaemon
3
3
 
4
- VERSION = '3.3.0'
4
+ VERSION = '3.3.5'
5
5
 
6
6
  end
7
7
  end
@@ -48,13 +48,14 @@ module Opener
48
48
 
49
49
  def add_word_form params
50
50
  text = @document.at('text') || @document.root.add_child('<text/>').first
51
- wf = text.add_child("<wf>#{params[:text]}</wf>")
51
+ wf = text.add_child("<wf>#{params.text}</wf>")
52
52
  attrs = {
53
- wid: "w#{params[:wid]}",
54
- sent: params[:sid],
55
- para: params[:para],
56
- offset: params[:offset],
57
- length: params[:length],
53
+ wid: "w#{params.wid}",
54
+ sent: params.sid,
55
+ para: params.para,
56
+ offset: params.offset,
57
+ length: params.length,
58
+ head: params.head,
58
59
  }
59
60
  wf.attr attrs
60
61
  end
@@ -63,15 +64,16 @@ module Opener
63
64
  text = @document.at('terms') || @document.root.add_child('<terms/>').first
64
65
  term = text.add_child("<term/>")
65
66
  attrs = {
66
- tid: "t#{params[:tid]}",
67
- type: params[:type],
68
- lemma: params[:lemma],
69
- text: params[:text],
70
- pos: params[:pos],
71
- morphofeat: params[:morphofeat],
67
+ tid: "t#{params.tid}",
68
+ type: params.type,
69
+ lemma: params.lemma,
70
+ text: params.text,
71
+ pos: params.pos,
72
+ morphofeat: params.morphofeat,
73
+ head: params.head,
72
74
  }
73
75
  term.attr attrs
74
- term.first.add_child("<span><target id='w#{params[:wid]}' /></span>")
76
+ term.first.add_child("<span><target id='w#{params.wid}'/></span>")
75
77
  end
76
78
 
77
79
  def to_xml
@@ -1,6 +1,7 @@
1
1
  module Opener
2
2
  module KAF
3
3
  class WordForm
4
+
4
5
  def initialize(document, xml_node)
5
6
  @document = document
6
7
  @xml_node = xml_node
@@ -25,6 +26,7 @@ module Opener
25
26
  def paragraph
26
27
  return @paragraph ||= @xml_node.attr('para').to_i
27
28
  end
29
+
28
30
  end
29
31
  end
30
32
  end
@@ -1,6 +1,6 @@
1
1
  module Opener
2
2
  module Stanza
3
- class TokenizerPos
3
+ class Processor
4
4
 
5
5
  DESC = 'Tokenizer / POS by Stanza'
6
6
  VERSION = '1.0'
@@ -8,8 +8,10 @@ module Opener
8
8
  BASE_URL = ENV['STANZA_SERVER']
9
9
  LANGUAGES_CACHE = Opener::ChainedDaemon::LanguagesCache.new
10
10
 
11
- RTL_LANGUAGES = [ "ar", "ara", "arc", "ae", "ave", "egy", "he", "heb", "nqo", "pal", "phn", "sam",
12
- "syc", "syr", "fa", "per", "fas", "ku", "kur", "ur", "urd" ]
11
+ RTL_LANGUAGES = %w[
12
+ ar ara arc ae ave egy he heb nqo pal phn sam
13
+ syc syr fa per fas ku kur ur urd
14
+ ]
13
15
 
14
16
  POS = {
15
17
  'DET' => 'D',
@@ -37,63 +39,68 @@ module Opener
37
39
  raise 'missing Stanza server' if ENV['STANZA_SERVER'].blank?
38
40
 
39
41
  kaf = KAF::Document.from_xml input
40
-
41
- prod = params[:cache_keys][:environment] != 'staging'
42
- if prod and !LANGUAGES_CACHE.get.include?(kaf.language)
42
+ lang = LANGUAGES_CACHE.get[kaf.language]
43
+ env = params.cache_keys.environment
44
+ unless lang&.environments&.include? env or (params.cache_keys.merged and lang&.environments&.include? 'production')
45
+ raise Core::UnsupportedLanguageError.new kaf.language
46
+ end
47
+ if env == 'production' and !lang.supported_by_opener
43
48
  raise Core::UnsupportedLanguageError.new kaf.language
44
49
  end
45
50
 
46
- input = kaf.raw
47
- input = input.gsub(/\,[^\ ]/, ', ')
48
- response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
51
+ input = kaf.raw
52
+ input = input.gsub(/\,[^\ ]/, ', ')
53
+ response = Faraday.post BASE_URL, {lang: kaf.language, input: input}.to_query
49
54
  raise Core::UnsupportedLanguageError, kaf.language if response.status == 406
50
55
  raise response.body if response.status >= 400
51
- tokens = JSON.parse response.body
56
+ sentences = JSON.parse response.body
57
+ sentences.each{ |s| s.map!{ |t| Hashie::Mash.new t } }
52
58
 
53
59
  w_index = 0
54
60
 
55
61
  miscs = {}
56
- tokens.each_with_index do |t, i|
62
+ sentences.each.with_index do |s, i|
57
63
  miscs[i] = {}
58
- t.each do |word|
59
- word['id'].is_a?(Array) && word['id'].each { |id| miscs[i][id] = word['misc'] }
64
+ s.each do |word|
65
+ word.id.is_a?(Array) && word.id.each{ |id| miscs[i][id] = word.misc }
60
66
  end
61
67
  end
62
68
 
63
- tokens.map{ |t| t.reverse! } if RTL_LANGUAGES.include? kaf.language
64
- tokens.each_with_index do |sentence, s_index|
65
- sentence.each_with_index do |word|
69
+ sentences.map{ |s| s.reverse! } if RTL_LANGUAGES.include? kaf.language
70
+ sentences.each.with_index do |s, s_index|
71
+ s.each do |word|
66
72
  w_index += 1
67
73
  # save misc for later usase in a MWT case
68
- next if word['id'].is_a? Array
74
+ next if word.id.is_a? Array
69
75
 
70
- misc = word['misc'] || miscs[s_index][word['id']]
76
+ misc = word.misc || miscs[s_index][word.id]
71
77
 
72
- Rollbar.scoped({ input: input, params: params, tokens: tokens, word: word }) do
78
+ Rollbar.scoped({ input: input, params: params, sentences: sentences, word: word }) do
73
79
  raise 'Missing misc'
74
80
  end if misc.nil?
75
81
 
76
82
  offset = misc.match(/start_char=(\d+)|/)[1].to_i
77
83
  length = misc.match(/end_char=(\d+)/)[1].to_i - offset
78
84
 
79
- u_pos = word['upos']
85
+ u_pos = word.upos
80
86
  pos = POS[u_pos]
81
87
  raise "Didn't find a map for #{u_pos}" if pos.nil?
82
88
  type = if POS_OPEN.include? pos then 'open' else 'close' end
83
89
 
84
- params = {
90
+ params = Hashie::Mash.new(
85
91
  wid: w_index,
86
92
  sid: s_index + 1,
87
93
  tid: w_index,
88
94
  para: 1,
89
95
  offset: offset,
90
96
  length: length,
91
- text: word['text'],
92
- lemma: word['lemma'],
97
+ text: word.text,
98
+ lemma: word.lemma,
93
99
  morphofeat: u_pos,
94
100
  pos: pos,
95
101
  type: type,
96
- }
102
+ head: word.head,
103
+ )
97
104
 
98
105
  kaf.add_word_form params
99
106
  kaf.add_term params
@@ -36,7 +36,7 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency 'opener-tokenizer', '>= 2.2.0'
37
37
  spec.add_dependency 'opener-pos-tagger', '>= 3.2.0'
38
38
  spec.add_dependency 'opener-property-tagger', '>= 3.4.0'
39
- spec.add_dependency 'opener-polarity-tagger', '>= 3.4.0'
39
+ spec.add_dependency 'opener-polarity-tagger', '>= 3.5.0'
40
40
  spec.add_dependency 'opener-opinion-detector-basic', '>= 3.2.3'
41
41
 
42
42
  spec.add_development_dependency 'bundler', '~> 1.3'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-chained-daemon
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.0
4
+ version: 3.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-27 00:00:00.000000000 Z
11
+ date: 2021-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -198,14 +198,14 @@ dependencies:
198
198
  requirements:
199
199
  - - ">="
200
200
  - !ruby/object:Gem::Version
201
- version: 3.4.0
201
+ version: 3.5.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - ">="
207
207
  - !ruby/object:Gem::Version
208
- version: 3.4.0
208
+ version: 3.5.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: opener-opinion-detector-basic
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -307,10 +307,10 @@ dependencies:
307
307
  description: OpeNER daemon for processing multiple queues at once
308
308
  email:
309
309
  executables:
310
- - chained-daemon
310
+ - chained-daemon-daemon
311
311
  - chained-daemon-csv
312
312
  - console
313
- - chained-daemon-daemon
313
+ - chained-daemon
314
314
  extensions: []
315
315
  extra_rdoc_files: []
316
316
  files:
@@ -330,7 +330,7 @@ files:
330
330
  - lib/opener/kaf/document.rb
331
331
  - lib/opener/kaf/term.rb
332
332
  - lib/opener/kaf/text.rb
333
- - lib/opener/stanza/tokenizer_pos.rb
333
+ - lib/opener/stanza/processor.rb
334
334
  - lib/opener/sym_mash.rb
335
335
  - opener-chained-daemon.gemspec
336
336
  homepage:
@@ -353,7 +353,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
353
353
  version: '0'
354
354
  requirements: []
355
355
  rubyforge_project:
356
- rubygems_version: 2.7.8
356
+ rubygems_version: 2.7.6.2
357
357
  signing_key:
358
358
  specification_version: 4
359
359
  summary: OpeNER daemon for processing multiple queues at once