opener-property-tagger 3.2.0 → 3.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: adec67a48fac849987e8347eeb0bc5a304e930ab6fdc604ae7029012c6505868
4
- data.tar.gz: 9e1dda3b0fecc300a8ef8d4574563cf51e1eb3590601637a0a4d8160bbdccc40
3
+ metadata.gz: 436af27476deb0372f325ed255151bf5128608405c740289931ecb630b111ab4
4
+ data.tar.gz: 5ff2d6bf101e606a786f0b4a9da8021a231ea07fac29fb51c452a1aefa737954
5
5
  SHA512:
6
- metadata.gz: e15b757610a466b6bc1b15e7afcb2c207874c9f696a375a2f56630c88a686f9990a362422a76835685b06a6a6fce32130565fe37e27ca1f1c818ae8a263fe86e
7
- data.tar.gz: a4ce0b27abba305fe0b440a6cb26fce829ceea37e775325be22aad5cc6efb6b45fe161ceb38e407040fb014881683ce0981f56b4defab07862fb81e539102588
6
+ metadata.gz: 4918a0be797e2776329863a1f9f43609b9b30390e6165e73c4e089f533ce769a077e82fa928091285dbf866039dad589b7a650b7c6ffd9cc403c355862a9bd5b
7
+ data.tar.gz: bcac64dc7d1f229b264976070c05f64e5ff42c54ba2f809f905beb4d7597acea6c71923c271c4bc5dd77c979aa78ffdcb74757c5d27f6819bb6e62186d41fed8
@@ -1,18 +1,19 @@
1
1
  require 'open3'
2
2
  require 'slop'
3
- require 'oga'
3
+ require 'nokogiri'
4
4
  require 'monitor'
5
5
  require 'httpclient'
6
6
  require 'hashie'
7
7
  require 'json'
8
+ require 'active_support/all'
8
9
 
9
10
  require 'rexml/document'
10
11
  require 'rexml/formatters/pretty'
11
12
 
12
13
  require_relative 'property_tagger/version'
13
14
  require_relative 'property_tagger/cli'
14
- require_relative 'property_tagger/aspects_cache'
15
15
  require_relative 'property_tagger/remote_aspects_cache'
16
+ require_relative 'property_tagger/file_aspects_cache'
16
17
  require_relative 'property_tagger/processor'
17
18
 
18
19
  module Opener
@@ -51,10 +52,7 @@ module Opener
51
52
 
52
53
  @path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
53
54
  ENV['PROPERTY_TAGGER_LEXICONS_PATH']
54
-
55
- unless @path
56
- raise ArgumentError, 'No lexicon path provided'
57
- end
55
+ return unless @path
58
56
 
59
57
  @path = File.expand_path @path
60
58
  end
@@ -69,10 +67,11 @@ module Opener
69
67
  # @param [String] input
70
68
  # @return [String]
71
69
  #
72
- def run input
70
+ def run input, params = {}
73
71
  timestamp = !options[:no_time]
74
72
 
75
73
  Processor.new(input,
74
+ params: params,
76
75
  url: remote_url,
77
76
  path: path,
78
77
  timestamp: timestamp,
@@ -3,7 +3,8 @@ module Opener
3
3
  ##
4
4
  # Thread-safe cache for storing the contents of aspect files.
5
5
  #
6
- class AspectsCache
6
+ class FileAspectsCache
7
+
7
8
  include MonitorMixin
8
9
 
9
10
  def initialize
@@ -42,6 +43,7 @@ module Opener
42
43
 
43
44
  return mapping
44
45
  end
45
- end # AspectsCache
46
- end # PropertyTagger
47
- end # Opener
46
+
47
+ end
48
+ end
49
+ end
@@ -12,9 +12,9 @@ module Opener
12
12
  ##
13
13
  # Global cache used for storing loaded aspects.
14
14
  #
15
- # @return [Opener::PropertyTagger::AspectsCache.new]
15
+ # @return [Opener::PropertyTagger::FileAspectsCache.new]
16
16
  #
17
- ASPECTS_CACHE = AspectsCache.new
17
+ FILE_ASPECTS_CACHE = FileAspectsCache.new
18
18
  REMOTE_ASPECTS_CACHE = RemoteAspectsCache.new
19
19
 
20
20
  ##
@@ -24,17 +24,19 @@ module Opener
24
24
  # @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
25
25
  # by default due to the performance overhead.
26
26
  #
27
- def initialize file, url: nil, path: nil, timestamp: true, pretty: false
28
- @document = Oga.parse_xml file
27
+ def initialize file, params: {}, url: nil, path: nil, timestamp: true, pretty: false
28
+ @document = Nokogiri.XML file
29
29
  raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
30
30
  @timestamp = timestamp
31
31
  @pretty = pretty
32
32
 
33
+ @params = params
34
+ @cache_keys = params[:cache_keys] || {lang: language}
33
35
  @remote = !url.nil?
34
36
  @aspects_path = path
35
37
  @aspects_url = url
36
38
 
37
- @aspects = if @remote then REMOTE_ASPECTS_CACHE[language] else ASPECTS_CACHE[aspects_file] end
39
+ @aspects = if @remote then REMOTE_ASPECTS_CACHE[**@cache_keys].aspects else FILE_ASPECTS_CACHE[aspects_file] end
38
40
  end
39
41
 
40
42
  ##
@@ -58,25 +60,16 @@ module Opener
58
60
  return pretty ? pretty_print(document) : document.to_xml
59
61
  end
60
62
 
61
- ##
62
- # Get the language of the input file.
63
- #
64
- # @return [String]
65
- #
66
63
  def language
67
- return @language ||= document.at_xpath('KAF').get('xml:lang')
64
+ return @language ||= document.at_xpath('KAF').attr('xml:lang')
68
65
  end
69
66
 
70
- ##
71
- # Get the terms from the input file
72
- # @return [Hash]
73
- #
74
67
  def terms
75
68
  unless @terms
76
69
  @terms = {}
77
70
 
78
71
  document.xpath('KAF/terms/term').each do |term|
79
- @terms[term.get('tid').to_sym] = term.get('lemma')
72
+ @terms[term.attr('tid').to_sym] = term.attr('lemma')
80
73
  end
81
74
  end
82
75
 
@@ -141,22 +134,21 @@ module Opener
141
134
  def add_property(key, value, index)
142
135
  property_node = new_node("property", "KAF/features/properties")
143
136
 
144
- property_node.set('lemma', key.to_s)
145
- property_node.set('pid', "p#{index.to_s}")
137
+ property_node['lemma'] = key.to_s
138
+ property_node['pid'] = "p#{index.to_s}"
146
139
 
147
140
  references_node = new_node("references", property_node)
148
141
 
149
142
  value.uniq.each do |v|
150
- comment = Oga::XML::Comment.new(:text => " #{v.last} ")
151
-
152
- references_node.children << comment
143
+ comm_node = Nokogiri::XML::Comment.new(references_node, " #{v.last} ")
144
+ references_node.add_child comm_node
153
145
 
154
146
  span_node = new_node("span", references_node)
155
147
 
156
148
  v.first.each do |val|
157
- target_node = new_node("target", span_node)
149
+ target_node = new_node("target", span_node)
158
150
 
159
- target_node.set('id', val.to_s)
151
+ target_node['id'] = val.to_s
160
152
  end
161
153
  end
162
154
  end
@@ -167,19 +159,19 @@ module Opener
167
159
  version = '2.0'
168
160
 
169
161
  node = new_node('linguisticProcessors', 'KAF/kafHeader')
170
- node.set('layer', 'features')
162
+ node['layer'] = 'features'
171
163
 
172
164
  lp_node = new_node('lp', node)
173
165
 
174
- lp_node.set('version', "#{last_edited}-#{version}")
175
- lp_node.set('name', description)
166
+ lp_node['version'] = "#{last_edited}-#{version}"
167
+ lp_node['name'] = description
176
168
 
177
169
  if timestamp
178
170
  format = '%Y-%m-%dT%H:%M:%S%Z'
179
171
 
180
- lp_node.set('timestamp', Time.now.strftime(format))
172
+ lp_node['timestamp'] = Time.now.strftime(format)
181
173
  else
182
- lp_node.set('timestamp', '*')
174
+ lp_node['timestamp'] = '*'
183
175
  end
184
176
  end
185
177
 
@@ -210,9 +202,9 @@ module Opener
210
202
  parent_node = parent
211
203
  end
212
204
 
213
- node = Oga::XML::Element.new(:name => tag)
205
+ node = Nokogiri::XML::Element.new(tag, document)
214
206
 
215
- parent_node.children << node
207
+ parent_node.add_child node
216
208
 
217
209
  return node
218
210
  end
@@ -7,6 +7,8 @@ module Opener
7
7
 
8
8
  include MonitorMixin
9
9
 
10
+ UPDATE_INTERVAL = (ENV['CACHE_EXPIRE_MINS']&.to_i || 5).minutes
11
+
10
12
  def initialize
11
13
  super
12
14
 
@@ -14,24 +16,43 @@ module Opener
14
16
  @cache = {}
15
17
  end
16
18
 
17
- def [] lang
19
+ def [] **params
18
20
  synchronize do
19
- @cache[lang] ||= load_aspects lang
21
+ existing = @cache[params]
22
+ break existing if existing and existing.from > UPDATE_INTERVAL.ago
23
+ @cache[params] = cache_update existing, **params
20
24
  end
21
25
  end
22
26
  alias_method :get, :[]
23
27
 
24
- def load_aspects lang
25
- mapping = Hash.new{ |hash, key| hash[key] = [] }
26
- url = "#{@url}&language_code=#{lang}"
28
+ def cache_update existing = nil, **params
29
+ from = Time.now
30
+ lexicons = load_aspects cache: existing, **params
31
+
32
+ if existing and lexicons.blank?
33
+ existing.from = from
34
+ return existing
35
+ end
36
+
37
+ Hashie::Mash.new(
38
+ aspects: lexicons,
39
+ from: from,
40
+ )
41
+ end
42
+
43
+ def load_aspects lang:, cache:, **params
44
+ url = "#{@url}&language_code=#{lang}&#{params.to_query}"
45
+ url += "&if_updated_since=#{cache.from.utc.iso8601}" if cache
46
+ puts "#{lang}: loading aspects from #{url}"
47
+
27
48
  lexicons = JSON.parse HTTPClient.new.get(url).body
28
49
  lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
29
-
50
+ mapping = Hash.new{ |hash, key| hash[key] = [] }
30
51
  lexicons.each do |l|
31
52
  mapping[l.lemma.to_sym] << l.aspect
32
53
  end
33
54
 
34
- return mapping
55
+ mapping
35
56
  end
36
57
 
37
58
  end
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
3
 
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.4'
5
5
 
6
6
  end
7
7
  end
@@ -28,9 +28,10 @@ Gem::Specification.new do |gem|
28
28
  gem.add_dependency 'opener-webservice', '~> 2.1'
29
29
  gem.add_dependency 'opener-core', '~> 2.2'
30
30
 
31
- gem.add_dependency 'oga', ['~> 1.0', '>= 1.3.1']
31
+ gem.add_dependency 'nokogiri'
32
32
  gem.add_dependency 'httpclient'
33
33
  gem.add_dependency 'hashie'
34
+ gem.add_dependency 'activesupport'
34
35
 
35
36
  gem.add_development_dependency 'rspec', '~> 3.0'
36
37
  gem.add_development_dependency 'cucumber'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-22 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -53,25 +53,19 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2.2'
55
55
  - !ruby/object:Gem::Dependency
56
- name: oga
56
+ name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '1.0'
62
59
  - - ">="
63
60
  - !ruby/object:Gem::Version
64
- version: 1.3.1
61
+ version: '0'
65
62
  type: :runtime
66
63
  prerelease: false
67
64
  version_requirements: !ruby/object:Gem::Requirement
68
65
  requirements:
69
- - - "~>"
70
- - !ruby/object:Gem::Version
71
- version: '1.0'
72
66
  - - ">="
73
67
  - !ruby/object:Gem::Version
74
- version: 1.3.1
68
+ version: '0'
75
69
  - !ruby/object:Gem::Dependency
76
70
  name: httpclient
77
71
  requirement: !ruby/object:Gem::Requirement
@@ -100,6 +94,20 @@ dependencies:
100
94
  - - ">="
101
95
  - !ruby/object:Gem::Version
102
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: activesupport
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
103
111
  - !ruby/object:Gem::Dependency
104
112
  name: rspec
105
113
  requirement: !ruby/object:Gem::Requirement
@@ -173,8 +181,8 @@ files:
173
181
  - config.ru
174
182
  - exec/property-tagger.rb
175
183
  - lib/opener/property_tagger.rb
176
- - lib/opener/property_tagger/aspects_cache.rb
177
184
  - lib/opener/property_tagger/cli.rb
185
+ - lib/opener/property_tagger/file_aspects_cache.rb
178
186
  - lib/opener/property_tagger/processor.rb
179
187
  - lib/opener/property_tagger/public/markdown.css
180
188
  - lib/opener/property_tagger/remote_aspects_cache.rb