opener-property-tagger 3.2.0 → 3.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: adec67a48fac849987e8347eeb0bc5a304e930ab6fdc604ae7029012c6505868
4
- data.tar.gz: 9e1dda3b0fecc300a8ef8d4574563cf51e1eb3590601637a0a4d8160bbdccc40
3
+ metadata.gz: 436af27476deb0372f325ed255151bf5128608405c740289931ecb630b111ab4
4
+ data.tar.gz: 5ff2d6bf101e606a786f0b4a9da8021a231ea07fac29fb51c452a1aefa737954
5
5
  SHA512:
6
- metadata.gz: e15b757610a466b6bc1b15e7afcb2c207874c9f696a375a2f56630c88a686f9990a362422a76835685b06a6a6fce32130565fe37e27ca1f1c818ae8a263fe86e
7
- data.tar.gz: a4ce0b27abba305fe0b440a6cb26fce829ceea37e775325be22aad5cc6efb6b45fe161ceb38e407040fb014881683ce0981f56b4defab07862fb81e539102588
6
+ metadata.gz: 4918a0be797e2776329863a1f9f43609b9b30390e6165e73c4e089f533ce769a077e82fa928091285dbf866039dad589b7a650b7c6ffd9cc403c355862a9bd5b
7
+ data.tar.gz: bcac64dc7d1f229b264976070c05f64e5ff42c54ba2f809f905beb4d7597acea6c71923c271c4bc5dd77c979aa78ffdcb74757c5d27f6819bb6e62186d41fed8
@@ -1,18 +1,19 @@
1
1
  require 'open3'
2
2
  require 'slop'
3
- require 'oga'
3
+ require 'nokogiri'
4
4
  require 'monitor'
5
5
  require 'httpclient'
6
6
  require 'hashie'
7
7
  require 'json'
8
+ require 'active_support/all'
8
9
 
9
10
  require 'rexml/document'
10
11
  require 'rexml/formatters/pretty'
11
12
 
12
13
  require_relative 'property_tagger/version'
13
14
  require_relative 'property_tagger/cli'
14
- require_relative 'property_tagger/aspects_cache'
15
15
  require_relative 'property_tagger/remote_aspects_cache'
16
+ require_relative 'property_tagger/file_aspects_cache'
16
17
  require_relative 'property_tagger/processor'
17
18
 
18
19
  module Opener
@@ -51,10 +52,7 @@ module Opener
51
52
 
52
53
  @path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
53
54
  ENV['PROPERTY_TAGGER_LEXICONS_PATH']
54
-
55
- unless @path
56
- raise ArgumentError, 'No lexicon path provided'
57
- end
55
+ return unless @path
58
56
 
59
57
  @path = File.expand_path @path
60
58
  end
@@ -69,10 +67,11 @@ module Opener
69
67
  # @param [String] input
70
68
  # @return [String]
71
69
  #
72
- def run input
70
+ def run input, params = {}
73
71
  timestamp = !options[:no_time]
74
72
 
75
73
  Processor.new(input,
74
+ params: params,
76
75
  url: remote_url,
77
76
  path: path,
78
77
  timestamp: timestamp,
@@ -3,7 +3,8 @@ module Opener
3
3
  ##
4
4
  # Thread-safe cache for storing the contents of aspect files.
5
5
  #
6
- class AspectsCache
6
+ class FileAspectsCache
7
+
7
8
  include MonitorMixin
8
9
 
9
10
  def initialize
@@ -42,6 +43,7 @@ module Opener
42
43
 
43
44
  return mapping
44
45
  end
45
- end # AspectsCache
46
- end # PropertyTagger
47
- end # Opener
46
+
47
+ end
48
+ end
49
+ end
@@ -12,9 +12,9 @@ module Opener
12
12
  ##
13
13
  # Global cache used for storing loaded aspects.
14
14
  #
15
- # @return [Opener::PropertyTagger::AspectsCache.new]
15
+ # @return [Opener::PropertyTagger::FileAspectsCache.new]
16
16
  #
17
- ASPECTS_CACHE = AspectsCache.new
17
+ FILE_ASPECTS_CACHE = FileAspectsCache.new
18
18
  REMOTE_ASPECTS_CACHE = RemoteAspectsCache.new
19
19
 
20
20
  ##
@@ -24,17 +24,19 @@ module Opener
24
24
  # @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
25
25
  # by default due to the performance overhead.
26
26
  #
27
- def initialize file, url: nil, path: nil, timestamp: true, pretty: false
28
- @document = Oga.parse_xml file
27
+ def initialize file, params: {}, url: nil, path: nil, timestamp: true, pretty: false
28
+ @document = Nokogiri.XML file
29
29
  raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
30
30
  @timestamp = timestamp
31
31
  @pretty = pretty
32
32
 
33
+ @params = params
34
+ @cache_keys = params[:cache_keys] || {lang: language}
33
35
  @remote = !url.nil?
34
36
  @aspects_path = path
35
37
  @aspects_url = url
36
38
 
37
- @aspects = if @remote then REMOTE_ASPECTS_CACHE[language] else ASPECTS_CACHE[aspects_file] end
39
+ @aspects = if @remote then REMOTE_ASPECTS_CACHE[**@cache_keys].aspects else FILE_ASPECTS_CACHE[aspects_file] end
38
40
  end
39
41
 
40
42
  ##
@@ -58,25 +60,16 @@ module Opener
58
60
  return pretty ? pretty_print(document) : document.to_xml
59
61
  end
60
62
 
61
- ##
62
- # Get the language of the input file.
63
- #
64
- # @return [String]
65
- #
66
63
  def language
67
- return @language ||= document.at_xpath('KAF').get('xml:lang')
64
+ return @language ||= document.at_xpath('KAF').attr('xml:lang')
68
65
  end
69
66
 
70
- ##
71
- # Get the terms from the input file
72
- # @return [Hash]
73
- #
74
67
  def terms
75
68
  unless @terms
76
69
  @terms = {}
77
70
 
78
71
  document.xpath('KAF/terms/term').each do |term|
79
- @terms[term.get('tid').to_sym] = term.get('lemma')
72
+ @terms[term.attr('tid').to_sym] = term.attr('lemma')
80
73
  end
81
74
  end
82
75
 
@@ -141,22 +134,21 @@ module Opener
141
134
  def add_property(key, value, index)
142
135
  property_node = new_node("property", "KAF/features/properties")
143
136
 
144
- property_node.set('lemma', key.to_s)
145
- property_node.set('pid', "p#{index.to_s}")
137
+ property_node['lemma'] = key.to_s
138
+ property_node['pid'] = "p#{index.to_s}"
146
139
 
147
140
  references_node = new_node("references", property_node)
148
141
 
149
142
  value.uniq.each do |v|
150
- comment = Oga::XML::Comment.new(:text => " #{v.last} ")
151
-
152
- references_node.children << comment
143
+ comm_node = Nokogiri::XML::Comment.new(references_node, " #{v.last} ")
144
+ references_node.add_child comm_node
153
145
 
154
146
  span_node = new_node("span", references_node)
155
147
 
156
148
  v.first.each do |val|
157
- target_node = new_node("target", span_node)
149
+ target_node = new_node("target", span_node)
158
150
 
159
- target_node.set('id', val.to_s)
151
+ target_node['id'] = val.to_s
160
152
  end
161
153
  end
162
154
  end
@@ -167,19 +159,19 @@ module Opener
167
159
  version = '2.0'
168
160
 
169
161
  node = new_node('linguisticProcessors', 'KAF/kafHeader')
170
- node.set('layer', 'features')
162
+ node['layer'] = 'features'
171
163
 
172
164
  lp_node = new_node('lp', node)
173
165
 
174
- lp_node.set('version', "#{last_edited}-#{version}")
175
- lp_node.set('name', description)
166
+ lp_node['version'] = "#{last_edited}-#{version}"
167
+ lp_node['name'] = description
176
168
 
177
169
  if timestamp
178
170
  format = '%Y-%m-%dT%H:%M:%S%Z'
179
171
 
180
- lp_node.set('timestamp', Time.now.strftime(format))
172
+ lp_node['timestamp'] = Time.now.strftime(format)
181
173
  else
182
- lp_node.set('timestamp', '*')
174
+ lp_node['timestamp'] = '*'
183
175
  end
184
176
  end
185
177
 
@@ -210,9 +202,9 @@ module Opener
210
202
  parent_node = parent
211
203
  end
212
204
 
213
- node = Oga::XML::Element.new(:name => tag)
205
+ node = Nokogiri::XML::Element.new(tag, document)
214
206
 
215
- parent_node.children << node
207
+ parent_node.add_child node
216
208
 
217
209
  return node
218
210
  end
@@ -7,6 +7,8 @@ module Opener
7
7
 
8
8
  include MonitorMixin
9
9
 
10
+ UPDATE_INTERVAL = (ENV['CACHE_EXPIRE_MINS']&.to_i || 5).minutes
11
+
10
12
  def initialize
11
13
  super
12
14
 
@@ -14,24 +16,43 @@ module Opener
14
16
  @cache = {}
15
17
  end
16
18
 
17
- def [] lang
19
+ def [] **params
18
20
  synchronize do
19
- @cache[lang] ||= load_aspects lang
21
+ existing = @cache[params]
22
+ break existing if existing and existing.from > UPDATE_INTERVAL.ago
23
+ @cache[params] = cache_update existing, **params
20
24
  end
21
25
  end
22
26
  alias_method :get, :[]
23
27
 
24
- def load_aspects lang
25
- mapping = Hash.new{ |hash, key| hash[key] = [] }
26
- url = "#{@url}&language_code=#{lang}"
28
+ def cache_update existing = nil, **params
29
+ from = Time.now
30
+ lexicons = load_aspects cache: existing, **params
31
+
32
+ if existing and lexicons.blank?
33
+ existing.from = from
34
+ return existing
35
+ end
36
+
37
+ Hashie::Mash.new(
38
+ aspects: lexicons,
39
+ from: from,
40
+ )
41
+ end
42
+
43
+ def load_aspects lang:, cache:, **params
44
+ url = "#{@url}&language_code=#{lang}&#{params.to_query}"
45
+ url += "&if_updated_since=#{cache.from.utc.iso8601}" if cache
46
+ puts "#{lang}: loading aspects from #{url}"
47
+
27
48
  lexicons = JSON.parse HTTPClient.new.get(url).body
28
49
  lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
29
-
50
+ mapping = Hash.new{ |hash, key| hash[key] = [] }
30
51
  lexicons.each do |l|
31
52
  mapping[l.lemma.to_sym] << l.aspect
32
53
  end
33
54
 
34
- return mapping
55
+ mapping
35
56
  end
36
57
 
37
58
  end
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
3
 
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.4'
5
5
 
6
6
  end
7
7
  end
@@ -28,9 +28,10 @@ Gem::Specification.new do |gem|
28
28
  gem.add_dependency 'opener-webservice', '~> 2.1'
29
29
  gem.add_dependency 'opener-core', '~> 2.2'
30
30
 
31
- gem.add_dependency 'oga', ['~> 1.0', '>= 1.3.1']
31
+ gem.add_dependency 'nokogiri'
32
32
  gem.add_dependency 'httpclient'
33
33
  gem.add_dependency 'hashie'
34
+ gem.add_dependency 'activesupport'
34
35
 
35
36
  gem.add_development_dependency 'rspec', '~> 3.0'
36
37
  gem.add_development_dependency 'cucumber'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-22 00:00:00.000000000 Z
11
+ date: 2020-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -53,25 +53,19 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2.2'
55
55
  - !ruby/object:Gem::Dependency
56
- name: oga
56
+ name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '1.0'
62
59
  - - ">="
63
60
  - !ruby/object:Gem::Version
64
- version: 1.3.1
61
+ version: '0'
65
62
  type: :runtime
66
63
  prerelease: false
67
64
  version_requirements: !ruby/object:Gem::Requirement
68
65
  requirements:
69
- - - "~>"
70
- - !ruby/object:Gem::Version
71
- version: '1.0'
72
66
  - - ">="
73
67
  - !ruby/object:Gem::Version
74
- version: 1.3.1
68
+ version: '0'
75
69
  - !ruby/object:Gem::Dependency
76
70
  name: httpclient
77
71
  requirement: !ruby/object:Gem::Requirement
@@ -100,6 +94,20 @@ dependencies:
100
94
  - - ">="
101
95
  - !ruby/object:Gem::Version
102
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: activesupport
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
103
111
  - !ruby/object:Gem::Dependency
104
112
  name: rspec
105
113
  requirement: !ruby/object:Gem::Requirement
@@ -173,8 +181,8 @@ files:
173
181
  - config.ru
174
182
  - exec/property-tagger.rb
175
183
  - lib/opener/property_tagger.rb
176
- - lib/opener/property_tagger/aspects_cache.rb
177
184
  - lib/opener/property_tagger/cli.rb
185
+ - lib/opener/property_tagger/file_aspects_cache.rb
178
186
  - lib/opener/property_tagger/processor.rb
179
187
  - lib/opener/property_tagger/public/markdown.css
180
188
  - lib/opener/property_tagger/remote_aspects_cache.rb