opener-property-tagger 3.2.0 → 3.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/opener/property_tagger.rb +6 -7
- data/lib/opener/property_tagger/{aspects_cache.rb → file_aspects_cache.rb} +6 -4
- data/lib/opener/property_tagger/processor.rb +22 -30
- data/lib/opener/property_tagger/remote_aspects_cache.rb +28 -7
- data/lib/opener/property_tagger/version.rb +1 -1
- data/opener-property-tagger.gemspec +2 -1
- metadata +20 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 436af27476deb0372f325ed255151bf5128608405c740289931ecb630b111ab4
|
4
|
+
data.tar.gz: 5ff2d6bf101e606a786f0b4a9da8021a231ea07fac29fb51c452a1aefa737954
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4918a0be797e2776329863a1f9f43609b9b30390e6165e73c4e089f533ce769a077e82fa928091285dbf866039dad589b7a650b7c6ffd9cc403c355862a9bd5b
|
7
|
+
data.tar.gz: bcac64dc7d1f229b264976070c05f64e5ff42c54ba2f809f905beb4d7597acea6c71923c271c4bc5dd77c979aa78ffdcb74757c5d27f6819bb6e62186d41fed8
|
@@ -1,18 +1,19 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'slop'
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
require 'monitor'
|
5
5
|
require 'httpclient'
|
6
6
|
require 'hashie'
|
7
7
|
require 'json'
|
8
|
+
require 'active_support/all'
|
8
9
|
|
9
10
|
require 'rexml/document'
|
10
11
|
require 'rexml/formatters/pretty'
|
11
12
|
|
12
13
|
require_relative 'property_tagger/version'
|
13
14
|
require_relative 'property_tagger/cli'
|
14
|
-
require_relative 'property_tagger/aspects_cache'
|
15
15
|
require_relative 'property_tagger/remote_aspects_cache'
|
16
|
+
require_relative 'property_tagger/file_aspects_cache'
|
16
17
|
require_relative 'property_tagger/processor'
|
17
18
|
|
18
19
|
module Opener
|
@@ -51,10 +52,7 @@ module Opener
|
|
51
52
|
|
52
53
|
@path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
53
54
|
ENV['PROPERTY_TAGGER_LEXICONS_PATH']
|
54
|
-
|
55
|
-
unless @path
|
56
|
-
raise ArgumentError, 'No lexicon path provided'
|
57
|
-
end
|
55
|
+
return unless @path
|
58
56
|
|
59
57
|
@path = File.expand_path @path
|
60
58
|
end
|
@@ -69,10 +67,11 @@ module Opener
|
|
69
67
|
# @param [String] input
|
70
68
|
# @return [String]
|
71
69
|
#
|
72
|
-
def run input
|
70
|
+
def run input, params = {}
|
73
71
|
timestamp = !options[:no_time]
|
74
72
|
|
75
73
|
Processor.new(input,
|
74
|
+
params: params,
|
76
75
|
url: remote_url,
|
77
76
|
path: path,
|
78
77
|
timestamp: timestamp,
|
@@ -3,7 +3,8 @@ module Opener
|
|
3
3
|
##
|
4
4
|
# Thread-safe cache for storing the contents of aspect files.
|
5
5
|
#
|
6
|
-
class
|
6
|
+
class FileAspectsCache
|
7
|
+
|
7
8
|
include MonitorMixin
|
8
9
|
|
9
10
|
def initialize
|
@@ -42,6 +43,7 @@ module Opener
|
|
42
43
|
|
43
44
|
return mapping
|
44
45
|
end
|
45
|
-
|
46
|
-
|
47
|
-
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -12,9 +12,9 @@ module Opener
|
|
12
12
|
##
|
13
13
|
# Global cache used for storing loaded aspects.
|
14
14
|
#
|
15
|
-
# @return [Opener::PropertyTagger::
|
15
|
+
# @return [Opener::PropertyTagger::FileAspectsCache.new]
|
16
16
|
#
|
17
|
-
|
17
|
+
FILE_ASPECTS_CACHE = FileAspectsCache.new
|
18
18
|
REMOTE_ASPECTS_CACHE = RemoteAspectsCache.new
|
19
19
|
|
20
20
|
##
|
@@ -24,17 +24,19 @@ module Opener
|
|
24
24
|
# @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
|
25
25
|
# by default due to the performance overhead.
|
26
26
|
#
|
27
|
-
def initialize file, url: nil, path: nil, timestamp: true, pretty: false
|
28
|
-
@document =
|
27
|
+
def initialize file, params: {}, url: nil, path: nil, timestamp: true, pretty: false
|
28
|
+
@document = Nokogiri.XML file
|
29
29
|
raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
|
30
30
|
@timestamp = timestamp
|
31
31
|
@pretty = pretty
|
32
32
|
|
33
|
+
@params = params
|
34
|
+
@cache_keys = params[:cache_keys] || {lang: language}
|
33
35
|
@remote = !url.nil?
|
34
36
|
@aspects_path = path
|
35
37
|
@aspects_url = url
|
36
38
|
|
37
|
-
@aspects = if @remote then REMOTE_ASPECTS_CACHE[
|
39
|
+
@aspects = if @remote then REMOTE_ASPECTS_CACHE[**@cache_keys].aspects else FILE_ASPECTS_CACHE[aspects_file] end
|
38
40
|
end
|
39
41
|
|
40
42
|
##
|
@@ -58,25 +60,16 @@ module Opener
|
|
58
60
|
return pretty ? pretty_print(document) : document.to_xml
|
59
61
|
end
|
60
62
|
|
61
|
-
##
|
62
|
-
# Get the language of the input file.
|
63
|
-
#
|
64
|
-
# @return [String]
|
65
|
-
#
|
66
63
|
def language
|
67
|
-
return @language ||= document.at_xpath('KAF').
|
64
|
+
return @language ||= document.at_xpath('KAF').attr('xml:lang')
|
68
65
|
end
|
69
66
|
|
70
|
-
##
|
71
|
-
# Get the terms from the input file
|
72
|
-
# @return [Hash]
|
73
|
-
#
|
74
67
|
def terms
|
75
68
|
unless @terms
|
76
69
|
@terms = {}
|
77
70
|
|
78
71
|
document.xpath('KAF/terms/term').each do |term|
|
79
|
-
@terms[term.
|
72
|
+
@terms[term.attr('tid').to_sym] = term.attr('lemma')
|
80
73
|
end
|
81
74
|
end
|
82
75
|
|
@@ -141,22 +134,21 @@ module Opener
|
|
141
134
|
def add_property(key, value, index)
|
142
135
|
property_node = new_node("property", "KAF/features/properties")
|
143
136
|
|
144
|
-
property_node
|
145
|
-
property_node
|
137
|
+
property_node['lemma'] = key.to_s
|
138
|
+
property_node['pid'] = "p#{index.to_s}"
|
146
139
|
|
147
140
|
references_node = new_node("references", property_node)
|
148
141
|
|
149
142
|
value.uniq.each do |v|
|
150
|
-
|
151
|
-
|
152
|
-
references_node.children << comment
|
143
|
+
comm_node = Nokogiri::XML::Comment.new(references_node, " #{v.last} ")
|
144
|
+
references_node.add_child comm_node
|
153
145
|
|
154
146
|
span_node = new_node("span", references_node)
|
155
147
|
|
156
148
|
v.first.each do |val|
|
157
|
-
target_node
|
149
|
+
target_node = new_node("target", span_node)
|
158
150
|
|
159
|
-
target_node
|
151
|
+
target_node['id'] = val.to_s
|
160
152
|
end
|
161
153
|
end
|
162
154
|
end
|
@@ -167,19 +159,19 @@ module Opener
|
|
167
159
|
version = '2.0'
|
168
160
|
|
169
161
|
node = new_node('linguisticProcessors', 'KAF/kafHeader')
|
170
|
-
node
|
162
|
+
node['layer'] = 'features'
|
171
163
|
|
172
164
|
lp_node = new_node('lp', node)
|
173
165
|
|
174
|
-
lp_node
|
175
|
-
lp_node
|
166
|
+
lp_node['version'] = "#{last_edited}-#{version}"
|
167
|
+
lp_node['name'] = description
|
176
168
|
|
177
169
|
if timestamp
|
178
170
|
format = '%Y-%m-%dT%H:%M:%S%Z'
|
179
171
|
|
180
|
-
lp_node
|
172
|
+
lp_node['timestamp'] = Time.now.strftime(format)
|
181
173
|
else
|
182
|
-
lp_node
|
174
|
+
lp_node['timestamp'] = '*'
|
183
175
|
end
|
184
176
|
end
|
185
177
|
|
@@ -210,9 +202,9 @@ module Opener
|
|
210
202
|
parent_node = parent
|
211
203
|
end
|
212
204
|
|
213
|
-
node =
|
205
|
+
node = Nokogiri::XML::Element.new(tag, document)
|
214
206
|
|
215
|
-
parent_node.
|
207
|
+
parent_node.add_child node
|
216
208
|
|
217
209
|
return node
|
218
210
|
end
|
@@ -7,6 +7,8 @@ module Opener
|
|
7
7
|
|
8
8
|
include MonitorMixin
|
9
9
|
|
10
|
+
UPDATE_INTERVAL = (ENV['CACHE_EXPIRE_MINS']&.to_i || 5).minutes
|
11
|
+
|
10
12
|
def initialize
|
11
13
|
super
|
12
14
|
|
@@ -14,24 +16,43 @@ module Opener
|
|
14
16
|
@cache = {}
|
15
17
|
end
|
16
18
|
|
17
|
-
def []
|
19
|
+
def [] **params
|
18
20
|
synchronize do
|
19
|
-
@cache[
|
21
|
+
existing = @cache[params]
|
22
|
+
break existing if existing and existing.from > UPDATE_INTERVAL.ago
|
23
|
+
@cache[params] = cache_update existing, **params
|
20
24
|
end
|
21
25
|
end
|
22
26
|
alias_method :get, :[]
|
23
27
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
28
|
+
def cache_update existing = nil, **params
|
29
|
+
from = Time.now
|
30
|
+
lexicons = load_aspects cache: existing, **params
|
31
|
+
|
32
|
+
if existing and lexicons.blank?
|
33
|
+
existing.from = from
|
34
|
+
return existing
|
35
|
+
end
|
36
|
+
|
37
|
+
Hashie::Mash.new(
|
38
|
+
aspects: lexicons,
|
39
|
+
from: from,
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_aspects lang:, cache:, **params
|
44
|
+
url = "#{@url}&language_code=#{lang}&#{params.to_query}"
|
45
|
+
url += "&if_updated_since=#{cache.from.utc.iso8601}" if cache
|
46
|
+
puts "#{lang}: loading aspects from #{url}"
|
47
|
+
|
27
48
|
lexicons = JSON.parse HTTPClient.new.get(url).body
|
28
49
|
lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
|
29
|
-
|
50
|
+
mapping = Hash.new{ |hash, key| hash[key] = [] }
|
30
51
|
lexicons.each do |l|
|
31
52
|
mapping[l.lemma.to_sym] << l.aspect
|
32
53
|
end
|
33
54
|
|
34
|
-
|
55
|
+
mapping
|
35
56
|
end
|
36
57
|
|
37
58
|
end
|
@@ -28,9 +28,10 @@ Gem::Specification.new do |gem|
|
|
28
28
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
29
29
|
gem.add_dependency 'opener-core', '~> 2.2'
|
30
30
|
|
31
|
-
gem.add_dependency '
|
31
|
+
gem.add_dependency 'nokogiri'
|
32
32
|
gem.add_dependency 'httpclient'
|
33
33
|
gem.add_dependency 'hashie'
|
34
|
+
gem.add_dependency 'activesupport'
|
34
35
|
|
35
36
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
36
37
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-property-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09
|
11
|
+
date: 2020-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -53,25 +53,19 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: nokogiri
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.0'
|
62
59
|
- - ">="
|
63
60
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
61
|
+
version: '0'
|
65
62
|
type: :runtime
|
66
63
|
prerelease: false
|
67
64
|
version_requirements: !ruby/object:Gem::Requirement
|
68
65
|
requirements:
|
69
|
-
- - "~>"
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
version: '1.0'
|
72
66
|
- - ">="
|
73
67
|
- !ruby/object:Gem::Version
|
74
|
-
version:
|
68
|
+
version: '0'
|
75
69
|
- !ruby/object:Gem::Dependency
|
76
70
|
name: httpclient
|
77
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,6 +94,20 @@ dependencies:
|
|
100
94
|
- - ">="
|
101
95
|
- !ruby/object:Gem::Version
|
102
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: activesupport
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
103
111
|
- !ruby/object:Gem::Dependency
|
104
112
|
name: rspec
|
105
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -173,8 +181,8 @@ files:
|
|
173
181
|
- config.ru
|
174
182
|
- exec/property-tagger.rb
|
175
183
|
- lib/opener/property_tagger.rb
|
176
|
-
- lib/opener/property_tagger/aspects_cache.rb
|
177
184
|
- lib/opener/property_tagger/cli.rb
|
185
|
+
- lib/opener/property_tagger/file_aspects_cache.rb
|
178
186
|
- lib/opener/property_tagger/processor.rb
|
179
187
|
- lib/opener/property_tagger/public/markdown.css
|
180
188
|
- lib/opener/property_tagger/remote_aspects_cache.rb
|