opener-property-tagger 3.2.0 → 3.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/opener/property_tagger.rb +6 -7
- data/lib/opener/property_tagger/{aspects_cache.rb → file_aspects_cache.rb} +6 -4
- data/lib/opener/property_tagger/processor.rb +22 -30
- data/lib/opener/property_tagger/remote_aspects_cache.rb +28 -7
- data/lib/opener/property_tagger/version.rb +1 -1
- data/opener-property-tagger.gemspec +2 -1
- metadata +20 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 436af27476deb0372f325ed255151bf5128608405c740289931ecb630b111ab4
|
4
|
+
data.tar.gz: 5ff2d6bf101e606a786f0b4a9da8021a231ea07fac29fb51c452a1aefa737954
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4918a0be797e2776329863a1f9f43609b9b30390e6165e73c4e089f533ce769a077e82fa928091285dbf866039dad589b7a650b7c6ffd9cc403c355862a9bd5b
|
7
|
+
data.tar.gz: bcac64dc7d1f229b264976070c05f64e5ff42c54ba2f809f905beb4d7597acea6c71923c271c4bc5dd77c979aa78ffdcb74757c5d27f6819bb6e62186d41fed8
|
@@ -1,18 +1,19 @@
|
|
1
1
|
require 'open3'
|
2
2
|
require 'slop'
|
3
|
-
require '
|
3
|
+
require 'nokogiri'
|
4
4
|
require 'monitor'
|
5
5
|
require 'httpclient'
|
6
6
|
require 'hashie'
|
7
7
|
require 'json'
|
8
|
+
require 'active_support/all'
|
8
9
|
|
9
10
|
require 'rexml/document'
|
10
11
|
require 'rexml/formatters/pretty'
|
11
12
|
|
12
13
|
require_relative 'property_tagger/version'
|
13
14
|
require_relative 'property_tagger/cli'
|
14
|
-
require_relative 'property_tagger/aspects_cache'
|
15
15
|
require_relative 'property_tagger/remote_aspects_cache'
|
16
|
+
require_relative 'property_tagger/file_aspects_cache'
|
16
17
|
require_relative 'property_tagger/processor'
|
17
18
|
|
18
19
|
module Opener
|
@@ -51,10 +52,7 @@ module Opener
|
|
51
52
|
|
52
53
|
@path = options[:resource_path] || ENV['RESOURCE_PATH'] ||
|
53
54
|
ENV['PROPERTY_TAGGER_LEXICONS_PATH']
|
54
|
-
|
55
|
-
unless @path
|
56
|
-
raise ArgumentError, 'No lexicon path provided'
|
57
|
-
end
|
55
|
+
return unless @path
|
58
56
|
|
59
57
|
@path = File.expand_path @path
|
60
58
|
end
|
@@ -69,10 +67,11 @@ module Opener
|
|
69
67
|
# @param [String] input
|
70
68
|
# @return [String]
|
71
69
|
#
|
72
|
-
def run input
|
70
|
+
def run input, params = {}
|
73
71
|
timestamp = !options[:no_time]
|
74
72
|
|
75
73
|
Processor.new(input,
|
74
|
+
params: params,
|
76
75
|
url: remote_url,
|
77
76
|
path: path,
|
78
77
|
timestamp: timestamp,
|
@@ -3,7 +3,8 @@ module Opener
|
|
3
3
|
##
|
4
4
|
# Thread-safe cache for storing the contents of aspect files.
|
5
5
|
#
|
6
|
-
class
|
6
|
+
class FileAspectsCache
|
7
|
+
|
7
8
|
include MonitorMixin
|
8
9
|
|
9
10
|
def initialize
|
@@ -42,6 +43,7 @@ module Opener
|
|
42
43
|
|
43
44
|
return mapping
|
44
45
|
end
|
45
|
-
|
46
|
-
|
47
|
-
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -12,9 +12,9 @@ module Opener
|
|
12
12
|
##
|
13
13
|
# Global cache used for storing loaded aspects.
|
14
14
|
#
|
15
|
-
# @return [Opener::PropertyTagger::
|
15
|
+
# @return [Opener::PropertyTagger::FileAspectsCache.new]
|
16
16
|
#
|
17
|
-
|
17
|
+
FILE_ASPECTS_CACHE = FileAspectsCache.new
|
18
18
|
REMOTE_ASPECTS_CACHE = RemoteAspectsCache.new
|
19
19
|
|
20
20
|
##
|
@@ -24,17 +24,19 @@ module Opener
|
|
24
24
|
# @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
|
25
25
|
# by default due to the performance overhead.
|
26
26
|
#
|
27
|
-
def initialize file, url: nil, path: nil, timestamp: true, pretty: false
|
28
|
-
@document =
|
27
|
+
def initialize file, params: {}, url: nil, path: nil, timestamp: true, pretty: false
|
28
|
+
@document = Nokogiri.XML file
|
29
29
|
raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
|
30
30
|
@timestamp = timestamp
|
31
31
|
@pretty = pretty
|
32
32
|
|
33
|
+
@params = params
|
34
|
+
@cache_keys = params[:cache_keys] || {lang: language}
|
33
35
|
@remote = !url.nil?
|
34
36
|
@aspects_path = path
|
35
37
|
@aspects_url = url
|
36
38
|
|
37
|
-
@aspects = if @remote then REMOTE_ASPECTS_CACHE[
|
39
|
+
@aspects = if @remote then REMOTE_ASPECTS_CACHE[**@cache_keys].aspects else FILE_ASPECTS_CACHE[aspects_file] end
|
38
40
|
end
|
39
41
|
|
40
42
|
##
|
@@ -58,25 +60,16 @@ module Opener
|
|
58
60
|
return pretty ? pretty_print(document) : document.to_xml
|
59
61
|
end
|
60
62
|
|
61
|
-
##
|
62
|
-
# Get the language of the input file.
|
63
|
-
#
|
64
|
-
# @return [String]
|
65
|
-
#
|
66
63
|
def language
|
67
|
-
return @language ||= document.at_xpath('KAF').
|
64
|
+
return @language ||= document.at_xpath('KAF').attr('xml:lang')
|
68
65
|
end
|
69
66
|
|
70
|
-
##
|
71
|
-
# Get the terms from the input file
|
72
|
-
# @return [Hash]
|
73
|
-
#
|
74
67
|
def terms
|
75
68
|
unless @terms
|
76
69
|
@terms = {}
|
77
70
|
|
78
71
|
document.xpath('KAF/terms/term').each do |term|
|
79
|
-
@terms[term.
|
72
|
+
@terms[term.attr('tid').to_sym] = term.attr('lemma')
|
80
73
|
end
|
81
74
|
end
|
82
75
|
|
@@ -141,22 +134,21 @@ module Opener
|
|
141
134
|
def add_property(key, value, index)
|
142
135
|
property_node = new_node("property", "KAF/features/properties")
|
143
136
|
|
144
|
-
property_node
|
145
|
-
property_node
|
137
|
+
property_node['lemma'] = key.to_s
|
138
|
+
property_node['pid'] = "p#{index.to_s}"
|
146
139
|
|
147
140
|
references_node = new_node("references", property_node)
|
148
141
|
|
149
142
|
value.uniq.each do |v|
|
150
|
-
|
151
|
-
|
152
|
-
references_node.children << comment
|
143
|
+
comm_node = Nokogiri::XML::Comment.new(references_node, " #{v.last} ")
|
144
|
+
references_node.add_child comm_node
|
153
145
|
|
154
146
|
span_node = new_node("span", references_node)
|
155
147
|
|
156
148
|
v.first.each do |val|
|
157
|
-
target_node
|
149
|
+
target_node = new_node("target", span_node)
|
158
150
|
|
159
|
-
target_node
|
151
|
+
target_node['id'] = val.to_s
|
160
152
|
end
|
161
153
|
end
|
162
154
|
end
|
@@ -167,19 +159,19 @@ module Opener
|
|
167
159
|
version = '2.0'
|
168
160
|
|
169
161
|
node = new_node('linguisticProcessors', 'KAF/kafHeader')
|
170
|
-
node
|
162
|
+
node['layer'] = 'features'
|
171
163
|
|
172
164
|
lp_node = new_node('lp', node)
|
173
165
|
|
174
|
-
lp_node
|
175
|
-
lp_node
|
166
|
+
lp_node['version'] = "#{last_edited}-#{version}"
|
167
|
+
lp_node['name'] = description
|
176
168
|
|
177
169
|
if timestamp
|
178
170
|
format = '%Y-%m-%dT%H:%M:%S%Z'
|
179
171
|
|
180
|
-
lp_node
|
172
|
+
lp_node['timestamp'] = Time.now.strftime(format)
|
181
173
|
else
|
182
|
-
lp_node
|
174
|
+
lp_node['timestamp'] = '*'
|
183
175
|
end
|
184
176
|
end
|
185
177
|
|
@@ -210,9 +202,9 @@ module Opener
|
|
210
202
|
parent_node = parent
|
211
203
|
end
|
212
204
|
|
213
|
-
node =
|
205
|
+
node = Nokogiri::XML::Element.new(tag, document)
|
214
206
|
|
215
|
-
parent_node.
|
207
|
+
parent_node.add_child node
|
216
208
|
|
217
209
|
return node
|
218
210
|
end
|
@@ -7,6 +7,8 @@ module Opener
|
|
7
7
|
|
8
8
|
include MonitorMixin
|
9
9
|
|
10
|
+
UPDATE_INTERVAL = (ENV['CACHE_EXPIRE_MINS']&.to_i || 5).minutes
|
11
|
+
|
10
12
|
def initialize
|
11
13
|
super
|
12
14
|
|
@@ -14,24 +16,43 @@ module Opener
|
|
14
16
|
@cache = {}
|
15
17
|
end
|
16
18
|
|
17
|
-
def []
|
19
|
+
def [] **params
|
18
20
|
synchronize do
|
19
|
-
@cache[
|
21
|
+
existing = @cache[params]
|
22
|
+
break existing if existing and existing.from > UPDATE_INTERVAL.ago
|
23
|
+
@cache[params] = cache_update existing, **params
|
20
24
|
end
|
21
25
|
end
|
22
26
|
alias_method :get, :[]
|
23
27
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
28
|
+
def cache_update existing = nil, **params
|
29
|
+
from = Time.now
|
30
|
+
lexicons = load_aspects cache: existing, **params
|
31
|
+
|
32
|
+
if existing and lexicons.blank?
|
33
|
+
existing.from = from
|
34
|
+
return existing
|
35
|
+
end
|
36
|
+
|
37
|
+
Hashie::Mash.new(
|
38
|
+
aspects: lexicons,
|
39
|
+
from: from,
|
40
|
+
)
|
41
|
+
end
|
42
|
+
|
43
|
+
def load_aspects lang:, cache:, **params
|
44
|
+
url = "#{@url}&language_code=#{lang}&#{params.to_query}"
|
45
|
+
url += "&if_updated_since=#{cache.from.utc.iso8601}" if cache
|
46
|
+
puts "#{lang}: loading aspects from #{url}"
|
47
|
+
|
27
48
|
lexicons = JSON.parse HTTPClient.new.get(url).body
|
28
49
|
lexicons = lexicons['data'].map{ |l| Hashie::Mash.new l }
|
29
|
-
|
50
|
+
mapping = Hash.new{ |hash, key| hash[key] = [] }
|
30
51
|
lexicons.each do |l|
|
31
52
|
mapping[l.lemma.to_sym] << l.aspect
|
32
53
|
end
|
33
54
|
|
34
|
-
|
55
|
+
mapping
|
35
56
|
end
|
36
57
|
|
37
58
|
end
|
@@ -28,9 +28,10 @@ Gem::Specification.new do |gem|
|
|
28
28
|
gem.add_dependency 'opener-webservice', '~> 2.1'
|
29
29
|
gem.add_dependency 'opener-core', '~> 2.2'
|
30
30
|
|
31
|
-
gem.add_dependency '
|
31
|
+
gem.add_dependency 'nokogiri'
|
32
32
|
gem.add_dependency 'httpclient'
|
33
33
|
gem.add_dependency 'hashie'
|
34
|
+
gem.add_dependency 'activesupport'
|
34
35
|
|
35
36
|
gem.add_development_dependency 'rspec', '~> 3.0'
|
36
37
|
gem.add_development_dependency 'cucumber'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-property-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09
|
11
|
+
date: 2020-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -53,25 +53,19 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2.2'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: nokogiri
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.0'
|
62
59
|
- - ">="
|
63
60
|
- !ruby/object:Gem::Version
|
64
|
-
version:
|
61
|
+
version: '0'
|
65
62
|
type: :runtime
|
66
63
|
prerelease: false
|
67
64
|
version_requirements: !ruby/object:Gem::Requirement
|
68
65
|
requirements:
|
69
|
-
- - "~>"
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
version: '1.0'
|
72
66
|
- - ">="
|
73
67
|
- !ruby/object:Gem::Version
|
74
|
-
version:
|
68
|
+
version: '0'
|
75
69
|
- !ruby/object:Gem::Dependency
|
76
70
|
name: httpclient
|
77
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,6 +94,20 @@ dependencies:
|
|
100
94
|
- - ">="
|
101
95
|
- !ruby/object:Gem::Version
|
102
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: activesupport
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
103
111
|
- !ruby/object:Gem::Dependency
|
104
112
|
name: rspec
|
105
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -173,8 +181,8 @@ files:
|
|
173
181
|
- config.ru
|
174
182
|
- exec/property-tagger.rb
|
175
183
|
- lib/opener/property_tagger.rb
|
176
|
-
- lib/opener/property_tagger/aspects_cache.rb
|
177
184
|
- lib/opener/property_tagger/cli.rb
|
185
|
+
- lib/opener/property_tagger/file_aspects_cache.rb
|
178
186
|
- lib/opener/property_tagger/processor.rb
|
179
187
|
- lib/opener/property_tagger/public/markdown.css
|
180
188
|
- lib/opener/property_tagger/remote_aspects_cache.rb
|