opener-property-tagger 3.3.4 → 3.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4498897f273195326764dae62875d3901919f69014df57d0919d88b0755ee777
|
4
|
+
data.tar.gz: e0c3c1cfcc32cf260024c2c4b5fee4afb7e6a2b4e46ba690499e8b151e959e52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f0557b5cb1604b6edec19c6d5c9b2d3cc892792d7562209d619cfe0f1f09c04fa0a340f3197e8f5963d0dbefbfb890d18e9051f82705eaa346f20682b724eb1
|
7
|
+
data.tar.gz: 828fb63990b968d4511548b5ed6b05a44574cc9b0ed933745ddb7053cc642400ddf1ff7966f50a55b6dc84223a1fbbb8882c9cb28850dd3a8e98e550e9453183
|
@@ -57,11 +57,11 @@ module Opener
|
|
57
57
|
|
58
58
|
add_linguistic_processor
|
59
59
|
|
60
|
-
|
60
|
+
pretty ? pretty_print(document) : document.to_xml
|
61
61
|
end
|
62
62
|
|
63
63
|
def language
|
64
|
-
|
64
|
+
@language ||= document.at_xpath('KAF').attr('xml:lang')
|
65
65
|
end
|
66
66
|
|
67
67
|
def terms
|
@@ -69,11 +69,11 @@ module Opener
|
|
69
69
|
@terms = {}
|
70
70
|
|
71
71
|
document.xpath('KAF/terms/term').each do |term|
|
72
|
-
@terms[term.attr('tid').to_sym] = term.attr('lemma')
|
72
|
+
@terms[term.attr('tid').to_sym] = { lemma: term.attr('lemma'), text: term.attr('text')}
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
-
|
76
|
+
@terms
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
@@ -81,37 +81,39 @@ module Opener
|
|
81
81
|
# @return [Hash]
|
82
82
|
#
|
83
83
|
def extract_aspects
|
84
|
-
term_ids
|
85
|
-
lemmas
|
84
|
+
term_ids = terms.keys
|
85
|
+
lemmas = terms.values
|
86
|
+
uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
|
86
87
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
88
|
+
[:lemma, :text].each do |k|
|
89
|
+
current_token = 0
|
90
|
+
# Use of n-grams to determine if a unigram (1 lemma) or bigram (2
|
91
|
+
# lemmas) belong to a property.
|
92
|
+
max_ngram = 2
|
91
93
|
|
92
|
-
uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
|
93
94
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
95
|
+
while current_token < terms.count
|
96
|
+
(0..max_ngram).each do |tam_ngram|
|
97
|
+
if current_token + tam_ngram <= terms.count
|
98
|
+
ngram = lemmas[current_token..current_token+tam_ngram].map{|a| a[k] }.join(" ").downcase
|
98
99
|
|
99
|
-
|
100
|
-
|
101
|
-
|
100
|
+
if aspects[ngram.to_sym]
|
101
|
+
properties = aspects[ngram.to_sym]
|
102
|
+
ids = term_ids[current_token..current_token+tam_ngram]
|
102
103
|
|
103
|
-
|
104
|
-
|
104
|
+
properties.uniq.each do |property|
|
105
|
+
next if !property or property.strip.empty?
|
105
106
|
|
106
|
-
|
107
|
+
uniq_aspects[property.to_sym] << [ids,ngram] unless uniq_aspects[property.to_sym].include? [ids,ngram]
|
108
|
+
end
|
107
109
|
end
|
108
110
|
end
|
109
111
|
end
|
112
|
+
current_token += 1
|
110
113
|
end
|
111
|
-
current_token += 1
|
112
114
|
end
|
113
115
|
|
114
|
-
|
116
|
+
Hash[uniq_aspects.sort]
|
115
117
|
end
|
116
118
|
|
117
119
|
##
|
@@ -190,7 +192,7 @@ module Opener
|
|
190
192
|
formatter.compact = true
|
191
193
|
formatter.write(doc, out)
|
192
194
|
|
193
|
-
|
195
|
+
out.strip
|
194
196
|
end
|
195
197
|
|
196
198
|
protected
|
@@ -206,7 +208,7 @@ module Opener
|
|
206
208
|
|
207
209
|
parent_node.add_child node
|
208
210
|
|
209
|
-
|
211
|
+
node
|
210
212
|
end
|
211
213
|
|
212
214
|
##
|
@@ -214,7 +216,7 @@ module Opener
|
|
214
216
|
# @return [Boolean]
|
215
217
|
#
|
216
218
|
def is_kaf?
|
217
|
-
|
219
|
+
!!document.at_xpath('KAF')
|
218
220
|
end
|
219
221
|
|
220
222
|
##
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-property-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-11-
|
11
|
+
date: 2020-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -167,9 +167,9 @@ dependencies:
|
|
167
167
|
description: Property tagger for hotels in Dutch and English.
|
168
168
|
email:
|
169
169
|
executables:
|
170
|
-
- property-tagger
|
171
170
|
- property-tagger-daemon
|
172
171
|
- property-tagger-server
|
172
|
+
- property-tagger
|
173
173
|
extensions: []
|
174
174
|
extra_rdoc_files: []
|
175
175
|
files:
|
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
213
|
version: '0'
|
214
214
|
requirements: []
|
215
215
|
rubyforge_project:
|
216
|
-
rubygems_version: 2.7.
|
216
|
+
rubygems_version: 2.7.6
|
217
217
|
signing_key:
|
218
218
|
specification_version: 4
|
219
219
|
summary: Property tagger for hotels in Dutch and English.
|