opener-property-tagger 3.3.4 → 3.3.5
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4498897f273195326764dae62875d3901919f69014df57d0919d88b0755ee777
|
4
|
+
data.tar.gz: e0c3c1cfcc32cf260024c2c4b5fee4afb7e6a2b4e46ba690499e8b151e959e52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f0557b5cb1604b6edec19c6d5c9b2d3cc892792d7562209d619cfe0f1f09c04fa0a340f3197e8f5963d0dbefbfb890d18e9051f82705eaa346f20682b724eb1
|
7
|
+
data.tar.gz: 828fb63990b968d4511548b5ed6b05a44574cc9b0ed933745ddb7053cc642400ddf1ff7966f50a55b6dc84223a1fbbb8882c9cb28850dd3a8e98e550e9453183
|
@@ -57,11 +57,11 @@ module Opener
|
|
57
57
|
|
58
58
|
add_linguistic_processor
|
59
59
|
|
60
|
-
|
60
|
+
pretty ? pretty_print(document) : document.to_xml
|
61
61
|
end
|
62
62
|
|
63
63
|
def language
|
64
|
-
|
64
|
+
@language ||= document.at_xpath('KAF').attr('xml:lang')
|
65
65
|
end
|
66
66
|
|
67
67
|
def terms
|
@@ -69,11 +69,11 @@ module Opener
|
|
69
69
|
@terms = {}
|
70
70
|
|
71
71
|
document.xpath('KAF/terms/term').each do |term|
|
72
|
-
@terms[term.attr('tid').to_sym] = term.attr('lemma')
|
72
|
+
@terms[term.attr('tid').to_sym] = { lemma: term.attr('lemma'), text: term.attr('text')}
|
73
73
|
end
|
74
74
|
end
|
75
75
|
|
76
|
-
|
76
|
+
@terms
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
@@ -81,37 +81,39 @@ module Opener
|
|
81
81
|
# @return [Hash]
|
82
82
|
#
|
83
83
|
def extract_aspects
|
84
|
-
term_ids
|
85
|
-
lemmas
|
84
|
+
term_ids = terms.keys
|
85
|
+
lemmas = terms.values
|
86
|
+
uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
|
86
87
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
88
|
+
[:lemma, :text].each do |k|
|
89
|
+
current_token = 0
|
90
|
+
# Use of n-grams to determine if a unigram (1 lemma) or bigram (2
|
91
|
+
# lemmas) belong to a property.
|
92
|
+
max_ngram = 2
|
91
93
|
|
92
|
-
uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
|
93
94
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
95
|
+
while current_token < terms.count
|
96
|
+
(0..max_ngram).each do |tam_ngram|
|
97
|
+
if current_token + tam_ngram <= terms.count
|
98
|
+
ngram = lemmas[current_token..current_token+tam_ngram].map{|a| a[k] }.join(" ").downcase
|
98
99
|
|
99
|
-
|
100
|
-
|
101
|
-
|
100
|
+
if aspects[ngram.to_sym]
|
101
|
+
properties = aspects[ngram.to_sym]
|
102
|
+
ids = term_ids[current_token..current_token+tam_ngram]
|
102
103
|
|
103
|
-
|
104
|
-
|
104
|
+
properties.uniq.each do |property|
|
105
|
+
next if !property or property.strip.empty?
|
105
106
|
|
106
|
-
|
107
|
+
uniq_aspects[property.to_sym] << [ids,ngram] unless uniq_aspects[property.to_sym].include? [ids,ngram]
|
108
|
+
end
|
107
109
|
end
|
108
110
|
end
|
109
111
|
end
|
112
|
+
current_token += 1
|
110
113
|
end
|
111
|
-
current_token += 1
|
112
114
|
end
|
113
115
|
|
114
|
-
|
116
|
+
Hash[uniq_aspects.sort]
|
115
117
|
end
|
116
118
|
|
117
119
|
##
|
@@ -190,7 +192,7 @@ module Opener
|
|
190
192
|
formatter.compact = true
|
191
193
|
formatter.write(doc, out)
|
192
194
|
|
193
|
-
|
195
|
+
out.strip
|
194
196
|
end
|
195
197
|
|
196
198
|
protected
|
@@ -206,7 +208,7 @@ module Opener
|
|
206
208
|
|
207
209
|
parent_node.add_child node
|
208
210
|
|
209
|
-
|
211
|
+
node
|
210
212
|
end
|
211
213
|
|
212
214
|
##
|
@@ -214,7 +216,7 @@ module Opener
|
|
214
216
|
# @return [Boolean]
|
215
217
|
#
|
216
218
|
def is_kaf?
|
217
|
-
|
219
|
+
!!document.at_xpath('KAF')
|
218
220
|
end
|
219
221
|
|
220
222
|
##
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-property-tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-11-
|
11
|
+
date: 2020-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opener-daemons
|
@@ -167,9 +167,9 @@ dependencies:
|
|
167
167
|
description: Property tagger for hotels in Dutch and English.
|
168
168
|
email:
|
169
169
|
executables:
|
170
|
-
- property-tagger
|
171
170
|
- property-tagger-daemon
|
172
171
|
- property-tagger-server
|
172
|
+
- property-tagger
|
173
173
|
extensions: []
|
174
174
|
extra_rdoc_files: []
|
175
175
|
files:
|
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
213
|
version: '0'
|
214
214
|
requirements: []
|
215
215
|
rubyforge_project:
|
216
|
-
rubygems_version: 2.7.
|
216
|
+
rubygems_version: 2.7.6
|
217
217
|
signing_key:
|
218
218
|
specification_version: 4
|
219
219
|
summary: Property tagger for hotels in Dutch and English.
|