opener-property-tagger 3.3.4 → 3.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 436af27476deb0372f325ed255151bf5128608405c740289931ecb630b111ab4
4
- data.tar.gz: 5ff2d6bf101e606a786f0b4a9da8021a231ea07fac29fb51c452a1aefa737954
3
+ metadata.gz: 4498897f273195326764dae62875d3901919f69014df57d0919d88b0755ee777
4
+ data.tar.gz: e0c3c1cfcc32cf260024c2c4b5fee4afb7e6a2b4e46ba690499e8b151e959e52
5
5
  SHA512:
6
- metadata.gz: 4918a0be797e2776329863a1f9f43609b9b30390e6165e73c4e089f533ce769a077e82fa928091285dbf866039dad589b7a650b7c6ffd9cc403c355862a9bd5b
7
- data.tar.gz: bcac64dc7d1f229b264976070c05f64e5ff42c54ba2f809f905beb4d7597acea6c71923c271c4bc5dd77c979aa78ffdcb74757c5d27f6819bb6e62186d41fed8
6
+ metadata.gz: 0f0557b5cb1604b6edec19c6d5c9b2d3cc892792d7562209d619cfe0f1f09c04fa0a340f3197e8f5963d0dbefbfb890d18e9051f82705eaa346f20682b724eb1
7
+ data.tar.gz: 828fb63990b968d4511548b5ed6b05a44574cc9b0ed933745ddb7053cc642400ddf1ff7966f50a55b6dc84223a1fbbb8882c9cb28850dd3a8e98e550e9453183
@@ -36,7 +36,7 @@ module Opener
36
36
  mapping = Hash.new { |hash, key| hash[key] = [] }
37
37
 
38
38
  File.foreach(path) do |line|
39
- lemma, pos, aspect = line.chomp.split("\t")
39
+ lemma, _pos, aspect = line.chomp.split("\t")
40
40
 
41
41
  mapping[lemma.to_sym] << aspect
42
42
  end
@@ -57,11 +57,11 @@ module Opener
57
57
 
58
58
  add_linguistic_processor
59
59
 
60
- return pretty ? pretty_print(document) : document.to_xml
60
+ pretty ? pretty_print(document) : document.to_xml
61
61
  end
62
62
 
63
63
  def language
64
- return @language ||= document.at_xpath('KAF').attr('xml:lang')
64
+ @language ||= document.at_xpath('KAF').attr('xml:lang')
65
65
  end
66
66
 
67
67
  def terms
@@ -69,11 +69,11 @@ module Opener
69
69
  @terms = {}
70
70
 
71
71
  document.xpath('KAF/terms/term').each do |term|
72
- @terms[term.attr('tid').to_sym] = term.attr('lemma')
72
+ @terms[term.attr('tid').to_sym] = { lemma: term.attr('lemma'), text: term.attr('text')}
73
73
  end
74
74
  end
75
75
 
76
- return @terms
76
+ @terms
77
77
  end
78
78
 
79
79
  ##
@@ -81,37 +81,39 @@ module Opener
81
81
  # @return [Hash]
82
82
  #
83
83
  def extract_aspects
84
- term_ids = terms.keys
85
- lemmas = terms.values
84
+ term_ids = terms.keys
85
+ lemmas = terms.values
86
+ uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
86
87
 
87
- current_token = 0
88
- # Use of n-grams to determine if a unigram (1 lemma) or bigram (2
89
- # lemmas) belong to a property.
90
- max_ngram = 2
88
+ [:lemma, :text].each do |k|
89
+ current_token = 0
90
+ # Use of n-grams to determine if a unigram (1 lemma) or bigram (2
91
+ # lemmas) belong to a property.
92
+ max_ngram = 2
91
93
 
92
- uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
93
94
 
94
- while current_token < terms.count
95
- (0..max_ngram).each do |tam_ngram|
96
- if current_token + tam_ngram <= terms.count
97
- ngram = lemmas[current_token..current_token+tam_ngram].join(" ").downcase
95
+ while current_token < terms.count
96
+ (0..max_ngram).each do |tam_ngram|
97
+ if current_token + tam_ngram <= terms.count
98
+ ngram = lemmas[current_token..current_token+tam_ngram].map{|a| a[k] }.join(" ").downcase
98
99
 
99
- if aspects[ngram.to_sym]
100
- properties = aspects[ngram.to_sym]
101
- ids = term_ids[current_token..current_token+tam_ngram]
100
+ if aspects[ngram.to_sym]
101
+ properties = aspects[ngram.to_sym]
102
+ ids = term_ids[current_token..current_token+tam_ngram]
102
103
 
103
- properties.uniq.each do |property|
104
- next if !property or property.strip.empty?
104
+ properties.uniq.each do |property|
105
+ next if !property or property.strip.empty?
105
106
 
106
- uniq_aspects[property.to_sym] << [ids,ngram]
107
+ uniq_aspects[property.to_sym] << [ids,ngram] unless uniq_aspects[property.to_sym].include? [ids,ngram]
108
+ end
107
109
  end
108
110
  end
109
111
  end
112
+ current_token += 1
110
113
  end
111
- current_token += 1
112
114
  end
113
115
 
114
- return Hash[uniq_aspects.sort]
116
+ Hash[uniq_aspects.sort]
115
117
  end
116
118
 
117
119
  ##
@@ -190,7 +192,7 @@ module Opener
190
192
  formatter.compact = true
191
193
  formatter.write(doc, out)
192
194
 
193
- return out.strip
195
+ out.strip
194
196
  end
195
197
 
196
198
  protected
@@ -206,7 +208,7 @@ module Opener
206
208
 
207
209
  parent_node.add_child node
208
210
 
209
- return node
211
+ node
210
212
  end
211
213
 
212
214
  ##
@@ -214,7 +216,7 @@ module Opener
214
216
  # @return [Boolean]
215
217
  #
216
218
  def is_kaf?
217
- return !!document.at_xpath('KAF')
219
+ !!document.at_xpath('KAF')
218
220
  end
219
221
 
220
222
  ##
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
3
 
4
- VERSION = '3.3.4'
4
+ VERSION = '3.3.5'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.4
4
+ version: 3.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-09 00:00:00.000000000 Z
11
+ date: 2020-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -167,9 +167,9 @@ dependencies:
167
167
  description: Property tagger for hotels in Dutch and English.
168
168
  email:
169
169
  executables:
170
- - property-tagger
171
170
  - property-tagger-daemon
172
171
  - property-tagger-server
172
+ - property-tagger
173
173
  extensions: []
174
174
  extra_rdoc_files: []
175
175
  files:
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
213
  version: '0'
214
214
  requirements: []
215
215
  rubyforge_project:
216
- rubygems_version: 2.7.8
216
+ rubygems_version: 2.7.6
217
217
  signing_key:
218
218
  specification_version: 4
219
219
  summary: Property tagger for hotels in Dutch and English.