opener-property-tagger 3.3.4 → 3.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 436af27476deb0372f325ed255151bf5128608405c740289931ecb630b111ab4
4
- data.tar.gz: 5ff2d6bf101e606a786f0b4a9da8021a231ea07fac29fb51c452a1aefa737954
3
+ metadata.gz: 4498897f273195326764dae62875d3901919f69014df57d0919d88b0755ee777
4
+ data.tar.gz: e0c3c1cfcc32cf260024c2c4b5fee4afb7e6a2b4e46ba690499e8b151e959e52
5
5
  SHA512:
6
- metadata.gz: 4918a0be797e2776329863a1f9f43609b9b30390e6165e73c4e089f533ce769a077e82fa928091285dbf866039dad589b7a650b7c6ffd9cc403c355862a9bd5b
7
- data.tar.gz: bcac64dc7d1f229b264976070c05f64e5ff42c54ba2f809f905beb4d7597acea6c71923c271c4bc5dd77c979aa78ffdcb74757c5d27f6819bb6e62186d41fed8
6
+ metadata.gz: 0f0557b5cb1604b6edec19c6d5c9b2d3cc892792d7562209d619cfe0f1f09c04fa0a340f3197e8f5963d0dbefbfb890d18e9051f82705eaa346f20682b724eb1
7
+ data.tar.gz: 828fb63990b968d4511548b5ed6b05a44574cc9b0ed933745ddb7053cc642400ddf1ff7966f50a55b6dc84223a1fbbb8882c9cb28850dd3a8e98e550e9453183
@@ -36,7 +36,7 @@ module Opener
36
36
  mapping = Hash.new { |hash, key| hash[key] = [] }
37
37
 
38
38
  File.foreach(path) do |line|
39
- lemma, pos, aspect = line.chomp.split("\t")
39
+ lemma, _pos, aspect = line.chomp.split("\t")
40
40
 
41
41
  mapping[lemma.to_sym] << aspect
42
42
  end
@@ -57,11 +57,11 @@ module Opener
57
57
 
58
58
  add_linguistic_processor
59
59
 
60
- return pretty ? pretty_print(document) : document.to_xml
60
+ pretty ? pretty_print(document) : document.to_xml
61
61
  end
62
62
 
63
63
  def language
64
- return @language ||= document.at_xpath('KAF').attr('xml:lang')
64
+ @language ||= document.at_xpath('KAF').attr('xml:lang')
65
65
  end
66
66
 
67
67
  def terms
@@ -69,11 +69,11 @@ module Opener
69
69
  @terms = {}
70
70
 
71
71
  document.xpath('KAF/terms/term').each do |term|
72
- @terms[term.attr('tid').to_sym] = term.attr('lemma')
72
+ @terms[term.attr('tid').to_sym] = { lemma: term.attr('lemma'), text: term.attr('text')}
73
73
  end
74
74
  end
75
75
 
76
- return @terms
76
+ @terms
77
77
  end
78
78
 
79
79
  ##
@@ -81,37 +81,39 @@ module Opener
81
81
  # @return [Hash]
82
82
  #
83
83
  def extract_aspects
84
- term_ids = terms.keys
85
- lemmas = terms.values
84
+ term_ids = terms.keys
85
+ lemmas = terms.values
86
+ uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
86
87
 
87
- current_token = 0
88
- # Use of n-grams to determine if a unigram (1 lemma) or bigram (2
89
- # lemmas) belong to a property.
90
- max_ngram = 2
88
+ [:lemma, :text].each do |k|
89
+ current_token = 0
90
+ # Use of n-grams to determine if a unigram (1 lemma) or bigram (2
91
+ # lemmas) belong to a property.
92
+ max_ngram = 2
91
93
 
92
- uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
93
94
 
94
- while current_token < terms.count
95
- (0..max_ngram).each do |tam_ngram|
96
- if current_token + tam_ngram <= terms.count
97
- ngram = lemmas[current_token..current_token+tam_ngram].join(" ").downcase
95
+ while current_token < terms.count
96
+ (0..max_ngram).each do |tam_ngram|
97
+ if current_token + tam_ngram <= terms.count
98
+ ngram = lemmas[current_token..current_token+tam_ngram].map{|a| a[k] }.join(" ").downcase
98
99
 
99
- if aspects[ngram.to_sym]
100
- properties = aspects[ngram.to_sym]
101
- ids = term_ids[current_token..current_token+tam_ngram]
100
+ if aspects[ngram.to_sym]
101
+ properties = aspects[ngram.to_sym]
102
+ ids = term_ids[current_token..current_token+tam_ngram]
102
103
 
103
- properties.uniq.each do |property|
104
- next if !property or property.strip.empty?
104
+ properties.uniq.each do |property|
105
+ next if !property or property.strip.empty?
105
106
 
106
- uniq_aspects[property.to_sym] << [ids,ngram]
107
+ uniq_aspects[property.to_sym] << [ids,ngram] unless uniq_aspects[property.to_sym].include? [ids,ngram]
108
+ end
107
109
  end
108
110
  end
109
111
  end
112
+ current_token += 1
110
113
  end
111
- current_token += 1
112
114
  end
113
115
 
114
- return Hash[uniq_aspects.sort]
116
+ Hash[uniq_aspects.sort]
115
117
  end
116
118
 
117
119
  ##
@@ -190,7 +192,7 @@ module Opener
190
192
  formatter.compact = true
191
193
  formatter.write(doc, out)
192
194
 
193
- return out.strip
195
+ out.strip
194
196
  end
195
197
 
196
198
  protected
@@ -206,7 +208,7 @@ module Opener
206
208
 
207
209
  parent_node.add_child node
208
210
 
209
- return node
211
+ node
210
212
  end
211
213
 
212
214
  ##
@@ -214,7 +216,7 @@ module Opener
214
216
  # @return [Boolean]
215
217
  #
216
218
  def is_kaf?
217
- return !!document.at_xpath('KAF')
219
+ !!document.at_xpath('KAF')
218
220
  end
219
221
 
220
222
  ##
@@ -1,7 +1,7 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
3
 
4
- VERSION = '3.3.4'
4
+ VERSION = '3.3.5'
5
5
 
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.4
4
+ version: 3.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-09 00:00:00.000000000 Z
11
+ date: 2020-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -167,9 +167,9 @@ dependencies:
167
167
  description: Property tagger for hotels in Dutch and English.
168
168
  email:
169
169
  executables:
170
- - property-tagger
171
170
  - property-tagger-daemon
172
171
  - property-tagger-server
172
+ - property-tagger
173
173
  extensions: []
174
174
  extra_rdoc_files: []
175
175
  files:
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
213
  version: '0'
214
214
  requirements: []
215
215
  rubyforge_project:
216
- rubygems_version: 2.7.8
216
+ rubygems_version: 2.7.6
217
217
  signing_key:
218
218
  specification_version: 4
219
219
  summary: Property tagger for hotels in Dutch and English.