opener-property-tagger 3.0.5 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db4ce8acc0c1dd88b2042f9a936012ea9a3c1448
4
- data.tar.gz: 4ed777c8b215fb98dd3687689cb1c58d97787dd0
3
+ metadata.gz: c3d36e45bbff187579cbb8f2275d53261ed13030
4
+ data.tar.gz: 2850b27e9876336083dde1af17083658be60ef1c
5
5
  SHA512:
6
- metadata.gz: 5cd57abe32b4d88f8ed10618c9e8466c1628faebda89ad1ceeaa956c571fdc00a6069cea44a1a1570df2ebe0136fc6e34c142d1850972a1bc3e2f926c640d13d
7
- data.tar.gz: e64b80100aac3dbfc3229e75692ef9ab16cbd362ca878e36f2ef5b492c456843fcb535c5f26105f6e334594a433c7ff9b050758068779803373aab76acac5e96
6
+ metadata.gz: f95a22d030ff6dc1685ee117ddae2f0d104fce5b187e7bb2062b96dd98ba035dd55c6b9f21266a63e968ff42d9e0869ce762786c7a2d7f5b53b6cc63d5aff4a2
7
+ data.tar.gz: be9bfed3bb90e26a111e03973a3ec543780e886be08d7102dad057e81917e1323c599198f81d79da58257d85e5adafaf507454a0fa6e30105fc8210698c94e17
@@ -1,12 +1,14 @@
1
1
  require 'open3'
2
2
  require 'slop'
3
3
  require 'oga'
4
+ require 'monitor'
4
5
 
5
6
  require 'rexml/document'
6
7
  require 'rexml/formatters/pretty'
7
8
 
8
9
  require_relative 'property_tagger/version'
9
10
  require_relative 'property_tagger/cli'
11
+ require_relative 'property_tagger/aspects_cache'
10
12
  require_relative 'property_tagger/processor'
11
13
 
12
14
  module Opener
@@ -52,24 +54,15 @@ module Opener
52
54
  end
53
55
 
54
56
  ##
55
- # Processes the input and returns an Array containing the output of STDOUT,
56
- # STDERR and an object containing process information.
57
+ # Processes the input KAF document.
57
58
  #
58
- # @param [String] input The text of which to detect the language.
59
- # @return [Array]
59
+ # @param [String] input
60
+ # @return [String]
60
61
  #
61
62
  def run(input)
62
- output = process(input)
63
-
64
- return output
65
- end
66
-
67
- protected
68
-
69
- def process(input)
70
- processor = Processor.new(input, path, !options[:no_time])
63
+ timestamp = !options[:no_time]
71
64
 
72
- return processor.process
65
+ return Processor.new(input, path, timestamp, options[:pretty]).process
73
66
  end
74
67
  end # PolarityTagger
75
68
  end # Opener
@@ -0,0 +1,49 @@
1
+ module Opener
2
+ class PropertyTagger
3
+ ##
4
+ # Thread-safe cache for storing the contents of aspect files.
5
+ #
6
+ class AspectsCache
7
+ include MonitorMixin
8
+
9
+ def initialize
10
+ super
11
+
12
+ @cache = {}
13
+ end
14
+
15
+ ##
16
+ # Returns the aspects for the given file path. If the aspects don't exist
17
+ # they are first loaded into the cache.
18
+ #
19
+ # @param [String] path
20
+ #
21
+ def [](path)
22
+ synchronize do
23
+ @cache[path] = load_aspects(path) unless @cache.key?(path)
24
+ end
25
+
26
+ return @cache[path]
27
+ end
28
+
29
+ alias_method :get, :[]
30
+
31
+ ##
32
+ # Loads the aspects of the given path.
33
+ #
34
+ # @param [String] path
35
+ #
36
+ def load_aspects(path)
37
+ mapping = Hash.new { |hash, key| hash[key] = [] }
38
+
39
+ File.foreach(path) do |line|
40
+ lemma, pos, aspect = line.chomp.split("\t")
41
+
42
+ mapping[lemma.to_sym] << aspect
43
+ end
44
+
45
+ return mapping
46
+ end
47
+ end # AspectsCache
48
+ end # PropertyTagger
49
+ end # Opener
@@ -56,10 +56,13 @@ Examples:
56
56
 
57
57
  on :'no-time', 'Disables adding of timestamps'
58
58
 
59
+ on :ugly, 'Disables pretty formatting of XML (faster)'
60
+
59
61
  run do |opts, args|
60
62
  tagger = PropertyTagger.new(
61
63
  :args => args,
62
- :no_time => opts[:'no-time']
64
+ :no_time => opts[:'no-time'],
65
+ :pretty => !opts[:ugly]
63
66
  )
64
67
 
65
68
  input = STDIN.tty? ? nil : STDIN.read
@@ -4,13 +4,27 @@ module Opener
4
4
  # Class that applies property tagging to a given input KAF file.
5
5
  #
6
6
  class Processor
7
- attr_accessor :document, :aspects_path, :language, :aspects, :terms,
8
- :timestamp
7
+ attr_accessor :document, :aspects_path, :timestamp, :pretty
9
8
 
10
- def initialize(file, aspects_path, timestamp = true)
9
+ ##
10
+ # Global cache used for storing loaded aspects.
11
+ #
12
+ # @return [Opener::PropertyTagger::AspectsCache.new]
13
+ #
14
+ ASPECTS_CACHE = AspectsCache.new
15
+
16
+ ##
17
+ # @param [String|IO] file The KAF file/input to process.
18
+ # @param [String] aspects_path Path to the aspects.
19
+ # @param [TrueClass|FalseClass] timestamp Add timestamps to the KAF.
20
+ # @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
21
+ # by default due to the performance overhead.
22
+ #
23
+ def initialize(file, aspects_path, timestamp = true, pretty = false)
11
24
  @document = Oga.parse_xml(file)
12
25
  @aspects_path = aspects_path
13
26
  @timestamp = timestamp
27
+ @pretty = pretty
14
28
 
15
29
  raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
16
30
  end
@@ -20,64 +34,52 @@ module Opener
20
34
  # @return [String]
21
35
  #
22
36
  def process
23
- @language = get_language
24
- @aspects = load_aspects
25
- @terms = get_terms
26
-
27
37
  existing_aspects = extract_aspects
28
38
 
29
39
  add_features_layer
30
40
  add_properties_layer
31
41
 
32
- index = 1
42
+ existing_aspects.each_with_index do |(key, value), index|
43
+ index += 1
33
44
 
34
- existing_aspects.each_pair do |key,value|
35
45
  add_property(key, value, index)
36
- index += 1
37
46
  end
38
47
 
39
48
  add_linguistic_processor
40
49
 
41
- return pretty_print(document)
50
+ return pretty ? pretty_print(document) : document.to_xml
42
51
  end
43
52
 
44
53
  ##
45
- # Loads the aspects from the txt file
46
54
  # @return [Hash]
47
55
  #
48
- def load_aspects
49
- aspects_hash = {}
50
-
51
- File.foreach(aspects_file) do |line|
52
- lemma, pos, aspect = line.gsub("\n", "").split("\t")
53
-
54
- aspects_hash[lemma.to_sym] = [] unless aspects_hash[lemma.to_sym]
55
- aspects_hash[lemma.to_sym] << aspect
56
- end
57
-
58
- return aspects_hash
56
+ def aspects
57
+ return ASPECTS_CACHE[aspects_file]
59
58
  end
60
59
 
61
60
  ##
62
61
  # Get the language of the input file.
62
+ #
63
63
  # @return [String]
64
64
  #
65
- def get_language
66
- document.at_xpath('KAF').get('xml:lang')
65
+ def language
66
+ return @language ||= document.at_xpath('KAF').get('xml:lang')
67
67
  end
68
68
 
69
69
  ##
70
70
  # Get the terms from the input file
71
71
  # @return [Hash]
72
72
  #
73
- def get_terms
74
- terms_hash = {}
73
+ def terms
74
+ unless @terms
75
+ @terms = {}
75
76
 
76
- document.xpath('KAF/terms/term').each do |term|
77
- terms_hash[term.get('tid').to_sym] = term.get('lemma')
77
+ document.xpath('KAF/terms/term').each do |term|
78
+ @terms[term.get('tid').to_sym] = term.get('lemma')
79
+ end
78
80
  end
79
81
 
80
- return terms_hash
82
+ return @terms
81
83
  end
82
84
 
83
85
  ##
@@ -93,7 +95,7 @@ module Opener
93
95
  # lemmas) belong to a property.
94
96
  max_ngram = 2
95
97
 
96
- uniq_aspects = {}
98
+ uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
97
99
 
98
100
  while current_token < terms.count
99
101
  (0..max_ngram).each do |tam_ngram|
@@ -107,7 +109,6 @@ module Opener
107
109
  properties.uniq.each do |property|
108
110
  next if !property or property.strip.empty?
109
111
 
110
- uniq_aspects[property.to_sym] = [] unless uniq_aspects[property.to_sym]
111
112
  uniq_aspects[property.to_sym] << [ids,ngram]
112
113
  end
113
114
  end
@@ -223,8 +224,12 @@ module Opener
223
224
  return !!document.at_xpath('KAF')
224
225
  end
225
226
 
227
+ ##
228
+ # @return [String]
229
+ #
226
230
  def aspects_file
227
- return File.expand_path("#{aspects_path}/#{language}.txt", __FILE__)
231
+ return @aspects_file ||=
232
+ File.expand_path("#{aspects_path}/#{language}.txt", __FILE__)
228
233
  end
229
234
  end # Processor
230
235
  end # PropertyTagger
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
- VERSION = '3.0.5'
3
+ VERSION = '3.0.6'
4
4
  end # PropertyTagger
5
5
  end # Opener
@@ -33,4 +33,5 @@ Gem::Specification.new do |gem|
33
33
  gem.add_development_dependency 'rspec', '~> 3.0'
34
34
  gem.add_development_dependency 'cucumber'
35
35
  gem.add_development_dependency 'rake'
36
+ gem.add_development_dependency 'benchmark-ips', '~> 2.0'
36
37
  end
data/task/test.rake CHANGED
@@ -2,5 +2,6 @@ desc 'Runs the tests'
2
2
  task :test => [:lexicons] do
3
3
  ENV['RESOURCE_PATH'] = File.expand_path('../../tmp/lexicons/hotel', __FILE__)
4
4
 
5
- sh('cucumber features')
5
+ sh 'cucumber features'
6
+ sh 'rspec spec --order random'
6
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-26 00:00:00.000000000 Z
11
+ date: 2015-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: benchmark-ips
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.0'
111
125
  description: Property tagger for hotels in Dutch and English.
112
126
  email:
113
127
  executables:
@@ -125,6 +139,7 @@ files:
125
139
  - config.ru
126
140
  - exec/property-tagger.rb
127
141
  - lib/opener/property_tagger.rb
142
+ - lib/opener/property_tagger/aspects_cache.rb
128
143
  - lib/opener/property_tagger/cli.rb
129
144
  - lib/opener/property_tagger/processor.rb
130
145
  - lib/opener/property_tagger/public/markdown.css