opener-property-tagger 3.0.5 → 3.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: db4ce8acc0c1dd88b2042f9a936012ea9a3c1448
4
- data.tar.gz: 4ed777c8b215fb98dd3687689cb1c58d97787dd0
3
+ metadata.gz: c3d36e45bbff187579cbb8f2275d53261ed13030
4
+ data.tar.gz: 2850b27e9876336083dde1af17083658be60ef1c
5
5
  SHA512:
6
- metadata.gz: 5cd57abe32b4d88f8ed10618c9e8466c1628faebda89ad1ceeaa956c571fdc00a6069cea44a1a1570df2ebe0136fc6e34c142d1850972a1bc3e2f926c640d13d
7
- data.tar.gz: e64b80100aac3dbfc3229e75692ef9ab16cbd362ca878e36f2ef5b492c456843fcb535c5f26105f6e334594a433c7ff9b050758068779803373aab76acac5e96
6
+ metadata.gz: f95a22d030ff6dc1685ee117ddae2f0d104fce5b187e7bb2062b96dd98ba035dd55c6b9f21266a63e968ff42d9e0869ce762786c7a2d7f5b53b6cc63d5aff4a2
7
+ data.tar.gz: be9bfed3bb90e26a111e03973a3ec543780e886be08d7102dad057e81917e1323c599198f81d79da58257d85e5adafaf507454a0fa6e30105fc8210698c94e17
@@ -1,12 +1,14 @@
1
1
  require 'open3'
2
2
  require 'slop'
3
3
  require 'oga'
4
+ require 'monitor'
4
5
 
5
6
  require 'rexml/document'
6
7
  require 'rexml/formatters/pretty'
7
8
 
8
9
  require_relative 'property_tagger/version'
9
10
  require_relative 'property_tagger/cli'
11
+ require_relative 'property_tagger/aspects_cache'
10
12
  require_relative 'property_tagger/processor'
11
13
 
12
14
  module Opener
@@ -52,24 +54,15 @@ module Opener
52
54
  end
53
55
 
54
56
  ##
55
- # Processes the input and returns an Array containing the output of STDOUT,
56
- # STDERR and an object containing process information.
57
+ # Processes the input KAF document.
57
58
  #
58
- # @param [String] input The text of which to detect the language.
59
- # @return [Array]
59
+ # @param [String] input
60
+ # @return [String]
60
61
  #
61
62
  def run(input)
62
- output = process(input)
63
-
64
- return output
65
- end
66
-
67
- protected
68
-
69
- def process(input)
70
- processor = Processor.new(input, path, !options[:no_time])
63
+ timestamp = !options[:no_time]
71
64
 
72
- return processor.process
65
+ return Processor.new(input, path, timestamp, options[:pretty]).process
73
66
  end
74
67
  end # PolarityTagger
75
68
  end # Opener
@@ -0,0 +1,49 @@
1
+ module Opener
2
+ class PropertyTagger
3
+ ##
4
+ # Thread-safe cache for storing the contents of aspect files.
5
+ #
6
+ class AspectsCache
7
+ include MonitorMixin
8
+
9
+ def initialize
10
+ super
11
+
12
+ @cache = {}
13
+ end
14
+
15
+ ##
16
+ # Returns the aspects for the given file path. If the aspects don't exist
17
+ # they are first loaded into the cache.
18
+ #
19
+ # @param [String] path
20
+ #
21
+ def [](path)
22
+ synchronize do
23
+ @cache[path] = load_aspects(path) unless @cache.key?(path)
24
+ end
25
+
26
+ return @cache[path]
27
+ end
28
+
29
+ alias_method :get, :[]
30
+
31
+ ##
32
+ # Loads the aspects of the given path.
33
+ #
34
+ # @param [String] path
35
+ #
36
+ def load_aspects(path)
37
+ mapping = Hash.new { |hash, key| hash[key] = [] }
38
+
39
+ File.foreach(path) do |line|
40
+ lemma, pos, aspect = line.chomp.split("\t")
41
+
42
+ mapping[lemma.to_sym] << aspect
43
+ end
44
+
45
+ return mapping
46
+ end
47
+ end # AspectsCache
48
+ end # PropertyTagger
49
+ end # Opener
@@ -56,10 +56,13 @@ Examples:
56
56
 
57
57
  on :'no-time', 'Disables adding of timestamps'
58
58
 
59
+ on :ugly, 'Disables pretty formatting of XML (faster)'
60
+
59
61
  run do |opts, args|
60
62
  tagger = PropertyTagger.new(
61
63
  :args => args,
62
- :no_time => opts[:'no-time']
64
+ :no_time => opts[:'no-time'],
65
+ :pretty => !opts[:ugly]
63
66
  )
64
67
 
65
68
  input = STDIN.tty? ? nil : STDIN.read
@@ -4,13 +4,27 @@ module Opener
4
4
  # Class that applies property tagging to a given input KAF file.
5
5
  #
6
6
  class Processor
7
- attr_accessor :document, :aspects_path, :language, :aspects, :terms,
8
- :timestamp
7
+ attr_accessor :document, :aspects_path, :timestamp, :pretty
9
8
 
10
- def initialize(file, aspects_path, timestamp = true)
9
+ ##
10
+ # Global cache used for storing loaded aspects.
11
+ #
12
+ # @return [Opener::PropertyTagger::AspectsCache.new]
13
+ #
14
+ ASPECTS_CACHE = AspectsCache.new
15
+
16
+ ##
17
+ # @param [String|IO] file The KAF file/input to process.
18
+ # @param [String] aspects_path Path to the aspects.
19
+ # @param [TrueClass|FalseClass] timestamp Add timestamps to the KAF.
20
+ # @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
21
+ # by default due to the performance overhead.
22
+ #
23
+ def initialize(file, aspects_path, timestamp = true, pretty = false)
11
24
  @document = Oga.parse_xml(file)
12
25
  @aspects_path = aspects_path
13
26
  @timestamp = timestamp
27
+ @pretty = pretty
14
28
 
15
29
  raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
16
30
  end
@@ -20,64 +34,52 @@ module Opener
20
34
  # @return [String]
21
35
  #
22
36
  def process
23
- @language = get_language
24
- @aspects = load_aspects
25
- @terms = get_terms
26
-
27
37
  existing_aspects = extract_aspects
28
38
 
29
39
  add_features_layer
30
40
  add_properties_layer
31
41
 
32
- index = 1
42
+ existing_aspects.each_with_index do |(key, value), index|
43
+ index += 1
33
44
 
34
- existing_aspects.each_pair do |key,value|
35
45
  add_property(key, value, index)
36
- index += 1
37
46
  end
38
47
 
39
48
  add_linguistic_processor
40
49
 
41
- return pretty_print(document)
50
+ return pretty ? pretty_print(document) : document.to_xml
42
51
  end
43
52
 
44
53
  ##
45
- # Loads the aspects from the txt file
46
54
  # @return [Hash]
47
55
  #
48
- def load_aspects
49
- aspects_hash = {}
50
-
51
- File.foreach(aspects_file) do |line|
52
- lemma, pos, aspect = line.gsub("\n", "").split("\t")
53
-
54
- aspects_hash[lemma.to_sym] = [] unless aspects_hash[lemma.to_sym]
55
- aspects_hash[lemma.to_sym] << aspect
56
- end
57
-
58
- return aspects_hash
56
+ def aspects
57
+ return ASPECTS_CACHE[aspects_file]
59
58
  end
60
59
 
61
60
  ##
62
61
  # Get the language of the input file.
62
+ #
63
63
  # @return [String]
64
64
  #
65
- def get_language
66
- document.at_xpath('KAF').get('xml:lang')
65
+ def language
66
+ return @language ||= document.at_xpath('KAF').get('xml:lang')
67
67
  end
68
68
 
69
69
  ##
70
70
  # Get the terms from the input file
71
71
  # @return [Hash]
72
72
  #
73
- def get_terms
74
- terms_hash = {}
73
+ def terms
74
+ unless @terms
75
+ @terms = {}
75
76
 
76
- document.xpath('KAF/terms/term').each do |term|
77
- terms_hash[term.get('tid').to_sym] = term.get('lemma')
77
+ document.xpath('KAF/terms/term').each do |term|
78
+ @terms[term.get('tid').to_sym] = term.get('lemma')
79
+ end
78
80
  end
79
81
 
80
- return terms_hash
82
+ return @terms
81
83
  end
82
84
 
83
85
  ##
@@ -93,7 +95,7 @@ module Opener
93
95
  # lemmas) belong to a property.
94
96
  max_ngram = 2
95
97
 
96
- uniq_aspects = {}
98
+ uniq_aspects = Hash.new { |hash, key| hash[key] = [] }
97
99
 
98
100
  while current_token < terms.count
99
101
  (0..max_ngram).each do |tam_ngram|
@@ -107,7 +109,6 @@ module Opener
107
109
  properties.uniq.each do |property|
108
110
  next if !property or property.strip.empty?
109
111
 
110
- uniq_aspects[property.to_sym] = [] unless uniq_aspects[property.to_sym]
111
112
  uniq_aspects[property.to_sym] << [ids,ngram]
112
113
  end
113
114
  end
@@ -223,8 +224,12 @@ module Opener
223
224
  return !!document.at_xpath('KAF')
224
225
  end
225
226
 
227
+ ##
228
+ # @return [String]
229
+ #
226
230
  def aspects_file
227
- return File.expand_path("#{aspects_path}/#{language}.txt", __FILE__)
231
+ return @aspects_file ||=
232
+ File.expand_path("#{aspects_path}/#{language}.txt", __FILE__)
228
233
  end
229
234
  end # Processor
230
235
  end # PropertyTagger
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
- VERSION = '3.0.5'
3
+ VERSION = '3.0.6'
4
4
  end # PropertyTagger
5
5
  end # Opener
@@ -33,4 +33,5 @@ Gem::Specification.new do |gem|
33
33
  gem.add_development_dependency 'rspec', '~> 3.0'
34
34
  gem.add_development_dependency 'cucumber'
35
35
  gem.add_development_dependency 'rake'
36
+ gem.add_development_dependency 'benchmark-ips', '~> 2.0'
36
37
  end
data/task/test.rake CHANGED
@@ -2,5 +2,6 @@ desc 'Runs the tests'
2
2
  task :test => [:lexicons] do
3
3
  ENV['RESOURCE_PATH'] = File.expand_path('../../tmp/lexicons/hotel', __FILE__)
4
4
 
5
- sh('cucumber features')
5
+ sh 'cucumber features'
6
+ sh 'rspec spec --order random'
6
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-26 00:00:00.000000000 Z
11
+ date: 2015-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: benchmark-ips
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '2.0'
111
125
  description: Property tagger for hotels in Dutch and English.
112
126
  email:
113
127
  executables:
@@ -125,6 +139,7 @@ files:
125
139
  - config.ru
126
140
  - exec/property-tagger.rb
127
141
  - lib/opener/property_tagger.rb
142
+ - lib/opener/property_tagger/aspects_cache.rb
128
143
  - lib/opener/property_tagger/cli.rb
129
144
  - lib/opener/property_tagger/processor.rb
130
145
  - lib/opener/property_tagger/public/markdown.css