opener-property-tagger 3.0.2 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bd892d35b2c63bf6186dbd770a60878f1ab393ba
4
- data.tar.gz: 8a45c4bc4bb53b01dd394d91ad8c7c5a99e320ef
3
+ metadata.gz: 6bd138d2aeb528bf87f83fde2af933ca3ebce6dd
4
+ data.tar.gz: e0e64e5f709effb0e2671b08790ee5c5e4afb5b2
5
5
  SHA512:
6
- metadata.gz: c26b7b04a7788ee0300148a5af16502e44c3141edc2187cb1c00ee6de5bd954c8bb670ea215359f65ebcb31b0ce440607bf6d0bf165cf6529897b53869aeb42e
7
- data.tar.gz: 637fee56021276a7f86a3cbb92bfc26c3c328ed8609488cf04bc0021a782a889901f769429ab208c54d745d5be37c0b9df4a91d517542e9dd94817e01d21cc35
6
+ metadata.gz: 7228f65dde150175167d2ac0f72cac888f8649467be2bd983ba28d1d9738222f20aa8bec7678813f4200756529558159764b4edfa420c2d43d2aee9593f9dccc
7
+ data.tar.gz: 0303f61b4f14aedc1c37c4378f3a030d602280b261f60013426ec3d9d986356dabfebbbf6f7a665f04fd9f7b65ac0901a67567416eabea5725db96699b1ee67c
data/README.md CHANGED
@@ -118,8 +118,6 @@ At least you need the following system setup:
118
118
  ### Depenencies for normal use:
119
119
 
120
120
  * Ruby 1.9.3 or newer
121
- * Python 2.6
122
- * lxml installed
123
121
  * libarchive (for running the tests and such), on Debian/Ubuntu based systems
124
122
  this can be installed using `sudo apt-get install libarchive-dev`
125
123
 
@@ -137,11 +135,6 @@ is the word or span of words (in this case use whitespaces), then the part of
137
135
  speech (which actually it is not use, you can include a dummy label) and
138
136
  finally the aspect class associated with the word.
139
137
 
140
- ## The Core
141
-
142
- The component is a fat wrapper around the actual language technology core. You
143
- can find the core technolies (python) in the `/core` directory.
144
-
145
138
  ## Where to go from here
146
139
 
147
140
  * [Check the project website](http://opener-project.github.io)
@@ -0,0 +1,227 @@
1
+ module Opener
2
+ class PropertyTagger
3
+ ##
4
+ # Class that applies property tagging to a given input KAF file.
5
+ #
6
+ class Processor
7
+ attr_accessor :document, :aspects_path, :language, :aspects, :terms,
8
+ :timestamp
9
+
10
+ def initialize(file, aspects_path, timestamp = true)
11
+ @document = Oga.parse_xml(file)
12
+ @aspects_path = aspects_path
13
+ @timestamp = timestamp
14
+
15
+ raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
16
+ end
17
+
18
+ ##
19
+ # Processes the input and returns the new KAF output.
20
+ # @return [String]
21
+ #
22
+ def process
23
+ @language = get_language
24
+ @aspects = load_aspects
25
+ @terms = get_terms
26
+
27
+ existing_aspects = extract_aspects
28
+
29
+ add_features_layer
30
+ add_properties_layer
31
+
32
+ index = 1
33
+
34
+ existing_aspects.each_pair do |key,value|
35
+ add_property(key, value, index)
36
+ index += 1
37
+ end
38
+
39
+ add_linguistic_processor
40
+
41
+ return pretty_print(document)
42
+ end
43
+
44
+ ##
45
+ # Loads the aspects from the txt file
46
+ # @return [Hash]
47
+ #
48
+ def load_aspects
49
+ aspects_hash = {}
50
+
51
+ File.foreach(aspects_file) do |line|
52
+ lemma, pos, aspect = line.gsub("\n", "").split("\t")
53
+
54
+ aspects_hash[lemma.to_sym] = [] unless aspects_hash[lemma.to_sym]
55
+ aspects_hash[lemma.to_sym] << aspect
56
+ end
57
+
58
+ return aspects_hash
59
+ end
60
+
61
+ ##
62
+ # Get the language of the input file.
63
+ # @return [String]
64
+ #
65
+ def get_language
66
+ document.at_xpath('KAF').get('xml:lang')
67
+ end
68
+
69
+ ##
70
+ # Get the terms from the input file
71
+ # @return [Hash]
72
+ #
73
+ def get_terms
74
+ terms_hash = {}
75
+
76
+ document.xpath('KAF/terms/term').each do |term|
77
+ terms_hash[term.get('tid').to_sym] = term.get('lemma')
78
+ end
79
+
80
+ return terms_hash
81
+ end
82
+
83
+ ##
84
+ # Check which terms belong to an aspect (property)
85
+ # @return [Hash]
86
+ #
87
+ def extract_aspects
88
+ term_ids = terms.keys
89
+ lemmas = terms.values
90
+
91
+ current_token = 0
92
+ # Use of n-grams to determine if a unigram (1 lemma) or bigram (2
93
+ # lemmas) belong to a property.
94
+ max_ngram = 2
95
+
96
+ uniq_aspects = {}
97
+
98
+ while current_token < terms.count
99
+ (0..max_ngram).each do |tam_ngram|
100
+ if current_token + tam_ngram <= terms.count
101
+ ngram = lemmas[current_token..current_token+tam_ngram].join(" ").downcase
102
+ if aspects[ngram.to_sym]
103
+ properties = aspects[ngram.to_sym]
104
+ ids = term_ids[current_token..current_token+tam_ngram]
105
+ properties.uniq.reject{|p| p.gsub(" ", "").empty?}.each do |property|
106
+ uniq_aspects[property.to_sym] = [] unless uniq_aspects[property.to_sym]
107
+ uniq_aspects[property.to_sym] << [ids,ngram]
108
+ end
109
+ end
110
+ end
111
+ end
112
+ current_token += 1
113
+ end
114
+
115
+ return Hash[uniq_aspects.sort]
116
+ end
117
+
118
+ ##
119
+ # Remove the features layer from the KAF file if it exists and add a new
120
+ # one.
121
+ def add_features_layer
122
+ existing = document.at_xpath('KAF/features')
123
+
124
+ existing.remove if existing
125
+
126
+ new_node('features', 'KAF')
127
+ end
128
+
129
+ ##
130
+ # Add the properties layer as a child to the features layer.
131
+ def add_properties_layer
132
+ new_node("properties", "KAF/features")
133
+ end
134
+
135
+ def add_property(key, value, index)
136
+ property_node = new_node("property", "KAF/features/properties")
137
+
138
+ property_node.set('lemma', key.to_s)
139
+ property_node.set('pid', "p#{index.to_s}")
140
+
141
+ references_node = new_node("references", property_node)
142
+
143
+ value.uniq.each do |v|
144
+ comment = Oga::XML::Comment.new(:text => v.last)
145
+
146
+ references_node.children << comment
147
+
148
+ span_node = new_node("span", references_node)
149
+
150
+ v.first.each do |val|
151
+ target_node = new_node("target", span_node)
152
+
153
+ target_node.set('id', val.to_s)
154
+ end
155
+ end
156
+ end
157
+
158
+ def add_linguistic_processor
159
+ description = 'VUA property tagger'
160
+ last_edited = '16jan2015'
161
+ version = '2.0'
162
+
163
+ node = new_node('linguisticProcessors', 'KAF/kafHeader')
164
+ node.set('layer', 'features')
165
+
166
+ lp_node = new_node('lp', node)
167
+
168
+ lp_node.set('version', "#{last_edited}-#{version}")
169
+ lp_node.set('name', description)
170
+
171
+ if timestamp
172
+ format = '%Y-%m-%dT%H:%M:%S%Z'
173
+
174
+ lp_node.set('timestamp', Time.now.strftime(format))
175
+ else
176
+ lp_node.set('timestamp', '*')
177
+ end
178
+ end
179
+
180
+ ##
181
+ # Format the output document properly.
182
+ #
183
+ # TODO: this should be handled by Oga in a nice way.
184
+ #
185
+ # @return [String]
186
+ #
187
+ def pretty_print(document)
188
+ doc = REXML::Document.new document.to_xml
189
+ doc.context[:attribute_quote] = :quote
190
+ out = ""
191
+ formatter = REXML::Formatters::Pretty.new
192
+ formatter.compact = true
193
+ formatter.write(doc, out)
194
+
195
+ return out.strip
196
+ end
197
+
198
+ protected
199
+
200
+ def new_node(tag, parent)
201
+ if parent.is_a?(String)
202
+ parent_node = document.at_xpath(parent)
203
+ else
204
+ parent_node = parent
205
+ end
206
+
207
+ node = Oga::XML::Element.new(:name => tag)
208
+
209
+ parent_node.children << node
210
+
211
+ return node
212
+ end
213
+
214
+ ##
215
+ # Check if input is a KAF file.
216
+ # @return [Boolean]
217
+ #
218
+ def is_kaf?
219
+ return !!document.at_xpath('KAF')
220
+ end
221
+
222
+ def aspects_file
223
+ return File.expand_path("#{aspects_path}/#{language}.txt", __FILE__)
224
+ end
225
+ end # Processor
226
+ end # PropertyTagger
227
+ end # Opener
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
- VERSION = '3.0.2'
3
+ VERSION = '3.0.3'
4
4
  end # PropertyTagger
5
5
  end # Opener
@@ -1,8 +1,13 @@
1
1
  require 'open3'
2
2
  require 'slop'
3
+ require 'oga'
4
+
5
+ require 'rexml/document'
6
+ require 'rexml/formatters/pretty'
3
7
 
4
8
  require_relative 'property_tagger/version'
5
9
  require_relative 'property_tagger/cli'
10
+ require_relative 'property_tagger/processor'
6
11
 
7
12
  module Opener
8
13
  ##
@@ -28,15 +33,6 @@ module Opener
28
33
  @options = options
29
34
  end
30
35
 
31
- ##
32
- # Returns a String containing the command to use for executing the kernel.
33
- #
34
- # @return [String]
35
- #
36
- def command
37
- return "python -E #{kernel} #{args.join(' ')} --path #{path}"
38
- end
39
-
40
36
  ##
41
37
  # Get the resource path for the lexicon files, defaults to an ENV variable
42
38
  #
@@ -50,7 +46,7 @@ module Opener
50
46
  raise ArgumentError, 'No lexicon path provided'
51
47
  end
52
48
 
53
- return path
49
+ return File.expand_path(path)
54
50
  end
55
51
 
56
52
  ##
@@ -61,41 +57,15 @@ module Opener
61
57
  # @return [Array]
62
58
  #
63
59
  def run(input)
64
- stdout, stderr, process = capture(input)
65
-
66
- raise stderr unless process.success?
60
+ output = process(input)
67
61
 
68
- return stdout
62
+ return output
69
63
  end
70
64
 
71
65
  protected
72
-
73
- ##
74
- # capture3 method doesn't work properly with Jruby, so
75
- # this is a workaround
76
- #
77
- def capture(input)
78
- Open3.popen3(*command.split(" ")) {|i, o, e, t|
79
- out_reader = Thread.new { o.read }
80
- err_reader = Thread.new { e.read }
81
- i.write input
82
- i.close
83
- [out_reader.value, err_reader.value, t.value]
84
- }
85
- end
86
-
87
- ##
88
- # @return [String]
89
- #
90
- def core_dir
91
- return File.expand_path('../../../core', __FILE__)
92
- end
93
-
94
- ##
95
- # @return [String]
96
- #
97
- def kernel
98
- return File.join(core_dir, 'hotel_property_tagger_nl_en.py')
66
+ def process(input)
67
+ processor = Opener::PropertyTagger::Processor.new(input, path, !args.include?("--no-time"))
68
+ return processor.process
99
69
  end
100
70
  end # PolarityTagger
101
71
  end # Opener
@@ -7,20 +7,15 @@ Gem::Specification.new do |gem|
7
7
  gem.summary = 'Property tagger for hotels in Dutch and English.'
8
8
  gem.description = gem.summary
9
9
  gem.homepage = 'http://opener-project.github.com/'
10
- gem.extensions = ['ext/hack/Rakefile']
11
10
 
12
11
  gem.license = 'Apache 2.0'
13
12
 
14
13
  gem.required_ruby_version = '>= 1.9.2'
15
14
 
16
15
  gem.files = Dir.glob([
17
- 'core/data/**/*',
18
- 'core/*.py',
19
- 'ext/**/*',
20
16
  'lib/**/*',
21
17
  'config.ru',
22
18
  '*.gemspec',
23
- '*_requirements.txt',
24
19
  'README.md',
25
20
  'LICENSE.txt',
26
21
  'exec/**/*',
@@ -33,10 +28,9 @@ Gem::Specification.new do |gem|
33
28
  gem.add_dependency 'opener-webservice', '~> 2.1'
34
29
  gem.add_dependency 'opener-core', '~> 2.2'
35
30
 
36
- gem.add_dependency 'nokogiri'
37
- gem.add_dependency 'rake'
38
- gem.add_dependency 'cliver'
31
+ gem.add_dependency 'oga'
39
32
 
40
33
  gem.add_development_dependency 'rspec', '~> 3.0'
41
34
  gem.add_development_dependency 'cucumber'
35
+ gem.add_development_dependency 'rake'
42
36
  end
data/task/test.rake CHANGED
@@ -1,5 +1,5 @@
1
1
  desc 'Runs the tests'
2
- task :test => [:compile, :lexicons] do
2
+ task :test => [:lexicons] do
3
3
  ENV['RESOURCE_PATH'] = File.expand_path('../../tmp/lexicons/hotel', __FILE__)
4
4
 
5
5
  sh('cucumber features')
metadata CHANGED
@@ -1,184 +1,162 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-12 00:00:00.000000000 Z
11
+ date: 2015-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2.2'
20
- type: :runtime
21
- prerelease: false
22
15
  version_requirements: !ruby/object:Gem::Requirement
23
16
  requirements:
24
- - - "~>"
17
+ - - ~>
25
18
  - !ruby/object:Gem::Version
26
19
  version: '2.2'
27
- - !ruby/object:Gem::Dependency
28
- name: opener-webservice
29
20
  requirement: !ruby/object:Gem::Requirement
30
21
  requirements:
31
- - - "~>"
22
+ - - ~>
32
23
  - !ruby/object:Gem::Version
33
- version: '2.1'
34
- type: :runtime
24
+ version: '2.2'
35
25
  prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: opener-webservice
36
29
  version_requirements: !ruby/object:Gem::Requirement
37
30
  requirements:
38
- - - "~>"
31
+ - - ~>
39
32
  - !ruby/object:Gem::Version
40
33
  version: '2.1'
41
- - !ruby/object:Gem::Dependency
42
- name: opener-core
43
34
  requirement: !ruby/object:Gem::Requirement
44
35
  requirements:
45
- - - "~>"
36
+ - - ~>
46
37
  - !ruby/object:Gem::Version
47
- version: '2.2'
48
- type: :runtime
38
+ version: '2.1'
49
39
  prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: opener-core
50
43
  version_requirements: !ruby/object:Gem::Requirement
51
44
  requirements:
52
- - - "~>"
45
+ - - ~>
53
46
  - !ruby/object:Gem::Version
54
47
  version: '2.2'
55
- - !ruby/object:Gem::Dependency
56
- name: nokogiri
57
48
  requirement: !ruby/object:Gem::Requirement
58
49
  requirements:
59
- - - ">="
50
+ - - ~>
60
51
  - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
52
+ version: '2.2'
63
53
  prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: oga
64
57
  version_requirements: !ruby/object:Gem::Requirement
65
58
  requirements:
66
- - - ">="
59
+ - - '>='
67
60
  - !ruby/object:Gem::Version
68
61
  version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
62
  requirement: !ruby/object:Gem::Requirement
72
63
  requirements:
73
- - - ">="
64
+ - - '>='
74
65
  - !ruby/object:Gem::Version
75
66
  version: '0'
76
- type: :runtime
77
67
  prerelease: false
68
+ type: :runtime
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
78
71
  version_requirements: !ruby/object:Gem::Requirement
79
72
  requirements:
80
- - - ">="
73
+ - - ~>
81
74
  - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: cliver
75
+ version: '3.0'
85
76
  requirement: !ruby/object:Gem::Requirement
86
77
  requirements:
87
- - - ">="
78
+ - - ~>
88
79
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :runtime
80
+ version: '3.0'
91
81
  prerelease: false
82
+ type: :development
83
+ - !ruby/object:Gem::Dependency
84
+ name: cucumber
92
85
  version_requirements: !ruby/object:Gem::Requirement
93
86
  requirements:
94
- - - ">="
87
+ - - '>='
95
88
  - !ruby/object:Gem::Version
96
89
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: rspec
99
90
  requirement: !ruby/object:Gem::Requirement
100
91
  requirements:
101
- - - "~>"
92
+ - - '>='
102
93
  - !ruby/object:Gem::Version
103
- version: '3.0'
104
- type: :development
94
+ version: '0'
105
95
  prerelease: false
96
+ type: :development
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
106
99
  version_requirements: !ruby/object:Gem::Requirement
107
100
  requirements:
108
- - - "~>"
101
+ - - '>='
109
102
  - !ruby/object:Gem::Version
110
- version: '3.0'
111
- - !ruby/object:Gem::Dependency
112
- name: cucumber
103
+ version: '0'
113
104
  requirement: !ruby/object:Gem::Requirement
114
105
  requirements:
115
- - - ">="
106
+ - - '>='
116
107
  - !ruby/object:Gem::Version
117
108
  version: '0'
118
- type: :development
119
109
  prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
110
+ type: :development
125
111
  description: Property tagger for hotels in Dutch and English.
126
- email:
112
+ email:
127
113
  executables:
128
114
  - property-tagger
129
115
  - property-tagger-daemon
130
116
  - property-tagger-server
131
- extensions:
132
- - ext/hack/Rakefile
117
+ extensions: []
133
118
  extra_rdoc_files: []
134
119
  files:
135
- - LICENSE.txt
136
- - README.md
137
- - bin/property-tagger
138
- - bin/property-tagger-daemon
139
- - bin/property-tagger-server
140
- - config.ru
141
- - core/extract_aspects.py
142
- - core/hotel_property_tagger_nl_en.py
143
- - exec/property-tagger.rb
144
- - ext/hack/Rakefile
145
120
  - lib/opener/property_tagger.rb
146
121
  - lib/opener/property_tagger/cli.rb
147
- - lib/opener/property_tagger/public/markdown.css
122
+ - lib/opener/property_tagger/processor.rb
148
123
  - lib/opener/property_tagger/server.rb
149
124
  - lib/opener/property_tagger/version.rb
125
+ - lib/opener/property_tagger/public/markdown.css
150
126
  - lib/opener/property_tagger/views/index.erb
151
127
  - lib/opener/property_tagger/views/result.erb
128
+ - config.ru
152
129
  - opener-property-tagger.gemspec
153
- - pre_install_requirements.txt
154
- - task/compile.rake
130
+ - README.md
131
+ - LICENSE.txt
132
+ - exec/property-tagger.rb
155
133
  - task/lexicons.rake
156
- - task/python.rake
157
- - task/requirements.rake
158
134
  - task/test.rake
135
+ - bin/property-tagger
136
+ - bin/property-tagger-daemon
137
+ - bin/property-tagger-server
159
138
  homepage: http://opener-project.github.com/
160
139
  licenses:
161
140
  - Apache 2.0
162
141
  metadata: {}
163
- post_install_message:
142
+ post_install_message:
164
143
  rdoc_options: []
165
144
  require_paths:
166
145
  - lib
167
146
  required_ruby_version: !ruby/object:Gem::Requirement
168
147
  requirements:
169
- - - ">="
148
+ - - '>='
170
149
  - !ruby/object:Gem::Version
171
150
  version: 1.9.2
172
151
  required_rubygems_version: !ruby/object:Gem::Requirement
173
152
  requirements:
174
- - - ">="
153
+ - - '>='
175
154
  - !ruby/object:Gem::Version
176
155
  version: '0'
177
156
  requirements: []
178
- rubyforge_project:
179
- rubygems_version: 2.2.2
180
- signing_key:
157
+ rubyforge_project:
158
+ rubygems_version: 2.1.9
159
+ signing_key:
181
160
  specification_version: 4
182
161
  summary: Property tagger for hotels in Dutch and English.
183
162
  test_files: []
184
- has_rdoc:
@@ -1,18 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- from lxml import etree
4
- import sys
5
- #filename='/Users/ruben/CODE/VU-sentiment-lexicon-xml/VUSentimentLexicon/EN-lexicon/Sentiment-English-HotelDomain.xml'
6
-
7
- root = etree.parse(sys.stdin).getroot()
8
-
9
- for element in root.findall('Lexicon/LexicalEntry'):
10
- ele_lemma = element.findall('Lemma')[0]
11
- ele_domain = element.findall('Sense/Domain')[0]
12
- pos = element.get('partOfSpeech','unknown_pos')
13
- if ele_lemma is not None and ele_domain is not None:
14
- lemma = ele_lemma.get('writtenForm','').lower()
15
- aspect = ele_domain.get('aspect','').lower()
16
- if lemma!='' and aspect!='':
17
- print lemma.encode('utf-8')+'\t'+pos.encode('utf-8')+'\t'+aspect.encode('utf-8')
18
-
@@ -1,138 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import sys
4
- import argparse
5
-
6
- import codecs
7
- import os
8
-
9
- this_folder = os.path.dirname(os.path.realpath(__file__))
10
-
11
- # This updates the load path to ensure that the local site-packages directory
12
- # can be used to load packages (e.g. a locally installed copy of lxml).
13
- sys.path.append(os.path.join(this_folder, 'site-packages/pre_install'))
14
-
15
- from VUKafParserPy import KafParser
16
- from lxml import etree
17
- from collections import defaultdict
18
-
19
- __desc='VUA property tagger'
20
- __last_edited='20may2014'
21
- __version='1.0'
22
-
23
- ###
24
- __module_dir = os.path.dirname(__file__)
25
- max_ngram = 1
26
- verbose = False
27
- ##
28
-
29
-
30
- ########################################
31
- ## Format of the file:
32
- #lemma pos aspect
33
- #lemma pos aspect
34
- ########################################
35
- def loadAspects(my_lang,this_file=None):
36
- my_aspects = {}
37
- if this_file is not None:
38
- aspects_filename = this_file
39
- else:
40
- filename = "{0}.txt".format(my_lang)
41
- print>>sys.stderr, "filename thingy",filename
42
- print>>sys.stderr, "path thingy",arguments.path
43
- aspects_filename = os.path.join(arguments.path,filename)
44
-
45
- if not os.path.exists(aspects_filename):
46
- print>>sys.stderr,'ERROR: file with aspects for the language',my_lang,'not found in',aspects_filename
47
- else:
48
- fic = codecs.open(aspects_filename,'r','utf-8')
49
- for line in fic:
50
- fields = line.strip().split('\t')
51
- if len(fields) == 3:
52
- lemma,pos,aspect = fields
53
- my_aspects[lemma] = aspect
54
- fic.close()
55
- return aspects_filename, my_aspects
56
- ########################################
57
-
58
-
59
-
60
- ###### MAIN ########
61
-
62
- argument_parser = argparse.ArgumentParser(description='Tags a text with polarities at lemma level')
63
- argument_parser.add_argument("--no-time",action="store_false", default=True, dest="my_time_stamp",help="For not including timestamp in header")
64
- argument_parser.add_argument("--lexicon", action="store", default=None, dest="lexicon", help="Force to use this lexicon")
65
- argument_parser.add_argument("--path", action="store", default=None, dest="path", help="Set the path where the property aspects are found.")
66
-
67
- arguments = argument_parser.parse_args()
68
-
69
- if not sys.stdin.isatty():
70
- ## READING FROM A PIPE
71
- pass
72
- else:
73
- print>>sys.stderr,'Input stream required.'
74
- print>>sys.stderr,'Example usage: cat myUTF8file.kaf.xml |',sys.argv[0]
75
- print>>sys.stderr,sys.argv[0]+' -h for help'
76
- sys.exit(-1)
77
-
78
-
79
- ## Load the tree and the list of terms with the id
80
- my_data = []
81
- try:
82
- my_kaf_tree = KafParser(sys.stdin)
83
- except Exception as e:
84
- print>>sys.stdout,'Error parsing input. Input is required to be KAF'
85
- print>>sys.stdout,str(e)
86
- sys.exit(2)
87
-
88
-
89
- ## Get language from the KAF file
90
- my_lang = my_kaf_tree.getLanguage()
91
-
92
- my_aspects_filename = my_aspects = None
93
- if arguments.lexicon is None:
94
- if my_lang not in ['nl','en','de','fr','it','es']:
95
- print>>sys.stdout,'Error in the language specified in your KAF. The language is ',my_lang,' and possible values for this module '
96
- print>>sys.stdout,'are nl for Dutch ,en for English, es Spanish, fr French, it Italian or de German'
97
- sys.exit(1)
98
-
99
- my_aspects_filename, my_aspects = loadAspects(my_lang)
100
- else:
101
- my_aspects_filename, my_aspects = loadAspects(my_lang,this_file=arguments.lexicon)
102
-
103
- if verbose:
104
- print>>sys.stderr,'Loaded ',len(my_aspects),'aspects from',my_aspects_filename
105
-
106
-
107
- for term in my_kaf_tree.getTerms():
108
- my_data.append((term.getLemma(),term.getId()))
109
- if verbose: print>>sys.stderr,'Number of terms in the kaf file:',len(my_data)
110
-
111
-
112
- current_token = found = 0
113
- uniq_aspects = defaultdict(list)
114
- while current_token < len(my_data):
115
- for tam_ngram in range(1,max_ngram+1):
116
- # Build an n-gram of size tam_ngram and beginning in current_token
117
- if current_token + tam_ngram <= len(my_data):
118
- ngram = ' '.join(lemma for lemma,_ in my_data[current_token:current_token+tam_ngram])
119
- aspect = my_aspects.get(ngram.lower(),None)
120
- if aspect is not None:
121
- list_of_ids = [id for _,id in my_data[current_token:current_token+tam_ngram]]
122
- uniq_aspects[aspect].append((list_of_ids,ngram))
123
- current_token += 1
124
-
125
-
126
- ## Code for generating the propery layer included in the Parser
127
- for aspect, list_of_lists in uniq_aspects.items():
128
- for list_of_ids, str_text in list_of_lists:
129
- my_kaf_tree.add_property(aspect,list_of_ids,str_text)
130
-
131
- my_kaf_tree.addLinguisticProcessor(__desc,__last_edited+'_'+__version,'features', arguments.my_time_stamp)
132
- my_kaf_tree.saveToFile(sys.stdout)
133
-
134
-
135
-
136
-
137
-
138
-
data/ext/hack/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- import File.expand_path('../../../task/requirements.rake', __FILE__)
2
- import File.expand_path('../../../task/python.rake', __FILE__)
3
-
4
- task :default => :requirements do
5
- Dir.chdir(File.expand_path('../../../', __FILE__)) do
6
- Rake::Task['core/site-packages/pre_install'].invoke
7
- end
8
- end
@@ -1 +0,0 @@
1
- https://github.com/opener-project/VU-kaf-parser/archive/master.zip#egg=VUKafParserPy
data/task/compile.rake DELETED
@@ -1,2 +0,0 @@
1
- desc 'Verifies requirements and compiles the core'
2
- task :compile => ['requirements', 'python:compile']
data/task/python.rake DELETED
@@ -1,11 +0,0 @@
1
- # NOTE: pre_build/pre_install directories are created by pip.
2
-
3
- directory 'core/site-packages/pre_install' do |task|
4
- sh "pip install --requirement=pre_install_requirements.txt " \
5
- "--target=#{task.name} --ignore-installed"
6
- end
7
-
8
- namespace :python do
9
- desc 'Installs Python packages in a local directory'
10
- task :compile => ['core/site-packages/pre_install']
11
- end
@@ -1,7 +0,0 @@
1
- desc 'Verifies the requirements'
2
- task :requirements do
3
- require 'cliver'
4
-
5
- Cliver.detect!('python', '~> 2.6')
6
- Cliver.detect!('pip', '>= 1.3')
7
- end