opener-property-tagger 3.0.2 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bd892d35b2c63bf6186dbd770a60878f1ab393ba
4
- data.tar.gz: 8a45c4bc4bb53b01dd394d91ad8c7c5a99e320ef
3
+ metadata.gz: 6bd138d2aeb528bf87f83fde2af933ca3ebce6dd
4
+ data.tar.gz: e0e64e5f709effb0e2671b08790ee5c5e4afb5b2
5
5
  SHA512:
6
- metadata.gz: c26b7b04a7788ee0300148a5af16502e44c3141edc2187cb1c00ee6de5bd954c8bb670ea215359f65ebcb31b0ce440607bf6d0bf165cf6529897b53869aeb42e
7
- data.tar.gz: 637fee56021276a7f86a3cbb92bfc26c3c328ed8609488cf04bc0021a782a889901f769429ab208c54d745d5be37c0b9df4a91d517542e9dd94817e01d21cc35
6
+ metadata.gz: 7228f65dde150175167d2ac0f72cac888f8649467be2bd983ba28d1d9738222f20aa8bec7678813f4200756529558159764b4edfa420c2d43d2aee9593f9dccc
7
+ data.tar.gz: 0303f61b4f14aedc1c37c4378f3a030d602280b261f60013426ec3d9d986356dabfebbbf6f7a665f04fd9f7b65ac0901a67567416eabea5725db96699b1ee67c
data/README.md CHANGED
@@ -118,8 +118,6 @@ At least you need the following system setup:
118
118
  ### Depenencies for normal use:
119
119
 
120
120
  * Ruby 1.9.3 or newer
121
- * Python 2.6
122
- * lxml installed
123
121
  * libarchive (for running the tests and such), on Debian/Ubuntu based systems
124
122
  this can be installed using `sudo apt-get install libarchive-dev`
125
123
 
@@ -137,11 +135,6 @@ is the word or span of words (in this case use whitespaces), then the part of
137
135
  speech (which actually it is not use, you can include a dummy label) and
138
136
  finally the aspect class associated with the word.
139
137
 
140
- ## The Core
141
-
142
- The component is a fat wrapper around the actual language technology core. You
143
- can find the core technolies (python) in the `/core` directory.
144
-
145
138
  ## Where to go from here
146
139
 
147
140
  * [Check the project website](http://opener-project.github.io)
@@ -0,0 +1,227 @@
1
+ module Opener
2
+ class PropertyTagger
3
+ ##
4
+ # Class that applies property tagging to a given input KAF file.
5
+ #
6
+ class Processor
7
+ attr_accessor :document, :aspects_path, :language, :aspects, :terms,
8
+ :timestamp
9
+
10
+ def initialize(file, aspects_path, timestamp = true)
11
+ @document = Oga.parse_xml(file)
12
+ @aspects_path = aspects_path
13
+ @timestamp = timestamp
14
+
15
+ raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
16
+ end
17
+
18
+ ##
19
+ # Processes the input and returns the new KAF output.
20
+ # @return [String]
21
+ #
22
+ def process
23
+ @language = get_language
24
+ @aspects = load_aspects
25
+ @terms = get_terms
26
+
27
+ existing_aspects = extract_aspects
28
+
29
+ add_features_layer
30
+ add_properties_layer
31
+
32
+ index = 1
33
+
34
+ existing_aspects.each_pair do |key,value|
35
+ add_property(key, value, index)
36
+ index += 1
37
+ end
38
+
39
+ add_linguistic_processor
40
+
41
+ return pretty_print(document)
42
+ end
43
+
44
+ ##
45
+ # Loads the aspects from the txt file
46
+ # @return [Hash]
47
+ #
48
+ def load_aspects
49
+ aspects_hash = {}
50
+
51
+ File.foreach(aspects_file) do |line|
52
+ lemma, pos, aspect = line.gsub("\n", "").split("\t")
53
+
54
+ aspects_hash[lemma.to_sym] = [] unless aspects_hash[lemma.to_sym]
55
+ aspects_hash[lemma.to_sym] << aspect
56
+ end
57
+
58
+ return aspects_hash
59
+ end
60
+
61
+ ##
62
+ # Get the language of the input file.
63
+ # @return [String]
64
+ #
65
+ def get_language
66
+ document.at_xpath('KAF').get('xml:lang')
67
+ end
68
+
69
+ ##
70
+ # Get the terms from the input file
71
+ # @return [Hash]
72
+ #
73
+ def get_terms
74
+ terms_hash = {}
75
+
76
+ document.xpath('KAF/terms/term').each do |term|
77
+ terms_hash[term.get('tid').to_sym] = term.get('lemma')
78
+ end
79
+
80
+ return terms_hash
81
+ end
82
+
83
+ ##
84
+ # Check which terms belong to an aspect (property)
85
+ # @return [Hash]
86
+ #
87
+ def extract_aspects
88
+ term_ids = terms.keys
89
+ lemmas = terms.values
90
+
91
+ current_token = 0
92
+ # Use of n-grams to determine if a unigram (1 lemma) or bigram (2
93
+ # lemmas) belong to a property.
94
+ max_ngram = 2
95
+
96
+ uniq_aspects = {}
97
+
98
+ while current_token < terms.count
99
+ (0..max_ngram).each do |tam_ngram|
100
+ if current_token + tam_ngram <= terms.count
101
+ ngram = lemmas[current_token..current_token+tam_ngram].join(" ").downcase
102
+ if aspects[ngram.to_sym]
103
+ properties = aspects[ngram.to_sym]
104
+ ids = term_ids[current_token..current_token+tam_ngram]
105
+ properties.uniq.reject{|p| p.gsub(" ", "").empty?}.each do |property|
106
+ uniq_aspects[property.to_sym] = [] unless uniq_aspects[property.to_sym]
107
+ uniq_aspects[property.to_sym] << [ids,ngram]
108
+ end
109
+ end
110
+ end
111
+ end
112
+ current_token += 1
113
+ end
114
+
115
+ return Hash[uniq_aspects.sort]
116
+ end
117
+
118
+ ##
119
+ # Remove the features layer from the KAF file if it exists and add a new
120
+ # one.
121
+ def add_features_layer
122
+ existing = document.at_xpath('KAF/features')
123
+
124
+ existing.remove if existing
125
+
126
+ new_node('features', 'KAF')
127
+ end
128
+
129
+ ##
130
+ # Add the properties layer as a child to the features layer.
131
+ def add_properties_layer
132
+ new_node("properties", "KAF/features")
133
+ end
134
+
135
+ def add_property(key, value, index)
136
+ property_node = new_node("property", "KAF/features/properties")
137
+
138
+ property_node.set('lemma', key.to_s)
139
+ property_node.set('pid', "p#{index.to_s}")
140
+
141
+ references_node = new_node("references", property_node)
142
+
143
+ value.uniq.each do |v|
144
+ comment = Oga::XML::Comment.new(:text => v.last)
145
+
146
+ references_node.children << comment
147
+
148
+ span_node = new_node("span", references_node)
149
+
150
+ v.first.each do |val|
151
+ target_node = new_node("target", span_node)
152
+
153
+ target_node.set('id', val.to_s)
154
+ end
155
+ end
156
+ end
157
+
158
+ def add_linguistic_processor
159
+ description = 'VUA property tagger'
160
+ last_edited = '16jan2015'
161
+ version = '2.0'
162
+
163
+ node = new_node('linguisticProcessors', 'KAF/kafHeader')
164
+ node.set('layer', 'features')
165
+
166
+ lp_node = new_node('lp', node)
167
+
168
+ lp_node.set('version', "#{last_edited}-#{version}")
169
+ lp_node.set('name', description)
170
+
171
+ if timestamp
172
+ format = '%Y-%m-%dT%H:%M:%S%Z'
173
+
174
+ lp_node.set('timestamp', Time.now.strftime(format))
175
+ else
176
+ lp_node.set('timestamp', '*')
177
+ end
178
+ end
179
+
180
+ ##
181
+ # Format the output document properly.
182
+ #
183
+ # TODO: this should be handled by Oga in a nice way.
184
+ #
185
+ # @return [String]
186
+ #
187
+ def pretty_print(document)
188
+ doc = REXML::Document.new document.to_xml
189
+ doc.context[:attribute_quote] = :quote
190
+ out = ""
191
+ formatter = REXML::Formatters::Pretty.new
192
+ formatter.compact = true
193
+ formatter.write(doc, out)
194
+
195
+ return out.strip
196
+ end
197
+
198
+ protected
199
+
200
+ def new_node(tag, parent)
201
+ if parent.is_a?(String)
202
+ parent_node = document.at_xpath(parent)
203
+ else
204
+ parent_node = parent
205
+ end
206
+
207
+ node = Oga::XML::Element.new(:name => tag)
208
+
209
+ parent_node.children << node
210
+
211
+ return node
212
+ end
213
+
214
+ ##
215
+ # Check if input is a KAF file.
216
+ # @return [Boolean]
217
+ #
218
+ def is_kaf?
219
+ return !!document.at_xpath('KAF')
220
+ end
221
+
222
+ def aspects_file
223
+ return File.expand_path("#{aspects_path}/#{language}.txt", __FILE__)
224
+ end
225
+ end # Processor
226
+ end # PropertyTagger
227
+ end # Opener
@@ -1,5 +1,5 @@
1
1
  module Opener
2
2
  class PropertyTagger
3
- VERSION = '3.0.2'
3
+ VERSION = '3.0.3'
4
4
  end # PropertyTagger
5
5
  end # Opener
@@ -1,8 +1,13 @@
1
1
  require 'open3'
2
2
  require 'slop'
3
+ require 'oga'
4
+
5
+ require 'rexml/document'
6
+ require 'rexml/formatters/pretty'
3
7
 
4
8
  require_relative 'property_tagger/version'
5
9
  require_relative 'property_tagger/cli'
10
+ require_relative 'property_tagger/processor'
6
11
 
7
12
  module Opener
8
13
  ##
@@ -28,15 +33,6 @@ module Opener
28
33
  @options = options
29
34
  end
30
35
 
31
- ##
32
- # Returns a String containing the command to use for executing the kernel.
33
- #
34
- # @return [String]
35
- #
36
- def command
37
- return "python -E #{kernel} #{args.join(' ')} --path #{path}"
38
- end
39
-
40
36
  ##
41
37
  # Get the resource path for the lexicon files, defaults to an ENV variable
42
38
  #
@@ -50,7 +46,7 @@ module Opener
50
46
  raise ArgumentError, 'No lexicon path provided'
51
47
  end
52
48
 
53
- return path
49
+ return File.expand_path(path)
54
50
  end
55
51
 
56
52
  ##
@@ -61,41 +57,15 @@ module Opener
61
57
  # @return [Array]
62
58
  #
63
59
  def run(input)
64
- stdout, stderr, process = capture(input)
65
-
66
- raise stderr unless process.success?
60
+ output = process(input)
67
61
 
68
- return stdout
62
+ return output
69
63
  end
70
64
 
71
65
  protected
72
-
73
- ##
74
- # capture3 method doesn't work properly with Jruby, so
75
- # this is a workaround
76
- #
77
- def capture(input)
78
- Open3.popen3(*command.split(" ")) {|i, o, e, t|
79
- out_reader = Thread.new { o.read }
80
- err_reader = Thread.new { e.read }
81
- i.write input
82
- i.close
83
- [out_reader.value, err_reader.value, t.value]
84
- }
85
- end
86
-
87
- ##
88
- # @return [String]
89
- #
90
- def core_dir
91
- return File.expand_path('../../../core', __FILE__)
92
- end
93
-
94
- ##
95
- # @return [String]
96
- #
97
- def kernel
98
- return File.join(core_dir, 'hotel_property_tagger_nl_en.py')
66
+ def process(input)
67
+ processor = Opener::PropertyTagger::Processor.new(input, path, !args.include?("--no-time"))
68
+ return processor.process
99
69
  end
100
70
  end # PolarityTagger
101
71
  end # Opener
@@ -7,20 +7,15 @@ Gem::Specification.new do |gem|
7
7
  gem.summary = 'Property tagger for hotels in Dutch and English.'
8
8
  gem.description = gem.summary
9
9
  gem.homepage = 'http://opener-project.github.com/'
10
- gem.extensions = ['ext/hack/Rakefile']
11
10
 
12
11
  gem.license = 'Apache 2.0'
13
12
 
14
13
  gem.required_ruby_version = '>= 1.9.2'
15
14
 
16
15
  gem.files = Dir.glob([
17
- 'core/data/**/*',
18
- 'core/*.py',
19
- 'ext/**/*',
20
16
  'lib/**/*',
21
17
  'config.ru',
22
18
  '*.gemspec',
23
- '*_requirements.txt',
24
19
  'README.md',
25
20
  'LICENSE.txt',
26
21
  'exec/**/*',
@@ -33,10 +28,9 @@ Gem::Specification.new do |gem|
33
28
  gem.add_dependency 'opener-webservice', '~> 2.1'
34
29
  gem.add_dependency 'opener-core', '~> 2.2'
35
30
 
36
- gem.add_dependency 'nokogiri'
37
- gem.add_dependency 'rake'
38
- gem.add_dependency 'cliver'
31
+ gem.add_dependency 'oga'
39
32
 
40
33
  gem.add_development_dependency 'rspec', '~> 3.0'
41
34
  gem.add_development_dependency 'cucumber'
35
+ gem.add_development_dependency 'rake'
42
36
  end
data/task/test.rake CHANGED
@@ -1,5 +1,5 @@
1
1
  desc 'Runs the tests'
2
- task :test => [:compile, :lexicons] do
2
+ task :test => [:lexicons] do
3
3
  ENV['RESOURCE_PATH'] = File.expand_path('../../tmp/lexicons/hotel', __FILE__)
4
4
 
5
5
  sh('cucumber features')
metadata CHANGED
@@ -1,184 +1,162 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: opener-property-tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - development@olery.com
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-12 00:00:00.000000000 Z
11
+ date: 2015-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opener-daemons
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2.2'
20
- type: :runtime
21
- prerelease: false
22
15
  version_requirements: !ruby/object:Gem::Requirement
23
16
  requirements:
24
- - - "~>"
17
+ - - ~>
25
18
  - !ruby/object:Gem::Version
26
19
  version: '2.2'
27
- - !ruby/object:Gem::Dependency
28
- name: opener-webservice
29
20
  requirement: !ruby/object:Gem::Requirement
30
21
  requirements:
31
- - - "~>"
22
+ - - ~>
32
23
  - !ruby/object:Gem::Version
33
- version: '2.1'
34
- type: :runtime
24
+ version: '2.2'
35
25
  prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: opener-webservice
36
29
  version_requirements: !ruby/object:Gem::Requirement
37
30
  requirements:
38
- - - "~>"
31
+ - - ~>
39
32
  - !ruby/object:Gem::Version
40
33
  version: '2.1'
41
- - !ruby/object:Gem::Dependency
42
- name: opener-core
43
34
  requirement: !ruby/object:Gem::Requirement
44
35
  requirements:
45
- - - "~>"
36
+ - - ~>
46
37
  - !ruby/object:Gem::Version
47
- version: '2.2'
48
- type: :runtime
38
+ version: '2.1'
49
39
  prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: opener-core
50
43
  version_requirements: !ruby/object:Gem::Requirement
51
44
  requirements:
52
- - - "~>"
45
+ - - ~>
53
46
  - !ruby/object:Gem::Version
54
47
  version: '2.2'
55
- - !ruby/object:Gem::Dependency
56
- name: nokogiri
57
48
  requirement: !ruby/object:Gem::Requirement
58
49
  requirements:
59
- - - ">="
50
+ - - ~>
60
51
  - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :runtime
52
+ version: '2.2'
63
53
  prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: oga
64
57
  version_requirements: !ruby/object:Gem::Requirement
65
58
  requirements:
66
- - - ">="
59
+ - - '>='
67
60
  - !ruby/object:Gem::Version
68
61
  version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
62
  requirement: !ruby/object:Gem::Requirement
72
63
  requirements:
73
- - - ">="
64
+ - - '>='
74
65
  - !ruby/object:Gem::Version
75
66
  version: '0'
76
- type: :runtime
77
67
  prerelease: false
68
+ type: :runtime
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
78
71
  version_requirements: !ruby/object:Gem::Requirement
79
72
  requirements:
80
- - - ">="
73
+ - - ~>
81
74
  - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: cliver
75
+ version: '3.0'
85
76
  requirement: !ruby/object:Gem::Requirement
86
77
  requirements:
87
- - - ">="
78
+ - - ~>
88
79
  - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :runtime
80
+ version: '3.0'
91
81
  prerelease: false
82
+ type: :development
83
+ - !ruby/object:Gem::Dependency
84
+ name: cucumber
92
85
  version_requirements: !ruby/object:Gem::Requirement
93
86
  requirements:
94
- - - ">="
87
+ - - '>='
95
88
  - !ruby/object:Gem::Version
96
89
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: rspec
99
90
  requirement: !ruby/object:Gem::Requirement
100
91
  requirements:
101
- - - "~>"
92
+ - - '>='
102
93
  - !ruby/object:Gem::Version
103
- version: '3.0'
104
- type: :development
94
+ version: '0'
105
95
  prerelease: false
96
+ type: :development
97
+ - !ruby/object:Gem::Dependency
98
+ name: rake
106
99
  version_requirements: !ruby/object:Gem::Requirement
107
100
  requirements:
108
- - - "~>"
101
+ - - '>='
109
102
  - !ruby/object:Gem::Version
110
- version: '3.0'
111
- - !ruby/object:Gem::Dependency
112
- name: cucumber
103
+ version: '0'
113
104
  requirement: !ruby/object:Gem::Requirement
114
105
  requirements:
115
- - - ">="
106
+ - - '>='
116
107
  - !ruby/object:Gem::Version
117
108
  version: '0'
118
- type: :development
119
109
  prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
110
+ type: :development
125
111
  description: Property tagger for hotels in Dutch and English.
126
- email:
112
+ email:
127
113
  executables:
128
114
  - property-tagger
129
115
  - property-tagger-daemon
130
116
  - property-tagger-server
131
- extensions:
132
- - ext/hack/Rakefile
117
+ extensions: []
133
118
  extra_rdoc_files: []
134
119
  files:
135
- - LICENSE.txt
136
- - README.md
137
- - bin/property-tagger
138
- - bin/property-tagger-daemon
139
- - bin/property-tagger-server
140
- - config.ru
141
- - core/extract_aspects.py
142
- - core/hotel_property_tagger_nl_en.py
143
- - exec/property-tagger.rb
144
- - ext/hack/Rakefile
145
120
  - lib/opener/property_tagger.rb
146
121
  - lib/opener/property_tagger/cli.rb
147
- - lib/opener/property_tagger/public/markdown.css
122
+ - lib/opener/property_tagger/processor.rb
148
123
  - lib/opener/property_tagger/server.rb
149
124
  - lib/opener/property_tagger/version.rb
125
+ - lib/opener/property_tagger/public/markdown.css
150
126
  - lib/opener/property_tagger/views/index.erb
151
127
  - lib/opener/property_tagger/views/result.erb
128
+ - config.ru
152
129
  - opener-property-tagger.gemspec
153
- - pre_install_requirements.txt
154
- - task/compile.rake
130
+ - README.md
131
+ - LICENSE.txt
132
+ - exec/property-tagger.rb
155
133
  - task/lexicons.rake
156
- - task/python.rake
157
- - task/requirements.rake
158
134
  - task/test.rake
135
+ - bin/property-tagger
136
+ - bin/property-tagger-daemon
137
+ - bin/property-tagger-server
159
138
  homepage: http://opener-project.github.com/
160
139
  licenses:
161
140
  - Apache 2.0
162
141
  metadata: {}
163
- post_install_message:
142
+ post_install_message:
164
143
  rdoc_options: []
165
144
  require_paths:
166
145
  - lib
167
146
  required_ruby_version: !ruby/object:Gem::Requirement
168
147
  requirements:
169
- - - ">="
148
+ - - '>='
170
149
  - !ruby/object:Gem::Version
171
150
  version: 1.9.2
172
151
  required_rubygems_version: !ruby/object:Gem::Requirement
173
152
  requirements:
174
- - - ">="
153
+ - - '>='
175
154
  - !ruby/object:Gem::Version
176
155
  version: '0'
177
156
  requirements: []
178
- rubyforge_project:
179
- rubygems_version: 2.2.2
180
- signing_key:
157
+ rubyforge_project:
158
+ rubygems_version: 2.1.9
159
+ signing_key:
181
160
  specification_version: 4
182
161
  summary: Property tagger for hotels in Dutch and English.
183
162
  test_files: []
184
- has_rdoc:
@@ -1,18 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- from lxml import etree
4
- import sys
5
- #filename='/Users/ruben/CODE/VU-sentiment-lexicon-xml/VUSentimentLexicon/EN-lexicon/Sentiment-English-HotelDomain.xml'
6
-
7
- root = etree.parse(sys.stdin).getroot()
8
-
9
- for element in root.findall('Lexicon/LexicalEntry'):
10
- ele_lemma = element.findall('Lemma')[0]
11
- ele_domain = element.findall('Sense/Domain')[0]
12
- pos = element.get('partOfSpeech','unknown_pos')
13
- if ele_lemma is not None and ele_domain is not None:
14
- lemma = ele_lemma.get('writtenForm','').lower()
15
- aspect = ele_domain.get('aspect','').lower()
16
- if lemma!='' and aspect!='':
17
- print lemma.encode('utf-8')+'\t'+pos.encode('utf-8')+'\t'+aspect.encode('utf-8')
18
-
@@ -1,138 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- import sys
4
- import argparse
5
-
6
- import codecs
7
- import os
8
-
9
- this_folder = os.path.dirname(os.path.realpath(__file__))
10
-
11
- # This updates the load path to ensure that the local site-packages directory
12
- # can be used to load packages (e.g. a locally installed copy of lxml).
13
- sys.path.append(os.path.join(this_folder, 'site-packages/pre_install'))
14
-
15
- from VUKafParserPy import KafParser
16
- from lxml import etree
17
- from collections import defaultdict
18
-
19
- __desc='VUA property tagger'
20
- __last_edited='20may2014'
21
- __version='1.0'
22
-
23
- ###
24
- __module_dir = os.path.dirname(__file__)
25
- max_ngram = 1
26
- verbose = False
27
- ##
28
-
29
-
30
- ########################################
31
- ## Format of the file:
32
- #lemma pos aspect
33
- #lemma pos aspect
34
- ########################################
35
- def loadAspects(my_lang,this_file=None):
36
- my_aspects = {}
37
- if this_file is not None:
38
- aspects_filename = this_file
39
- else:
40
- filename = "{0}.txt".format(my_lang)
41
- print>>sys.stderr, "filename thingy",filename
42
- print>>sys.stderr, "path thingy",arguments.path
43
- aspects_filename = os.path.join(arguments.path,filename)
44
-
45
- if not os.path.exists(aspects_filename):
46
- print>>sys.stderr,'ERROR: file with aspects for the language',my_lang,'not found in',aspects_filename
47
- else:
48
- fic = codecs.open(aspects_filename,'r','utf-8')
49
- for line in fic:
50
- fields = line.strip().split('\t')
51
- if len(fields) == 3:
52
- lemma,pos,aspect = fields
53
- my_aspects[lemma] = aspect
54
- fic.close()
55
- return aspects_filename, my_aspects
56
- ########################################
57
-
58
-
59
-
60
- ###### MAIN ########
61
-
62
- argument_parser = argparse.ArgumentParser(description='Tags a text with polarities at lemma level')
63
- argument_parser.add_argument("--no-time",action="store_false", default=True, dest="my_time_stamp",help="For not including timestamp in header")
64
- argument_parser.add_argument("--lexicon", action="store", default=None, dest="lexicon", help="Force to use this lexicon")
65
- argument_parser.add_argument("--path", action="store", default=None, dest="path", help="Set the path where the property aspects are found.")
66
-
67
- arguments = argument_parser.parse_args()
68
-
69
- if not sys.stdin.isatty():
70
- ## READING FROM A PIPE
71
- pass
72
- else:
73
- print>>sys.stderr,'Input stream required.'
74
- print>>sys.stderr,'Example usage: cat myUTF8file.kaf.xml |',sys.argv[0]
75
- print>>sys.stderr,sys.argv[0]+' -h for help'
76
- sys.exit(-1)
77
-
78
-
79
- ## Load the tree and the list of terms with the id
80
- my_data = []
81
- try:
82
- my_kaf_tree = KafParser(sys.stdin)
83
- except Exception as e:
84
- print>>sys.stdout,'Error parsing input. Input is required to be KAF'
85
- print>>sys.stdout,str(e)
86
- sys.exit(2)
87
-
88
-
89
- ## Get language from the KAF file
90
- my_lang = my_kaf_tree.getLanguage()
91
-
92
- my_aspects_filename = my_aspects = None
93
- if arguments.lexicon is None:
94
- if my_lang not in ['nl','en','de','fr','it','es']:
95
- print>>sys.stdout,'Error in the language specified in your KAF. The language is ',my_lang,' and possible values for this module '
96
- print>>sys.stdout,'are nl for Dutch ,en for English, es Spanish, fr French, it Italian or de German'
97
- sys.exit(1)
98
-
99
- my_aspects_filename, my_aspects = loadAspects(my_lang)
100
- else:
101
- my_aspects_filename, my_aspects = loadAspects(my_lang,this_file=arguments.lexicon)
102
-
103
- if verbose:
104
- print>>sys.stderr,'Loaded ',len(my_aspects),'aspects from',my_aspects_filename
105
-
106
-
107
- for term in my_kaf_tree.getTerms():
108
- my_data.append((term.getLemma(),term.getId()))
109
- if verbose: print>>sys.stderr,'Number of terms in the kaf file:',len(my_data)
110
-
111
-
112
- current_token = found = 0
113
- uniq_aspects = defaultdict(list)
114
- while current_token < len(my_data):
115
- for tam_ngram in range(1,max_ngram+1):
116
- # Build an n-gram of size tam_ngram and beginning in current_token
117
- if current_token + tam_ngram <= len(my_data):
118
- ngram = ' '.join(lemma for lemma,_ in my_data[current_token:current_token+tam_ngram])
119
- aspect = my_aspects.get(ngram.lower(),None)
120
- if aspect is not None:
121
- list_of_ids = [id for _,id in my_data[current_token:current_token+tam_ngram]]
122
- uniq_aspects[aspect].append((list_of_ids,ngram))
123
- current_token += 1
124
-
125
-
126
- ## Code for generating the propery layer included in the Parser
127
- for aspect, list_of_lists in uniq_aspects.items():
128
- for list_of_ids, str_text in list_of_lists:
129
- my_kaf_tree.add_property(aspect,list_of_ids,str_text)
130
-
131
- my_kaf_tree.addLinguisticProcessor(__desc,__last_edited+'_'+__version,'features', arguments.my_time_stamp)
132
- my_kaf_tree.saveToFile(sys.stdout)
133
-
134
-
135
-
136
-
137
-
138
-
data/ext/hack/Rakefile DELETED
@@ -1,8 +0,0 @@
1
- import File.expand_path('../../../task/requirements.rake', __FILE__)
2
- import File.expand_path('../../../task/python.rake', __FILE__)
3
-
4
- task :default => :requirements do
5
- Dir.chdir(File.expand_path('../../../', __FILE__)) do
6
- Rake::Task['core/site-packages/pre_install'].invoke
7
- end
8
- end
@@ -1 +0,0 @@
1
- https://github.com/opener-project/VU-kaf-parser/archive/master.zip#egg=VUKafParserPy
data/task/compile.rake DELETED
@@ -1,2 +0,0 @@
1
- desc 'Verifies requirements and compiles the core'
2
- task :compile => ['requirements', 'python:compile']
data/task/python.rake DELETED
@@ -1,11 +0,0 @@
1
- # NOTE: pre_build/pre_install directories are created by pip.
2
-
3
- directory 'core/site-packages/pre_install' do |task|
4
- sh "pip install --requirement=pre_install_requirements.txt " \
5
- "--target=#{task.name} --ignore-installed"
6
- end
7
-
8
- namespace :python do
9
- desc 'Installs Python packages in a local directory'
10
- task :compile => ['core/site-packages/pre_install']
11
- end
@@ -1,7 +0,0 @@
1
- desc 'Verifies the requirements'
2
- task :requirements do
3
- require 'cliver'
4
-
5
- Cliver.detect!('python', '~> 2.6')
6
- Cliver.detect!('pip', '>= 1.3')
7
- end