opener-opinion-detector-basic 2.0.7 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -5
- data/lib/opener/opinion_detector_basic.rb +12 -65
- data/lib/opener/opinion_detector_basic/opinion.rb +171 -0
- data/lib/opener/opinion_detector_basic/processor.rb +329 -0
- data/lib/opener/opinion_detector_basic/term.rb +160 -0
- data/lib/opener/opinion_detector_basic/version.rb +1 -1
- data/opener-opinion-detector-basic.gemspec +5 -10
- metadata +24 -39
- data/core/opinion_detector_basic_multi.py +0 -512
- data/ext/hack/Rakefile +0 -8
- data/pre_install_requirements.txt +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d07d2a2eb88245eca143655a2fc8b5d301b632dd
|
4
|
+
data.tar.gz: 1c67e6b59421ef2ab4e33f5c3260699c202eab0e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf26709cea362f73901df7184f2c562ac5b9d597c5386c1bb4845a843a667b8e59a301d6e36e3ed5759fd7a7b904b82a390665c0dd916f803e4dcfdefe3ca7f3
|
7
|
+
data.tar.gz: f978e9dc22837f78a758e28d4612e07732d5c12f7c429711d46716e1869c0fb640e7397b47b9c388c12273b440b7b282d49bcf87899070af80831373789294be
|
data/README.md
CHANGED
@@ -103,11 +103,7 @@ At least you need the following system setup:
|
|
103
103
|
|
104
104
|
### Depenencies for normal use:
|
105
105
|
|
106
|
-
* Ruby 1.
|
107
|
-
* Python 2.6
|
108
|
-
* lxml: library for processing xml in python
|
109
|
-
* libarchive, on Debian/Ubuntu based systems this can be installed using
|
110
|
-
`sudo apt-get install libarchive-dev`
|
106
|
+
* Tested on Ruby 2.1.5, 2.2.2, Rubinius 2.4.0, jruby-1.7.8
|
111
107
|
|
112
108
|
## Domain Adaption
|
113
109
|
|
@@ -1,8 +1,13 @@
|
|
1
|
-
require 'open3'
|
2
1
|
require 'slop'
|
2
|
+
require 'oga'
|
3
|
+
require 'monitor'
|
4
|
+
|
5
|
+
require 'rexml/document'
|
6
|
+
require 'rexml/formatters/pretty'
|
3
7
|
|
4
8
|
require_relative 'opinion_detector_basic/version'
|
5
9
|
require_relative 'opinion_detector_basic/cli'
|
10
|
+
require_relative 'opinion_detector_basic/processor'
|
6
11
|
|
7
12
|
module Opener
|
8
13
|
##
|
@@ -27,77 +32,19 @@ module Opener
|
|
27
32
|
@args = options.delete(:args) || []
|
28
33
|
@options = options
|
29
34
|
end
|
30
|
-
|
35
|
+
|
31
36
|
##
|
32
|
-
#
|
33
|
-
#
|
34
|
-
# @param [Array] args Commandline arguments passed to the command.
|
35
|
-
#
|
36
|
-
def command
|
37
|
-
return "#{adjust_python_path} python -E #{kernel} #{args.join(' ')}"
|
38
|
-
end
|
39
|
-
|
40
|
-
##
|
41
|
-
# Processes an input KAF document and returns the results as a new KAF
|
42
|
-
# document.
|
37
|
+
# Processes the input KAF document.
|
43
38
|
#
|
44
39
|
# @param [String] input
|
45
40
|
# @return [String]
|
46
41
|
#
|
47
42
|
def run(input)
|
48
|
-
|
49
|
-
|
50
|
-
raise stderr unless process.success?
|
51
|
-
|
52
|
-
return stdout
|
53
|
-
end
|
54
|
-
|
55
|
-
protected
|
56
|
-
|
57
|
-
##
|
58
|
-
# @return [String]
|
59
|
-
#
|
60
|
-
def adjust_python_path
|
61
|
-
site_packages = File.join(core_dir, 'site-packages')
|
62
|
-
|
63
|
-
return "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
|
64
|
-
end
|
65
|
-
|
66
|
-
##
|
67
|
-
# capture3 method doesn't work properly with Jruby, so
|
68
|
-
# this is a workaround
|
69
|
-
#
|
70
|
-
def capture(input)
|
71
|
-
Open3.popen3(*command.split(" ")) {|i, o, e, t|
|
72
|
-
out_reader = Thread.new { o.read }
|
73
|
-
err_reader = Thread.new { e.read }
|
74
|
-
i.write input
|
75
|
-
i.close
|
76
|
-
[out_reader.value, err_reader.value, t.value]
|
77
|
-
}
|
78
|
-
end
|
79
|
-
|
80
|
-
##
|
81
|
-
# @return [String]
|
82
|
-
#
|
83
|
-
def core_dir
|
84
|
-
return File.expand_path('../../../core', __FILE__)
|
85
|
-
end
|
86
|
-
|
87
|
-
##
|
88
|
-
# @return [String]
|
89
|
-
#
|
90
|
-
def kernel
|
91
|
-
return File.join(core_dir, 'opinion_detector_basic_multi.py')
|
92
|
-
end
|
43
|
+
options[:timestamp] = !options.delete(:no_time)
|
93
44
|
|
94
|
-
|
95
|
-
# @return the language from the KAF
|
96
|
-
#
|
97
|
-
def language(input)
|
98
|
-
document = Nokogiri::XML(input)
|
99
|
-
|
100
|
-
return document.at('KAF').attr('xml:lang')
|
45
|
+
return Processor.new(input, options).process
|
101
46
|
end
|
102
47
|
end # OpinionDetectorBasic
|
103
48
|
end # Opener
|
49
|
+
|
50
|
+
|
@@ -0,0 +1,171 @@
|
|
1
|
+
module Opener
|
2
|
+
class OpinionDetectorBasic
|
3
|
+
class Opinion
|
4
|
+
attr_reader :term
|
5
|
+
attr_accessor :left_candidates, :right_candidates, :target_ids, :holders
|
6
|
+
|
7
|
+
def initialize(term)
|
8
|
+
@term = term
|
9
|
+
@left_candidates = []
|
10
|
+
@right_candidates = []
|
11
|
+
@holders = []
|
12
|
+
@target_ids = []
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Returns the term ids of the opinion expression.
|
17
|
+
#
|
18
|
+
# @return [Array]
|
19
|
+
#
|
20
|
+
def ids
|
21
|
+
@ids ||= term.list_ids.sort
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Returns the sentence id of the opinion.
|
26
|
+
#
|
27
|
+
# @return [String]
|
28
|
+
#
|
29
|
+
def sentence
|
30
|
+
@sentence ||= term.sentence
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Returns the strength of the opinion.
|
35
|
+
#
|
36
|
+
# @return [Integer]
|
37
|
+
#
|
38
|
+
def strength
|
39
|
+
@strength ||= term.accumulated_strength
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# Returns the polarity of the opinion.
|
44
|
+
#
|
45
|
+
# @return [String]
|
46
|
+
#
|
47
|
+
def polarity
|
48
|
+
@polarity ||= if strength > 0
|
49
|
+
"positive"
|
50
|
+
elsif strength < 0
|
51
|
+
"negative"
|
52
|
+
else
|
53
|
+
"neutral"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# Obtain the opinion holders from the terms that belong to the same
|
59
|
+
# sentence.
|
60
|
+
#
|
61
|
+
def obtain_holders(sentences, language)
|
62
|
+
sentence_terms = sentences[sentence]
|
63
|
+
sentence_terms.each do |term|
|
64
|
+
if opinion_holders[language].include?(term.lemma)
|
65
|
+
@holders << term.id
|
66
|
+
break
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# Get the potential right and left candidates of the sentence and
|
73
|
+
# decide which ones are the actual targets of the opinion
|
74
|
+
#
|
75
|
+
def obtain_targets(sentences)
|
76
|
+
sentence_terms = sentences[sentence]
|
77
|
+
max_distance = 3
|
78
|
+
terms_count = sentence_terms.count
|
79
|
+
|
80
|
+
index = -1
|
81
|
+
sentence_terms.each_with_index do |term, i|
|
82
|
+
if ids.include?(term.id)
|
83
|
+
index = i
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
unless index+1 >= terms_count
|
88
|
+
min = index+1
|
89
|
+
max = [index+1+max_distance,terms_count].min
|
90
|
+
@right_candidates = filter_candidates(sentence_terms[min..max])
|
91
|
+
end
|
92
|
+
|
93
|
+
index = 0
|
94
|
+
sentence_terms.each_with_index do |term, i|
|
95
|
+
if ids.include?(term.id)
|
96
|
+
index = i
|
97
|
+
break # needed for left_candidates
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
unless index == 0
|
102
|
+
min = [0, index-1-max_distance].max
|
103
|
+
max = index
|
104
|
+
@left_candidates = filter_candidates(sentence_terms[min..max])
|
105
|
+
end
|
106
|
+
|
107
|
+
unless right_candidates.empty?
|
108
|
+
candidate = right_candidates.first
|
109
|
+
@target_ids << candidate.id
|
110
|
+
end
|
111
|
+
|
112
|
+
if target_ids.empty?
|
113
|
+
list = mix_lists(right_candidates, left_candidates)
|
114
|
+
list.each do |l|
|
115
|
+
@target_ids << l.id
|
116
|
+
break
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
protected
|
122
|
+
|
123
|
+
##
|
124
|
+
# If there are no opinion targets, right and left candidates
|
125
|
+
# are mixed into one list and the first one is picked as the target.
|
126
|
+
#
|
127
|
+
# @return [Array]
|
128
|
+
#
|
129
|
+
def mix_lists(lista, listb)
|
130
|
+
list = []
|
131
|
+
min = [lista.count, listb.count].min
|
132
|
+
(0..min).each do |i|
|
133
|
+
list << lista[i]
|
134
|
+
list << listb[i]
|
135
|
+
if lista.count > listb.count
|
136
|
+
list << lista[min]
|
137
|
+
elsif listb.count > lista.count
|
138
|
+
list << listb[min]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
return list.compact
|
142
|
+
end
|
143
|
+
|
144
|
+
##
|
145
|
+
# Filters candidate terms depending on their part of speech and if
|
146
|
+
# they are already part of the expression.
|
147
|
+
#
|
148
|
+
# @return [Hash]
|
149
|
+
#
|
150
|
+
def filter_candidates(sentence_terms)
|
151
|
+
sentence_terms.select{|t| (t.pos == "N" || t.pos == "R") && !ids.include?(t.id)}
|
152
|
+
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# Opinion holders for each language code.
|
156
|
+
#
|
157
|
+
# @return [Hash]
|
158
|
+
#
|
159
|
+
def opinion_holders
|
160
|
+
{
|
161
|
+
'nl' => ['ik','we','wij','ze','zij','jullie','u','hij','het','jij','je','mij','me','hem','haar','ons','hen','hun'],
|
162
|
+
'en' => ['i','we','he','she','they','it','you'],
|
163
|
+
'es' => ['yo','tu','nosotros','vosotros','ellos','ellas','nosotras','vosotras'],
|
164
|
+
'it' => ['io','tu','noi','voi','loro','lei','lui'],
|
165
|
+
'de' => ['ich','du','wir','ihr','sie','er'],
|
166
|
+
'fr' => ['je','tu','lui','elle','nous','vous','ils','elles']
|
167
|
+
}
|
168
|
+
end
|
169
|
+
end # Opinion
|
170
|
+
end # OpinionDetectorBasic
|
171
|
+
end # Opener
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require_relative 'term'
|
2
|
+
require_relative 'opinion'
|
3
|
+
|
4
|
+
module Opener
|
5
|
+
class OpinionDetectorBasic
|
6
|
+
##
|
7
|
+
# Class that detects opinions in a given input KAF file.
|
8
|
+
#
|
9
|
+
class Processor
|
10
|
+
attr_accessor :document, :timestamp, :opinion_strength, :pretty
|
11
|
+
|
12
|
+
##
|
13
|
+
# @param [String|IO] file The KAF file/input to process.
|
14
|
+
# @param [Hash] options. Options for timestamp and including strength to
|
15
|
+
# opinions.
|
16
|
+
# @param [TrueClass|FalseClass] pretty Enable pretty formatting, disabled
|
17
|
+
# by default due to the performance overhead.
|
18
|
+
#
|
19
|
+
def initialize(file, options = {})
|
20
|
+
@document = Oga.parse_xml(file)
|
21
|
+
|
22
|
+
@timestamp = !!options[:timestamp]
|
23
|
+
@opinion_strength = !!options[:opinion_strength]
|
24
|
+
@pretty = options[:pretty] || false
|
25
|
+
|
26
|
+
raise 'Error parsing input. Input is required to be KAF' unless is_kaf?
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Processes the input and returns the new KAF output.
|
31
|
+
# @return [String]
|
32
|
+
#
|
33
|
+
def process
|
34
|
+
add_opinions_layer
|
35
|
+
|
36
|
+
index = 1
|
37
|
+
opinions.each do |opinion|
|
38
|
+
add_opinion(opinion, index)
|
39
|
+
index += 1
|
40
|
+
end
|
41
|
+
|
42
|
+
add_linguistic_processor
|
43
|
+
|
44
|
+
return pretty ? pretty_print(document) : document.to_xml
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Get the language of the input file.
|
49
|
+
#
|
50
|
+
# @return [String]
|
51
|
+
#
|
52
|
+
def language
|
53
|
+
return @language ||= document.at_xpath('KAF').get('xml:lang')
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Get the terms from the input file
|
58
|
+
# @return [Hash]
|
59
|
+
#
|
60
|
+
def terms
|
61
|
+
unless @terms
|
62
|
+
@terms = []
|
63
|
+
|
64
|
+
document.xpath('KAF/terms/term').each do |term|
|
65
|
+
@terms << Term.new(term, document, language)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
return @terms
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# Get the opinions.
|
74
|
+
#
|
75
|
+
# @return [Hash]
|
76
|
+
#
|
77
|
+
def opinions
|
78
|
+
unless @opinions
|
79
|
+
set_accumulated_strength
|
80
|
+
apply_modifiers
|
81
|
+
apply_conjunctions
|
82
|
+
|
83
|
+
##
|
84
|
+
# Initialize opinions with their expressions.
|
85
|
+
#
|
86
|
+
@opinions = terms.map do |term|
|
87
|
+
if term.is_expression? && term.accumulated_strength != 0
|
88
|
+
o = Opinion.new(term)
|
89
|
+
end
|
90
|
+
end.compact
|
91
|
+
|
92
|
+
##
|
93
|
+
# Obtain targets for each opinion.
|
94
|
+
#
|
95
|
+
@opinions.each do |opinion|
|
96
|
+
opinion.obtain_targets(sentences)
|
97
|
+
end
|
98
|
+
|
99
|
+
##
|
100
|
+
# Obtain holders for each opinion.
|
101
|
+
#
|
102
|
+
@opinions.each do |opinion|
|
103
|
+
opinion.obtain_holders(sentences, language)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
return @opinions
|
108
|
+
end
|
109
|
+
|
110
|
+
##
|
111
|
+
# Remove the opinions layer from the KAF file if it exists and add a new
|
112
|
+
# one.
|
113
|
+
def add_opinions_layer
|
114
|
+
existing = document.at_xpath('KAF/opinions')
|
115
|
+
|
116
|
+
existing.remove if existing
|
117
|
+
|
118
|
+
new_node('opinions', 'KAF')
|
119
|
+
end
|
120
|
+
|
121
|
+
##
|
122
|
+
# Adds the entire opinion in the KAF file.
|
123
|
+
#
|
124
|
+
def add_opinion(opinion, index)
|
125
|
+
opinion_node = new_node("opinion", "KAF/opinions")
|
126
|
+
opinion_node.set('oid', "o#{index.to_s}")
|
127
|
+
|
128
|
+
unless opinion.holders.empty?
|
129
|
+
opinion_holder_node = new_node("opinion_holder", opinion_node)
|
130
|
+
add_opinion_element(opinion_holder_node, opinion.holders)
|
131
|
+
end
|
132
|
+
|
133
|
+
opinion_target_node = new_node("opinion_target", opinion_node)
|
134
|
+
unless opinion.target_ids.empty?
|
135
|
+
add_opinion_element(opinion_target_node, opinion.target_ids)
|
136
|
+
end
|
137
|
+
|
138
|
+
expression_node = new_node("opinion_expression", opinion_node)
|
139
|
+
expression_node.set('polarity', opinion.polarity)
|
140
|
+
expression_node.set('strength', opinion.strength.to_s)
|
141
|
+
add_opinion_element(expression_node, opinion.ids)
|
142
|
+
end
|
143
|
+
|
144
|
+
##
|
145
|
+
# Method for adding opinion holders, targets and expressions.
|
146
|
+
#
|
147
|
+
def add_opinion_element(node, ids)
|
148
|
+
lemmas = terms.select{|t| ids.include?(t.id)}.map(&:lemma).join(" ")
|
149
|
+
comment = Oga::XML::Comment.new(:text => "#{lemmas}")
|
150
|
+
node.children << comment
|
151
|
+
span_node = new_node("span", node)
|
152
|
+
ids.each do |id|
|
153
|
+
target_node = new_node("target", span_node)
|
154
|
+
target_node.set('id', id.to_s)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
##
|
159
|
+
# Add linguistic processor layer with basic information
|
160
|
+
# (version, timestamp, description etc) in the KAF file.
|
161
|
+
#
|
162
|
+
def add_linguistic_processor
|
163
|
+
description = 'Basic opinion detector with Pos'
|
164
|
+
last_edited = '13may2015'
|
165
|
+
version = '2.0'
|
166
|
+
|
167
|
+
node = new_node('linguisticProcessors', 'KAF/kafHeader')
|
168
|
+
node.set('layer', 'opinions')
|
169
|
+
|
170
|
+
lp_node = new_node('lp', node)
|
171
|
+
|
172
|
+
lp_node.set('version', "#{last_edited}-#{version}")
|
173
|
+
lp_node.set('name', description)
|
174
|
+
|
175
|
+
if timestamp
|
176
|
+
format = '%Y-%m-%dT%H:%M:%S%Z'
|
177
|
+
|
178
|
+
lp_node.set('timestamp', Time.now.strftime(format))
|
179
|
+
else
|
180
|
+
lp_node.set('timestamp', '*')
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
##
|
185
|
+
# Format the output document properly.
|
186
|
+
#
|
187
|
+
# TODO: this should be handled by Oga in a nice way.
|
188
|
+
#
|
189
|
+
# @return [String]
|
190
|
+
#
|
191
|
+
def pretty_print(document)
|
192
|
+
doc = REXML::Document.new document.to_xml
|
193
|
+
doc.context[:attribute_quote] = :quote
|
194
|
+
out = ""
|
195
|
+
formatter = REXML::Formatters::Pretty.new
|
196
|
+
formatter.compact = true
|
197
|
+
formatter.write(doc, out)
|
198
|
+
|
199
|
+
return out.strip
|
200
|
+
end
|
201
|
+
|
202
|
+
##
|
203
|
+
# Get terms grouped by sentence.
|
204
|
+
#
|
205
|
+
def sentences
|
206
|
+
@sentences ||= terms.group_by{|t| t.sentence}
|
207
|
+
end
|
208
|
+
|
209
|
+
protected
|
210
|
+
|
211
|
+
##
|
212
|
+
# The strength of a term depends heavily on the type of the previous
|
213
|
+
# one. For example if the previous one is a shifter, it needs
|
214
|
+
# to be multiplied. If it's an intensifier, it needs to be
|
215
|
+
# added (or subtracted depending on the strength of the previous
|
216
|
+
# term) etc.
|
217
|
+
#
|
218
|
+
def set_accumulated_strength
|
219
|
+
symbol = :+
|
220
|
+
terms_count = terms.count
|
221
|
+
terms.each_with_index do |term, i|
|
222
|
+
if i+1 < terms_count
|
223
|
+
if terms[i+1].is_shifter?
|
224
|
+
if term.accumulated_strength != 0
|
225
|
+
terms[i+1].accumulated_strength *= term.accumulated_strength
|
226
|
+
terms[i+1].list_ids += term.list_ids
|
227
|
+
term.use = false
|
228
|
+
symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
|
229
|
+
else
|
230
|
+
symbol = :*
|
231
|
+
end
|
232
|
+
elsif terms[i+1].is_intensifier?
|
233
|
+
terms[i+1].accumulated_strength = term.accumulated_strength.send(symbol, terms[i+1].accumulated_strength)
|
234
|
+
term.use = false
|
235
|
+
symbol = terms[i+1].accumulated_strength > 0 ? :+ : :-
|
236
|
+
if term.accumulated_strength != 0
|
237
|
+
terms[i+1].list_ids += term.list_ids
|
238
|
+
end
|
239
|
+
else
|
240
|
+
symbol = terms[i+1].accumulated_strength >= 0 ? :+ : :-
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
##
|
247
|
+
# Apply strength to the next term after a shifter or intensifier.
|
248
|
+
#
|
249
|
+
def apply_modifiers
|
250
|
+
terms_count = terms.count
|
251
|
+
terms.each_with_index do |term, i|
|
252
|
+
if i+1 < terms_count
|
253
|
+
if term.use && (term.is_shifter? || term.is_intensifier?)
|
254
|
+
terms[i+1].accumulated_strength *= term.accumulated_strength
|
255
|
+
terms[i+1].list_ids += term.list_ids
|
256
|
+
term.use = false
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
##
|
263
|
+
# Ignore conjunctions when applying strength.
|
264
|
+
#
|
265
|
+
def apply_conjunctions
|
266
|
+
terms_count = terms.count
|
267
|
+
i = 0
|
268
|
+
while i < terms_count
|
269
|
+
if terms[i].use && terms[i].accumulated_strength != 0
|
270
|
+
used = [i]
|
271
|
+
list_ids = terms[i].list_ids
|
272
|
+
strength = terms[i].accumulated_strength
|
273
|
+
terms[i].use = false
|
274
|
+
j = i+1
|
275
|
+
while true
|
276
|
+
if j >= terms_count
|
277
|
+
break
|
278
|
+
end
|
279
|
+
|
280
|
+
if terms[j].is_conjunction
|
281
|
+
terms[j].use = false
|
282
|
+
j += 1
|
283
|
+
elsif terms[j].use && terms[j].accumulated_strength != 0
|
284
|
+
list_ids += terms[j].list_ids
|
285
|
+
used << j
|
286
|
+
terms[j].use = false
|
287
|
+
strength += terms[j].accumulated_strength
|
288
|
+
j += 1
|
289
|
+
else
|
290
|
+
break
|
291
|
+
end
|
292
|
+
end
|
293
|
+
last_used = used.last
|
294
|
+
terms[last_used].accumulated_strength = strength
|
295
|
+
terms[last_used].list_ids = list_ids
|
296
|
+
terms[last_used].use = true
|
297
|
+
i = j
|
298
|
+
end
|
299
|
+
i += 1
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
##
|
304
|
+
# Creates a new node in the KAF file.
|
305
|
+
#
|
306
|
+
def new_node(tag, parent)
|
307
|
+
if parent.is_a?(String)
|
308
|
+
parent_node = document.at_xpath(parent)
|
309
|
+
else
|
310
|
+
parent_node = parent
|
311
|
+
end
|
312
|
+
|
313
|
+
node = Oga::XML::Element.new(:name => tag)
|
314
|
+
|
315
|
+
parent_node.children << node
|
316
|
+
|
317
|
+
return node
|
318
|
+
end
|
319
|
+
|
320
|
+
##
|
321
|
+
# Check if input is a KAF file.
|
322
|
+
# @return [Boolean]
|
323
|
+
#
|
324
|
+
def is_kaf?
|
325
|
+
return !!document.at_xpath('KAF')
|
326
|
+
end
|
327
|
+
end # Processor
|
328
|
+
end # OpinionDetectorBasic
|
329
|
+
end # Opener
|