frprep 0.0.1.prealpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,1609 @@
|
|
1
|
+
####
|
2
|
+
# ke & sp
|
3
|
+
# adapted to new feature extractor class,
|
4
|
+
# Collins and Tiger features combined:
|
5
|
+
# KE November 2005
|
6
|
+
#
|
7
|
+
# Feature Extractors for Rosy
|
8
|
+
#
|
9
|
+
# Contract: each feature extractor inherits from the RosyFeatureExtractor class
|
10
|
+
#
|
11
|
+
# Feature extractors return nil if no feature value could be
|
12
|
+
# returned
|
13
|
+
|
14
|
+
|
15
|
+
# Salsa packages
|
16
|
+
require 'rosy/AbstractFeatureAndExternal'
|
17
|
+
require 'common/SalsaTigerRegXML'
|
18
|
+
|
19
|
+
# Fred and Rosy packages
|
20
|
+
require 'common/RosyConventions'
|
21
|
+
|
22
|
+
|
23
|
+
################################
|
24
|
+
# base class for all following feature extractors
|
25
|
+
class RosyFeatureExtractor < AbstractFeatureExtractor
|
26
|
+
@@instance_ok = nil # Boolean: set_node(), set_sent() successful?
|
27
|
+
@@split_nones = nil # Boolean: split NONE value for gold feature?
|
28
|
+
|
29
|
+
@@target = nil # SynNode: main target node
|
30
|
+
@@target_pos = nil # string: part of speech of main target
|
31
|
+
@@target_voice = nil # string: "active", "passive", or nil
|
32
|
+
@@terminals_ordered = nil # Hash: sentence terminals, mapped onto their word indices (starting with 1)
|
33
|
+
@@target_gfs = nil # Array of pairs [rel, node]: grammatical functions of the target
|
34
|
+
|
35
|
+
@@paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
|
36
|
+
@@relpos = nil # string: position of instance relative to target
|
37
|
+
@@node_leftmost_terminal = nil # SynNode objects: first and last terminal
|
38
|
+
@@node_rightmost_terminal = nil # in the yield of @@node
|
39
|
+
|
40
|
+
@@governing_verb = nil # SynNode object: closest governing verb of @@target
|
41
|
+
@@gv_paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
|
42
|
+
|
43
|
+
###
|
44
|
+
# returns a string: "phase 1" or "phase 2",
|
45
|
+
# depending on whether the feature is computed
|
46
|
+
# directly from the SalsaTigerSentence and the SynNode objects
|
47
|
+
# or whether it is computed from the phase 1 features
|
48
|
+
# computed for the training set
|
49
|
+
#
|
50
|
+
# Here: all features in this packages are phase 1
|
51
|
+
def RosyFeatureExtractor.phase()
|
52
|
+
return "phase 1"
|
53
|
+
end
|
54
|
+
|
55
|
+
###
|
56
|
+
# returns an array of strings, providing information about
|
57
|
+
# the feature extractor
|
58
|
+
def RosyFeatureExtractor.info()
|
59
|
+
return super().concat(["rosy"])
|
60
|
+
end
|
61
|
+
|
62
|
+
###
|
63
|
+
# set sentence, set node, set general settings: this is done prior to
|
64
|
+
# feature computation using compute_feature_value()
|
65
|
+
# such that computations that stay the same for
|
66
|
+
# several features can be done in advance
|
67
|
+
def RosyFeatureExtractor.set(var_hash) # hash. possible entries: split_nones=> true/false
|
68
|
+
|
69
|
+
@@split_nones = var_hash["split_nones"]
|
70
|
+
|
71
|
+
return true
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
def RosyFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
|
76
|
+
frame) # FrameNode object
|
77
|
+
super(sent, frame)
|
78
|
+
|
79
|
+
root = @@sent.syn_roots.first()
|
80
|
+
word_index_counter = 1
|
81
|
+
@@terminals_ordered = Hash.new
|
82
|
+
root.yield_nodes_ordered.each {|yield_node|
|
83
|
+
@@terminals_ordered[yield_node] = word_index_counter
|
84
|
+
word_index_counter += 1
|
85
|
+
}
|
86
|
+
|
87
|
+
# @@target: main target node (SynNode)
|
88
|
+
# WARNING: at this moment, we are
|
89
|
+
# not considering true multiword targets.
|
90
|
+
# Remove the "no_mwe" parameter in determine_main_target
|
91
|
+
# to change this
|
92
|
+
unless frame.target
|
93
|
+
@@target = nil
|
94
|
+
return false
|
95
|
+
end
|
96
|
+
@@target = @@interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
|
97
|
+
|
98
|
+
unless @@target
|
99
|
+
return false
|
100
|
+
end
|
101
|
+
|
102
|
+
# @@target_pos: string, target POS
|
103
|
+
@@target_pos = @@interpreter_class.category(@@target)
|
104
|
+
|
105
|
+
# @@target_voice:
|
106
|
+
# for verb targets, string, active or passive
|
107
|
+
# else nil
|
108
|
+
@@target_voice = @@interpreter_class.voice(@@target)
|
109
|
+
@@target_gfs = @@interpreter_class.gfs(@@target, @@sent)
|
110
|
+
|
111
|
+
# paths from target to all other nodes in the graph
|
112
|
+
@@paths = RosyFeatureExtractor.all_paths_from(@@target)
|
113
|
+
|
114
|
+
# governing verb of target.
|
115
|
+
# If something goes wrong, this will remain unset
|
116
|
+
@@gv_paths = Hash.new
|
117
|
+
if (targetlemma = RosyFeatureExtractor.headlemma(@@target))
|
118
|
+
# determine governing verb
|
119
|
+
parent = @@target
|
120
|
+
while (parent = parent.parent)
|
121
|
+
parentlemma = RosyFeatureExtractor.headlemma(parent)
|
122
|
+
|
123
|
+
if @@interpreter_class.category(parent) == "verb" and
|
124
|
+
parentlemma != targetlemma
|
125
|
+
# success: found the governing verb of the target
|
126
|
+
|
127
|
+
@@governing_verb = @@interpreter_class.head_terminal(parent)
|
128
|
+
# paths from governing verb of target to all other nodes in the graph
|
129
|
+
if @@governing_verb
|
130
|
+
@@gv_paths = RosyFeatureExtractor.all_paths_from(@@governing_verb)
|
131
|
+
end
|
132
|
+
|
133
|
+
break
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
# paths: when printing, leave off the phrase type of the end node
|
140
|
+
@@paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
|
141
|
+
@@gv_paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
|
142
|
+
|
143
|
+
return true
|
144
|
+
end
|
145
|
+
|
146
|
+
###
|
147
|
+
# node: SynNode of the sentence set in set_sentence
|
148
|
+
def RosyFeatureExtractor.set_node(node)
|
149
|
+
super(node)
|
150
|
+
|
151
|
+
@@instance_ok = true
|
152
|
+
|
153
|
+
unless @@target
|
154
|
+
# no target, nothing I can compute here
|
155
|
+
@@instance_ok = false
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
# # path between target and current instance node
|
160
|
+
# @@path = @@interpreter_class.path_between(@@target, @@node)
|
161
|
+
# @@path.set_cutoff_last_pt_on_printing(true) # when printing path, cut off last node label
|
162
|
+
|
163
|
+
|
164
|
+
# position of instance node relative to main target node
|
165
|
+
@@relpos = @@interpreter_class.relative_position(@@node, @@target)
|
166
|
+
# leftmost, rightmost terminal in the yield of @@node
|
167
|
+
@@node_leftmost_terminal = @@interpreter_class.leftmost_terminal(@@node)
|
168
|
+
@@node_rightmost_terminal = @@interpreter_class.rightmost_terminal(@@node)
|
169
|
+
|
170
|
+
return true
|
171
|
+
end
|
172
|
+
|
173
|
+
###
|
174
|
+
# compute_feature_value: first check if instance is OK
|
175
|
+
#
|
176
|
+
# returns: list of features
|
177
|
+
def compute_features()
|
178
|
+
unless @@instance_ok
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
|
182
|
+
return make_features_safe_for_sql(compute_features_instanceOK())
|
183
|
+
end
|
184
|
+
|
185
|
+
############
|
186
|
+
protected
|
187
|
+
|
188
|
+
|
189
|
+
# returns: list of features
|
190
|
+
def compute_features_instanceOK()
|
191
|
+
raise "Overwrite me"
|
192
|
+
end
|
193
|
+
|
194
|
+
###
|
195
|
+
# in computed features:
|
196
|
+
# replace "," by COMMA in order not to confuse SQL
|
197
|
+
def make_features_safe_for_sql(feature_list)
|
198
|
+
return feature_list.map { |feature|
|
199
|
+
if feature.kind_of? String
|
200
|
+
feature.gsub(/,/, "COMMA").gsub(/\\/, "BACK")
|
201
|
+
else
|
202
|
+
feature
|
203
|
+
end
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
###
|
209
|
+
# lemma of the head terminal of SynNode n
|
210
|
+
def RosyFeatureExtractor.headlemma(n) # SynNode
|
211
|
+
unless n
|
212
|
+
return nil
|
213
|
+
end
|
214
|
+
|
215
|
+
h = @@interpreter_class.head_terminal(n)
|
216
|
+
if h
|
217
|
+
return @@interpreter_class.lemma_backoff(h)
|
218
|
+
else
|
219
|
+
return nil
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
###
|
224
|
+
# part of speech of the head terminal of SynNode n
|
225
|
+
def RosyFeatureExtractor.headpos(n) # SynNode
|
226
|
+
unless n
|
227
|
+
return nil
|
228
|
+
end
|
229
|
+
|
230
|
+
h = @@interpreter_class.head_terminal(n)
|
231
|
+
if h
|
232
|
+
return h.part_of_speech()
|
233
|
+
else
|
234
|
+
return nil
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
###
|
239
|
+
# Given a SynNode n, recursively determine
|
240
|
+
# the paths from n to all other reachable nodes,
|
241
|
+
# skipping nodes that already have a path
|
242
|
+
# listed in the given hash mapping node IDs to paths.
|
243
|
+
# Paths are given as Path objects (see AbstractSynInterface).
|
244
|
+
# It is assumed that the graph of n is a tree, which
|
245
|
+
# is searched depth-first, first the children, then the parent of n.
|
246
|
+
def RosyFeatureExtractor.all_paths_from(n, # SynNode
|
247
|
+
hash = nil) # Hash: nodeID(string) => Path object
|
248
|
+
# initial step of all: no hash existing yet
|
249
|
+
if hash.nil?
|
250
|
+
hash = Hash.new
|
251
|
+
hash[n.id()] = Path.new(n)
|
252
|
+
end
|
253
|
+
|
254
|
+
# invariant at this point: n must be listed in hash
|
255
|
+
unless hash[n.id()]
|
256
|
+
raise "Shouldn't be here"
|
257
|
+
end
|
258
|
+
|
259
|
+
# for each child c of n: compute its path from the path of n,
|
260
|
+
# and explore paths below c
|
261
|
+
n.each_child_with_edgelabel { |label, c|
|
262
|
+
if hash[c.id()].nil?
|
263
|
+
hash[c.id()] = hash[n.id()].deep_clone().add_last_step("D",
|
264
|
+
label,
|
265
|
+
@@interpreter_class.simplified_pt(c),
|
266
|
+
c)
|
267
|
+
RosyFeatureExtractor.all_paths_from(c, hash)
|
268
|
+
end
|
269
|
+
}
|
270
|
+
|
271
|
+
# compute the path from n's parent p from the path of n,
|
272
|
+
# and explore paths beyond p
|
273
|
+
if (p = n.parent) and hash[p.id()].nil?
|
274
|
+
# node has a parent, and it is not listed in the path hash
|
275
|
+
# make a new path for parent: n's path, plus one up-step
|
276
|
+
hash[p.id()] = hash[n.id()].deep_clone().add_last_step("U",
|
277
|
+
n.parent_label,
|
278
|
+
@@interpreter_class.simplified_pt(p),
|
279
|
+
p)
|
280
|
+
RosyFeatureExtractor.all_paths_from(p, hash)
|
281
|
+
end
|
282
|
+
|
283
|
+
return hash
|
284
|
+
|
285
|
+
end
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
###############################
|
290
|
+
# Rosy single feature extractor, duplicating stuff from
|
291
|
+
# AbstractSingleFeatureExtractor
|
292
|
+
class RosySingleFeatureExtractor < RosyFeatureExtractor
|
293
|
+
|
294
|
+
###
|
295
|
+
# returns a string: the designator for this feature extractor
|
296
|
+
# (an extractor may compute several features, but
|
297
|
+
# in the experiment file it is chosen by a single designator)
|
298
|
+
#
|
299
|
+
# here: single feature, and the feature name is the designator
|
300
|
+
def RosySingleFeatureExtractor.designator()
|
301
|
+
return eval(self.name()).feature_name()
|
302
|
+
end
|
303
|
+
|
304
|
+
###
|
305
|
+
def RosySingleFeatureExtractor.feature_names()
|
306
|
+
return [eval(self.name()).feature_name()]
|
307
|
+
end
|
308
|
+
|
309
|
+
###
|
310
|
+
# compute_feature_value: first check if instance is OK
|
311
|
+
#
|
312
|
+
# returns: list of features
|
313
|
+
def compute_features()
|
314
|
+
unless @@instance_ok
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
|
318
|
+
return make_features_safe_for_sql([compute_feature_instanceOK()])
|
319
|
+
end
|
320
|
+
|
321
|
+
############
|
322
|
+
private
|
323
|
+
|
324
|
+
def compute_feature_instanceOK()
|
325
|
+
raise "Overwrite me"
|
326
|
+
end
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
##############################################
|
331
|
+
# Individual feature extractors
|
332
|
+
##############################################
|
333
|
+
|
334
|
+
####################
|
335
|
+
# gold role label
|
336
|
+
class GoldlabelFeature < RosySingleFeatureExtractor
|
337
|
+
GoldlabelFeature.announce_me()
|
338
|
+
|
339
|
+
def GoldlabelFeature.feature_name()
|
340
|
+
return "gold"
|
341
|
+
end
|
342
|
+
def GoldlabelFeature.sql_type()
|
343
|
+
return "VARCHAR(30)"
|
344
|
+
end
|
345
|
+
def GoldlabelFeature.feature_type()
|
346
|
+
return "gold"
|
347
|
+
end
|
348
|
+
def GoldlabelFeature.info()
|
349
|
+
# additional info: I am an index feature
|
350
|
+
return super().concat(["index"])
|
351
|
+
end
|
352
|
+
|
353
|
+
################
|
354
|
+
private
|
355
|
+
|
356
|
+
def compute_feature_instanceOK()
|
357
|
+
@@frame.each_fe_by_name {|fe|
|
358
|
+
if fe.children.include? @@node
|
359
|
+
return fe.name
|
360
|
+
end
|
361
|
+
}
|
362
|
+
|
363
|
+
# no role label for this node
|
364
|
+
# if @@split_nones
|
365
|
+
# split "no role" label into:
|
366
|
+
# before/after/dominating the target node
|
367
|
+
# return @@relpos
|
368
|
+
# else
|
369
|
+
return nil
|
370
|
+
# end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
####################
|
375
|
+
# path features
|
376
|
+
class AbstractPathFeature < RosySingleFeatureExtractor
|
377
|
+
def AbstractPathFeature.sql_type()
|
378
|
+
return "VARCHAR(80)"
|
379
|
+
end
|
380
|
+
def AbstractPathFeature.feature_type()
|
381
|
+
return "syn"
|
382
|
+
end
|
383
|
+
|
384
|
+
################
|
385
|
+
private
|
386
|
+
|
387
|
+
def compute_feature_instanceOK()
|
388
|
+
if @@paths[@@node.id()].nil?
|
389
|
+
path = nil
|
390
|
+
else
|
391
|
+
path = my_path_computation()
|
392
|
+
end
|
393
|
+
|
394
|
+
if path.nil? or path.empty?
|
395
|
+
return nil
|
396
|
+
else
|
397
|
+
return path
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def my_path_computation()
|
402
|
+
raise "overwrite me"
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
|
407
|
+
####################
|
408
|
+
# path consisting of nodelabels, dependencies and directions
|
409
|
+
class PathFeature < AbstractPathFeature
|
410
|
+
PathFeature.announce_me()
|
411
|
+
|
412
|
+
def PathFeature.sql_type()
|
413
|
+
return "VARCHAR(120)"
|
414
|
+
end
|
415
|
+
def PathFeature.feature_name()
|
416
|
+
return "path"
|
417
|
+
end
|
418
|
+
|
419
|
+
################
|
420
|
+
private
|
421
|
+
|
422
|
+
def my_path_computation()
|
423
|
+
if @@paths[@@node.id()].nil?
|
424
|
+
return nil
|
425
|
+
end
|
426
|
+
|
427
|
+
return @@paths[@@node.id()].print(true, true, true)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
####################
|
434
|
+
# path consisting of phrase type and directions
|
435
|
+
class NodelabelPathFeature < AbstractPathFeature
|
436
|
+
NodelabelPathFeature.announce_me()
|
437
|
+
|
438
|
+
def NodelabelPathFeature.feature_name()
|
439
|
+
return "pt_path"
|
440
|
+
end
|
441
|
+
|
442
|
+
################
|
443
|
+
private
|
444
|
+
|
445
|
+
def my_path_computation()
|
446
|
+
if @@paths[@@node.id()].nil?
|
447
|
+
return nil
|
448
|
+
end
|
449
|
+
|
450
|
+
return @@paths[@@node.id()].print(true, false, true)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
####################
|
455
|
+
# path consisting of dependencies and directions
|
456
|
+
class EdgelabelPathFeature < AbstractPathFeature
|
457
|
+
EdgelabelPathFeature.announce_me()
|
458
|
+
|
459
|
+
def EdgelabelPathFeature.feature_name()
|
460
|
+
return "gf_path"
|
461
|
+
end
|
462
|
+
|
463
|
+
################
|
464
|
+
private
|
465
|
+
|
466
|
+
def my_path_computation()
|
467
|
+
if @@paths[@@node.id()].nil?
|
468
|
+
return nil
|
469
|
+
end
|
470
|
+
|
471
|
+
return @@paths[@@node.id()].print(true, true, false)
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
####################
|
476
|
+
# features: path from governing verb
|
477
|
+
class AbstractGVPathFeature < RosySingleFeatureExtractor
|
478
|
+
def AbstractGVPathFeature.sql_type()
|
479
|
+
return "VARCHAR(80)"
|
480
|
+
end
|
481
|
+
def AbstractGVPathFeature.feature_type()
|
482
|
+
return "syn"
|
483
|
+
end
|
484
|
+
|
485
|
+
################
|
486
|
+
private
|
487
|
+
|
488
|
+
def compute_feature_instanceOK()
|
489
|
+
if @@gv_paths[@@node.id()].nil?
|
490
|
+
path = nil
|
491
|
+
else
|
492
|
+
path = my_path_computation()
|
493
|
+
end
|
494
|
+
|
495
|
+
if path.nil? or path.empty?
|
496
|
+
return nil
|
497
|
+
else
|
498
|
+
return path
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
def my_path_computation()
|
503
|
+
raise "overwrite me"
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
|
508
|
+
####################
|
509
|
+
# path from governing verb consisting of nodelabels, dependencies and directions
|
510
|
+
class GVPathFeature < AbstractGVPathFeature
|
511
|
+
GVPathFeature.announce_me()
|
512
|
+
|
513
|
+
def GVPathFeature.sql_type()
|
514
|
+
return "VARCHAR(120)"
|
515
|
+
end
|
516
|
+
def GVPathFeature.feature_name()
|
517
|
+
return "gvpath"
|
518
|
+
end
|
519
|
+
|
520
|
+
################
|
521
|
+
private
|
522
|
+
|
523
|
+
def my_path_computation()
|
524
|
+
return @@gv_paths[@@node.id()].print(true, true, true)
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
|
529
|
+
####################
|
530
|
+
# gov. verb path consisting of phrase type and directions
|
531
|
+
class GVNodelabelPathFeature < AbstractGVPathFeature
|
532
|
+
GVNodelabelPathFeature.announce_me()
|
533
|
+
|
534
|
+
def GVNodelabelPathFeature.feature_name()
|
535
|
+
return "pt_gvpath"
|
536
|
+
end
|
537
|
+
|
538
|
+
################
|
539
|
+
private
|
540
|
+
|
541
|
+
def my_path_computation()
|
542
|
+
return @@gv_paths[@@node.id()].print(true, false, true)
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
####################
|
547
|
+
# gov. verb path consisting of dependencies and directions
|
548
|
+
class GVEdgelabelPathFeature < AbstractGVPathFeature
|
549
|
+
GVEdgelabelPathFeature.announce_me()
|
550
|
+
|
551
|
+
def GVEdgelabelPathFeature.feature_name()
|
552
|
+
return "gf_gvpath"
|
553
|
+
end
|
554
|
+
|
555
|
+
################
|
556
|
+
private
|
557
|
+
|
558
|
+
def my_path_computation()
|
559
|
+
return @@gv_paths[@@node.id()].print(true, true, false)
|
560
|
+
end
|
561
|
+
end
|
562
|
+
|
563
|
+
####################
|
564
|
+
# path length
|
565
|
+
class PathLengthFeature < RosySingleFeatureExtractor
|
566
|
+
PathLengthFeature.announce_me()
|
567
|
+
|
568
|
+
def PathLengthFeature.feature_name()
|
569
|
+
return "path_length"
|
570
|
+
end
|
571
|
+
def PathLengthFeature.sql_type()
|
572
|
+
return "TINYINT"
|
573
|
+
end
|
574
|
+
def PathLengthFeature.feature_type()
|
575
|
+
return "syn"
|
576
|
+
end
|
577
|
+
|
578
|
+
################
|
579
|
+
private
|
580
|
+
|
581
|
+
def compute_feature_instanceOK()
|
582
|
+
if @@paths[@@node.id()].nil?
|
583
|
+
return nil
|
584
|
+
else
|
585
|
+
return @@paths[@@node.id()].length()
|
586
|
+
end
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
#########
|
591
|
+
# group of combined path features:
|
592
|
+
# path to target combined with target part of speech and
|
593
|
+
# info on whether the target is passive
|
594
|
+
class AbstractCombinedPathFeature < RosySingleFeatureExtractor
|
595
|
+
|
596
|
+
def AbstractCombinedPathFeature.sql_type()
|
597
|
+
return "VARCHAR(90)"
|
598
|
+
end
|
599
|
+
def AbstractCombinedPathFeature.feature_type()
|
600
|
+
return "syn"
|
601
|
+
end
|
602
|
+
|
603
|
+
################
|
604
|
+
private
|
605
|
+
|
606
|
+
def compute_feature_instanceOK()
|
607
|
+
if @@paths[@@node.id()].nil?
|
608
|
+
path = ""
|
609
|
+
else
|
610
|
+
path = my_path_computation()
|
611
|
+
end
|
612
|
+
return path + "--" + @@target_pos.to_s + "--" + @@target_voice.to_s
|
613
|
+
end
|
614
|
+
|
615
|
+
###
|
616
|
+
def my_path_computation()
|
617
|
+
raise "Overwrite me"
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
|
622
|
+
####################
|
623
|
+
# combined path based on nodelabels
|
624
|
+
class NodelabelCombinedPathFeature < AbstractCombinedPathFeature
|
625
|
+
NodelabelCombinedPathFeature.announce_me()
|
626
|
+
|
627
|
+
def NodelabelCombinedPathFeature.feature_name()
|
628
|
+
return "pt_combined_path"
|
629
|
+
end
|
630
|
+
|
631
|
+
################
|
632
|
+
private
|
633
|
+
|
634
|
+
def my_path_computation()
|
635
|
+
if @@paths[@@node.id()].nil?
|
636
|
+
return nil
|
637
|
+
end
|
638
|
+
|
639
|
+
return @@paths[@@node.id()].print(false, false, true)
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
####################
|
644
|
+
# combined path based on edgelabels
|
645
|
+
class EdgelabelCombinedPathFeature < AbstractCombinedPathFeature
|
646
|
+
EdgelabelCombinedPathFeature.announce_me()
|
647
|
+
|
648
|
+
def EdgelabelCombinedPathFeature.feature_name()
|
649
|
+
return "gf_combined_path"
|
650
|
+
end
|
651
|
+
|
652
|
+
################
|
653
|
+
private
|
654
|
+
|
655
|
+
def my_path_computation()
|
656
|
+
if @@paths[@@node.id()].nil?
|
657
|
+
return nil
|
658
|
+
end
|
659
|
+
|
660
|
+
return @@paths[@@node.id()].print(false, true, false)
|
661
|
+
end
|
662
|
+
end
|
663
|
+
|
664
|
+
|
665
|
+
####################
|
666
|
+
# combined path based on nodelabels and edgelabels
|
667
|
+
class CombinedPathFeature < AbstractCombinedPathFeature
|
668
|
+
CombinedPathFeature.announce_me()
|
669
|
+
|
670
|
+
def CombinedPathFeature.sql_type()
|
671
|
+
return "VARCHAR(130)"
|
672
|
+
end
|
673
|
+
def CombinedPathFeature.feature_name()
|
674
|
+
return "combined_path"
|
675
|
+
end
|
676
|
+
|
677
|
+
################
|
678
|
+
private
|
679
|
+
|
680
|
+
def my_path_computation()
|
681
|
+
if @@paths[@@node.id()].nil?
|
682
|
+
return nil
|
683
|
+
end
|
684
|
+
|
685
|
+
return @@paths[@@node.id()].print(false, true, true)
|
686
|
+
end
|
687
|
+
end
|
688
|
+
|
689
|
+
|
690
|
+
##################
|
691
|
+
# group of features for computing
|
692
|
+
# partial path to target: only up to
|
693
|
+
# the lowest common ancestor of current node and target
|
694
|
+
class AbstractPartialPathFeature < RosySingleFeatureExtractor
|
695
|
+
|
696
|
+
def AbstractPartialPathFeature.sql_type()
|
697
|
+
return "VARCHAR(70)"
|
698
|
+
end
|
699
|
+
def AbstractPartialPathFeature.feature_type()
|
700
|
+
return "syn"
|
701
|
+
end
|
702
|
+
|
703
|
+
################
|
704
|
+
private
|
705
|
+
|
706
|
+
def compute_feature_instanceOK()
|
707
|
+
if @@paths[@@node.id()].nil?
|
708
|
+
path = nil
|
709
|
+
else
|
710
|
+
path = my_path_computation()
|
711
|
+
end
|
712
|
+
if path.nil? or path.empty?
|
713
|
+
return nil
|
714
|
+
else
|
715
|
+
return path
|
716
|
+
end
|
717
|
+
end
|
718
|
+
end
|
719
|
+
|
720
|
+
####
|
721
|
+
# partial path based on node labels
|
722
|
+
class NodelabelPartialPathFeature < AbstractPartialPathFeature
|
723
|
+
NodelabelPartialPathFeature.announce_me()
|
724
|
+
|
725
|
+
def NodelabelPartialPathFeature.feature_name()
|
726
|
+
return "pt_partial_path"
|
727
|
+
end
|
728
|
+
|
729
|
+
################
|
730
|
+
private
|
731
|
+
|
732
|
+
def my_path_computation()
|
733
|
+
if @@paths[@@node.id()].nil?
|
734
|
+
return nil
|
735
|
+
end
|
736
|
+
|
737
|
+
return @@paths[@@node.id()].print_downpart(true, false, true)
|
738
|
+
end
|
739
|
+
end
|
740
|
+
|
741
|
+
####
|
742
|
+
# partial path based on edge labels
|
743
|
+
class EdgelabelPartialPathFeature < AbstractPartialPathFeature
|
744
|
+
EdgelabelPartialPathFeature.announce_me()
|
745
|
+
|
746
|
+
def EdgelabelPartialPathFeature.feature_name()
|
747
|
+
return "gf_partial_path"
|
748
|
+
end
|
749
|
+
|
750
|
+
################
|
751
|
+
private
|
752
|
+
|
753
|
+
def my_path_computation()
|
754
|
+
if @@paths[@@node.id()].nil?
|
755
|
+
return nil
|
756
|
+
end
|
757
|
+
|
758
|
+
return @@paths[@@node.id()].print_downpart(true, true, false)
|
759
|
+
end
|
760
|
+
end
|
761
|
+
|
762
|
+
####
|
763
|
+
# partial path based on node and edge labels
|
764
|
+
class PartialPathFeature < AbstractPartialPathFeature
|
765
|
+
PartialPathFeature.announce_me()
|
766
|
+
|
767
|
+
def PartialPathFeature.sql_type()
|
768
|
+
return "VARCHAR(110)"
|
769
|
+
end
|
770
|
+
def PartialPathFeature.feature_name()
|
771
|
+
return "partial_path"
|
772
|
+
end
|
773
|
+
|
774
|
+
################
|
775
|
+
private
|
776
|
+
|
777
|
+
def my_path_computation()
|
778
|
+
if @@paths[@@node.id()].nil?
|
779
|
+
return nil
|
780
|
+
end
|
781
|
+
|
782
|
+
return @@paths[@@node.id()].print_downpart(true, true, true)
|
783
|
+
end
|
784
|
+
end
|
785
|
+
|
786
|
+
|
787
|
+
|
788
|
+
##################
|
789
|
+
# ancestor rule: grammar rule
|
790
|
+
# expanding lowest common ancestor of current node and target
|
791
|
+
class AncestorRuleFeature < RosySingleFeatureExtractor
|
792
|
+
AncestorRuleFeature.announce_me()
|
793
|
+
|
794
|
+
def AncestorRuleFeature.feature_name()
|
795
|
+
return "ancestor_rule"
|
796
|
+
end
|
797
|
+
def AncestorRuleFeature.sql_type()
|
798
|
+
return "VARCHAR(50)"
|
799
|
+
end
|
800
|
+
def AncestorRuleFeature.feature_type()
|
801
|
+
return "syn"
|
802
|
+
end
|
803
|
+
|
804
|
+
################
|
805
|
+
private
|
806
|
+
|
807
|
+
def compute_feature_instanceOK()
|
808
|
+
if @@paths[@@node.id()].nil?
|
809
|
+
return nil
|
810
|
+
end
|
811
|
+
|
812
|
+
lca = @@paths[@@node.id()].lca()
|
813
|
+
unless lca
|
814
|
+
return nil
|
815
|
+
end
|
816
|
+
|
817
|
+
return @@interpreter_class.simplified_pt(lca).to_s +
|
818
|
+
" -> "+
|
819
|
+
lca.children.map {|c| @@interpreter_class.simplified_pt(c).to_s }.join(" ")
|
820
|
+
end
|
821
|
+
end
|
822
|
+
|
823
|
+
##################
|
824
|
+
# relative position to target: left, right, including target
|
825
|
+
class RelativePositionFeature < RosySingleFeatureExtractor
|
826
|
+
RelativePositionFeature.announce_me()
|
827
|
+
|
828
|
+
def RelativePositionFeature.feature_name()
|
829
|
+
return "relpos"
|
830
|
+
end
|
831
|
+
def RelativePositionFeature.sql_type()
|
832
|
+
return "CHAR(5)"
|
833
|
+
end
|
834
|
+
def RelativePositionFeature.feature_type()
|
835
|
+
return "syn"
|
836
|
+
end
|
837
|
+
|
838
|
+
################
|
839
|
+
private
|
840
|
+
|
841
|
+
def compute_feature_instanceOK()
|
842
|
+
return @@relpos
|
843
|
+
end
|
844
|
+
end
|
845
|
+
|
846
|
+
|
847
|
+
################
|
848
|
+
# phrase type of the instance node
|
849
|
+
class PhraseTypeFeature < RosySingleFeatureExtractor
|
850
|
+
PhraseTypeFeature.announce_me()
|
851
|
+
|
852
|
+
def PhraseTypeFeature.feature_name()
|
853
|
+
return "pt"
|
854
|
+
end
|
855
|
+
def PhraseTypeFeature.sql_type()
|
856
|
+
return "VARCHAR(15)"
|
857
|
+
end
|
858
|
+
def PhraseTypeFeature.feature_type()
|
859
|
+
return "syn"
|
860
|
+
end
|
861
|
+
|
862
|
+
################
|
863
|
+
private
|
864
|
+
|
865
|
+
def compute_feature_instanceOK()
|
866
|
+
return @@interpreter_class.simplified_pt(@@node)
|
867
|
+
end
|
868
|
+
end
|
869
|
+
|
870
|
+
################
|
871
|
+
# grammatical function that this instance node fills for the target
|
872
|
+
class GFFeature < RosySingleFeatureExtractor
|
873
|
+
GFFeature.announce_me()
|
874
|
+
|
875
|
+
def GFFeature.feature_name()
|
876
|
+
return "gf"
|
877
|
+
end
|
878
|
+
def GFFeature.sql_type()
|
879
|
+
return "VARCHAR(20)"
|
880
|
+
end
|
881
|
+
def GFFeature.feature_type()
|
882
|
+
return "syn"
|
883
|
+
end
|
884
|
+
|
885
|
+
################
|
886
|
+
private
|
887
|
+
|
888
|
+
def compute_feature_instanceOK()
|
889
|
+
unless @@target_gfs
|
890
|
+
return nil
|
891
|
+
end
|
892
|
+
|
893
|
+
@@target_gfs.each { |rel, other_node|
|
894
|
+
if @@node == other_node
|
895
|
+
return rel
|
896
|
+
end
|
897
|
+
}
|
898
|
+
|
899
|
+
return nil
|
900
|
+
end
|
901
|
+
end
|
902
|
+
|
903
|
+
##################
|
904
|
+
# phrase type of parent of this node
|
905
|
+
class FatherPhraseTypeFeature < RosySingleFeatureExtractor
|
906
|
+
FatherPhraseTypeFeature.announce_me()
|
907
|
+
|
908
|
+
def FatherPhraseTypeFeature.feature_name()
|
909
|
+
return "father_pt"
|
910
|
+
end
|
911
|
+
def FatherPhraseTypeFeature.sql_type()
|
912
|
+
return "VARCHAR(15)"
|
913
|
+
end
|
914
|
+
def FatherPhraseTypeFeature.feature_type()
|
915
|
+
return "syn"
|
916
|
+
end
|
917
|
+
|
918
|
+
#####
|
919
|
+
private
|
920
|
+
|
921
|
+
def compute_feature_instanceOK()
|
922
|
+
if @@node.parent
|
923
|
+
return @@interpreter_class.simplified_pt(@@node.parent)
|
924
|
+
else
|
925
|
+
return nil
|
926
|
+
end
|
927
|
+
end
|
928
|
+
end
|
929
|
+
|
930
|
+
################
|
931
|
+
# target lemma
|
932
|
+
class TargetLemmaFeature < RosySingleFeatureExtractor
|
933
|
+
TargetLemmaFeature.announce_me()
|
934
|
+
|
935
|
+
def TargetLemmaFeature.feature_name()
|
936
|
+
return "target"
|
937
|
+
end
|
938
|
+
def TargetLemmaFeature.sql_type()
|
939
|
+
return "VARCHAR(20)"
|
940
|
+
end
|
941
|
+
def TargetLemmaFeature.feature_type()
|
942
|
+
return "ubiq"
|
943
|
+
end
|
944
|
+
def TargetLemmaFeature.info()
|
945
|
+
# additional info: I am an index feature
|
946
|
+
return super().concat(["index"])
|
947
|
+
end
|
948
|
+
|
949
|
+
#####
|
950
|
+
private
|
951
|
+
|
952
|
+
def compute_feature_instanceOK()
|
953
|
+
return @@interpreter_class.lemma_backoff(@@target)
|
954
|
+
end
|
955
|
+
end
|
956
|
+
|
957
|
+
################
|
958
|
+
# part of speech of target lemma
|
959
|
+
class TargetPOSFeature < RosySingleFeatureExtractor
|
960
|
+
TargetPOSFeature.announce_me()
|
961
|
+
|
962
|
+
def TargetPOSFeature.feature_name()
|
963
|
+
return "target_pos"
|
964
|
+
end
|
965
|
+
def TargetPOSFeature.sql_type()
|
966
|
+
return "VARCHAR(10)"
|
967
|
+
end
|
968
|
+
def TargetPOSFeature.feature_type()
|
969
|
+
return "ubiq"
|
970
|
+
end
|
971
|
+
def TargetPOSFeature.info()
|
972
|
+
# additional info: I am an index feature
|
973
|
+
return super().concat(["index"])
|
974
|
+
end
|
975
|
+
|
976
|
+
|
977
|
+
#####
|
978
|
+
private
|
979
|
+
|
980
|
+
def compute_feature_instanceOK()
|
981
|
+
return @@target_pos
|
982
|
+
end
|
983
|
+
end
|
984
|
+
|
985
|
+
################
|
986
|
+
# part of speech of target lemma
|
987
|
+
class TargetFineGrainedPOSFeature < RosySingleFeatureExtractor
|
988
|
+
TargetFineGrainedPOSFeature.announce_me()
|
989
|
+
|
990
|
+
def TargetFineGrainedPOSFeature.feature_name()
|
991
|
+
return "finegrained_target_pos"
|
992
|
+
end
|
993
|
+
def TargetFineGrainedPOSFeature.sql_type()
|
994
|
+
return "VARCHAR(20)"
|
995
|
+
end
|
996
|
+
def TargetFineGrainedPOSFeature.feature_type()
|
997
|
+
return "ubiq"
|
998
|
+
end
|
999
|
+
|
1000
|
+
|
1001
|
+
#####
|
1002
|
+
private
|
1003
|
+
|
1004
|
+
def compute_feature_instanceOK()
|
1005
|
+
return @@interpreter_class.pt(@@target)
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
################
|
1010
|
+
# voice of the target lemma
|
1011
|
+
class TargetVoiceFeature < RosySingleFeatureExtractor
|
1012
|
+
TargetVoiceFeature.announce_me()
|
1013
|
+
|
1014
|
+
def TargetVoiceFeature.feature_name()
|
1015
|
+
return "target_voice"
|
1016
|
+
end
|
1017
|
+
def TargetVoiceFeature.sql_type()
|
1018
|
+
return "CHAR(4)"
|
1019
|
+
end
|
1020
|
+
def TargetVoiceFeature.feature_type()
|
1021
|
+
return "ubiq"
|
1022
|
+
end
|
1023
|
+
|
1024
|
+
#####
|
1025
|
+
private
|
1026
|
+
|
1027
|
+
def compute_feature_instanceOK()
|
1028
|
+
voice = @@interpreter_class.voice(@@target)
|
1029
|
+
if voice
|
1030
|
+
return voice.slice(0,4)
|
1031
|
+
else
|
1032
|
+
return nil
|
1033
|
+
end
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
|
1037
|
+
################
|
1038
|
+
# the governing verb of the target
|
1039
|
+
class GoverningVerbOfTargetFeature < RosySingleFeatureExtractor
|
1040
|
+
GoverningVerbOfTargetFeature.announce_me()
|
1041
|
+
|
1042
|
+
def GoverningVerbOfTargetFeature.feature_name()
|
1043
|
+
return "gov_verb"
|
1044
|
+
end
|
1045
|
+
def GoverningVerbOfTargetFeature.sql_type()
|
1046
|
+
return "VArCHAR(20)"
|
1047
|
+
end
|
1048
|
+
def GoverningVerbOfTargetFeature.feature_type()
|
1049
|
+
return "sem"
|
1050
|
+
end
|
1051
|
+
|
1052
|
+
#####
|
1053
|
+
private
|
1054
|
+
|
1055
|
+
def compute_feature_instanceOK()
|
1056
|
+
if @@governing_verb
|
1057
|
+
return RosyFeatureExtractor.headlemma(@@governing_verb)
|
1058
|
+
else
|
1059
|
+
return nil
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
################c
|
1065
|
+
# preposition for this constituent
|
1066
|
+
class PrepFeature < RosySingleFeatureExtractor
|
1067
|
+
PrepFeature.announce_me()
|
1068
|
+
|
1069
|
+
def PrepFeature.feature_name()
|
1070
|
+
return "prep"
|
1071
|
+
end
|
1072
|
+
def PrepFeature.sql_type()
|
1073
|
+
return "VARCHAR(20)"
|
1074
|
+
end
|
1075
|
+
def PrepFeature.feature_type()
|
1076
|
+
return "syn"
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
#####
|
1080
|
+
private
|
1081
|
+
|
1082
|
+
def compute_feature_instanceOK()
|
1083
|
+
return @@interpreter_class.preposition(@@node)
|
1084
|
+
end
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
################
|
1088
|
+
# head lemma of this constituent
|
1089
|
+
class HeadFeature < RosySingleFeatureExtractor
|
1090
|
+
HeadFeature.announce_me()
|
1091
|
+
|
1092
|
+
def HeadFeature.feature_name()
|
1093
|
+
return "const_head"
|
1094
|
+
end
|
1095
|
+
def HeadFeature.sql_type()
|
1096
|
+
return "VARCHAR(20)"
|
1097
|
+
end
|
1098
|
+
def HeadFeature.feature_type()
|
1099
|
+
return "sem"
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
#####
|
1103
|
+
private
|
1104
|
+
|
1105
|
+
def compute_feature_instanceOK()
|
1106
|
+
return RosyFeatureExtractor.headlemma(@@node)
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
################
|
1111
|
+
# part of speech of the head of this constituent
|
1112
|
+
class HeadPosFeature < RosySingleFeatureExtractor
|
1113
|
+
HeadPosFeature.announce_me()
|
1114
|
+
|
1115
|
+
def HeadPosFeature.feature_name()
|
1116
|
+
return "const_head_pos"
|
1117
|
+
end
|
1118
|
+
def HeadPosFeature.sql_type()
|
1119
|
+
return "VARCHAR(10)"
|
1120
|
+
end
|
1121
|
+
def HeadPosFeature.feature_type()
|
1122
|
+
return "syn"
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
#####
|
1126
|
+
private
|
1127
|
+
|
1128
|
+
def compute_feature_instanceOK()
|
1129
|
+
return RosyFeatureExtractor.headpos(@@node)
|
1130
|
+
end
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
################
|
1134
|
+
# informative content word (see AbstractSynFeature): lemma and POS
|
1135
|
+
class IcontLemmaFeature < RosyFeatureExtractor
|
1136
|
+
IcontLemmaFeature.announce_me()
|
1137
|
+
|
1138
|
+
def IcontLemmaFeature.designator()
|
1139
|
+
return "icont_word"
|
1140
|
+
end
|
1141
|
+
def IcontLemmaFeature.feature_names()
|
1142
|
+
return ["icont_lemma", "icont_pos"]
|
1143
|
+
end
|
1144
|
+
def IcontLemmaFeature.sql_type()
|
1145
|
+
return "VARCHAR(20)"
|
1146
|
+
end
|
1147
|
+
def IcontLemmaFeature.feature_type()
|
1148
|
+
return "sem"
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
#####
|
1152
|
+
private
|
1153
|
+
|
1154
|
+
def compute_features_instanceOK()
|
1155
|
+
icont_node = @@interpreter_class.informative_content_node(@@node)
|
1156
|
+
if icont_node
|
1157
|
+
return [RosyFeatureExtractor.headlemma(icont_node), RosyFeatureExtractor.headpos(icont_node)]
|
1158
|
+
else
|
1159
|
+
return [nil, nil]
|
1160
|
+
end
|
1161
|
+
end
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
|
1165
|
+
################
|
1166
|
+
# leftmost terminal of this constituent
|
1167
|
+
class FirstWordFeature < RosyFeatureExtractor
|
1168
|
+
FirstWordFeature.announce_me()
|
1169
|
+
|
1170
|
+
def FirstWordFeature.designator()
|
1171
|
+
return "firstword"
|
1172
|
+
end
|
1173
|
+
def FirstWordFeature.feature_names()
|
1174
|
+
return ["firstword", "firstword_pos"]
|
1175
|
+
end
|
1176
|
+
def FirstWordFeature.sql_type()
|
1177
|
+
return "VARCHAR(20)"
|
1178
|
+
end
|
1179
|
+
def FirstWordFeature.feature_type()
|
1180
|
+
return "sem"
|
1181
|
+
end
|
1182
|
+
|
1183
|
+
#####
|
1184
|
+
private
|
1185
|
+
|
1186
|
+
def compute_features_instanceOK()
|
1187
|
+
if @@node_leftmost_terminal
|
1188
|
+
return [RosyFeatureExtractor.headlemma(@@node_leftmost_terminal), RosyFeatureExtractor.headpos(@@node_leftmost_terminal)]
|
1189
|
+
else
|
1190
|
+
return [nil, nil]
|
1191
|
+
end
|
1192
|
+
end
|
1193
|
+
end
|
1194
|
+
|
1195
|
+
|
1196
|
+
################
|
1197
|
+
# rightmost terminal of this constituent
|
1198
|
+
class LastWordFeature < RosyFeatureExtractor
|
1199
|
+
LastWordFeature.announce_me()
|
1200
|
+
|
1201
|
+
def LastWordFeature.designator()
|
1202
|
+
return "lastword"
|
1203
|
+
end
|
1204
|
+
def LastWordFeature.feature_names()
|
1205
|
+
return ["lastword", "lastword_pos"]
|
1206
|
+
end
|
1207
|
+
def LastWordFeature.sql_type()
|
1208
|
+
return "VARCHAR(30)"
|
1209
|
+
end
|
1210
|
+
def LastWordFeature.feature_type()
|
1211
|
+
return "sem"
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
#####
|
1215
|
+
private
|
1216
|
+
|
1217
|
+
def compute_features_instanceOK()
|
1218
|
+
if @@node_rightmost_terminal
|
1219
|
+
return [RosyFeatureExtractor.headlemma(@@node_rightmost_terminal), RosyFeatureExtractor.headpos(@@node_rightmost_terminal)]
|
1220
|
+
else
|
1221
|
+
return [nil, nil]
|
1222
|
+
end
|
1223
|
+
end
|
1224
|
+
end
|
1225
|
+
|
1226
|
+
################
|
1227
|
+
# left sibling of the current node
|
1228
|
+
class LeftSiblingFeature < RosyFeatureExtractor
|
1229
|
+
LeftSiblingFeature.announce_me()
|
1230
|
+
|
1231
|
+
def LeftSiblingFeature.designator()
|
1232
|
+
return "leftsib"
|
1233
|
+
end
|
1234
|
+
def LeftSiblingFeature.feature_names()
|
1235
|
+
return ["leftsib_pt", "leftsib_lemma"]
|
1236
|
+
end
|
1237
|
+
def LeftSiblingFeature.sql_type()
|
1238
|
+
return "VARCHAR(20)"
|
1239
|
+
end
|
1240
|
+
def LeftSiblingFeature.feature_type()
|
1241
|
+
return "sem"
|
1242
|
+
end
|
1243
|
+
|
1244
|
+
#####
|
1245
|
+
private
|
1246
|
+
|
1247
|
+
def compute_features_instanceOK()
|
1248
|
+
# leftsib, rightsib (node)
|
1249
|
+
# siblings with max lastword/firstword among those with lastword/firstword index
|
1250
|
+
# smaller/greater than firstword/lastword index of self
|
1251
|
+
if @@node.parent.nil?
|
1252
|
+
return [nil, nil]
|
1253
|
+
end
|
1254
|
+
|
1255
|
+
node_ix = terminal_index(@@node_leftmost_terminal)
|
1256
|
+
unless node_ix
|
1257
|
+
return [nil, nil]
|
1258
|
+
end
|
1259
|
+
|
1260
|
+
leftsib_ix = nil
|
1261
|
+
leftsib = nil
|
1262
|
+
@@node.parent.children.each { |sibling|
|
1263
|
+
sib_ix = terminal_index(@@interpreter_class.rightmost_terminal(sibling))
|
1264
|
+
unless sib_ix
|
1265
|
+
next
|
1266
|
+
end
|
1267
|
+
|
1268
|
+
if sib_ix < node_ix and
|
1269
|
+
(leftsib.nil? or leftsib_ix < sib_ix)
|
1270
|
+
|
1271
|
+
leftsib = sibling
|
1272
|
+
leftsib_ix = sib_ix
|
1273
|
+
end
|
1274
|
+
}
|
1275
|
+
|
1276
|
+
if leftsib
|
1277
|
+
return [
|
1278
|
+
@@interpreter_class.simplified_pt(leftsib),
|
1279
|
+
@@interpreter_class.lemma_backoff(leftsib),
|
1280
|
+
]
|
1281
|
+
else
|
1282
|
+
return [nil, nil]
|
1283
|
+
end
|
1284
|
+
end
|
1285
|
+
|
1286
|
+
###
|
1287
|
+
# returns: index(integer) of node in list of terminals of this sentence;
|
1288
|
+
# nil if node is nil or does not occur in the list
|
1289
|
+
def terminal_index(node) # SynNode, terminal
|
1290
|
+
unless node
|
1291
|
+
return nil
|
1292
|
+
end
|
1293
|
+
|
1294
|
+
return @@terminals_ordered[node] # word index (or nil)
|
1295
|
+
end
|
1296
|
+
end
|
1297
|
+
|
1298
|
+
################
|
1299
|
+
# distance between head word of constituent and target (in words)
|
1300
|
+
class WordDistanceFeature < RosySingleFeatureExtractor
|
1301
|
+
WordDistanceFeature.announce_me()
|
1302
|
+
|
1303
|
+
def WordDistanceFeature.feature_name()
|
1304
|
+
return "worddistance"
|
1305
|
+
end
|
1306
|
+
def WordDistanceFeature.sql_type()
|
1307
|
+
return "TINYINT"
|
1308
|
+
end
|
1309
|
+
def WordDistanceFeature.feature_type()
|
1310
|
+
return "syn"
|
1311
|
+
end
|
1312
|
+
|
1313
|
+
#####
|
1314
|
+
private
|
1315
|
+
|
1316
|
+
def compute_feature_instanceOK()
|
1317
|
+
|
1318
|
+
head_term = @@interpreter_class.head_terminal(@@node)
|
1319
|
+
targ_term = @@interpreter_class.head_terminal(@@target)
|
1320
|
+
if head_term.nil? or targ_term.nil?
|
1321
|
+
return nil
|
1322
|
+
end
|
1323
|
+
h_id = @@terminals_ordered[head_term]
|
1324
|
+
t_id = @@terminals_ordered[targ_term]
|
1325
|
+
if h_id.nil? or t_id.nil?
|
1326
|
+
return nil
|
1327
|
+
else
|
1328
|
+
return (h_id-t_id).abs
|
1329
|
+
end
|
1330
|
+
end
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
################
|
1334
|
+
# is the current node a maximal projection?
|
1335
|
+
# heuristic: is my category the same as my parent's?
|
1336
|
+
class IsMaxProj < RosySingleFeatureExtractor
|
1337
|
+
IsMaxProj.announce_me()
|
1338
|
+
|
1339
|
+
def IsMaxProj.feature_name()
|
1340
|
+
return "ismaxproj"
|
1341
|
+
end
|
1342
|
+
def IsMaxProj.sql_type()
|
1343
|
+
return "TINYINT"
|
1344
|
+
end
|
1345
|
+
def IsMaxProj.feature_type()
|
1346
|
+
return "syn"
|
1347
|
+
end
|
1348
|
+
|
1349
|
+
#####
|
1350
|
+
private
|
1351
|
+
|
1352
|
+
def compute_feature_instanceOK()
|
1353
|
+
unless @@node.parent()
|
1354
|
+
return 1
|
1355
|
+
end
|
1356
|
+
my_cat = @@interpreter_class.category(@@node)
|
1357
|
+
parent_cat = @@interpreter_class.category(@@node.parent)
|
1358
|
+
if my_cat == parent_cat
|
1359
|
+
return 0
|
1360
|
+
else
|
1361
|
+
return 1
|
1362
|
+
end
|
1363
|
+
end
|
1364
|
+
end
|
1365
|
+
|
1366
|
+
################
|
1367
|
+
# right sibling of the current node
|
1368
|
+
class RightSiblingFeature < RosyFeatureExtractor
|
1369
|
+
RightSiblingFeature.announce_me()
|
1370
|
+
|
1371
|
+
def RightSiblingFeature.designator()
|
1372
|
+
return "rightsib"
|
1373
|
+
end
|
1374
|
+
def RightSiblingFeature.feature_names()
|
1375
|
+
return ["rightsib_pt", "rightsib_lemma"]
|
1376
|
+
end
|
1377
|
+
def RightSiblingFeature.sql_type()
|
1378
|
+
return "VARCHAR(20)"
|
1379
|
+
end
|
1380
|
+
def RightSiblingFeature.feature_type()
|
1381
|
+
return "sem"
|
1382
|
+
end
|
1383
|
+
|
1384
|
+
#####
|
1385
|
+
private
|
1386
|
+
|
1387
|
+
def compute_features_instanceOK()
|
1388
|
+
# leftsib, rightsib (node)
|
1389
|
+
# siblings with max lastword/firstword among those with lastword/firstword index
|
1390
|
+
# smaller/greater than firstword/lastword index of self
|
1391
|
+
if @@node.parent.nil?
|
1392
|
+
return [nil, nil]
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
node_ix = terminal_index(@@node_rightmost_terminal)
|
1396
|
+
unless node_ix
|
1397
|
+
return [nil, nil]
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
rightsib_ix = nil
|
1401
|
+
rightsib = nil
|
1402
|
+
@@node.parent.children.each { |sibling|
|
1403
|
+
sib_ix = terminal_index(@@interpreter_class.leftmost_terminal(sibling))
|
1404
|
+
unless sib_ix
|
1405
|
+
next
|
1406
|
+
end
|
1407
|
+
|
1408
|
+
if sib_ix > node_ix and
|
1409
|
+
(rightsib.nil? or sib_ix < rightsib_ix)
|
1410
|
+
|
1411
|
+
rightsib = sibling
|
1412
|
+
rightsib_ix = sib_ix
|
1413
|
+
end
|
1414
|
+
}
|
1415
|
+
|
1416
|
+
if rightsib
|
1417
|
+
return [
|
1418
|
+
@@interpreter_class.simplified_pt(rightsib),
|
1419
|
+
@@interpreter_class.lemma_backoff(rightsib),
|
1420
|
+
]
|
1421
|
+
else
|
1422
|
+
return [nil, nil]
|
1423
|
+
end
|
1424
|
+
end
|
1425
|
+
|
1426
|
+
###
|
1427
|
+
# returns: index(integer) of node in list of terminals of this sentence;
|
1428
|
+
# nil if node is nil or does not occur in the list
|
1429
|
+
def terminal_index(node) # SynNode, terminal
|
1430
|
+
unless node
|
1431
|
+
return nil
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
return @@terminals_ordered[node] # word index (or nil)
|
1435
|
+
end
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
|
1439
|
+
# ################
|
1440
|
+
# # admin feature: word span of this constituent
|
1441
|
+
# class WordSpanFeature < RosySingleFeatureExtractor
|
1442
|
+
# WordSpanFeature.announce_me()
|
1443
|
+
|
1444
|
+
# def WordSpanFeature.feature_name()
|
1445
|
+
# return "wordspan"
|
1446
|
+
# end
|
1447
|
+
# def WordSpanFeature.sql_type()
|
1448
|
+
# return "VARCHAR(30)"
|
1449
|
+
# end
|
1450
|
+
# def WordSpanFeature.feature_type()
|
1451
|
+
# return "admin"
|
1452
|
+
# end
|
1453
|
+
|
1454
|
+
# #####
|
1455
|
+
# private
|
1456
|
+
|
1457
|
+
# def compute_feature_instanceOK()
|
1458
|
+
|
1459
|
+
# fwh = RosyFeatureExtractor.headlemma(@@node_leftmost_terminal)
|
1460
|
+
# lwh = RosyFeatureExtractor.headlemma(@@node_rightmost_terminal)
|
1461
|
+
|
1462
|
+
# if fwh.nil?
|
1463
|
+
# fwh = ""
|
1464
|
+
# end
|
1465
|
+
# if lwh.nil?
|
1466
|
+
# lwh = ""
|
1467
|
+
# end
|
1468
|
+
|
1469
|
+
# return fwh+ "-" +lwh
|
1470
|
+
# end
|
1471
|
+
# end
|
1472
|
+
|
1473
|
+
|
1474
|
+
################
|
1475
|
+
# admin feature: my node ID and my father's, separated by a space
|
1476
|
+
# the highest node (topnode) has ID 0, and no father ID.
|
1477
|
+
class NodeIDFeature < RosySingleFeatureExtractor
|
1478
|
+
NodeIDFeature.announce_me()
|
1479
|
+
|
1480
|
+
def NodeIDFeature.feature_name()
|
1481
|
+
return "nodeID"
|
1482
|
+
end
|
1483
|
+
def NodeIDFeature.sql_type()
|
1484
|
+
return "VARCHAR(100)"
|
1485
|
+
end
|
1486
|
+
def NodeIDFeature.feature_type()
|
1487
|
+
return "admin"
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
#####
|
1491
|
+
private
|
1492
|
+
|
1493
|
+
def compute_feature_instanceOK()
|
1494
|
+
|
1495
|
+
if @@node.parent
|
1496
|
+
return @@node.id.to_s+ " " + @@node.parent.id.to_s
|
1497
|
+
else
|
1498
|
+
return @@node.id.to_s
|
1499
|
+
end
|
1500
|
+
end
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
################
|
1504
|
+
# admin feature: sentence ID
|
1505
|
+
class SentidFeature < RosySingleFeatureExtractor
|
1506
|
+
SentidFeature.announce_me()
|
1507
|
+
|
1508
|
+
def SentidFeature.feature_name()
|
1509
|
+
return "sentid"
|
1510
|
+
end
|
1511
|
+
def SentidFeature.sql_type()
|
1512
|
+
return "VARCHAR(100)"
|
1513
|
+
end
|
1514
|
+
def SentidFeature.feature_type()
|
1515
|
+
return "admin"
|
1516
|
+
end
|
1517
|
+
def SentidFeature.info()
|
1518
|
+
# additional info: I am an index feature
|
1519
|
+
return super().concat(["index"])
|
1520
|
+
end
|
1521
|
+
|
1522
|
+
#####
|
1523
|
+
private
|
1524
|
+
|
1525
|
+
def compute_feature_instanceOK()
|
1526
|
+
return construct_instance_id(@@sent.id(), @@frame.id())
|
1527
|
+
end
|
1528
|
+
end
|
1529
|
+
|
1530
|
+
# ################
|
1531
|
+
# # admin feature: tokens spanned by this constituent
|
1532
|
+
# class TokensFeature < RosySingleFeatureExtractor
|
1533
|
+
# TokensFeature.announce_me()
|
1534
|
+
|
1535
|
+
# def TokensFeature.feature_name()
|
1536
|
+
# return "tokens"
|
1537
|
+
# end
|
1538
|
+
# def TokensFeature.sql_type()
|
1539
|
+
# return "VARCHAR(100)"
|
1540
|
+
# end
|
1541
|
+
# def TokensFeature.feature_type()
|
1542
|
+
# return "admin"
|
1543
|
+
# end
|
1544
|
+
|
1545
|
+
# #####
|
1546
|
+
# private
|
1547
|
+
|
1548
|
+
# def compute_feature_instanceOK()
|
1549
|
+
# return @@node.to_s
|
1550
|
+
# end
|
1551
|
+
# end
|
1552
|
+
|
1553
|
+
################
|
1554
|
+
# admin feature: frame assigned by FN
|
1555
|
+
class FrameFeature < RosySingleFeatureExtractor
|
1556
|
+
FrameFeature.announce_me()
|
1557
|
+
|
1558
|
+
def FrameFeature.feature_name()
|
1559
|
+
return "frame"
|
1560
|
+
end
|
1561
|
+
def FrameFeature.sql_type()
|
1562
|
+
return "VARCHAR(35)"
|
1563
|
+
end
|
1564
|
+
def FrameFeature.feature_type()
|
1565
|
+
return "ubiq"
|
1566
|
+
end
|
1567
|
+
def FrameFeature.info()
|
1568
|
+
# additional info: I am an index feature
|
1569
|
+
return super().concat(["index"])
|
1570
|
+
end
|
1571
|
+
|
1572
|
+
#####
|
1573
|
+
private
|
1574
|
+
|
1575
|
+
def compute_feature_instanceOK()
|
1576
|
+
if @@frame
|
1577
|
+
return @@frame.name()
|
1578
|
+
else
|
1579
|
+
return nil
|
1580
|
+
end
|
1581
|
+
end
|
1582
|
+
end
|
1583
|
+
|
1584
|
+
################
|
1585
|
+
# admin feature: is this node a terminal?
|
1586
|
+
class TerminalFeature < RosySingleFeatureExtractor
|
1587
|
+
TerminalFeature.announce_me()
|
1588
|
+
|
1589
|
+
def TerminalFeature.feature_name()
|
1590
|
+
return "term"
|
1591
|
+
end
|
1592
|
+
def TerminalFeature.sql_type()
|
1593
|
+
return "TINYINT"
|
1594
|
+
end
|
1595
|
+
def TerminalFeature.feature_type()
|
1596
|
+
return "admin"
|
1597
|
+
end
|
1598
|
+
|
1599
|
+
#####
|
1600
|
+
private
|
1601
|
+
|
1602
|
+
def compute_feature_instanceOK()
|
1603
|
+
if @@node.is_terminal?
|
1604
|
+
return 1
|
1605
|
+
else
|
1606
|
+
return 0
|
1607
|
+
end
|
1608
|
+
end
|
1609
|
+
end
|