shalmaneser 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/shalmaneser +8 -2
- data/doc/index.md +1 -0
- data/lib/shalmaneser/opt_parser.rb +68 -67
- metadata +49 -119
- data/bin/fred +0 -16
- data/bin/frprep +0 -34
- data/bin/rosy +0 -17
- data/lib/common/AbstractSynInterface.rb +0 -1229
- data/lib/common/Counter.rb +0 -18
- data/lib/common/EnduserMode.rb +0 -27
- data/lib/common/Eval.rb +0 -480
- data/lib/common/FixSynSemMapping.rb +0 -196
- data/lib/common/Graph.rb +0 -345
- data/lib/common/ISO-8859-1.rb +0 -24
- data/lib/common/ML.rb +0 -186
- data/lib/common/Mallet.rb +0 -236
- data/lib/common/Maxent.rb +0 -229
- data/lib/common/Optimise.rb +0 -195
- data/lib/common/Parser.rb +0 -213
- data/lib/common/RegXML.rb +0 -269
- data/lib/common/RosyConventions.rb +0 -171
- data/lib/common/STXmlTerminalOrder.rb +0 -194
- data/lib/common/SalsaTigerRegXML.rb +0 -2347
- data/lib/common/SalsaTigerXMLHelper.rb +0 -99
- data/lib/common/SynInterfaces.rb +0 -282
- data/lib/common/TabFormat.rb +0 -721
- data/lib/common/Tiger.rb +0 -1448
- data/lib/common/Timbl.rb +0 -144
- data/lib/common/Tree.rb +0 -61
- data/lib/common/config_data.rb +0 -470
- data/lib/common/config_format_element.rb +0 -220
- data/lib/common/headz.rb +0 -338
- data/lib/common/option_parser.rb +0 -13
- data/lib/common/prep_config_data.rb +0 -62
- data/lib/common/prep_helper.rb +0 -1330
- data/lib/common/ruby_class_extensions.rb +0 -310
- data/lib/db/db_interface.rb +0 -48
- data/lib/db/db_mysql.rb +0 -145
- data/lib/db/db_sqlite.rb +0 -280
- data/lib/db/db_table.rb +0 -239
- data/lib/db/db_wrapper.rb +0 -176
- data/lib/db/sql_query.rb +0 -243
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredConventions.rb +0 -232
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred.rb +0 -47
- data/lib/fred/fred_config_data.rb +0 -185
- data/lib/fred/md5.rb +0 -23
- data/lib/fred/opt_parser.rb +0 -250
- data/lib/frprep/Ampersand.rb +0 -39
- data/lib/frprep/CollinsInterface.rb +0 -1165
- data/lib/frprep/Counter.rb +0 -18
- data/lib/frprep/FNCorpusXML.rb +0 -643
- data/lib/frprep/FNDatabase.rb +0 -144
- data/lib/frprep/FrameXML.rb +0 -513
- data/lib/frprep/Graph.rb +0 -345
- data/lib/frprep/MiniparInterface.rb +0 -1388
- data/lib/frprep/RegXML.rb +0 -269
- data/lib/frprep/STXmlTerminalOrder.rb +0 -194
- data/lib/frprep/SleepyInterface.rb +0 -384
- data/lib/frprep/TntInterface.rb +0 -44
- data/lib/frprep/TreetaggerInterface.rb +0 -327
- data/lib/frprep/do_parses.rb +0 -143
- data/lib/frprep/frprep.rb +0 -693
- data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
- data/lib/frprep/interfaces/stanford_interface.rb +0 -353
- data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
- data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
- data/lib/frprep/one_parsed_file.rb +0 -28
- data/lib/frprep/opt_parser.rb +0 -94
- data/lib/frprep/ruby_class_extensions.rb +0 -310
- data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
- data/lib/rosy/ExternalConfigData.rb +0 -58
- data/lib/rosy/FailedParses.rb +0 -130
- data/lib/rosy/FeatureInfo.rb +0 -242
- data/lib/rosy/GfInduce.rb +0 -1115
- data/lib/rosy/GfInduceFeature.rb +0 -148
- data/lib/rosy/InputData.rb +0 -294
- data/lib/rosy/RosyConfusability.rb +0 -338
- data/lib/rosy/RosyEval.rb +0 -465
- data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
- data/lib/rosy/RosyFeaturize.rb +0 -281
- data/lib/rosy/RosyInspect.rb +0 -336
- data/lib/rosy/RosyIterator.rb +0 -478
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
- data/lib/rosy/RosyPruning.rb +0 -165
- data/lib/rosy/RosyServices.rb +0 -744
- data/lib/rosy/RosySplit.rb +0 -232
- data/lib/rosy/RosyTask.rb +0 -19
- data/lib/rosy/RosyTest.rb +0 -829
- data/lib/rosy/RosyTrain.rb +0 -234
- data/lib/rosy/RosyTrainingTestTable.rb +0 -787
- data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
- data/lib/rosy/View.rb +0 -418
- data/lib/rosy/opt_parser.rb +0 -379
- data/lib/rosy/rosy.rb +0 -78
- data/lib/rosy/rosy_config_data.rb +0 -121
- data/lib/shalmaneser/version.rb +0 -3
@@ -1,1609 +0,0 @@
|
|
1
|
-
####
|
2
|
-
# ke & sp
|
3
|
-
# adapted to new feature extractor class,
|
4
|
-
# Collins and Tiger features combined:
|
5
|
-
# KE November 2005
|
6
|
-
#
|
7
|
-
# Feature Extractors for Rosy
|
8
|
-
#
|
9
|
-
# Contract: each feature extractor inherits from the RosyFeatureExtractor class
|
10
|
-
#
|
11
|
-
# Feature extractors return nil if no feature value could be
|
12
|
-
# returned
|
13
|
-
|
14
|
-
|
15
|
-
# Salsa packages
|
16
|
-
require 'rosy/AbstractFeatureAndExternal'
|
17
|
-
require 'common/SalsaTigerRegXML'
|
18
|
-
|
19
|
-
# Fred and Rosy packages
|
20
|
-
require 'common/RosyConventions'
|
21
|
-
|
22
|
-
|
23
|
-
################################
|
24
|
-
# base class for all following feature extractors
|
25
|
-
class RosyFeatureExtractor < AbstractFeatureExtractor
|
26
|
-
@@instance_ok = nil # Boolean: set_node(), set_sent() successful?
|
27
|
-
@@split_nones = nil # Boolean: split NONE value for gold feature?
|
28
|
-
|
29
|
-
@@target = nil # SynNode: main target node
|
30
|
-
@@target_pos = nil # string: part of speech of main target
|
31
|
-
@@target_voice = nil # string: "active", "passive", or nil
|
32
|
-
@@terminals_ordered = nil # Hash: sentence terminals, mapped onto their word indices (starting with 1)
|
33
|
-
@@target_gfs = nil # Array of pairs [rel, node]: grammatical functions of the target
|
34
|
-
|
35
|
-
@@paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
|
36
|
-
@@relpos = nil # string: position of instance relative to target
|
37
|
-
@@node_leftmost_terminal = nil # SynNode objects: first and last terminal
|
38
|
-
@@node_rightmost_terminal = nil # in the yield of @@node
|
39
|
-
|
40
|
-
@@governing_verb = nil # SynNode object: closest governing verb of @@target
|
41
|
-
@@gv_paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
|
42
|
-
|
43
|
-
###
|
44
|
-
# returns a string: "phase 1" or "phase 2",
|
45
|
-
# depending on whether the feature is computed
|
46
|
-
# directly from the SalsaTigerSentence and the SynNode objects
|
47
|
-
# or whether it is computed from the phase 1 features
|
48
|
-
# computed for the training set
|
49
|
-
#
|
50
|
-
# Here: all features in this packages are phase 1
|
51
|
-
def RosyFeatureExtractor.phase()
|
52
|
-
return "phase 1"
|
53
|
-
end
|
54
|
-
|
55
|
-
###
|
56
|
-
# returns an array of strings, providing information about
|
57
|
-
# the feature extractor
|
58
|
-
def RosyFeatureExtractor.info()
|
59
|
-
return super().concat(["rosy"])
|
60
|
-
end
|
61
|
-
|
62
|
-
###
|
63
|
-
# set sentence, set node, set general settings: this is done prior to
|
64
|
-
# feature computation using compute_feature_value()
|
65
|
-
# such that computations that stay the same for
|
66
|
-
# several features can be done in advance
|
67
|
-
def RosyFeatureExtractor.set(var_hash) # hash. possible entries: split_nones=> true/false
|
68
|
-
|
69
|
-
@@split_nones = var_hash["split_nones"]
|
70
|
-
|
71
|
-
return true
|
72
|
-
end
|
73
|
-
|
74
|
-
###
|
75
|
-
def RosyFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
|
76
|
-
frame) # FrameNode object
|
77
|
-
super(sent, frame)
|
78
|
-
|
79
|
-
root = @@sent.syn_roots.first()
|
80
|
-
word_index_counter = 1
|
81
|
-
@@terminals_ordered = Hash.new
|
82
|
-
root.yield_nodes_ordered.each {|yield_node|
|
83
|
-
@@terminals_ordered[yield_node] = word_index_counter
|
84
|
-
word_index_counter += 1
|
85
|
-
}
|
86
|
-
|
87
|
-
# @@target: main target node (SynNode)
|
88
|
-
# WARNING: at this moment, we are
|
89
|
-
# not considering true multiword targets.
|
90
|
-
# Remove the "no_mwe" parameter in determine_main_target
|
91
|
-
# to change this
|
92
|
-
unless frame.target
|
93
|
-
@@target = nil
|
94
|
-
return false
|
95
|
-
end
|
96
|
-
@@target = @@interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
|
97
|
-
|
98
|
-
unless @@target
|
99
|
-
return false
|
100
|
-
end
|
101
|
-
|
102
|
-
# @@target_pos: string, target POS
|
103
|
-
@@target_pos = @@interpreter_class.category(@@target)
|
104
|
-
|
105
|
-
# @@target_voice:
|
106
|
-
# for verb targets, string, active or passive
|
107
|
-
# else nil
|
108
|
-
@@target_voice = @@interpreter_class.voice(@@target)
|
109
|
-
@@target_gfs = @@interpreter_class.gfs(@@target, @@sent)
|
110
|
-
|
111
|
-
# paths from target to all other nodes in the graph
|
112
|
-
@@paths = RosyFeatureExtractor.all_paths_from(@@target)
|
113
|
-
|
114
|
-
# governing verb of target.
|
115
|
-
# If something goes wrong, this will remain unset
|
116
|
-
@@gv_paths = Hash.new
|
117
|
-
if (targetlemma = RosyFeatureExtractor.headlemma(@@target))
|
118
|
-
# determine governing verb
|
119
|
-
parent = @@target
|
120
|
-
while (parent = parent.parent)
|
121
|
-
parentlemma = RosyFeatureExtractor.headlemma(parent)
|
122
|
-
|
123
|
-
if @@interpreter_class.category(parent) == "verb" and
|
124
|
-
parentlemma != targetlemma
|
125
|
-
# success: found the governing verb of the target
|
126
|
-
|
127
|
-
@@governing_verb = @@interpreter_class.head_terminal(parent)
|
128
|
-
# paths from governing verb of target to all other nodes in the graph
|
129
|
-
if @@governing_verb
|
130
|
-
@@gv_paths = RosyFeatureExtractor.all_paths_from(@@governing_verb)
|
131
|
-
end
|
132
|
-
|
133
|
-
break
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
|
139
|
-
# paths: when printing, leave off the phrase type of the end node
|
140
|
-
@@paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
|
141
|
-
@@gv_paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
|
142
|
-
|
143
|
-
return true
|
144
|
-
end
|
145
|
-
|
146
|
-
###
|
147
|
-
# node: SynNode of the sentence set in set_sentence
|
148
|
-
def RosyFeatureExtractor.set_node(node)
|
149
|
-
super(node)
|
150
|
-
|
151
|
-
@@instance_ok = true
|
152
|
-
|
153
|
-
unless @@target
|
154
|
-
# no target, nothing I can compute here
|
155
|
-
@@instance_ok = false
|
156
|
-
return false
|
157
|
-
end
|
158
|
-
|
159
|
-
# # path between target and current instance node
|
160
|
-
# @@path = @@interpreter_class.path_between(@@target, @@node)
|
161
|
-
# @@path.set_cutoff_last_pt_on_printing(true) # when printing path, cut off last node label
|
162
|
-
|
163
|
-
|
164
|
-
# position of instance node relative to main target node
|
165
|
-
@@relpos = @@interpreter_class.relative_position(@@node, @@target)
|
166
|
-
# leftmost, rightmost terminal in the yield of @@node
|
167
|
-
@@node_leftmost_terminal = @@interpreter_class.leftmost_terminal(@@node)
|
168
|
-
@@node_rightmost_terminal = @@interpreter_class.rightmost_terminal(@@node)
|
169
|
-
|
170
|
-
return true
|
171
|
-
end
|
172
|
-
|
173
|
-
###
|
174
|
-
# compute_feature_value: first check if instance is OK
|
175
|
-
#
|
176
|
-
# returns: list of features
|
177
|
-
def compute_features()
|
178
|
-
unless @@instance_ok
|
179
|
-
return nil
|
180
|
-
end
|
181
|
-
|
182
|
-
return make_features_safe_for_sql(compute_features_instanceOK())
|
183
|
-
end
|
184
|
-
|
185
|
-
############
|
186
|
-
protected
|
187
|
-
|
188
|
-
|
189
|
-
# returns: list of features
|
190
|
-
def compute_features_instanceOK()
|
191
|
-
raise "Overwrite me"
|
192
|
-
end
|
193
|
-
|
194
|
-
###
|
195
|
-
# in computed features:
|
196
|
-
# replace "," by COMMA in order not to confuse SQL
|
197
|
-
def make_features_safe_for_sql(feature_list)
|
198
|
-
return feature_list.map { |feature|
|
199
|
-
if feature.kind_of? String
|
200
|
-
feature.gsub(/,/, "COMMA").gsub(/\\/, "BACK")
|
201
|
-
else
|
202
|
-
feature
|
203
|
-
end
|
204
|
-
}
|
205
|
-
end
|
206
|
-
|
207
|
-
|
208
|
-
###
|
209
|
-
# lemma of the head terminal of SynNode n
|
210
|
-
def RosyFeatureExtractor.headlemma(n) # SynNode
|
211
|
-
unless n
|
212
|
-
return nil
|
213
|
-
end
|
214
|
-
|
215
|
-
h = @@interpreter_class.head_terminal(n)
|
216
|
-
if h
|
217
|
-
return @@interpreter_class.lemma_backoff(h)
|
218
|
-
else
|
219
|
-
return nil
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
###
|
224
|
-
# part of speech of the head terminal of SynNode n
|
225
|
-
def RosyFeatureExtractor.headpos(n) # SynNode
|
226
|
-
unless n
|
227
|
-
return nil
|
228
|
-
end
|
229
|
-
|
230
|
-
h = @@interpreter_class.head_terminal(n)
|
231
|
-
if h
|
232
|
-
return h.part_of_speech()
|
233
|
-
else
|
234
|
-
return nil
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
238
|
-
###
|
239
|
-
# Given a SynNode n, recursively determine
|
240
|
-
# the paths from n to all other reachable nodes,
|
241
|
-
# skipping nodes that already have a path
|
242
|
-
# listed in the given hash mapping node IDs to paths.
|
243
|
-
# Paths are given as Path objects (see AbstractSynInterface).
|
244
|
-
# It is assumed that the graph of n is a tree, which
|
245
|
-
# is searched depth-first, first the children, then the parent of n.
|
246
|
-
def RosyFeatureExtractor.all_paths_from(n, # SynNode
|
247
|
-
hash = nil) # Hash: nodeID(string) => Path object
|
248
|
-
# initial step of all: no hash existing yet
|
249
|
-
if hash.nil?
|
250
|
-
hash = Hash.new
|
251
|
-
hash[n.id()] = Path.new(n)
|
252
|
-
end
|
253
|
-
|
254
|
-
# invariant at this point: n must be listed in hash
|
255
|
-
unless hash[n.id()]
|
256
|
-
raise "Shouldn't be here"
|
257
|
-
end
|
258
|
-
|
259
|
-
# for each child c of n: compute its path from the path of n,
|
260
|
-
# and explore paths below c
|
261
|
-
n.each_child_with_edgelabel { |label, c|
|
262
|
-
if hash[c.id()].nil?
|
263
|
-
hash[c.id()] = hash[n.id()].deep_clone().add_last_step("D",
|
264
|
-
label,
|
265
|
-
@@interpreter_class.simplified_pt(c),
|
266
|
-
c)
|
267
|
-
RosyFeatureExtractor.all_paths_from(c, hash)
|
268
|
-
end
|
269
|
-
}
|
270
|
-
|
271
|
-
# compute the path from n's parent p from the path of n,
|
272
|
-
# and explore paths beyond p
|
273
|
-
if (p = n.parent) and hash[p.id()].nil?
|
274
|
-
# node has a parent, and it is not listed in the path hash
|
275
|
-
# make a new path for parent: n's path, plus one up-step
|
276
|
-
hash[p.id()] = hash[n.id()].deep_clone().add_last_step("U",
|
277
|
-
n.parent_label,
|
278
|
-
@@interpreter_class.simplified_pt(p),
|
279
|
-
p)
|
280
|
-
RosyFeatureExtractor.all_paths_from(p, hash)
|
281
|
-
end
|
282
|
-
|
283
|
-
return hash
|
284
|
-
|
285
|
-
end
|
286
|
-
|
287
|
-
end
|
288
|
-
|
289
|
-
###############################
|
290
|
-
# Rosy single feature extractor, duplicating stuff from
|
291
|
-
# AbstractSingleFeatureExtractor
|
292
|
-
class RosySingleFeatureExtractor < RosyFeatureExtractor
|
293
|
-
|
294
|
-
###
|
295
|
-
# returns a string: the designator for this feature extractor
|
296
|
-
# (an extractor may compute several features, but
|
297
|
-
# in the experiment file it is chosen by a single designator)
|
298
|
-
#
|
299
|
-
# here: single feature, and the feature name is the designator
|
300
|
-
def RosySingleFeatureExtractor.designator()
|
301
|
-
return eval(self.name()).feature_name()
|
302
|
-
end
|
303
|
-
|
304
|
-
###
|
305
|
-
def RosySingleFeatureExtractor.feature_names()
|
306
|
-
return [eval(self.name()).feature_name()]
|
307
|
-
end
|
308
|
-
|
309
|
-
###
|
310
|
-
# compute_feature_value: first check if instance is OK
|
311
|
-
#
|
312
|
-
# returns: list of features
|
313
|
-
def compute_features()
|
314
|
-
unless @@instance_ok
|
315
|
-
return nil
|
316
|
-
end
|
317
|
-
|
318
|
-
return make_features_safe_for_sql([compute_feature_instanceOK()])
|
319
|
-
end
|
320
|
-
|
321
|
-
############
|
322
|
-
private
|
323
|
-
|
324
|
-
def compute_feature_instanceOK()
|
325
|
-
raise "Overwrite me"
|
326
|
-
end
|
327
|
-
|
328
|
-
end
|
329
|
-
|
330
|
-
##############################################
|
331
|
-
# Individual feature extractors
|
332
|
-
##############################################
|
333
|
-
|
334
|
-
####################
|
335
|
-
# gold role label
|
336
|
-
class GoldlabelFeature < RosySingleFeatureExtractor
|
337
|
-
GoldlabelFeature.announce_me()
|
338
|
-
|
339
|
-
def GoldlabelFeature.feature_name()
|
340
|
-
return "gold"
|
341
|
-
end
|
342
|
-
def GoldlabelFeature.sql_type()
|
343
|
-
return "VARCHAR(30)"
|
344
|
-
end
|
345
|
-
def GoldlabelFeature.feature_type()
|
346
|
-
return "gold"
|
347
|
-
end
|
348
|
-
def GoldlabelFeature.info()
|
349
|
-
# additional info: I am an index feature
|
350
|
-
return super().concat(["index"])
|
351
|
-
end
|
352
|
-
|
353
|
-
################
|
354
|
-
private
|
355
|
-
|
356
|
-
def compute_feature_instanceOK()
|
357
|
-
@@frame.each_fe_by_name {|fe|
|
358
|
-
if fe.children.include? @@node
|
359
|
-
return fe.name
|
360
|
-
end
|
361
|
-
}
|
362
|
-
|
363
|
-
# no role label for this node
|
364
|
-
# if @@split_nones
|
365
|
-
# split "no role" label into:
|
366
|
-
# before/after/dominating the target node
|
367
|
-
# return @@relpos
|
368
|
-
# else
|
369
|
-
return nil
|
370
|
-
# end
|
371
|
-
end
|
372
|
-
end
|
373
|
-
|
374
|
-
####################
|
375
|
-
# path features
|
376
|
-
class AbstractPathFeature < RosySingleFeatureExtractor
|
377
|
-
def AbstractPathFeature.sql_type()
|
378
|
-
return "VARCHAR(80)"
|
379
|
-
end
|
380
|
-
def AbstractPathFeature.feature_type()
|
381
|
-
return "syn"
|
382
|
-
end
|
383
|
-
|
384
|
-
################
|
385
|
-
private
|
386
|
-
|
387
|
-
def compute_feature_instanceOK()
|
388
|
-
if @@paths[@@node.id()].nil?
|
389
|
-
path = nil
|
390
|
-
else
|
391
|
-
path = my_path_computation()
|
392
|
-
end
|
393
|
-
|
394
|
-
if path.nil? or path.empty?
|
395
|
-
return nil
|
396
|
-
else
|
397
|
-
return path
|
398
|
-
end
|
399
|
-
end
|
400
|
-
|
401
|
-
def my_path_computation()
|
402
|
-
raise "overwrite me"
|
403
|
-
end
|
404
|
-
end
|
405
|
-
|
406
|
-
|
407
|
-
####################
|
408
|
-
# path consisting of nodelabels, dependencies and directions
|
409
|
-
class PathFeature < AbstractPathFeature
|
410
|
-
PathFeature.announce_me()
|
411
|
-
|
412
|
-
def PathFeature.sql_type()
|
413
|
-
return "VARCHAR(120)"
|
414
|
-
end
|
415
|
-
def PathFeature.feature_name()
|
416
|
-
return "path"
|
417
|
-
end
|
418
|
-
|
419
|
-
################
|
420
|
-
private
|
421
|
-
|
422
|
-
def my_path_computation()
|
423
|
-
if @@paths[@@node.id()].nil?
|
424
|
-
return nil
|
425
|
-
end
|
426
|
-
|
427
|
-
return @@paths[@@node.id()].print(true, true, true)
|
428
|
-
end
|
429
|
-
end
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
####################
|
434
|
-
# path consisting of phrase type and directions
|
435
|
-
class NodelabelPathFeature < AbstractPathFeature
|
436
|
-
NodelabelPathFeature.announce_me()
|
437
|
-
|
438
|
-
def NodelabelPathFeature.feature_name()
|
439
|
-
return "pt_path"
|
440
|
-
end
|
441
|
-
|
442
|
-
################
|
443
|
-
private
|
444
|
-
|
445
|
-
def my_path_computation()
|
446
|
-
if @@paths[@@node.id()].nil?
|
447
|
-
return nil
|
448
|
-
end
|
449
|
-
|
450
|
-
return @@paths[@@node.id()].print(true, false, true)
|
451
|
-
end
|
452
|
-
end
|
453
|
-
|
454
|
-
####################
|
455
|
-
# path consisting of dependencies and directions
|
456
|
-
class EdgelabelPathFeature < AbstractPathFeature
|
457
|
-
EdgelabelPathFeature.announce_me()
|
458
|
-
|
459
|
-
def EdgelabelPathFeature.feature_name()
|
460
|
-
return "gf_path"
|
461
|
-
end
|
462
|
-
|
463
|
-
################
|
464
|
-
private
|
465
|
-
|
466
|
-
def my_path_computation()
|
467
|
-
if @@paths[@@node.id()].nil?
|
468
|
-
return nil
|
469
|
-
end
|
470
|
-
|
471
|
-
return @@paths[@@node.id()].print(true, true, false)
|
472
|
-
end
|
473
|
-
end
|
474
|
-
|
475
|
-
####################
|
476
|
-
# features: path from governing verb
|
477
|
-
class AbstractGVPathFeature < RosySingleFeatureExtractor
|
478
|
-
def AbstractGVPathFeature.sql_type()
|
479
|
-
return "VARCHAR(80)"
|
480
|
-
end
|
481
|
-
def AbstractGVPathFeature.feature_type()
|
482
|
-
return "syn"
|
483
|
-
end
|
484
|
-
|
485
|
-
################
|
486
|
-
private
|
487
|
-
|
488
|
-
def compute_feature_instanceOK()
|
489
|
-
if @@gv_paths[@@node.id()].nil?
|
490
|
-
path = nil
|
491
|
-
else
|
492
|
-
path = my_path_computation()
|
493
|
-
end
|
494
|
-
|
495
|
-
if path.nil? or path.empty?
|
496
|
-
return nil
|
497
|
-
else
|
498
|
-
return path
|
499
|
-
end
|
500
|
-
end
|
501
|
-
|
502
|
-
def my_path_computation()
|
503
|
-
raise "overwrite me"
|
504
|
-
end
|
505
|
-
end
|
506
|
-
|
507
|
-
|
508
|
-
####################
|
509
|
-
# path from governing verb consisting of nodelabels, dependencies and directions
|
510
|
-
class GVPathFeature < AbstractGVPathFeature
|
511
|
-
GVPathFeature.announce_me()
|
512
|
-
|
513
|
-
def GVPathFeature.sql_type()
|
514
|
-
return "VARCHAR(120)"
|
515
|
-
end
|
516
|
-
def GVPathFeature.feature_name()
|
517
|
-
return "gvpath"
|
518
|
-
end
|
519
|
-
|
520
|
-
################
|
521
|
-
private
|
522
|
-
|
523
|
-
def my_path_computation()
|
524
|
-
return @@gv_paths[@@node.id()].print(true, true, true)
|
525
|
-
end
|
526
|
-
end
|
527
|
-
|
528
|
-
|
529
|
-
####################
|
530
|
-
# gov. verb path consisting of phrase type and directions
|
531
|
-
class GVNodelabelPathFeature < AbstractGVPathFeature
|
532
|
-
GVNodelabelPathFeature.announce_me()
|
533
|
-
|
534
|
-
def GVNodelabelPathFeature.feature_name()
|
535
|
-
return "pt_gvpath"
|
536
|
-
end
|
537
|
-
|
538
|
-
################
|
539
|
-
private
|
540
|
-
|
541
|
-
def my_path_computation()
|
542
|
-
return @@gv_paths[@@node.id()].print(true, false, true)
|
543
|
-
end
|
544
|
-
end
|
545
|
-
|
546
|
-
####################
|
547
|
-
# gov. verb path consisting of dependencies and directions
|
548
|
-
class GVEdgelabelPathFeature < AbstractGVPathFeature
|
549
|
-
GVEdgelabelPathFeature.announce_me()
|
550
|
-
|
551
|
-
def GVEdgelabelPathFeature.feature_name()
|
552
|
-
return "gf_gvpath"
|
553
|
-
end
|
554
|
-
|
555
|
-
################
|
556
|
-
private
|
557
|
-
|
558
|
-
def my_path_computation()
|
559
|
-
return @@gv_paths[@@node.id()].print(true, true, false)
|
560
|
-
end
|
561
|
-
end
|
562
|
-
|
563
|
-
####################
|
564
|
-
# path length
|
565
|
-
class PathLengthFeature < RosySingleFeatureExtractor
|
566
|
-
PathLengthFeature.announce_me()
|
567
|
-
|
568
|
-
def PathLengthFeature.feature_name()
|
569
|
-
return "path_length"
|
570
|
-
end
|
571
|
-
def PathLengthFeature.sql_type()
|
572
|
-
return "TINYINT"
|
573
|
-
end
|
574
|
-
def PathLengthFeature.feature_type()
|
575
|
-
return "syn"
|
576
|
-
end
|
577
|
-
|
578
|
-
################
|
579
|
-
private
|
580
|
-
|
581
|
-
def compute_feature_instanceOK()
|
582
|
-
if @@paths[@@node.id()].nil?
|
583
|
-
return nil
|
584
|
-
else
|
585
|
-
return @@paths[@@node.id()].length()
|
586
|
-
end
|
587
|
-
end
|
588
|
-
end
|
589
|
-
|
590
|
-
#########
|
591
|
-
# group of combined path features:
|
592
|
-
# path to target combined with target part of speech and
|
593
|
-
# info on whether the target is passive
|
594
|
-
class AbstractCombinedPathFeature < RosySingleFeatureExtractor
|
595
|
-
|
596
|
-
def AbstractCombinedPathFeature.sql_type()
|
597
|
-
return "VARCHAR(90)"
|
598
|
-
end
|
599
|
-
def AbstractCombinedPathFeature.feature_type()
|
600
|
-
return "syn"
|
601
|
-
end
|
602
|
-
|
603
|
-
################
|
604
|
-
private
|
605
|
-
|
606
|
-
def compute_feature_instanceOK()
|
607
|
-
if @@paths[@@node.id()].nil?
|
608
|
-
path = ""
|
609
|
-
else
|
610
|
-
path = my_path_computation()
|
611
|
-
end
|
612
|
-
return path + "--" + @@target_pos.to_s + "--" + @@target_voice.to_s
|
613
|
-
end
|
614
|
-
|
615
|
-
###
|
616
|
-
def my_path_computation()
|
617
|
-
raise "Overwrite me"
|
618
|
-
end
|
619
|
-
end
|
620
|
-
|
621
|
-
|
622
|
-
####################
|
623
|
-
# combined path based on nodelabels
|
624
|
-
class NodelabelCombinedPathFeature < AbstractCombinedPathFeature
|
625
|
-
NodelabelCombinedPathFeature.announce_me()
|
626
|
-
|
627
|
-
def NodelabelCombinedPathFeature.feature_name()
|
628
|
-
return "pt_combined_path"
|
629
|
-
end
|
630
|
-
|
631
|
-
################
|
632
|
-
private
|
633
|
-
|
634
|
-
def my_path_computation()
|
635
|
-
if @@paths[@@node.id()].nil?
|
636
|
-
return nil
|
637
|
-
end
|
638
|
-
|
639
|
-
return @@paths[@@node.id()].print(false, false, true)
|
640
|
-
end
|
641
|
-
end
|
642
|
-
|
643
|
-
####################
|
644
|
-
# combined path based on edgelabels
|
645
|
-
class EdgelabelCombinedPathFeature < AbstractCombinedPathFeature
|
646
|
-
EdgelabelCombinedPathFeature.announce_me()
|
647
|
-
|
648
|
-
def EdgelabelCombinedPathFeature.feature_name()
|
649
|
-
return "gf_combined_path"
|
650
|
-
end
|
651
|
-
|
652
|
-
################
|
653
|
-
private
|
654
|
-
|
655
|
-
def my_path_computation()
|
656
|
-
if @@paths[@@node.id()].nil?
|
657
|
-
return nil
|
658
|
-
end
|
659
|
-
|
660
|
-
return @@paths[@@node.id()].print(false, true, false)
|
661
|
-
end
|
662
|
-
end
|
663
|
-
|
664
|
-
|
665
|
-
####################
|
666
|
-
# combined path based on nodelabels and edgelabels
|
667
|
-
class CombinedPathFeature < AbstractCombinedPathFeature
|
668
|
-
CombinedPathFeature.announce_me()
|
669
|
-
|
670
|
-
def CombinedPathFeature.sql_type()
|
671
|
-
return "VARCHAR(130)"
|
672
|
-
end
|
673
|
-
def CombinedPathFeature.feature_name()
|
674
|
-
return "combined_path"
|
675
|
-
end
|
676
|
-
|
677
|
-
################
|
678
|
-
private
|
679
|
-
|
680
|
-
def my_path_computation()
|
681
|
-
if @@paths[@@node.id()].nil?
|
682
|
-
return nil
|
683
|
-
end
|
684
|
-
|
685
|
-
return @@paths[@@node.id()].print(false, true, true)
|
686
|
-
end
|
687
|
-
end
|
688
|
-
|
689
|
-
|
690
|
-
##################
|
691
|
-
# group of features for computing
|
692
|
-
# partial path to target: only up to
|
693
|
-
# the lowest common ancestor of current node and target
|
694
|
-
class AbstractPartialPathFeature < RosySingleFeatureExtractor
|
695
|
-
|
696
|
-
def AbstractPartialPathFeature.sql_type()
|
697
|
-
return "VARCHAR(70)"
|
698
|
-
end
|
699
|
-
def AbstractPartialPathFeature.feature_type()
|
700
|
-
return "syn"
|
701
|
-
end
|
702
|
-
|
703
|
-
################
|
704
|
-
private
|
705
|
-
|
706
|
-
def compute_feature_instanceOK()
|
707
|
-
if @@paths[@@node.id()].nil?
|
708
|
-
path = nil
|
709
|
-
else
|
710
|
-
path = my_path_computation()
|
711
|
-
end
|
712
|
-
if path.nil? or path.empty?
|
713
|
-
return nil
|
714
|
-
else
|
715
|
-
return path
|
716
|
-
end
|
717
|
-
end
|
718
|
-
end
|
719
|
-
|
720
|
-
####
|
721
|
-
# partial path based on node labels
|
722
|
-
class NodelabelPartialPathFeature < AbstractPartialPathFeature
|
723
|
-
NodelabelPartialPathFeature.announce_me()
|
724
|
-
|
725
|
-
def NodelabelPartialPathFeature.feature_name()
|
726
|
-
return "pt_partial_path"
|
727
|
-
end
|
728
|
-
|
729
|
-
################
|
730
|
-
private
|
731
|
-
|
732
|
-
def my_path_computation()
|
733
|
-
if @@paths[@@node.id()].nil?
|
734
|
-
return nil
|
735
|
-
end
|
736
|
-
|
737
|
-
return @@paths[@@node.id()].print_downpart(true, false, true)
|
738
|
-
end
|
739
|
-
end
|
740
|
-
|
741
|
-
####
|
742
|
-
# partial path based on edge labels
|
743
|
-
class EdgelabelPartialPathFeature < AbstractPartialPathFeature
|
744
|
-
EdgelabelPartialPathFeature.announce_me()
|
745
|
-
|
746
|
-
def EdgelabelPartialPathFeature.feature_name()
|
747
|
-
return "gf_partial_path"
|
748
|
-
end
|
749
|
-
|
750
|
-
################
|
751
|
-
private
|
752
|
-
|
753
|
-
def my_path_computation()
|
754
|
-
if @@paths[@@node.id()].nil?
|
755
|
-
return nil
|
756
|
-
end
|
757
|
-
|
758
|
-
return @@paths[@@node.id()].print_downpart(true, true, false)
|
759
|
-
end
|
760
|
-
end
|
761
|
-
|
762
|
-
####
|
763
|
-
# partial path based on node and edge labels
|
764
|
-
class PartialPathFeature < AbstractPartialPathFeature
|
765
|
-
PartialPathFeature.announce_me()
|
766
|
-
|
767
|
-
def PartialPathFeature.sql_type()
|
768
|
-
return "VARCHAR(110)"
|
769
|
-
end
|
770
|
-
def PartialPathFeature.feature_name()
|
771
|
-
return "partial_path"
|
772
|
-
end
|
773
|
-
|
774
|
-
################
|
775
|
-
private
|
776
|
-
|
777
|
-
def my_path_computation()
|
778
|
-
if @@paths[@@node.id()].nil?
|
779
|
-
return nil
|
780
|
-
end
|
781
|
-
|
782
|
-
return @@paths[@@node.id()].print_downpart(true, true, true)
|
783
|
-
end
|
784
|
-
end
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
##################
|
789
|
-
# ancestor rule: grammar rule
|
790
|
-
# expanding lowest common ancestor of current node and target
|
791
|
-
class AncestorRuleFeature < RosySingleFeatureExtractor
|
792
|
-
AncestorRuleFeature.announce_me()
|
793
|
-
|
794
|
-
def AncestorRuleFeature.feature_name()
|
795
|
-
return "ancestor_rule"
|
796
|
-
end
|
797
|
-
def AncestorRuleFeature.sql_type()
|
798
|
-
return "VARCHAR(50)"
|
799
|
-
end
|
800
|
-
def AncestorRuleFeature.feature_type()
|
801
|
-
return "syn"
|
802
|
-
end
|
803
|
-
|
804
|
-
################
|
805
|
-
private
|
806
|
-
|
807
|
-
def compute_feature_instanceOK()
|
808
|
-
if @@paths[@@node.id()].nil?
|
809
|
-
return nil
|
810
|
-
end
|
811
|
-
|
812
|
-
lca = @@paths[@@node.id()].lca()
|
813
|
-
unless lca
|
814
|
-
return nil
|
815
|
-
end
|
816
|
-
|
817
|
-
return @@interpreter_class.simplified_pt(lca).to_s +
|
818
|
-
" -> "+
|
819
|
-
lca.children.map {|c| @@interpreter_class.simplified_pt(c).to_s }.join(" ")
|
820
|
-
end
|
821
|
-
end
|
822
|
-
|
823
|
-
##################
|
824
|
-
# relative position to target: left, right, including target
|
825
|
-
class RelativePositionFeature < RosySingleFeatureExtractor
|
826
|
-
RelativePositionFeature.announce_me()
|
827
|
-
|
828
|
-
def RelativePositionFeature.feature_name()
|
829
|
-
return "relpos"
|
830
|
-
end
|
831
|
-
def RelativePositionFeature.sql_type()
|
832
|
-
return "CHAR(5)"
|
833
|
-
end
|
834
|
-
def RelativePositionFeature.feature_type()
|
835
|
-
return "syn"
|
836
|
-
end
|
837
|
-
|
838
|
-
################
|
839
|
-
private
|
840
|
-
|
841
|
-
def compute_feature_instanceOK()
|
842
|
-
return @@relpos
|
843
|
-
end
|
844
|
-
end
|
845
|
-
|
846
|
-
|
847
|
-
################
|
848
|
-
# phrase type of the instance node
|
849
|
-
class PhraseTypeFeature < RosySingleFeatureExtractor
|
850
|
-
PhraseTypeFeature.announce_me()
|
851
|
-
|
852
|
-
def PhraseTypeFeature.feature_name()
|
853
|
-
return "pt"
|
854
|
-
end
|
855
|
-
def PhraseTypeFeature.sql_type()
|
856
|
-
return "VARCHAR(15)"
|
857
|
-
end
|
858
|
-
def PhraseTypeFeature.feature_type()
|
859
|
-
return "syn"
|
860
|
-
end
|
861
|
-
|
862
|
-
################
|
863
|
-
private
|
864
|
-
|
865
|
-
def compute_feature_instanceOK()
|
866
|
-
return @@interpreter_class.simplified_pt(@@node)
|
867
|
-
end
|
868
|
-
end
|
869
|
-
|
870
|
-
################
|
871
|
-
# grammatical function that this instance node fills for the target
|
872
|
-
class GFFeature < RosySingleFeatureExtractor
|
873
|
-
GFFeature.announce_me()
|
874
|
-
|
875
|
-
def GFFeature.feature_name()
|
876
|
-
return "gf"
|
877
|
-
end
|
878
|
-
def GFFeature.sql_type()
|
879
|
-
return "VARCHAR(20)"
|
880
|
-
end
|
881
|
-
def GFFeature.feature_type()
|
882
|
-
return "syn"
|
883
|
-
end
|
884
|
-
|
885
|
-
################
|
886
|
-
private
|
887
|
-
|
888
|
-
def compute_feature_instanceOK()
|
889
|
-
unless @@target_gfs
|
890
|
-
return nil
|
891
|
-
end
|
892
|
-
|
893
|
-
@@target_gfs.each { |rel, other_node|
|
894
|
-
if @@node == other_node
|
895
|
-
return rel
|
896
|
-
end
|
897
|
-
}
|
898
|
-
|
899
|
-
return nil
|
900
|
-
end
|
901
|
-
end
|
902
|
-
|
903
|
-
##################
|
904
|
-
# phrase type of parent of this node
|
905
|
-
class FatherPhraseTypeFeature < RosySingleFeatureExtractor
|
906
|
-
FatherPhraseTypeFeature.announce_me()
|
907
|
-
|
908
|
-
def FatherPhraseTypeFeature.feature_name()
|
909
|
-
return "father_pt"
|
910
|
-
end
|
911
|
-
def FatherPhraseTypeFeature.sql_type()
|
912
|
-
return "VARCHAR(15)"
|
913
|
-
end
|
914
|
-
def FatherPhraseTypeFeature.feature_type()
|
915
|
-
return "syn"
|
916
|
-
end
|
917
|
-
|
918
|
-
#####
|
919
|
-
private
|
920
|
-
|
921
|
-
def compute_feature_instanceOK()
|
922
|
-
if @@node.parent
|
923
|
-
return @@interpreter_class.simplified_pt(@@node.parent)
|
924
|
-
else
|
925
|
-
return nil
|
926
|
-
end
|
927
|
-
end
|
928
|
-
end
|
929
|
-
|
930
|
-
################
|
931
|
-
# target lemma
|
932
|
-
class TargetLemmaFeature < RosySingleFeatureExtractor
|
933
|
-
TargetLemmaFeature.announce_me()
|
934
|
-
|
935
|
-
def TargetLemmaFeature.feature_name()
|
936
|
-
return "target"
|
937
|
-
end
|
938
|
-
def TargetLemmaFeature.sql_type()
|
939
|
-
return "VARCHAR(20)"
|
940
|
-
end
|
941
|
-
def TargetLemmaFeature.feature_type()
|
942
|
-
return "ubiq"
|
943
|
-
end
|
944
|
-
def TargetLemmaFeature.info()
|
945
|
-
# additional info: I am an index feature
|
946
|
-
return super().concat(["index"])
|
947
|
-
end
|
948
|
-
|
949
|
-
#####
|
950
|
-
private
|
951
|
-
|
952
|
-
def compute_feature_instanceOK()
|
953
|
-
return @@interpreter_class.lemma_backoff(@@target)
|
954
|
-
end
|
955
|
-
end
|
956
|
-
|
957
|
-
################
|
958
|
-
# part of speech of target lemma
|
959
|
-
class TargetPOSFeature < RosySingleFeatureExtractor
|
960
|
-
TargetPOSFeature.announce_me()
|
961
|
-
|
962
|
-
def TargetPOSFeature.feature_name()
|
963
|
-
return "target_pos"
|
964
|
-
end
|
965
|
-
def TargetPOSFeature.sql_type()
|
966
|
-
return "VARCHAR(10)"
|
967
|
-
end
|
968
|
-
def TargetPOSFeature.feature_type()
|
969
|
-
return "ubiq"
|
970
|
-
end
|
971
|
-
def TargetPOSFeature.info()
|
972
|
-
# additional info: I am an index feature
|
973
|
-
return super().concat(["index"])
|
974
|
-
end
|
975
|
-
|
976
|
-
|
977
|
-
#####
|
978
|
-
private
|
979
|
-
|
980
|
-
def compute_feature_instanceOK()
|
981
|
-
return @@target_pos
|
982
|
-
end
|
983
|
-
end
|
984
|
-
|
985
|
-
################
|
986
|
-
# part of speech of target lemma
|
987
|
-
class TargetFineGrainedPOSFeature < RosySingleFeatureExtractor
|
988
|
-
TargetFineGrainedPOSFeature.announce_me()
|
989
|
-
|
990
|
-
def TargetFineGrainedPOSFeature.feature_name()
|
991
|
-
return "finegrained_target_pos"
|
992
|
-
end
|
993
|
-
def TargetFineGrainedPOSFeature.sql_type()
|
994
|
-
return "VARCHAR(20)"
|
995
|
-
end
|
996
|
-
def TargetFineGrainedPOSFeature.feature_type()
|
997
|
-
return "ubiq"
|
998
|
-
end
|
999
|
-
|
1000
|
-
|
1001
|
-
#####
|
1002
|
-
private
|
1003
|
-
|
1004
|
-
def compute_feature_instanceOK()
|
1005
|
-
return @@interpreter_class.pt(@@target)
|
1006
|
-
end
|
1007
|
-
end
|
1008
|
-
|
1009
|
-
################
|
1010
|
-
# voice of the target lemma
|
1011
|
-
class TargetVoiceFeature < RosySingleFeatureExtractor
|
1012
|
-
TargetVoiceFeature.announce_me()
|
1013
|
-
|
1014
|
-
def TargetVoiceFeature.feature_name()
|
1015
|
-
return "target_voice"
|
1016
|
-
end
|
1017
|
-
def TargetVoiceFeature.sql_type()
|
1018
|
-
return "CHAR(4)"
|
1019
|
-
end
|
1020
|
-
def TargetVoiceFeature.feature_type()
|
1021
|
-
return "ubiq"
|
1022
|
-
end
|
1023
|
-
|
1024
|
-
#####
|
1025
|
-
private
|
1026
|
-
|
1027
|
-
def compute_feature_instanceOK()
|
1028
|
-
voice = @@interpreter_class.voice(@@target)
|
1029
|
-
if voice
|
1030
|
-
return voice.slice(0,4)
|
1031
|
-
else
|
1032
|
-
return nil
|
1033
|
-
end
|
1034
|
-
end
|
1035
|
-
end
|
1036
|
-
|
1037
|
-
################
|
1038
|
-
# the governing verb of the target
|
1039
|
-
class GoverningVerbOfTargetFeature < RosySingleFeatureExtractor
|
1040
|
-
GoverningVerbOfTargetFeature.announce_me()
|
1041
|
-
|
1042
|
-
def GoverningVerbOfTargetFeature.feature_name()
|
1043
|
-
return "gov_verb"
|
1044
|
-
end
|
1045
|
-
def GoverningVerbOfTargetFeature.sql_type()
|
1046
|
-
return "VArCHAR(20)"
|
1047
|
-
end
|
1048
|
-
def GoverningVerbOfTargetFeature.feature_type()
|
1049
|
-
return "sem"
|
1050
|
-
end
|
1051
|
-
|
1052
|
-
#####
|
1053
|
-
private
|
1054
|
-
|
1055
|
-
def compute_feature_instanceOK()
|
1056
|
-
if @@governing_verb
|
1057
|
-
return RosyFeatureExtractor.headlemma(@@governing_verb)
|
1058
|
-
else
|
1059
|
-
return nil
|
1060
|
-
end
|
1061
|
-
end
|
1062
|
-
end
|
1063
|
-
|
1064
|
-
################c
|
1065
|
-
# preposition for this constituent
|
1066
|
-
class PrepFeature < RosySingleFeatureExtractor
|
1067
|
-
PrepFeature.announce_me()
|
1068
|
-
|
1069
|
-
def PrepFeature.feature_name()
|
1070
|
-
return "prep"
|
1071
|
-
end
|
1072
|
-
def PrepFeature.sql_type()
|
1073
|
-
return "VARCHAR(20)"
|
1074
|
-
end
|
1075
|
-
def PrepFeature.feature_type()
|
1076
|
-
return "syn"
|
1077
|
-
end
|
1078
|
-
|
1079
|
-
#####
|
1080
|
-
private
|
1081
|
-
|
1082
|
-
def compute_feature_instanceOK()
|
1083
|
-
return @@interpreter_class.preposition(@@node)
|
1084
|
-
end
|
1085
|
-
end
|
1086
|
-
|
1087
|
-
################
|
1088
|
-
# head lemma of this constituent
|
1089
|
-
class HeadFeature < RosySingleFeatureExtractor
|
1090
|
-
HeadFeature.announce_me()
|
1091
|
-
|
1092
|
-
def HeadFeature.feature_name()
|
1093
|
-
return "const_head"
|
1094
|
-
end
|
1095
|
-
def HeadFeature.sql_type()
|
1096
|
-
return "VARCHAR(20)"
|
1097
|
-
end
|
1098
|
-
def HeadFeature.feature_type()
|
1099
|
-
return "sem"
|
1100
|
-
end
|
1101
|
-
|
1102
|
-
#####
|
1103
|
-
private
|
1104
|
-
|
1105
|
-
def compute_feature_instanceOK()
|
1106
|
-
return RosyFeatureExtractor.headlemma(@@node)
|
1107
|
-
end
|
1108
|
-
end
|
1109
|
-
|
1110
|
-
################
|
1111
|
-
# part of speech of the head of this constituent
|
1112
|
-
class HeadPosFeature < RosySingleFeatureExtractor
|
1113
|
-
HeadPosFeature.announce_me()
|
1114
|
-
|
1115
|
-
def HeadPosFeature.feature_name()
|
1116
|
-
return "const_head_pos"
|
1117
|
-
end
|
1118
|
-
def HeadPosFeature.sql_type()
|
1119
|
-
return "VARCHAR(10)"
|
1120
|
-
end
|
1121
|
-
def HeadPosFeature.feature_type()
|
1122
|
-
return "syn"
|
1123
|
-
end
|
1124
|
-
|
1125
|
-
#####
|
1126
|
-
private
|
1127
|
-
|
1128
|
-
def compute_feature_instanceOK()
|
1129
|
-
return RosyFeatureExtractor.headpos(@@node)
|
1130
|
-
end
|
1131
|
-
end
|
1132
|
-
|
1133
|
-
################
|
1134
|
-
# informative content word (see AbstractSynFeature): lemma and POS
|
1135
|
-
class IcontLemmaFeature < RosyFeatureExtractor
|
1136
|
-
IcontLemmaFeature.announce_me()
|
1137
|
-
|
1138
|
-
def IcontLemmaFeature.designator()
|
1139
|
-
return "icont_word"
|
1140
|
-
end
|
1141
|
-
def IcontLemmaFeature.feature_names()
|
1142
|
-
return ["icont_lemma", "icont_pos"]
|
1143
|
-
end
|
1144
|
-
def IcontLemmaFeature.sql_type()
|
1145
|
-
return "VARCHAR(20)"
|
1146
|
-
end
|
1147
|
-
def IcontLemmaFeature.feature_type()
|
1148
|
-
return "sem"
|
1149
|
-
end
|
1150
|
-
|
1151
|
-
#####
|
1152
|
-
private
|
1153
|
-
|
1154
|
-
def compute_features_instanceOK()
|
1155
|
-
icont_node = @@interpreter_class.informative_content_node(@@node)
|
1156
|
-
if icont_node
|
1157
|
-
return [RosyFeatureExtractor.headlemma(icont_node), RosyFeatureExtractor.headpos(icont_node)]
|
1158
|
-
else
|
1159
|
-
return [nil, nil]
|
1160
|
-
end
|
1161
|
-
end
|
1162
|
-
end
|
1163
|
-
|
1164
|
-
|
1165
|
-
################
|
1166
|
-
# leftmost terminal of this constituent
|
1167
|
-
class FirstWordFeature < RosyFeatureExtractor
|
1168
|
-
FirstWordFeature.announce_me()
|
1169
|
-
|
1170
|
-
def FirstWordFeature.designator()
|
1171
|
-
return "firstword"
|
1172
|
-
end
|
1173
|
-
def FirstWordFeature.feature_names()
|
1174
|
-
return ["firstword", "firstword_pos"]
|
1175
|
-
end
|
1176
|
-
def FirstWordFeature.sql_type()
|
1177
|
-
return "VARCHAR(20)"
|
1178
|
-
end
|
1179
|
-
def FirstWordFeature.feature_type()
|
1180
|
-
return "sem"
|
1181
|
-
end
|
1182
|
-
|
1183
|
-
#####
|
1184
|
-
private
|
1185
|
-
|
1186
|
-
def compute_features_instanceOK()
|
1187
|
-
if @@node_leftmost_terminal
|
1188
|
-
return [RosyFeatureExtractor.headlemma(@@node_leftmost_terminal), RosyFeatureExtractor.headpos(@@node_leftmost_terminal)]
|
1189
|
-
else
|
1190
|
-
return [nil, nil]
|
1191
|
-
end
|
1192
|
-
end
|
1193
|
-
end
|
1194
|
-
|
1195
|
-
|
1196
|
-
################
|
1197
|
-
# rightmost terminal of this constituent
|
1198
|
-
class LastWordFeature < RosyFeatureExtractor
|
1199
|
-
LastWordFeature.announce_me()
|
1200
|
-
|
1201
|
-
def LastWordFeature.designator()
|
1202
|
-
return "lastword"
|
1203
|
-
end
|
1204
|
-
def LastWordFeature.feature_names()
|
1205
|
-
return ["lastword", "lastword_pos"]
|
1206
|
-
end
|
1207
|
-
def LastWordFeature.sql_type()
|
1208
|
-
return "VARCHAR(30)"
|
1209
|
-
end
|
1210
|
-
def LastWordFeature.feature_type()
|
1211
|
-
return "sem"
|
1212
|
-
end
|
1213
|
-
|
1214
|
-
#####
|
1215
|
-
private
|
1216
|
-
|
1217
|
-
def compute_features_instanceOK()
|
1218
|
-
if @@node_rightmost_terminal
|
1219
|
-
return [RosyFeatureExtractor.headlemma(@@node_rightmost_terminal), RosyFeatureExtractor.headpos(@@node_rightmost_terminal)]
|
1220
|
-
else
|
1221
|
-
return [nil, nil]
|
1222
|
-
end
|
1223
|
-
end
|
1224
|
-
end
|
1225
|
-
|
1226
|
-
################
|
1227
|
-
# left sibling of the current node
|
1228
|
-
class LeftSiblingFeature < RosyFeatureExtractor
|
1229
|
-
LeftSiblingFeature.announce_me()
|
1230
|
-
|
1231
|
-
def LeftSiblingFeature.designator()
|
1232
|
-
return "leftsib"
|
1233
|
-
end
|
1234
|
-
def LeftSiblingFeature.feature_names()
|
1235
|
-
return ["leftsib_pt", "leftsib_lemma"]
|
1236
|
-
end
|
1237
|
-
def LeftSiblingFeature.sql_type()
|
1238
|
-
return "VARCHAR(20)"
|
1239
|
-
end
|
1240
|
-
def LeftSiblingFeature.feature_type()
|
1241
|
-
return "sem"
|
1242
|
-
end
|
1243
|
-
|
1244
|
-
#####
|
1245
|
-
private
|
1246
|
-
|
1247
|
-
def compute_features_instanceOK()
|
1248
|
-
# leftsib, rightsib (node)
|
1249
|
-
# siblings with max lastword/firstword among those with lastword/firstword index
|
1250
|
-
# smaller/greater than firstword/lastword index of self
|
1251
|
-
if @@node.parent.nil?
|
1252
|
-
return [nil, nil]
|
1253
|
-
end
|
1254
|
-
|
1255
|
-
node_ix = terminal_index(@@node_leftmost_terminal)
|
1256
|
-
unless node_ix
|
1257
|
-
return [nil, nil]
|
1258
|
-
end
|
1259
|
-
|
1260
|
-
leftsib_ix = nil
|
1261
|
-
leftsib = nil
|
1262
|
-
@@node.parent.children.each { |sibling|
|
1263
|
-
sib_ix = terminal_index(@@interpreter_class.rightmost_terminal(sibling))
|
1264
|
-
unless sib_ix
|
1265
|
-
next
|
1266
|
-
end
|
1267
|
-
|
1268
|
-
if sib_ix < node_ix and
|
1269
|
-
(leftsib.nil? or leftsib_ix < sib_ix)
|
1270
|
-
|
1271
|
-
leftsib = sibling
|
1272
|
-
leftsib_ix = sib_ix
|
1273
|
-
end
|
1274
|
-
}
|
1275
|
-
|
1276
|
-
if leftsib
|
1277
|
-
return [
|
1278
|
-
@@interpreter_class.simplified_pt(leftsib),
|
1279
|
-
@@interpreter_class.lemma_backoff(leftsib),
|
1280
|
-
]
|
1281
|
-
else
|
1282
|
-
return [nil, nil]
|
1283
|
-
end
|
1284
|
-
end
|
1285
|
-
|
1286
|
-
###
|
1287
|
-
# returns: index(integer) of node in list of terminals of this sentence;
|
1288
|
-
# nil if node is nil or does not occur in the list
|
1289
|
-
def terminal_index(node) # SynNode, terminal
|
1290
|
-
unless node
|
1291
|
-
return nil
|
1292
|
-
end
|
1293
|
-
|
1294
|
-
return @@terminals_ordered[node] # word index (or nil)
|
1295
|
-
end
|
1296
|
-
end
|
1297
|
-
|
1298
|
-
################
|
1299
|
-
# distance between head word of constituent and target (in words)
|
1300
|
-
class WordDistanceFeature < RosySingleFeatureExtractor
|
1301
|
-
WordDistanceFeature.announce_me()
|
1302
|
-
|
1303
|
-
def WordDistanceFeature.feature_name()
|
1304
|
-
return "worddistance"
|
1305
|
-
end
|
1306
|
-
def WordDistanceFeature.sql_type()
|
1307
|
-
return "TINYINT"
|
1308
|
-
end
|
1309
|
-
def WordDistanceFeature.feature_type()
|
1310
|
-
return "syn"
|
1311
|
-
end
|
1312
|
-
|
1313
|
-
#####
|
1314
|
-
private
|
1315
|
-
|
1316
|
-
def compute_feature_instanceOK()
|
1317
|
-
|
1318
|
-
head_term = @@interpreter_class.head_terminal(@@node)
|
1319
|
-
targ_term = @@interpreter_class.head_terminal(@@target)
|
1320
|
-
if head_term.nil? or targ_term.nil?
|
1321
|
-
return nil
|
1322
|
-
end
|
1323
|
-
h_id = @@terminals_ordered[head_term]
|
1324
|
-
t_id = @@terminals_ordered[targ_term]
|
1325
|
-
if h_id.nil? or t_id.nil?
|
1326
|
-
return nil
|
1327
|
-
else
|
1328
|
-
return (h_id-t_id).abs
|
1329
|
-
end
|
1330
|
-
end
|
1331
|
-
end
|
1332
|
-
|
1333
|
-
################
|
1334
|
-
# is the current node a maximal projection?
|
1335
|
-
# heuristic: is my category the same as my parent's?
|
1336
|
-
class IsMaxProj < RosySingleFeatureExtractor
|
1337
|
-
IsMaxProj.announce_me()
|
1338
|
-
|
1339
|
-
def IsMaxProj.feature_name()
|
1340
|
-
return "ismaxproj"
|
1341
|
-
end
|
1342
|
-
def IsMaxProj.sql_type()
|
1343
|
-
return "TINYINT"
|
1344
|
-
end
|
1345
|
-
def IsMaxProj.feature_type()
|
1346
|
-
return "syn"
|
1347
|
-
end
|
1348
|
-
|
1349
|
-
#####
|
1350
|
-
private
|
1351
|
-
|
1352
|
-
def compute_feature_instanceOK()
|
1353
|
-
unless @@node.parent()
|
1354
|
-
return 1
|
1355
|
-
end
|
1356
|
-
my_cat = @@interpreter_class.category(@@node)
|
1357
|
-
parent_cat = @@interpreter_class.category(@@node.parent)
|
1358
|
-
if my_cat == parent_cat
|
1359
|
-
return 0
|
1360
|
-
else
|
1361
|
-
return 1
|
1362
|
-
end
|
1363
|
-
end
|
1364
|
-
end
|
1365
|
-
|
1366
|
-
################
|
1367
|
-
# right sibling of the current node
|
1368
|
-
class RightSiblingFeature < RosyFeatureExtractor
|
1369
|
-
RightSiblingFeature.announce_me()
|
1370
|
-
|
1371
|
-
def RightSiblingFeature.designator()
|
1372
|
-
return "rightsib"
|
1373
|
-
end
|
1374
|
-
def RightSiblingFeature.feature_names()
|
1375
|
-
return ["rightsib_pt", "rightsib_lemma"]
|
1376
|
-
end
|
1377
|
-
def RightSiblingFeature.sql_type()
|
1378
|
-
return "VARCHAR(20)"
|
1379
|
-
end
|
1380
|
-
def RightSiblingFeature.feature_type()
|
1381
|
-
return "sem"
|
1382
|
-
end
|
1383
|
-
|
1384
|
-
#####
|
1385
|
-
private
|
1386
|
-
|
1387
|
-
def compute_features_instanceOK()
|
1388
|
-
# leftsib, rightsib (node)
|
1389
|
-
# siblings with max lastword/firstword among those with lastword/firstword index
|
1390
|
-
# smaller/greater than firstword/lastword index of self
|
1391
|
-
if @@node.parent.nil?
|
1392
|
-
return [nil, nil]
|
1393
|
-
end
|
1394
|
-
|
1395
|
-
node_ix = terminal_index(@@node_rightmost_terminal)
|
1396
|
-
unless node_ix
|
1397
|
-
return [nil, nil]
|
1398
|
-
end
|
1399
|
-
|
1400
|
-
rightsib_ix = nil
|
1401
|
-
rightsib = nil
|
1402
|
-
@@node.parent.children.each { |sibling|
|
1403
|
-
sib_ix = terminal_index(@@interpreter_class.leftmost_terminal(sibling))
|
1404
|
-
unless sib_ix
|
1405
|
-
next
|
1406
|
-
end
|
1407
|
-
|
1408
|
-
if sib_ix > node_ix and
|
1409
|
-
(rightsib.nil? or sib_ix < rightsib_ix)
|
1410
|
-
|
1411
|
-
rightsib = sibling
|
1412
|
-
rightsib_ix = sib_ix
|
1413
|
-
end
|
1414
|
-
}
|
1415
|
-
|
1416
|
-
if rightsib
|
1417
|
-
return [
|
1418
|
-
@@interpreter_class.simplified_pt(rightsib),
|
1419
|
-
@@interpreter_class.lemma_backoff(rightsib),
|
1420
|
-
]
|
1421
|
-
else
|
1422
|
-
return [nil, nil]
|
1423
|
-
end
|
1424
|
-
end
|
1425
|
-
|
1426
|
-
###
|
1427
|
-
# returns: index(integer) of node in list of terminals of this sentence;
|
1428
|
-
# nil if node is nil or does not occur in the list
|
1429
|
-
def terminal_index(node) # SynNode, terminal
|
1430
|
-
unless node
|
1431
|
-
return nil
|
1432
|
-
end
|
1433
|
-
|
1434
|
-
return @@terminals_ordered[node] # word index (or nil)
|
1435
|
-
end
|
1436
|
-
end
|
1437
|
-
|
1438
|
-
|
1439
|
-
# ################
|
1440
|
-
# # admin feature: word span of this constituent
|
1441
|
-
# class WordSpanFeature < RosySingleFeatureExtractor
|
1442
|
-
# WordSpanFeature.announce_me()
|
1443
|
-
|
1444
|
-
# def WordSpanFeature.feature_name()
|
1445
|
-
# return "wordspan"
|
1446
|
-
# end
|
1447
|
-
# def WordSpanFeature.sql_type()
|
1448
|
-
# return "VARCHAR(30)"
|
1449
|
-
# end
|
1450
|
-
# def WordSpanFeature.feature_type()
|
1451
|
-
# return "admin"
|
1452
|
-
# end
|
1453
|
-
|
1454
|
-
# #####
|
1455
|
-
# private
|
1456
|
-
|
1457
|
-
# def compute_feature_instanceOK()
|
1458
|
-
|
1459
|
-
# fwh = RosyFeatureExtractor.headlemma(@@node_leftmost_terminal)
|
1460
|
-
# lwh = RosyFeatureExtractor.headlemma(@@node_rightmost_terminal)
|
1461
|
-
|
1462
|
-
# if fwh.nil?
|
1463
|
-
# fwh = ""
|
1464
|
-
# end
|
1465
|
-
# if lwh.nil?
|
1466
|
-
# lwh = ""
|
1467
|
-
# end
|
1468
|
-
|
1469
|
-
# return fwh+ "-" +lwh
|
1470
|
-
# end
|
1471
|
-
# end
|
1472
|
-
|
1473
|
-
|
1474
|
-
################
|
1475
|
-
# admin feature: my node ID and my father's, separated by a space
|
1476
|
-
# the highest node (topnode) has ID 0, and no father ID.
|
1477
|
-
class NodeIDFeature < RosySingleFeatureExtractor
|
1478
|
-
NodeIDFeature.announce_me()
|
1479
|
-
|
1480
|
-
def NodeIDFeature.feature_name()
|
1481
|
-
return "nodeID"
|
1482
|
-
end
|
1483
|
-
def NodeIDFeature.sql_type()
|
1484
|
-
return "VARCHAR(100)"
|
1485
|
-
end
|
1486
|
-
def NodeIDFeature.feature_type()
|
1487
|
-
return "admin"
|
1488
|
-
end
|
1489
|
-
|
1490
|
-
#####
|
1491
|
-
private
|
1492
|
-
|
1493
|
-
def compute_feature_instanceOK()
|
1494
|
-
|
1495
|
-
if @@node.parent
|
1496
|
-
return @@node.id.to_s+ " " + @@node.parent.id.to_s
|
1497
|
-
else
|
1498
|
-
return @@node.id.to_s
|
1499
|
-
end
|
1500
|
-
end
|
1501
|
-
end
|
1502
|
-
|
1503
|
-
################
|
1504
|
-
# admin feature: sentence ID
|
1505
|
-
class SentidFeature < RosySingleFeatureExtractor
|
1506
|
-
SentidFeature.announce_me()
|
1507
|
-
|
1508
|
-
def SentidFeature.feature_name()
|
1509
|
-
return "sentid"
|
1510
|
-
end
|
1511
|
-
def SentidFeature.sql_type()
|
1512
|
-
return "VARCHAR(100)"
|
1513
|
-
end
|
1514
|
-
def SentidFeature.feature_type()
|
1515
|
-
return "admin"
|
1516
|
-
end
|
1517
|
-
def SentidFeature.info()
|
1518
|
-
# additional info: I am an index feature
|
1519
|
-
return super().concat(["index"])
|
1520
|
-
end
|
1521
|
-
|
1522
|
-
#####
|
1523
|
-
private
|
1524
|
-
|
1525
|
-
def compute_feature_instanceOK()
|
1526
|
-
return construct_instance_id(@@sent.id(), @@frame.id())
|
1527
|
-
end
|
1528
|
-
end
|
1529
|
-
|
1530
|
-
# ################
|
1531
|
-
# # admin feature: tokens spanned by this constituent
|
1532
|
-
# class TokensFeature < RosySingleFeatureExtractor
|
1533
|
-
# TokensFeature.announce_me()
|
1534
|
-
|
1535
|
-
# def TokensFeature.feature_name()
|
1536
|
-
# return "tokens"
|
1537
|
-
# end
|
1538
|
-
# def TokensFeature.sql_type()
|
1539
|
-
# return "VARCHAR(100)"
|
1540
|
-
# end
|
1541
|
-
# def TokensFeature.feature_type()
|
1542
|
-
# return "admin"
|
1543
|
-
# end
|
1544
|
-
|
1545
|
-
# #####
|
1546
|
-
# private
|
1547
|
-
|
1548
|
-
# def compute_feature_instanceOK()
|
1549
|
-
# return @@node.to_s
|
1550
|
-
# end
|
1551
|
-
# end
|
1552
|
-
|
1553
|
-
################
|
1554
|
-
# admin feature: frame assigned by FN
|
1555
|
-
class FrameFeature < RosySingleFeatureExtractor
|
1556
|
-
FrameFeature.announce_me()
|
1557
|
-
|
1558
|
-
def FrameFeature.feature_name()
|
1559
|
-
return "frame"
|
1560
|
-
end
|
1561
|
-
def FrameFeature.sql_type()
|
1562
|
-
return "VARCHAR(35)"
|
1563
|
-
end
|
1564
|
-
def FrameFeature.feature_type()
|
1565
|
-
return "ubiq"
|
1566
|
-
end
|
1567
|
-
def FrameFeature.info()
|
1568
|
-
# additional info: I am an index feature
|
1569
|
-
return super().concat(["index"])
|
1570
|
-
end
|
1571
|
-
|
1572
|
-
#####
|
1573
|
-
private
|
1574
|
-
|
1575
|
-
def compute_feature_instanceOK()
|
1576
|
-
if @@frame
|
1577
|
-
return @@frame.name()
|
1578
|
-
else
|
1579
|
-
return nil
|
1580
|
-
end
|
1581
|
-
end
|
1582
|
-
end
|
1583
|
-
|
1584
|
-
################
|
1585
|
-
# admin feature: is this node a terminal?
|
1586
|
-
class TerminalFeature < RosySingleFeatureExtractor
|
1587
|
-
TerminalFeature.announce_me()
|
1588
|
-
|
1589
|
-
def TerminalFeature.feature_name()
|
1590
|
-
return "term"
|
1591
|
-
end
|
1592
|
-
def TerminalFeature.sql_type()
|
1593
|
-
return "TINYINT"
|
1594
|
-
end
|
1595
|
-
def TerminalFeature.feature_type()
|
1596
|
-
return "admin"
|
1597
|
-
end
|
1598
|
-
|
1599
|
-
#####
|
1600
|
-
private
|
1601
|
-
|
1602
|
-
def compute_feature_instanceOK()
|
1603
|
-
if @@node.is_terminal?
|
1604
|
-
return 1
|
1605
|
-
else
|
1606
|
-
return 0
|
1607
|
-
end
|
1608
|
-
end
|
1609
|
-
end
|