frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
- data/CHANGELOG.rdoc +0 -0
- data/LICENSE.rdoc +0 -0
- data/README.rdoc +0 -0
- data/lib/common/AbstractSynInterface.rb +1227 -0
- data/lib/common/BerkeleyInterface.rb +375 -0
- data/lib/common/CollinsInterface.rb +1165 -0
- data/lib/common/ConfigData.rb +694 -0
- data/lib/common/Counter.rb +18 -0
- data/lib/common/DBInterface.rb +48 -0
- data/lib/common/EnduserMode.rb +27 -0
- data/lib/common/Eval.rb +480 -0
- data/lib/common/FixSynSemMapping.rb +196 -0
- data/lib/common/FrPrepConfigData.rb +66 -0
- data/lib/common/FrprepHelper.rb +1324 -0
- data/lib/common/Graph.rb +345 -0
- data/lib/common/ISO-8859-1.rb +24 -0
- data/lib/common/ML.rb +186 -0
- data/lib/common/Maxent.rb +215 -0
- data/lib/common/MiniparInterface.rb +1388 -0
- data/lib/common/Optimise.rb +195 -0
- data/lib/common/Parser.rb +213 -0
- data/lib/common/RegXML.rb +269 -0
- data/lib/common/RosyConventions.rb +171 -0
- data/lib/common/SQLQuery.rb +243 -0
- data/lib/common/STXmlTerminalOrder.rb +194 -0
- data/lib/common/SalsaTigerRegXML.rb +2347 -0
- data/lib/common/SalsaTigerXMLHelper.rb +99 -0
- data/lib/common/SleepyInterface.rb +384 -0
- data/lib/common/SynInterfaces.rb +275 -0
- data/lib/common/TabFormat.rb +720 -0
- data/lib/common/Tiger.rb +1448 -0
- data/lib/common/TntInterface.rb +44 -0
- data/lib/common/Tree.rb +61 -0
- data/lib/common/TreetaggerInterface.rb +303 -0
- data/lib/common/headz.rb +338 -0
- data/lib/common/option_parser.rb +13 -0
- data/lib/common/ruby_class_extensions.rb +310 -0
- data/lib/fred/Baseline.rb +150 -0
- data/lib/fred/FileZipped.rb +31 -0
- data/lib/fred/FredBOWContext.rb +863 -0
- data/lib/fred/FredConfigData.rb +182 -0
- data/lib/fred/FredConventions.rb +232 -0
- data/lib/fred/FredDetermineTargets.rb +324 -0
- data/lib/fred/FredEval.rb +312 -0
- data/lib/fred/FredFeatureExtractors.rb +321 -0
- data/lib/fred/FredFeatures.rb +1061 -0
- data/lib/fred/FredFeaturize.rb +596 -0
- data/lib/fred/FredNumTrainingSenses.rb +27 -0
- data/lib/fred/FredParameters.rb +402 -0
- data/lib/fred/FredSplit.rb +84 -0
- data/lib/fred/FredSplitPkg.rb +180 -0
- data/lib/fred/FredTest.rb +607 -0
- data/lib/fred/FredTrain.rb +144 -0
- data/lib/fred/PlotAndREval.rb +480 -0
- data/lib/fred/fred.rb +45 -0
- data/lib/fred/md5.rb +23 -0
- data/lib/fred/opt_parser.rb +250 -0
- data/lib/frprep/AbstractSynInterface.rb +1227 -0
- data/lib/frprep/Ampersand.rb +37 -0
- data/lib/frprep/BerkeleyInterface.rb +375 -0
- data/lib/frprep/CollinsInterface.rb +1165 -0
- data/lib/frprep/ConfigData.rb +694 -0
- data/lib/frprep/Counter.rb +18 -0
- data/lib/frprep/FNCorpusXML.rb +643 -0
- data/lib/frprep/FNDatabase.rb +144 -0
- data/lib/frprep/FixSynSemMapping.rb +196 -0
- data/lib/frprep/FrPrepConfigData.rb +66 -0
- data/lib/frprep/FrameXML.rb +513 -0
- data/lib/frprep/FrprepHelper.rb +1324 -0
- data/lib/frprep/Graph.rb +345 -0
- data/lib/frprep/ISO-8859-1.rb +24 -0
- data/lib/frprep/MiniparInterface.rb +1388 -0
- data/lib/frprep/Parser.rb +213 -0
- data/lib/frprep/RegXML.rb +269 -0
- data/lib/frprep/STXmlTerminalOrder.rb +194 -0
- data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
- data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
- data/lib/frprep/SleepyInterface.rb +384 -0
- data/lib/frprep/SynInterfaces.rb +275 -0
- data/lib/frprep/TabFormat.rb +720 -0
- data/lib/frprep/Tiger.rb +1448 -0
- data/lib/frprep/TntInterface.rb +44 -0
- data/lib/frprep/Tree.rb +61 -0
- data/lib/frprep/TreetaggerInterface.rb +303 -0
- data/lib/frprep/do_parses.rb +142 -0
- data/lib/frprep/frprep.rb +686 -0
- data/lib/frprep/headz.rb +338 -0
- data/lib/frprep/one_parsed_file.rb +28 -0
- data/lib/frprep/opt_parser.rb +94 -0
- data/lib/frprep/ruby_class_extensions.rb +310 -0
- data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
- data/lib/rosy/DBMySQL.rb +146 -0
- data/lib/rosy/DBSQLite.rb +280 -0
- data/lib/rosy/DBTable.rb +239 -0
- data/lib/rosy/DBWrapper.rb +176 -0
- data/lib/rosy/ExternalConfigData.rb +58 -0
- data/lib/rosy/FailedParses.rb +130 -0
- data/lib/rosy/FeatureInfo.rb +242 -0
- data/lib/rosy/GfInduce.rb +1115 -0
- data/lib/rosy/GfInduceFeature.rb +148 -0
- data/lib/rosy/InputData.rb +294 -0
- data/lib/rosy/RosyConfigData.rb +115 -0
- data/lib/rosy/RosyConfusability.rb +338 -0
- data/lib/rosy/RosyEval.rb +465 -0
- data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
- data/lib/rosy/RosyFeaturize.rb +280 -0
- data/lib/rosy/RosyInspect.rb +336 -0
- data/lib/rosy/RosyIterator.rb +477 -0
- data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
- data/lib/rosy/RosyPruning.rb +165 -0
- data/lib/rosy/RosyServices.rb +744 -0
- data/lib/rosy/RosySplit.rb +232 -0
- data/lib/rosy/RosyTask.rb +19 -0
- data/lib/rosy/RosyTest.rb +826 -0
- data/lib/rosy/RosyTrain.rb +232 -0
- data/lib/rosy/RosyTrainingTestTable.rb +786 -0
- data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
- data/lib/rosy/View.rb +418 -0
- data/lib/rosy/opt_parser.rb +379 -0
- data/lib/rosy/rosy.rb +77 -0
- data/lib/shalmaneser/version.rb +3 -0
- data/test/frprep/test_opt_parser.rb +94 -0
- data/test/functional/functional_test_helper.rb +40 -0
- data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
- data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
- data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
- data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
- data/test/functional/test_fred.rb +47 -0
- data/test/functional/test_frprep.rb +52 -0
- data/test/functional/test_rosy.rb +20 -0
- metadata +270 -0
@@ -0,0 +1,1609 @@
|
|
1
|
+
####
|
2
|
+
# ke & sp
|
3
|
+
# adapted to new feature extractor class,
|
4
|
+
# Collins and Tiger features combined:
|
5
|
+
# KE November 2005
|
6
|
+
#
|
7
|
+
# Feature Extractors for Rosy
|
8
|
+
#
|
9
|
+
# Contract: each feature extractor inherits from the RosyFeatureExtractor class
|
10
|
+
#
|
11
|
+
# Feature extractors return nil if no feature value could be
|
12
|
+
# returned
|
13
|
+
|
14
|
+
|
15
|
+
# Salsa packages
|
16
|
+
require 'rosy/AbstractFeatureAndExternal'
|
17
|
+
require 'common/SalsaTigerRegXML'
|
18
|
+
|
19
|
+
# Fred and Rosy packages
|
20
|
+
require 'common/RosyConventions'
|
21
|
+
|
22
|
+
|
23
|
+
################################
|
24
|
+
# base class for all following feature extractors
|
25
|
+
class RosyFeatureExtractor < AbstractFeatureExtractor
|
26
|
+
@@instance_ok = nil # Boolean: set_node(), set_sent() successful?
|
27
|
+
@@split_nones = nil # Boolean: split NONE value for gold feature?
|
28
|
+
|
29
|
+
@@target = nil # SynNode: main target node
|
30
|
+
@@target_pos = nil # string: part of speech of main target
|
31
|
+
@@target_voice = nil # string: "active", "passive", or nil
|
32
|
+
@@terminals_ordered = nil # Hash: sentence terminals, mapped onto their word indices (starting with 1)
|
33
|
+
@@target_gfs = nil # Array of pairs [rel, node]: grammatical functions of the target
|
34
|
+
|
35
|
+
@@paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
|
36
|
+
@@relpos = nil # string: position of instance relative to target
|
37
|
+
@@node_leftmost_terminal = nil # SynNode objects: first and last terminal
|
38
|
+
@@node_rightmost_terminal = nil # in the yield of @@node
|
39
|
+
|
40
|
+
@@governing_verb = nil # SynNode object: closest governing verb of @@target
|
41
|
+
@@gv_paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
|
42
|
+
|
43
|
+
###
|
44
|
+
# returns a string: "phase 1" or "phase 2",
|
45
|
+
# depending on whether the feature is computed
|
46
|
+
# directly from the SalsaTigerSentence and the SynNode objects
|
47
|
+
# or whether it is computed from the phase 1 features
|
48
|
+
# computed for the training set
|
49
|
+
#
|
50
|
+
# Here: all features in this packages are phase 1
|
51
|
+
def RosyFeatureExtractor.phase()
|
52
|
+
return "phase 1"
|
53
|
+
end
|
54
|
+
|
55
|
+
###
|
56
|
+
# returns an array of strings, providing information about
|
57
|
+
# the feature extractor
|
58
|
+
def RosyFeatureExtractor.info()
|
59
|
+
return super().concat(["rosy"])
|
60
|
+
end
|
61
|
+
|
62
|
+
###
|
63
|
+
# set sentence, set node, set general settings: this is done prior to
|
64
|
+
# feature computation using compute_feature_value()
|
65
|
+
# such that computations that stay the same for
|
66
|
+
# several features can be done in advance
|
67
|
+
def RosyFeatureExtractor.set(var_hash) # hash. possible entries: split_nones=> true/false
|
68
|
+
|
69
|
+
@@split_nones = var_hash["split_nones"]
|
70
|
+
|
71
|
+
return true
|
72
|
+
end
|
73
|
+
|
74
|
+
###
|
75
|
+
def RosyFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
|
76
|
+
frame) # FrameNode object
|
77
|
+
super(sent, frame)
|
78
|
+
|
79
|
+
root = @@sent.syn_roots.first()
|
80
|
+
word_index_counter = 1
|
81
|
+
@@terminals_ordered = Hash.new
|
82
|
+
root.yield_nodes_ordered.each {|yield_node|
|
83
|
+
@@terminals_ordered[yield_node] = word_index_counter
|
84
|
+
word_index_counter += 1
|
85
|
+
}
|
86
|
+
|
87
|
+
# @@target: main target node (SynNode)
|
88
|
+
# WARNING: at this moment, we are
|
89
|
+
# not considering true multiword targets.
|
90
|
+
# Remove the "no_mwe" parameter in determine_main_target
|
91
|
+
# to change this
|
92
|
+
unless frame.target
|
93
|
+
@@target = nil
|
94
|
+
return false
|
95
|
+
end
|
96
|
+
@@target = @@interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
|
97
|
+
|
98
|
+
unless @@target
|
99
|
+
return false
|
100
|
+
end
|
101
|
+
|
102
|
+
# @@target_pos: string, target POS
|
103
|
+
@@target_pos = @@interpreter_class.category(@@target)
|
104
|
+
|
105
|
+
# @@target_voice:
|
106
|
+
# for verb targets, string, active or passive
|
107
|
+
# else nil
|
108
|
+
@@target_voice = @@interpreter_class.voice(@@target)
|
109
|
+
@@target_gfs = @@interpreter_class.gfs(@@target, @@sent)
|
110
|
+
|
111
|
+
# paths from target to all other nodes in the graph
|
112
|
+
@@paths = RosyFeatureExtractor.all_paths_from(@@target)
|
113
|
+
|
114
|
+
# governing verb of target.
|
115
|
+
# If something goes wrong, this will remain unset
|
116
|
+
@@gv_paths = Hash.new
|
117
|
+
if (targetlemma = RosyFeatureExtractor.headlemma(@@target))
|
118
|
+
# determine governing verb
|
119
|
+
parent = @@target
|
120
|
+
while (parent = parent.parent)
|
121
|
+
parentlemma = RosyFeatureExtractor.headlemma(parent)
|
122
|
+
|
123
|
+
if @@interpreter_class.category(parent) == "verb" and
|
124
|
+
parentlemma != targetlemma
|
125
|
+
# success: found the governing verb of the target
|
126
|
+
|
127
|
+
@@governing_verb = @@interpreter_class.head_terminal(parent)
|
128
|
+
# paths from governing verb of target to all other nodes in the graph
|
129
|
+
if @@governing_verb
|
130
|
+
@@gv_paths = RosyFeatureExtractor.all_paths_from(@@governing_verb)
|
131
|
+
end
|
132
|
+
|
133
|
+
break
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
# paths: when printing, leave off the phrase type of the end node
|
140
|
+
@@paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
|
141
|
+
@@gv_paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
|
142
|
+
|
143
|
+
return true
|
144
|
+
end
|
145
|
+
|
146
|
+
###
|
147
|
+
# node: SynNode of the sentence set in set_sentence
|
148
|
+
def RosyFeatureExtractor.set_node(node)
|
149
|
+
super(node)
|
150
|
+
|
151
|
+
@@instance_ok = true
|
152
|
+
|
153
|
+
unless @@target
|
154
|
+
# no target, nothing I can compute here
|
155
|
+
@@instance_ok = false
|
156
|
+
return false
|
157
|
+
end
|
158
|
+
|
159
|
+
# # path between target and current instance node
|
160
|
+
# @@path = @@interpreter_class.path_between(@@target, @@node)
|
161
|
+
# @@path.set_cutoff_last_pt_on_printing(true) # when printing path, cut off last node label
|
162
|
+
|
163
|
+
|
164
|
+
# position of instance node relative to main target node
|
165
|
+
@@relpos = @@interpreter_class.relative_position(@@node, @@target)
|
166
|
+
# leftmost, rightmost terminal in the yield of @@node
|
167
|
+
@@node_leftmost_terminal = @@interpreter_class.leftmost_terminal(@@node)
|
168
|
+
@@node_rightmost_terminal = @@interpreter_class.rightmost_terminal(@@node)
|
169
|
+
|
170
|
+
return true
|
171
|
+
end
|
172
|
+
|
173
|
+
###
|
174
|
+
# compute_feature_value: first check if instance is OK
|
175
|
+
#
|
176
|
+
# returns: list of features
|
177
|
+
def compute_features()
|
178
|
+
unless @@instance_ok
|
179
|
+
return nil
|
180
|
+
end
|
181
|
+
|
182
|
+
return make_features_safe_for_sql(compute_features_instanceOK())
|
183
|
+
end
|
184
|
+
|
185
|
+
############
|
186
|
+
protected
|
187
|
+
|
188
|
+
|
189
|
+
# returns: list of features
|
190
|
+
def compute_features_instanceOK()
|
191
|
+
raise "Overwrite me"
|
192
|
+
end
|
193
|
+
|
194
|
+
###
|
195
|
+
# in computed features:
|
196
|
+
# replace "," by COMMA in order not to confuse SQL
|
197
|
+
def make_features_safe_for_sql(feature_list)
|
198
|
+
return feature_list.map { |feature|
|
199
|
+
if feature.kind_of? String
|
200
|
+
feature.gsub(/,/, "COMMA").gsub(/\\/, "BACK")
|
201
|
+
else
|
202
|
+
feature
|
203
|
+
end
|
204
|
+
}
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
###
|
209
|
+
# lemma of the head terminal of SynNode n
|
210
|
+
def RosyFeatureExtractor.headlemma(n) # SynNode
|
211
|
+
unless n
|
212
|
+
return nil
|
213
|
+
end
|
214
|
+
|
215
|
+
h = @@interpreter_class.head_terminal(n)
|
216
|
+
if h
|
217
|
+
return @@interpreter_class.lemma_backoff(h)
|
218
|
+
else
|
219
|
+
return nil
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
###
|
224
|
+
# part of speech of the head terminal of SynNode n
|
225
|
+
def RosyFeatureExtractor.headpos(n) # SynNode
|
226
|
+
unless n
|
227
|
+
return nil
|
228
|
+
end
|
229
|
+
|
230
|
+
h = @@interpreter_class.head_terminal(n)
|
231
|
+
if h
|
232
|
+
return h.part_of_speech()
|
233
|
+
else
|
234
|
+
return nil
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
###
|
239
|
+
# Given a SynNode n, recursively determine
|
240
|
+
# the paths from n to all other reachable nodes,
|
241
|
+
# skipping nodes that already have a path
|
242
|
+
# listed in the given hash mapping node IDs to paths.
|
243
|
+
# Paths are given as Path objects (see AbstractSynInterface).
|
244
|
+
# It is assumed that the graph of n is a tree, which
|
245
|
+
# is searched depth-first, first the children, then the parent of n.
|
246
|
+
def RosyFeatureExtractor.all_paths_from(n, # SynNode
|
247
|
+
hash = nil) # Hash: nodeID(string) => Path object
|
248
|
+
# initial step of all: no hash existing yet
|
249
|
+
if hash.nil?
|
250
|
+
hash = Hash.new
|
251
|
+
hash[n.id()] = Path.new(n)
|
252
|
+
end
|
253
|
+
|
254
|
+
# invariant at this point: n must be listed in hash
|
255
|
+
unless hash[n.id()]
|
256
|
+
raise "Shouldn't be here"
|
257
|
+
end
|
258
|
+
|
259
|
+
# for each child c of n: compute its path from the path of n,
|
260
|
+
# and explore paths below c
|
261
|
+
n.each_child_with_edgelabel { |label, c|
|
262
|
+
if hash[c.id()].nil?
|
263
|
+
hash[c.id()] = hash[n.id()].deep_clone().add_last_step("D",
|
264
|
+
label,
|
265
|
+
@@interpreter_class.simplified_pt(c),
|
266
|
+
c)
|
267
|
+
RosyFeatureExtractor.all_paths_from(c, hash)
|
268
|
+
end
|
269
|
+
}
|
270
|
+
|
271
|
+
# compute the path from n's parent p from the path of n,
|
272
|
+
# and explore paths beyond p
|
273
|
+
if (p = n.parent) and hash[p.id()].nil?
|
274
|
+
# node has a parent, and it is not listed in the path hash
|
275
|
+
# make a new path for parent: n's path, plus one up-step
|
276
|
+
hash[p.id()] = hash[n.id()].deep_clone().add_last_step("U",
|
277
|
+
n.parent_label,
|
278
|
+
@@interpreter_class.simplified_pt(p),
|
279
|
+
p)
|
280
|
+
RosyFeatureExtractor.all_paths_from(p, hash)
|
281
|
+
end
|
282
|
+
|
283
|
+
return hash
|
284
|
+
|
285
|
+
end
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
###############################
|
290
|
+
# Rosy single feature extractor, duplicating stuff from
|
291
|
+
# AbstractSingleFeatureExtractor
|
292
|
+
class RosySingleFeatureExtractor < RosyFeatureExtractor
|
293
|
+
|
294
|
+
###
|
295
|
+
# returns a string: the designator for this feature extractor
|
296
|
+
# (an extractor may compute several features, but
|
297
|
+
# in the experiment file it is chosen by a single designator)
|
298
|
+
#
|
299
|
+
# here: single feature, and the feature name is the designator
|
300
|
+
def RosySingleFeatureExtractor.designator()
|
301
|
+
return eval(self.name()).feature_name()
|
302
|
+
end
|
303
|
+
|
304
|
+
###
|
305
|
+
def RosySingleFeatureExtractor.feature_names()
|
306
|
+
return [eval(self.name()).feature_name()]
|
307
|
+
end
|
308
|
+
|
309
|
+
###
|
310
|
+
# compute_feature_value: first check if instance is OK
|
311
|
+
#
|
312
|
+
# returns: list of features
|
313
|
+
def compute_features()
|
314
|
+
unless @@instance_ok
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
|
318
|
+
return make_features_safe_for_sql([compute_feature_instanceOK()])
|
319
|
+
end
|
320
|
+
|
321
|
+
############
|
322
|
+
private
|
323
|
+
|
324
|
+
def compute_feature_instanceOK()
|
325
|
+
raise "Overwrite me"
|
326
|
+
end
|
327
|
+
|
328
|
+
end
|
329
|
+
|
330
|
+
##############################################
|
331
|
+
# Individual feature extractors
|
332
|
+
##############################################
|
333
|
+
|
334
|
+
####################
|
335
|
+
# gold role label
|
336
|
+
class GoldlabelFeature < RosySingleFeatureExtractor
|
337
|
+
GoldlabelFeature.announce_me()
|
338
|
+
|
339
|
+
def GoldlabelFeature.feature_name()
|
340
|
+
return "gold"
|
341
|
+
end
|
342
|
+
def GoldlabelFeature.sql_type()
|
343
|
+
return "VARCHAR(30)"
|
344
|
+
end
|
345
|
+
def GoldlabelFeature.feature_type()
|
346
|
+
return "gold"
|
347
|
+
end
|
348
|
+
def GoldlabelFeature.info()
|
349
|
+
# additional info: I am an index feature
|
350
|
+
return super().concat(["index"])
|
351
|
+
end
|
352
|
+
|
353
|
+
################
|
354
|
+
private
|
355
|
+
|
356
|
+
def compute_feature_instanceOK()
|
357
|
+
@@frame.each_fe_by_name {|fe|
|
358
|
+
if fe.children.include? @@node
|
359
|
+
return fe.name
|
360
|
+
end
|
361
|
+
}
|
362
|
+
|
363
|
+
# no role label for this node
|
364
|
+
# if @@split_nones
|
365
|
+
# split "no role" label into:
|
366
|
+
# before/after/dominating the target node
|
367
|
+
# return @@relpos
|
368
|
+
# else
|
369
|
+
return nil
|
370
|
+
# end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
####################
|
375
|
+
# path features
|
376
|
+
class AbstractPathFeature < RosySingleFeatureExtractor
|
377
|
+
def AbstractPathFeature.sql_type()
|
378
|
+
return "VARCHAR(80)"
|
379
|
+
end
|
380
|
+
def AbstractPathFeature.feature_type()
|
381
|
+
return "syn"
|
382
|
+
end
|
383
|
+
|
384
|
+
################
|
385
|
+
private
|
386
|
+
|
387
|
+
def compute_feature_instanceOK()
|
388
|
+
if @@paths[@@node.id()].nil?
|
389
|
+
path = nil
|
390
|
+
else
|
391
|
+
path = my_path_computation()
|
392
|
+
end
|
393
|
+
|
394
|
+
if path.nil? or path.empty?
|
395
|
+
return nil
|
396
|
+
else
|
397
|
+
return path
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def my_path_computation()
|
402
|
+
raise "overwrite me"
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
|
407
|
+
####################
|
408
|
+
# path consisting of nodelabels, dependencies and directions
|
409
|
+
class PathFeature < AbstractPathFeature
|
410
|
+
PathFeature.announce_me()
|
411
|
+
|
412
|
+
def PathFeature.sql_type()
|
413
|
+
return "VARCHAR(120)"
|
414
|
+
end
|
415
|
+
def PathFeature.feature_name()
|
416
|
+
return "path"
|
417
|
+
end
|
418
|
+
|
419
|
+
################
|
420
|
+
private
|
421
|
+
|
422
|
+
def my_path_computation()
|
423
|
+
if @@paths[@@node.id()].nil?
|
424
|
+
return nil
|
425
|
+
end
|
426
|
+
|
427
|
+
return @@paths[@@node.id()].print(true, true, true)
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
####################
|
434
|
+
# path consisting of phrase type and directions
|
435
|
+
class NodelabelPathFeature < AbstractPathFeature
|
436
|
+
NodelabelPathFeature.announce_me()
|
437
|
+
|
438
|
+
def NodelabelPathFeature.feature_name()
|
439
|
+
return "pt_path"
|
440
|
+
end
|
441
|
+
|
442
|
+
################
|
443
|
+
private
|
444
|
+
|
445
|
+
def my_path_computation()
|
446
|
+
if @@paths[@@node.id()].nil?
|
447
|
+
return nil
|
448
|
+
end
|
449
|
+
|
450
|
+
return @@paths[@@node.id()].print(true, false, true)
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
####################
|
455
|
+
# path consisting of dependencies and directions
|
456
|
+
class EdgelabelPathFeature < AbstractPathFeature
|
457
|
+
EdgelabelPathFeature.announce_me()
|
458
|
+
|
459
|
+
def EdgelabelPathFeature.feature_name()
|
460
|
+
return "gf_path"
|
461
|
+
end
|
462
|
+
|
463
|
+
################
|
464
|
+
private
|
465
|
+
|
466
|
+
def my_path_computation()
|
467
|
+
if @@paths[@@node.id()].nil?
|
468
|
+
return nil
|
469
|
+
end
|
470
|
+
|
471
|
+
return @@paths[@@node.id()].print(true, true, false)
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
####################
|
476
|
+
# features: path from governing verb
|
477
|
+
class AbstractGVPathFeature < RosySingleFeatureExtractor
|
478
|
+
def AbstractGVPathFeature.sql_type()
|
479
|
+
return "VARCHAR(80)"
|
480
|
+
end
|
481
|
+
def AbstractGVPathFeature.feature_type()
|
482
|
+
return "syn"
|
483
|
+
end
|
484
|
+
|
485
|
+
################
|
486
|
+
private
|
487
|
+
|
488
|
+
def compute_feature_instanceOK()
|
489
|
+
if @@gv_paths[@@node.id()].nil?
|
490
|
+
path = nil
|
491
|
+
else
|
492
|
+
path = my_path_computation()
|
493
|
+
end
|
494
|
+
|
495
|
+
if path.nil? or path.empty?
|
496
|
+
return nil
|
497
|
+
else
|
498
|
+
return path
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
def my_path_computation()
|
503
|
+
raise "overwrite me"
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
|
508
|
+
####################
|
509
|
+
# path from governing verb consisting of nodelabels, dependencies and directions
|
510
|
+
class GVPathFeature < AbstractGVPathFeature
|
511
|
+
GVPathFeature.announce_me()
|
512
|
+
|
513
|
+
def GVPathFeature.sql_type()
|
514
|
+
return "VARCHAR(120)"
|
515
|
+
end
|
516
|
+
def GVPathFeature.feature_name()
|
517
|
+
return "gvpath"
|
518
|
+
end
|
519
|
+
|
520
|
+
################
|
521
|
+
private
|
522
|
+
|
523
|
+
def my_path_computation()
|
524
|
+
return @@gv_paths[@@node.id()].print(true, true, true)
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
|
529
|
+
####################
|
530
|
+
# gov. verb path consisting of phrase type and directions
|
531
|
+
class GVNodelabelPathFeature < AbstractGVPathFeature
|
532
|
+
GVNodelabelPathFeature.announce_me()
|
533
|
+
|
534
|
+
def GVNodelabelPathFeature.feature_name()
|
535
|
+
return "pt_gvpath"
|
536
|
+
end
|
537
|
+
|
538
|
+
################
|
539
|
+
private
|
540
|
+
|
541
|
+
def my_path_computation()
|
542
|
+
return @@gv_paths[@@node.id()].print(true, false, true)
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
####################
|
547
|
+
# gov. verb path consisting of dependencies and directions
|
548
|
+
class GVEdgelabelPathFeature < AbstractGVPathFeature
|
549
|
+
GVEdgelabelPathFeature.announce_me()
|
550
|
+
|
551
|
+
def GVEdgelabelPathFeature.feature_name()
|
552
|
+
return "gf_gvpath"
|
553
|
+
end
|
554
|
+
|
555
|
+
################
|
556
|
+
private
|
557
|
+
|
558
|
+
def my_path_computation()
|
559
|
+
return @@gv_paths[@@node.id()].print(true, true, false)
|
560
|
+
end
|
561
|
+
end
|
562
|
+
|
563
|
+
####################
|
564
|
+
# path length
|
565
|
+
class PathLengthFeature < RosySingleFeatureExtractor
|
566
|
+
PathLengthFeature.announce_me()
|
567
|
+
|
568
|
+
def PathLengthFeature.feature_name()
|
569
|
+
return "path_length"
|
570
|
+
end
|
571
|
+
def PathLengthFeature.sql_type()
|
572
|
+
return "TINYINT"
|
573
|
+
end
|
574
|
+
def PathLengthFeature.feature_type()
|
575
|
+
return "syn"
|
576
|
+
end
|
577
|
+
|
578
|
+
################
|
579
|
+
private
|
580
|
+
|
581
|
+
def compute_feature_instanceOK()
|
582
|
+
if @@paths[@@node.id()].nil?
|
583
|
+
return nil
|
584
|
+
else
|
585
|
+
return @@paths[@@node.id()].length()
|
586
|
+
end
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
#########
|
591
|
+
# group of combined path features:
|
592
|
+
# path to target combined with target part of speech and
|
593
|
+
# info on whether the target is passive
|
594
|
+
class AbstractCombinedPathFeature < RosySingleFeatureExtractor
|
595
|
+
|
596
|
+
def AbstractCombinedPathFeature.sql_type()
|
597
|
+
return "VARCHAR(90)"
|
598
|
+
end
|
599
|
+
def AbstractCombinedPathFeature.feature_type()
|
600
|
+
return "syn"
|
601
|
+
end
|
602
|
+
|
603
|
+
################
|
604
|
+
private
|
605
|
+
|
606
|
+
def compute_feature_instanceOK()
|
607
|
+
if @@paths[@@node.id()].nil?
|
608
|
+
path = ""
|
609
|
+
else
|
610
|
+
path = my_path_computation()
|
611
|
+
end
|
612
|
+
return path + "--" + @@target_pos.to_s + "--" + @@target_voice.to_s
|
613
|
+
end
|
614
|
+
|
615
|
+
###
|
616
|
+
def my_path_computation()
|
617
|
+
raise "Overwrite me"
|
618
|
+
end
|
619
|
+
end
|
620
|
+
|
621
|
+
|
622
|
+
####################
|
623
|
+
# combined path based on nodelabels
|
624
|
+
class NodelabelCombinedPathFeature < AbstractCombinedPathFeature
|
625
|
+
NodelabelCombinedPathFeature.announce_me()
|
626
|
+
|
627
|
+
def NodelabelCombinedPathFeature.feature_name()
|
628
|
+
return "pt_combined_path"
|
629
|
+
end
|
630
|
+
|
631
|
+
################
|
632
|
+
private
|
633
|
+
|
634
|
+
def my_path_computation()
|
635
|
+
if @@paths[@@node.id()].nil?
|
636
|
+
return nil
|
637
|
+
end
|
638
|
+
|
639
|
+
return @@paths[@@node.id()].print(false, false, true)
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
####################
|
644
|
+
# combined path based on edgelabels
|
645
|
+
class EdgelabelCombinedPathFeature < AbstractCombinedPathFeature
|
646
|
+
EdgelabelCombinedPathFeature.announce_me()
|
647
|
+
|
648
|
+
def EdgelabelCombinedPathFeature.feature_name()
|
649
|
+
return "gf_combined_path"
|
650
|
+
end
|
651
|
+
|
652
|
+
################
|
653
|
+
private
|
654
|
+
|
655
|
+
def my_path_computation()
|
656
|
+
if @@paths[@@node.id()].nil?
|
657
|
+
return nil
|
658
|
+
end
|
659
|
+
|
660
|
+
return @@paths[@@node.id()].print(false, true, false)
|
661
|
+
end
|
662
|
+
end
|
663
|
+
|
664
|
+
|
665
|
+
####################
|
666
|
+
# combined path based on nodelabels and edgelabels
|
667
|
+
class CombinedPathFeature < AbstractCombinedPathFeature
|
668
|
+
CombinedPathFeature.announce_me()
|
669
|
+
|
670
|
+
def CombinedPathFeature.sql_type()
|
671
|
+
return "VARCHAR(130)"
|
672
|
+
end
|
673
|
+
def CombinedPathFeature.feature_name()
|
674
|
+
return "combined_path"
|
675
|
+
end
|
676
|
+
|
677
|
+
################
|
678
|
+
private
|
679
|
+
|
680
|
+
def my_path_computation()
|
681
|
+
if @@paths[@@node.id()].nil?
|
682
|
+
return nil
|
683
|
+
end
|
684
|
+
|
685
|
+
return @@paths[@@node.id()].print(false, true, true)
|
686
|
+
end
|
687
|
+
end
|
688
|
+
|
689
|
+
|
690
|
+
##################
|
691
|
+
# group of features for computing
|
692
|
+
# partial path to target: only up to
|
693
|
+
# the lowest common ancestor of current node and target
|
694
|
+
class AbstractPartialPathFeature < RosySingleFeatureExtractor
|
695
|
+
|
696
|
+
def AbstractPartialPathFeature.sql_type()
|
697
|
+
return "VARCHAR(70)"
|
698
|
+
end
|
699
|
+
def AbstractPartialPathFeature.feature_type()
|
700
|
+
return "syn"
|
701
|
+
end
|
702
|
+
|
703
|
+
################
|
704
|
+
private
|
705
|
+
|
706
|
+
def compute_feature_instanceOK()
|
707
|
+
if @@paths[@@node.id()].nil?
|
708
|
+
path = nil
|
709
|
+
else
|
710
|
+
path = my_path_computation()
|
711
|
+
end
|
712
|
+
if path.nil? or path.empty?
|
713
|
+
return nil
|
714
|
+
else
|
715
|
+
return path
|
716
|
+
end
|
717
|
+
end
|
718
|
+
end
|
719
|
+
|
720
|
+
####
|
721
|
+
# partial path based on node labels
|
722
|
+
class NodelabelPartialPathFeature < AbstractPartialPathFeature
|
723
|
+
NodelabelPartialPathFeature.announce_me()
|
724
|
+
|
725
|
+
def NodelabelPartialPathFeature.feature_name()
|
726
|
+
return "pt_partial_path"
|
727
|
+
end
|
728
|
+
|
729
|
+
################
|
730
|
+
private
|
731
|
+
|
732
|
+
def my_path_computation()
|
733
|
+
if @@paths[@@node.id()].nil?
|
734
|
+
return nil
|
735
|
+
end
|
736
|
+
|
737
|
+
return @@paths[@@node.id()].print_downpart(true, false, true)
|
738
|
+
end
|
739
|
+
end
|
740
|
+
|
741
|
+
####
|
742
|
+
# partial path based on edge labels
|
743
|
+
class EdgelabelPartialPathFeature < AbstractPartialPathFeature
|
744
|
+
EdgelabelPartialPathFeature.announce_me()
|
745
|
+
|
746
|
+
def EdgelabelPartialPathFeature.feature_name()
|
747
|
+
return "gf_partial_path"
|
748
|
+
end
|
749
|
+
|
750
|
+
################
|
751
|
+
private
|
752
|
+
|
753
|
+
def my_path_computation()
|
754
|
+
if @@paths[@@node.id()].nil?
|
755
|
+
return nil
|
756
|
+
end
|
757
|
+
|
758
|
+
return @@paths[@@node.id()].print_downpart(true, true, false)
|
759
|
+
end
|
760
|
+
end
|
761
|
+
|
762
|
+
####
|
763
|
+
# partial path based on node and edge labels
|
764
|
+
class PartialPathFeature < AbstractPartialPathFeature
|
765
|
+
PartialPathFeature.announce_me()
|
766
|
+
|
767
|
+
def PartialPathFeature.sql_type()
|
768
|
+
return "VARCHAR(110)"
|
769
|
+
end
|
770
|
+
def PartialPathFeature.feature_name()
|
771
|
+
return "partial_path"
|
772
|
+
end
|
773
|
+
|
774
|
+
################
|
775
|
+
private
|
776
|
+
|
777
|
+
def my_path_computation()
|
778
|
+
if @@paths[@@node.id()].nil?
|
779
|
+
return nil
|
780
|
+
end
|
781
|
+
|
782
|
+
return @@paths[@@node.id()].print_downpart(true, true, true)
|
783
|
+
end
|
784
|
+
end
|
785
|
+
|
786
|
+
|
787
|
+
|
788
|
+
##################
|
789
|
+
# ancestor rule: grammar rule
|
790
|
+
# expanding lowest common ancestor of current node and target
|
791
|
+
class AncestorRuleFeature < RosySingleFeatureExtractor
|
792
|
+
AncestorRuleFeature.announce_me()
|
793
|
+
|
794
|
+
def AncestorRuleFeature.feature_name()
|
795
|
+
return "ancestor_rule"
|
796
|
+
end
|
797
|
+
def AncestorRuleFeature.sql_type()
|
798
|
+
return "VARCHAR(50)"
|
799
|
+
end
|
800
|
+
def AncestorRuleFeature.feature_type()
|
801
|
+
return "syn"
|
802
|
+
end
|
803
|
+
|
804
|
+
################
|
805
|
+
private
|
806
|
+
|
807
|
+
def compute_feature_instanceOK()
|
808
|
+
if @@paths[@@node.id()].nil?
|
809
|
+
return nil
|
810
|
+
end
|
811
|
+
|
812
|
+
lca = @@paths[@@node.id()].lca()
|
813
|
+
unless lca
|
814
|
+
return nil
|
815
|
+
end
|
816
|
+
|
817
|
+
return @@interpreter_class.simplified_pt(lca).to_s +
|
818
|
+
" -> "+
|
819
|
+
lca.children.map {|c| @@interpreter_class.simplified_pt(c).to_s }.join(" ")
|
820
|
+
end
|
821
|
+
end
|
822
|
+
|
823
|
+
##################
|
824
|
+
# relative position to target: left, right, including target
|
825
|
+
class RelativePositionFeature < RosySingleFeatureExtractor
|
826
|
+
RelativePositionFeature.announce_me()
|
827
|
+
|
828
|
+
def RelativePositionFeature.feature_name()
|
829
|
+
return "relpos"
|
830
|
+
end
|
831
|
+
def RelativePositionFeature.sql_type()
|
832
|
+
return "CHAR(5)"
|
833
|
+
end
|
834
|
+
def RelativePositionFeature.feature_type()
|
835
|
+
return "syn"
|
836
|
+
end
|
837
|
+
|
838
|
+
################
|
839
|
+
private
|
840
|
+
|
841
|
+
def compute_feature_instanceOK()
|
842
|
+
return @@relpos
|
843
|
+
end
|
844
|
+
end
|
845
|
+
|
846
|
+
|
847
|
+
################
|
848
|
+
# phrase type of the instance node
|
849
|
+
class PhraseTypeFeature < RosySingleFeatureExtractor
|
850
|
+
PhraseTypeFeature.announce_me()
|
851
|
+
|
852
|
+
def PhraseTypeFeature.feature_name()
|
853
|
+
return "pt"
|
854
|
+
end
|
855
|
+
def PhraseTypeFeature.sql_type()
|
856
|
+
return "VARCHAR(15)"
|
857
|
+
end
|
858
|
+
def PhraseTypeFeature.feature_type()
|
859
|
+
return "syn"
|
860
|
+
end
|
861
|
+
|
862
|
+
################
|
863
|
+
private
|
864
|
+
|
865
|
+
def compute_feature_instanceOK()
|
866
|
+
return @@interpreter_class.simplified_pt(@@node)
|
867
|
+
end
|
868
|
+
end
|
869
|
+
|
870
|
+
################
|
871
|
+
# grammatical function that this instance node fills for the target
|
872
|
+
class GFFeature < RosySingleFeatureExtractor
|
873
|
+
GFFeature.announce_me()
|
874
|
+
|
875
|
+
def GFFeature.feature_name()
|
876
|
+
return "gf"
|
877
|
+
end
|
878
|
+
def GFFeature.sql_type()
|
879
|
+
return "VARCHAR(20)"
|
880
|
+
end
|
881
|
+
def GFFeature.feature_type()
|
882
|
+
return "syn"
|
883
|
+
end
|
884
|
+
|
885
|
+
################
|
886
|
+
private
|
887
|
+
|
888
|
+
def compute_feature_instanceOK()
|
889
|
+
unless @@target_gfs
|
890
|
+
return nil
|
891
|
+
end
|
892
|
+
|
893
|
+
@@target_gfs.each { |rel, other_node|
|
894
|
+
if @@node == other_node
|
895
|
+
return rel
|
896
|
+
end
|
897
|
+
}
|
898
|
+
|
899
|
+
return nil
|
900
|
+
end
|
901
|
+
end
|
902
|
+
|
903
|
+
##################
|
904
|
+
# phrase type of parent of this node
|
905
|
+
class FatherPhraseTypeFeature < RosySingleFeatureExtractor
|
906
|
+
FatherPhraseTypeFeature.announce_me()
|
907
|
+
|
908
|
+
def FatherPhraseTypeFeature.feature_name()
|
909
|
+
return "father_pt"
|
910
|
+
end
|
911
|
+
def FatherPhraseTypeFeature.sql_type()
|
912
|
+
return "VARCHAR(15)"
|
913
|
+
end
|
914
|
+
def FatherPhraseTypeFeature.feature_type()
|
915
|
+
return "syn"
|
916
|
+
end
|
917
|
+
|
918
|
+
#####
|
919
|
+
private
|
920
|
+
|
921
|
+
def compute_feature_instanceOK()
|
922
|
+
if @@node.parent
|
923
|
+
return @@interpreter_class.simplified_pt(@@node.parent)
|
924
|
+
else
|
925
|
+
return nil
|
926
|
+
end
|
927
|
+
end
|
928
|
+
end
|
929
|
+
|
930
|
+
################
|
931
|
+
# target lemma
|
932
|
+
class TargetLemmaFeature < RosySingleFeatureExtractor
|
933
|
+
TargetLemmaFeature.announce_me()
|
934
|
+
|
935
|
+
def TargetLemmaFeature.feature_name()
|
936
|
+
return "target"
|
937
|
+
end
|
938
|
+
def TargetLemmaFeature.sql_type()
|
939
|
+
return "VARCHAR(20)"
|
940
|
+
end
|
941
|
+
def TargetLemmaFeature.feature_type()
|
942
|
+
return "ubiq"
|
943
|
+
end
|
944
|
+
def TargetLemmaFeature.info()
|
945
|
+
# additional info: I am an index feature
|
946
|
+
return super().concat(["index"])
|
947
|
+
end
|
948
|
+
|
949
|
+
#####
|
950
|
+
private
|
951
|
+
|
952
|
+
def compute_feature_instanceOK()
|
953
|
+
return @@interpreter_class.lemma_backoff(@@target)
|
954
|
+
end
|
955
|
+
end
|
956
|
+
|
957
|
+
################
|
958
|
+
# part of speech of target lemma
|
959
|
+
class TargetPOSFeature < RosySingleFeatureExtractor
|
960
|
+
TargetPOSFeature.announce_me()
|
961
|
+
|
962
|
+
def TargetPOSFeature.feature_name()
|
963
|
+
return "target_pos"
|
964
|
+
end
|
965
|
+
def TargetPOSFeature.sql_type()
|
966
|
+
return "VARCHAR(10)"
|
967
|
+
end
|
968
|
+
def TargetPOSFeature.feature_type()
|
969
|
+
return "ubiq"
|
970
|
+
end
|
971
|
+
def TargetPOSFeature.info()
|
972
|
+
# additional info: I am an index feature
|
973
|
+
return super().concat(["index"])
|
974
|
+
end
|
975
|
+
|
976
|
+
|
977
|
+
#####
|
978
|
+
private
|
979
|
+
|
980
|
+
def compute_feature_instanceOK()
|
981
|
+
return @@target_pos
|
982
|
+
end
|
983
|
+
end
|
984
|
+
|
985
|
+
################
|
986
|
+
# part of speech of target lemma
|
987
|
+
class TargetFineGrainedPOSFeature < RosySingleFeatureExtractor
|
988
|
+
TargetFineGrainedPOSFeature.announce_me()
|
989
|
+
|
990
|
+
def TargetFineGrainedPOSFeature.feature_name()
|
991
|
+
return "finegrained_target_pos"
|
992
|
+
end
|
993
|
+
def TargetFineGrainedPOSFeature.sql_type()
|
994
|
+
return "VARCHAR(20)"
|
995
|
+
end
|
996
|
+
def TargetFineGrainedPOSFeature.feature_type()
|
997
|
+
return "ubiq"
|
998
|
+
end
|
999
|
+
|
1000
|
+
|
1001
|
+
#####
|
1002
|
+
private
|
1003
|
+
|
1004
|
+
def compute_feature_instanceOK()
|
1005
|
+
return @@interpreter_class.pt(@@target)
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
################
|
1010
|
+
# voice of the target lemma
|
1011
|
+
class TargetVoiceFeature < RosySingleFeatureExtractor
|
1012
|
+
TargetVoiceFeature.announce_me()
|
1013
|
+
|
1014
|
+
def TargetVoiceFeature.feature_name()
|
1015
|
+
return "target_voice"
|
1016
|
+
end
|
1017
|
+
def TargetVoiceFeature.sql_type()
|
1018
|
+
return "CHAR(4)"
|
1019
|
+
end
|
1020
|
+
def TargetVoiceFeature.feature_type()
|
1021
|
+
return "ubiq"
|
1022
|
+
end
|
1023
|
+
|
1024
|
+
#####
|
1025
|
+
private
|
1026
|
+
|
1027
|
+
def compute_feature_instanceOK()
|
1028
|
+
voice = @@interpreter_class.voice(@@target)
|
1029
|
+
if voice
|
1030
|
+
return voice.slice(0,4)
|
1031
|
+
else
|
1032
|
+
return nil
|
1033
|
+
end
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
|
1037
|
+
################
|
1038
|
+
# the governing verb of the target
|
1039
|
+
class GoverningVerbOfTargetFeature < RosySingleFeatureExtractor
|
1040
|
+
GoverningVerbOfTargetFeature.announce_me()
|
1041
|
+
|
1042
|
+
def GoverningVerbOfTargetFeature.feature_name()
|
1043
|
+
return "gov_verb"
|
1044
|
+
end
|
1045
|
+
def GoverningVerbOfTargetFeature.sql_type()
|
1046
|
+
return "VArCHAR(20)"
|
1047
|
+
end
|
1048
|
+
def GoverningVerbOfTargetFeature.feature_type()
|
1049
|
+
return "sem"
|
1050
|
+
end
|
1051
|
+
|
1052
|
+
#####
|
1053
|
+
private
|
1054
|
+
|
1055
|
+
def compute_feature_instanceOK()
|
1056
|
+
if @@governing_verb
|
1057
|
+
return RosyFeatureExtractor.headlemma(@@governing_verb)
|
1058
|
+
else
|
1059
|
+
return nil
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
end
|
1063
|
+
|
1064
|
+
################c
|
1065
|
+
# preposition for this constituent
|
1066
|
+
class PrepFeature < RosySingleFeatureExtractor
|
1067
|
+
PrepFeature.announce_me()
|
1068
|
+
|
1069
|
+
def PrepFeature.feature_name()
|
1070
|
+
return "prep"
|
1071
|
+
end
|
1072
|
+
def PrepFeature.sql_type()
|
1073
|
+
return "VARCHAR(20)"
|
1074
|
+
end
|
1075
|
+
def PrepFeature.feature_type()
|
1076
|
+
return "syn"
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
#####
|
1080
|
+
private
|
1081
|
+
|
1082
|
+
def compute_feature_instanceOK()
|
1083
|
+
return @@interpreter_class.preposition(@@node)
|
1084
|
+
end
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
################
|
1088
|
+
# head lemma of this constituent
|
1089
|
+
class HeadFeature < RosySingleFeatureExtractor
|
1090
|
+
HeadFeature.announce_me()
|
1091
|
+
|
1092
|
+
def HeadFeature.feature_name()
|
1093
|
+
return "const_head"
|
1094
|
+
end
|
1095
|
+
def HeadFeature.sql_type()
|
1096
|
+
return "VARCHAR(20)"
|
1097
|
+
end
|
1098
|
+
def HeadFeature.feature_type()
|
1099
|
+
return "sem"
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
#####
|
1103
|
+
private
|
1104
|
+
|
1105
|
+
def compute_feature_instanceOK()
|
1106
|
+
return RosyFeatureExtractor.headlemma(@@node)
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
################
|
1111
|
+
# part of speech of the head of this constituent
|
1112
|
+
class HeadPosFeature < RosySingleFeatureExtractor
|
1113
|
+
HeadPosFeature.announce_me()
|
1114
|
+
|
1115
|
+
def HeadPosFeature.feature_name()
|
1116
|
+
return "const_head_pos"
|
1117
|
+
end
|
1118
|
+
def HeadPosFeature.sql_type()
|
1119
|
+
return "VARCHAR(10)"
|
1120
|
+
end
|
1121
|
+
def HeadPosFeature.feature_type()
|
1122
|
+
return "syn"
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
#####
|
1126
|
+
private
|
1127
|
+
|
1128
|
+
def compute_feature_instanceOK()
|
1129
|
+
return RosyFeatureExtractor.headpos(@@node)
|
1130
|
+
end
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
################
|
1134
|
+
# informative content word (see AbstractSynFeature): lemma and POS
|
1135
|
+
class IcontLemmaFeature < RosyFeatureExtractor
|
1136
|
+
IcontLemmaFeature.announce_me()
|
1137
|
+
|
1138
|
+
def IcontLemmaFeature.designator()
|
1139
|
+
return "icont_word"
|
1140
|
+
end
|
1141
|
+
def IcontLemmaFeature.feature_names()
|
1142
|
+
return ["icont_lemma", "icont_pos"]
|
1143
|
+
end
|
1144
|
+
def IcontLemmaFeature.sql_type()
|
1145
|
+
return "VARCHAR(20)"
|
1146
|
+
end
|
1147
|
+
def IcontLemmaFeature.feature_type()
|
1148
|
+
return "sem"
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
#####
|
1152
|
+
private
|
1153
|
+
|
1154
|
+
def compute_features_instanceOK()
|
1155
|
+
icont_node = @@interpreter_class.informative_content_node(@@node)
|
1156
|
+
if icont_node
|
1157
|
+
return [RosyFeatureExtractor.headlemma(icont_node), RosyFeatureExtractor.headpos(icont_node)]
|
1158
|
+
else
|
1159
|
+
return [nil, nil]
|
1160
|
+
end
|
1161
|
+
end
|
1162
|
+
end
|
1163
|
+
|
1164
|
+
|
1165
|
+
################
|
1166
|
+
# leftmost terminal of this constituent
|
1167
|
+
class FirstWordFeature < RosyFeatureExtractor
|
1168
|
+
FirstWordFeature.announce_me()
|
1169
|
+
|
1170
|
+
def FirstWordFeature.designator()
|
1171
|
+
return "firstword"
|
1172
|
+
end
|
1173
|
+
def FirstWordFeature.feature_names()
|
1174
|
+
return ["firstword", "firstword_pos"]
|
1175
|
+
end
|
1176
|
+
def FirstWordFeature.sql_type()
|
1177
|
+
return "VARCHAR(20)"
|
1178
|
+
end
|
1179
|
+
def FirstWordFeature.feature_type()
|
1180
|
+
return "sem"
|
1181
|
+
end
|
1182
|
+
|
1183
|
+
#####
|
1184
|
+
private
|
1185
|
+
|
1186
|
+
def compute_features_instanceOK()
|
1187
|
+
if @@node_leftmost_terminal
|
1188
|
+
return [RosyFeatureExtractor.headlemma(@@node_leftmost_terminal), RosyFeatureExtractor.headpos(@@node_leftmost_terminal)]
|
1189
|
+
else
|
1190
|
+
return [nil, nil]
|
1191
|
+
end
|
1192
|
+
end
|
1193
|
+
end
|
1194
|
+
|
1195
|
+
|
1196
|
+
################
|
1197
|
+
# rightmost terminal of this constituent
|
1198
|
+
class LastWordFeature < RosyFeatureExtractor
|
1199
|
+
LastWordFeature.announce_me()
|
1200
|
+
|
1201
|
+
def LastWordFeature.designator()
|
1202
|
+
return "lastword"
|
1203
|
+
end
|
1204
|
+
def LastWordFeature.feature_names()
|
1205
|
+
return ["lastword", "lastword_pos"]
|
1206
|
+
end
|
1207
|
+
def LastWordFeature.sql_type()
|
1208
|
+
return "VARCHAR(30)"
|
1209
|
+
end
|
1210
|
+
def LastWordFeature.feature_type()
|
1211
|
+
return "sem"
|
1212
|
+
end
|
1213
|
+
|
1214
|
+
#####
|
1215
|
+
private
|
1216
|
+
|
1217
|
+
def compute_features_instanceOK()
|
1218
|
+
if @@node_rightmost_terminal
|
1219
|
+
return [RosyFeatureExtractor.headlemma(@@node_rightmost_terminal), RosyFeatureExtractor.headpos(@@node_rightmost_terminal)]
|
1220
|
+
else
|
1221
|
+
return [nil, nil]
|
1222
|
+
end
|
1223
|
+
end
|
1224
|
+
end
|
1225
|
+
|
1226
|
+
################
|
1227
|
+
# left sibling of the current node
|
1228
|
+
class LeftSiblingFeature < RosyFeatureExtractor
|
1229
|
+
LeftSiblingFeature.announce_me()
|
1230
|
+
|
1231
|
+
def LeftSiblingFeature.designator()
|
1232
|
+
return "leftsib"
|
1233
|
+
end
|
1234
|
+
def LeftSiblingFeature.feature_names()
|
1235
|
+
return ["leftsib_pt", "leftsib_lemma"]
|
1236
|
+
end
|
1237
|
+
def LeftSiblingFeature.sql_type()
|
1238
|
+
return "VARCHAR(20)"
|
1239
|
+
end
|
1240
|
+
def LeftSiblingFeature.feature_type()
|
1241
|
+
return "sem"
|
1242
|
+
end
|
1243
|
+
|
1244
|
+
#####
|
1245
|
+
private
|
1246
|
+
|
1247
|
+
def compute_features_instanceOK()
|
1248
|
+
# leftsib, rightsib (node)
|
1249
|
+
# siblings with max lastword/firstword among those with lastword/firstword index
|
1250
|
+
# smaller/greater than firstword/lastword index of self
|
1251
|
+
if @@node.parent.nil?
|
1252
|
+
return [nil, nil]
|
1253
|
+
end
|
1254
|
+
|
1255
|
+
node_ix = terminal_index(@@node_leftmost_terminal)
|
1256
|
+
unless node_ix
|
1257
|
+
return [nil, nil]
|
1258
|
+
end
|
1259
|
+
|
1260
|
+
leftsib_ix = nil
|
1261
|
+
leftsib = nil
|
1262
|
+
@@node.parent.children.each { |sibling|
|
1263
|
+
sib_ix = terminal_index(@@interpreter_class.rightmost_terminal(sibling))
|
1264
|
+
unless sib_ix
|
1265
|
+
next
|
1266
|
+
end
|
1267
|
+
|
1268
|
+
if sib_ix < node_ix and
|
1269
|
+
(leftsib.nil? or leftsib_ix < sib_ix)
|
1270
|
+
|
1271
|
+
leftsib = sibling
|
1272
|
+
leftsib_ix = sib_ix
|
1273
|
+
end
|
1274
|
+
}
|
1275
|
+
|
1276
|
+
if leftsib
|
1277
|
+
return [
|
1278
|
+
@@interpreter_class.simplified_pt(leftsib),
|
1279
|
+
@@interpreter_class.lemma_backoff(leftsib),
|
1280
|
+
]
|
1281
|
+
else
|
1282
|
+
return [nil, nil]
|
1283
|
+
end
|
1284
|
+
end
|
1285
|
+
|
1286
|
+
###
|
1287
|
+
# returns: index(integer) of node in list of terminals of this sentence;
|
1288
|
+
# nil if node is nil or does not occur in the list
|
1289
|
+
def terminal_index(node) # SynNode, terminal
|
1290
|
+
unless node
|
1291
|
+
return nil
|
1292
|
+
end
|
1293
|
+
|
1294
|
+
return @@terminals_ordered[node] # word index (or nil)
|
1295
|
+
end
|
1296
|
+
end
|
1297
|
+
|
1298
|
+
################
|
1299
|
+
# distance between head word of constituent and target (in words)
|
1300
|
+
class WordDistanceFeature < RosySingleFeatureExtractor
|
1301
|
+
WordDistanceFeature.announce_me()
|
1302
|
+
|
1303
|
+
def WordDistanceFeature.feature_name()
|
1304
|
+
return "worddistance"
|
1305
|
+
end
|
1306
|
+
def WordDistanceFeature.sql_type()
|
1307
|
+
return "TINYINT"
|
1308
|
+
end
|
1309
|
+
def WordDistanceFeature.feature_type()
|
1310
|
+
return "syn"
|
1311
|
+
end
|
1312
|
+
|
1313
|
+
#####
|
1314
|
+
private
|
1315
|
+
|
1316
|
+
def compute_feature_instanceOK()
|
1317
|
+
|
1318
|
+
head_term = @@interpreter_class.head_terminal(@@node)
|
1319
|
+
targ_term = @@interpreter_class.head_terminal(@@target)
|
1320
|
+
if head_term.nil? or targ_term.nil?
|
1321
|
+
return nil
|
1322
|
+
end
|
1323
|
+
h_id = @@terminals_ordered[head_term]
|
1324
|
+
t_id = @@terminals_ordered[targ_term]
|
1325
|
+
if h_id.nil? or t_id.nil?
|
1326
|
+
return nil
|
1327
|
+
else
|
1328
|
+
return (h_id-t_id).abs
|
1329
|
+
end
|
1330
|
+
end
|
1331
|
+
end
|
1332
|
+
|
1333
|
+
################
|
1334
|
+
# is the current node a maximal projection?
|
1335
|
+
# heuristic: is my category the same as my parent's?
|
1336
|
+
class IsMaxProj < RosySingleFeatureExtractor
|
1337
|
+
IsMaxProj.announce_me()
|
1338
|
+
|
1339
|
+
def IsMaxProj.feature_name()
|
1340
|
+
return "ismaxproj"
|
1341
|
+
end
|
1342
|
+
def IsMaxProj.sql_type()
|
1343
|
+
return "TINYINT"
|
1344
|
+
end
|
1345
|
+
def IsMaxProj.feature_type()
|
1346
|
+
return "syn"
|
1347
|
+
end
|
1348
|
+
|
1349
|
+
#####
|
1350
|
+
private
|
1351
|
+
|
1352
|
+
def compute_feature_instanceOK()
|
1353
|
+
unless @@node.parent()
|
1354
|
+
return 1
|
1355
|
+
end
|
1356
|
+
my_cat = @@interpreter_class.category(@@node)
|
1357
|
+
parent_cat = @@interpreter_class.category(@@node.parent)
|
1358
|
+
if my_cat == parent_cat
|
1359
|
+
return 0
|
1360
|
+
else
|
1361
|
+
return 1
|
1362
|
+
end
|
1363
|
+
end
|
1364
|
+
end
|
1365
|
+
|
1366
|
+
################
|
1367
|
+
# right sibling of the current node
|
1368
|
+
class RightSiblingFeature < RosyFeatureExtractor
|
1369
|
+
RightSiblingFeature.announce_me()
|
1370
|
+
|
1371
|
+
def RightSiblingFeature.designator()
|
1372
|
+
return "rightsib"
|
1373
|
+
end
|
1374
|
+
def RightSiblingFeature.feature_names()
|
1375
|
+
return ["rightsib_pt", "rightsib_lemma"]
|
1376
|
+
end
|
1377
|
+
def RightSiblingFeature.sql_type()
|
1378
|
+
return "VARCHAR(20)"
|
1379
|
+
end
|
1380
|
+
def RightSiblingFeature.feature_type()
|
1381
|
+
return "sem"
|
1382
|
+
end
|
1383
|
+
|
1384
|
+
#####
|
1385
|
+
private
|
1386
|
+
|
1387
|
+
def compute_features_instanceOK()
|
1388
|
+
# leftsib, rightsib (node)
|
1389
|
+
# siblings with max lastword/firstword among those with lastword/firstword index
|
1390
|
+
# smaller/greater than firstword/lastword index of self
|
1391
|
+
if @@node.parent.nil?
|
1392
|
+
return [nil, nil]
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
node_ix = terminal_index(@@node_rightmost_terminal)
|
1396
|
+
unless node_ix
|
1397
|
+
return [nil, nil]
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
rightsib_ix = nil
|
1401
|
+
rightsib = nil
|
1402
|
+
@@node.parent.children.each { |sibling|
|
1403
|
+
sib_ix = terminal_index(@@interpreter_class.leftmost_terminal(sibling))
|
1404
|
+
unless sib_ix
|
1405
|
+
next
|
1406
|
+
end
|
1407
|
+
|
1408
|
+
if sib_ix > node_ix and
|
1409
|
+
(rightsib.nil? or sib_ix < rightsib_ix)
|
1410
|
+
|
1411
|
+
rightsib = sibling
|
1412
|
+
rightsib_ix = sib_ix
|
1413
|
+
end
|
1414
|
+
}
|
1415
|
+
|
1416
|
+
if rightsib
|
1417
|
+
return [
|
1418
|
+
@@interpreter_class.simplified_pt(rightsib),
|
1419
|
+
@@interpreter_class.lemma_backoff(rightsib),
|
1420
|
+
]
|
1421
|
+
else
|
1422
|
+
return [nil, nil]
|
1423
|
+
end
|
1424
|
+
end
|
1425
|
+
|
1426
|
+
###
|
1427
|
+
# returns: index(integer) of node in list of terminals of this sentence;
|
1428
|
+
# nil if node is nil or does not occur in the list
|
1429
|
+
def terminal_index(node) # SynNode, terminal
|
1430
|
+
unless node
|
1431
|
+
return nil
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
return @@terminals_ordered[node] # word index (or nil)
|
1435
|
+
end
|
1436
|
+
end
|
1437
|
+
|
1438
|
+
|
1439
|
+
# ################
|
1440
|
+
# # admin feature: word span of this constituent
|
1441
|
+
# class WordSpanFeature < RosySingleFeatureExtractor
|
1442
|
+
# WordSpanFeature.announce_me()
|
1443
|
+
|
1444
|
+
# def WordSpanFeature.feature_name()
|
1445
|
+
# return "wordspan"
|
1446
|
+
# end
|
1447
|
+
# def WordSpanFeature.sql_type()
|
1448
|
+
# return "VARCHAR(30)"
|
1449
|
+
# end
|
1450
|
+
# def WordSpanFeature.feature_type()
|
1451
|
+
# return "admin"
|
1452
|
+
# end
|
1453
|
+
|
1454
|
+
# #####
|
1455
|
+
# private
|
1456
|
+
|
1457
|
+
# def compute_feature_instanceOK()
|
1458
|
+
|
1459
|
+
# fwh = RosyFeatureExtractor.headlemma(@@node_leftmost_terminal)
|
1460
|
+
# lwh = RosyFeatureExtractor.headlemma(@@node_rightmost_terminal)
|
1461
|
+
|
1462
|
+
# if fwh.nil?
|
1463
|
+
# fwh = ""
|
1464
|
+
# end
|
1465
|
+
# if lwh.nil?
|
1466
|
+
# lwh = ""
|
1467
|
+
# end
|
1468
|
+
|
1469
|
+
# return fwh+ "-" +lwh
|
1470
|
+
# end
|
1471
|
+
# end
|
1472
|
+
|
1473
|
+
|
1474
|
+
################
|
1475
|
+
# admin feature: my node ID and my father's, separated by a space
|
1476
|
+
# the highest node (topnode) has ID 0, and no father ID.
|
1477
|
+
class NodeIDFeature < RosySingleFeatureExtractor
|
1478
|
+
NodeIDFeature.announce_me()
|
1479
|
+
|
1480
|
+
def NodeIDFeature.feature_name()
|
1481
|
+
return "nodeID"
|
1482
|
+
end
|
1483
|
+
def NodeIDFeature.sql_type()
|
1484
|
+
return "VARCHAR(100)"
|
1485
|
+
end
|
1486
|
+
def NodeIDFeature.feature_type()
|
1487
|
+
return "admin"
|
1488
|
+
end
|
1489
|
+
|
1490
|
+
#####
|
1491
|
+
private
|
1492
|
+
|
1493
|
+
def compute_feature_instanceOK()
|
1494
|
+
|
1495
|
+
if @@node.parent
|
1496
|
+
return @@node.id.to_s+ " " + @@node.parent.id.to_s
|
1497
|
+
else
|
1498
|
+
return @@node.id.to_s
|
1499
|
+
end
|
1500
|
+
end
|
1501
|
+
end
|
1502
|
+
|
1503
|
+
################
|
1504
|
+
# admin feature: sentence ID
|
1505
|
+
class SentidFeature < RosySingleFeatureExtractor
|
1506
|
+
SentidFeature.announce_me()
|
1507
|
+
|
1508
|
+
def SentidFeature.feature_name()
|
1509
|
+
return "sentid"
|
1510
|
+
end
|
1511
|
+
def SentidFeature.sql_type()
|
1512
|
+
return "VARCHAR(100)"
|
1513
|
+
end
|
1514
|
+
def SentidFeature.feature_type()
|
1515
|
+
return "admin"
|
1516
|
+
end
|
1517
|
+
def SentidFeature.info()
|
1518
|
+
# additional info: I am an index feature
|
1519
|
+
return super().concat(["index"])
|
1520
|
+
end
|
1521
|
+
|
1522
|
+
#####
|
1523
|
+
private
|
1524
|
+
|
1525
|
+
def compute_feature_instanceOK()
|
1526
|
+
return construct_instance_id(@@sent.id(), @@frame.id())
|
1527
|
+
end
|
1528
|
+
end
|
1529
|
+
|
1530
|
+
# ################
|
1531
|
+
# # admin feature: tokens spanned by this constituent
|
1532
|
+
# class TokensFeature < RosySingleFeatureExtractor
|
1533
|
+
# TokensFeature.announce_me()
|
1534
|
+
|
1535
|
+
# def TokensFeature.feature_name()
|
1536
|
+
# return "tokens"
|
1537
|
+
# end
|
1538
|
+
# def TokensFeature.sql_type()
|
1539
|
+
# return "VARCHAR(100)"
|
1540
|
+
# end
|
1541
|
+
# def TokensFeature.feature_type()
|
1542
|
+
# return "admin"
|
1543
|
+
# end
|
1544
|
+
|
1545
|
+
# #####
|
1546
|
+
# private
|
1547
|
+
|
1548
|
+
# def compute_feature_instanceOK()
|
1549
|
+
# return @@node.to_s
|
1550
|
+
# end
|
1551
|
+
# end
|
1552
|
+
|
1553
|
+
################
|
1554
|
+
# admin feature: frame assigned by FN
|
1555
|
+
class FrameFeature < RosySingleFeatureExtractor
|
1556
|
+
FrameFeature.announce_me()
|
1557
|
+
|
1558
|
+
def FrameFeature.feature_name()
|
1559
|
+
return "frame"
|
1560
|
+
end
|
1561
|
+
def FrameFeature.sql_type()
|
1562
|
+
return "VARCHAR(35)"
|
1563
|
+
end
|
1564
|
+
def FrameFeature.feature_type()
|
1565
|
+
return "ubiq"
|
1566
|
+
end
|
1567
|
+
def FrameFeature.info()
|
1568
|
+
# additional info: I am an index feature
|
1569
|
+
return super().concat(["index"])
|
1570
|
+
end
|
1571
|
+
|
1572
|
+
#####
|
1573
|
+
private
|
1574
|
+
|
1575
|
+
def compute_feature_instanceOK()
|
1576
|
+
if @@frame
|
1577
|
+
return @@frame.name()
|
1578
|
+
else
|
1579
|
+
return nil
|
1580
|
+
end
|
1581
|
+
end
|
1582
|
+
end
|
1583
|
+
|
1584
|
+
################
|
1585
|
+
# admin feature: is this node a terminal?
|
1586
|
+
class TerminalFeature < RosySingleFeatureExtractor
|
1587
|
+
TerminalFeature.announce_me()
|
1588
|
+
|
1589
|
+
def TerminalFeature.feature_name()
|
1590
|
+
return "term"
|
1591
|
+
end
|
1592
|
+
def TerminalFeature.sql_type()
|
1593
|
+
return "TINYINT"
|
1594
|
+
end
|
1595
|
+
def TerminalFeature.feature_type()
|
1596
|
+
return "admin"
|
1597
|
+
end
|
1598
|
+
|
1599
|
+
#####
|
1600
|
+
private
|
1601
|
+
|
1602
|
+
def compute_feature_instanceOK()
|
1603
|
+
if @@node.is_terminal?
|
1604
|
+
return 1
|
1605
|
+
else
|
1606
|
+
return 0
|
1607
|
+
end
|
1608
|
+
end
|
1609
|
+
end
|