frprep 0.0.1.prealpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,1609 @@
1
+ ####
2
+ # ke & sp
3
+ # adapted to new feature extractor class,
4
+ # Collins and Tiger features combined:
5
+ # KE November 2005
6
+ #
7
+ # Feature Extractors for Rosy
8
+ #
9
+ # Contract: each feature extractor inherits from the RosyFeatureExtractor class
10
+ #
11
+ # Feature extractors return nil if no feature value could be
12
+ # returned
13
+
14
+
15
+ # Salsa packages
16
+ require 'rosy/AbstractFeatureAndExternal'
17
+ require 'common/SalsaTigerRegXML'
18
+
19
+ # Fred and Rosy packages
20
+ require 'common/RosyConventions'
21
+
22
+
23
+ ################################
24
+ # base class for all following feature extractors
25
+ class RosyFeatureExtractor < AbstractFeatureExtractor
26
+ @@instance_ok = nil # Boolean: set_node(), set_sent() successful?
27
+ @@split_nones = nil # Boolean: split NONE value for gold feature?
28
+
29
+ @@target = nil # SynNode: main target node
30
+ @@target_pos = nil # string: part of speech of main target
31
+ @@target_voice = nil # string: "active", "passive", or nil
32
+ @@terminals_ordered = nil # Hash: sentence terminals, mapped onto their word indices (starting with 1)
33
+ @@target_gfs = nil # Array of pairs [rel, node]: grammatical functions of the target
34
+
35
+ @@paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
36
+ @@relpos = nil # string: position of instance relative to target
37
+ @@node_leftmost_terminal = nil # SynNode objects: first and last terminal
38
+ @@node_rightmost_terminal = nil # in the yield of @@node
39
+
40
+ @@governing_verb = nil # SynNode object: closest governing verb of @@target
41
+ @@gv_paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
42
+
43
+ ###
44
+ # returns a string: "phase 1" or "phase 2",
45
+ # depending on whether the feature is computed
46
+ # directly from the SalsaTigerSentence and the SynNode objects
47
+ # or whether it is computed from the phase 1 features
48
+ # computed for the training set
49
+ #
50
+ # Here: all features in this packages are phase 1
51
+ def RosyFeatureExtractor.phase()
52
+ return "phase 1"
53
+ end
54
+
55
+ ###
56
+ # returns an array of strings, providing information about
57
+ # the feature extractor
58
+ def RosyFeatureExtractor.info()
59
+ return super().concat(["rosy"])
60
+ end
61
+
62
+ ###
63
+ # set sentence, set node, set general settings: this is done prior to
64
+ # feature computation using compute_feature_value()
65
+ # such that computations that stay the same for
66
+ # several features can be done in advance
67
+ def RosyFeatureExtractor.set(var_hash) # hash. possible entries: split_nones=> true/false
68
+
69
+ @@split_nones = var_hash["split_nones"]
70
+
71
+ return true
72
+ end
73
+
74
+ ###
75
+ def RosyFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
76
+ frame) # FrameNode object
77
+ super(sent, frame)
78
+
79
+ root = @@sent.syn_roots.first()
80
+ word_index_counter = 1
81
+ @@terminals_ordered = Hash.new
82
+ root.yield_nodes_ordered.each {|yield_node|
83
+ @@terminals_ordered[yield_node] = word_index_counter
84
+ word_index_counter += 1
85
+ }
86
+
87
+ # @@target: main target node (SynNode)
88
+ # WARNING: at this moment, we are
89
+ # not considering true multiword targets.
90
+ # Remove the "no_mwe" parameter in determine_main_target
91
+ # to change this
92
+ unless frame.target
93
+ @@target = nil
94
+ return false
95
+ end
96
+ @@target = @@interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
97
+
98
+ unless @@target
99
+ return false
100
+ end
101
+
102
+ # @@target_pos: string, target POS
103
+ @@target_pos = @@interpreter_class.category(@@target)
104
+
105
+ # @@target_voice:
106
+ # for verb targets, string, active or passive
107
+ # else nil
108
+ @@target_voice = @@interpreter_class.voice(@@target)
109
+ @@target_gfs = @@interpreter_class.gfs(@@target, @@sent)
110
+
111
+ # paths from target to all other nodes in the graph
112
+ @@paths = RosyFeatureExtractor.all_paths_from(@@target)
113
+
114
+ # governing verb of target.
115
+ # If something goes wrong, this will remain unset
116
+ @@gv_paths = Hash.new
117
+ if (targetlemma = RosyFeatureExtractor.headlemma(@@target))
118
+ # determine governing verb
119
+ parent = @@target
120
+ while (parent = parent.parent)
121
+ parentlemma = RosyFeatureExtractor.headlemma(parent)
122
+
123
+ if @@interpreter_class.category(parent) == "verb" and
124
+ parentlemma != targetlemma
125
+ # success: found the governing verb of the target
126
+
127
+ @@governing_verb = @@interpreter_class.head_terminal(parent)
128
+ # paths from governing verb of target to all other nodes in the graph
129
+ if @@governing_verb
130
+ @@gv_paths = RosyFeatureExtractor.all_paths_from(@@governing_verb)
131
+ end
132
+
133
+ break
134
+ end
135
+ end
136
+ end
137
+
138
+
139
+ # paths: when printing, leave off the phrase type of the end node
140
+ @@paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
141
+ @@gv_paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
142
+
143
+ return true
144
+ end
145
+
146
+ ###
147
+ # node: SynNode of the sentence set in set_sentence
148
+ def RosyFeatureExtractor.set_node(node)
149
+ super(node)
150
+
151
+ @@instance_ok = true
152
+
153
+ unless @@target
154
+ # no target, nothing I can compute here
155
+ @@instance_ok = false
156
+ return false
157
+ end
158
+
159
+ # # path between target and current instance node
160
+ # @@path = @@interpreter_class.path_between(@@target, @@node)
161
+ # @@path.set_cutoff_last_pt_on_printing(true) # when printing path, cut off last node label
162
+
163
+
164
+ # position of instance node relative to main target node
165
+ @@relpos = @@interpreter_class.relative_position(@@node, @@target)
166
+ # leftmost, rightmost terminal in the yield of @@node
167
+ @@node_leftmost_terminal = @@interpreter_class.leftmost_terminal(@@node)
168
+ @@node_rightmost_terminal = @@interpreter_class.rightmost_terminal(@@node)
169
+
170
+ return true
171
+ end
172
+
173
+ ###
174
+ # compute_feature_value: first check if instance is OK
175
+ #
176
+ # returns: list of features
177
+ def compute_features()
178
+ unless @@instance_ok
179
+ return nil
180
+ end
181
+
182
+ return make_features_safe_for_sql(compute_features_instanceOK())
183
+ end
184
+
185
+ ############
186
+ protected
187
+
188
+
189
+ # returns: list of features
190
+ def compute_features_instanceOK()
191
+ raise "Overwrite me"
192
+ end
193
+
194
+ ###
195
+ # in computed features:
196
+ # replace "," by COMMA in order not to confuse SQL
197
+ def make_features_safe_for_sql(feature_list)
198
+ return feature_list.map { |feature|
199
+ if feature.kind_of? String
200
+ feature.gsub(/,/, "COMMA").gsub(/\\/, "BACK")
201
+ else
202
+ feature
203
+ end
204
+ }
205
+ end
206
+
207
+
208
+ ###
209
+ # lemma of the head terminal of SynNode n
210
+ def RosyFeatureExtractor.headlemma(n) # SynNode
211
+ unless n
212
+ return nil
213
+ end
214
+
215
+ h = @@interpreter_class.head_terminal(n)
216
+ if h
217
+ return @@interpreter_class.lemma_backoff(h)
218
+ else
219
+ return nil
220
+ end
221
+ end
222
+
223
+ ###
224
+ # part of speech of the head terminal of SynNode n
225
+ def RosyFeatureExtractor.headpos(n) # SynNode
226
+ unless n
227
+ return nil
228
+ end
229
+
230
+ h = @@interpreter_class.head_terminal(n)
231
+ if h
232
+ return h.part_of_speech()
233
+ else
234
+ return nil
235
+ end
236
+ end
237
+
238
+ ###
239
+ # Given a SynNode n, recursively determine
240
+ # the paths from n to all other reachable nodes,
241
+ # skipping nodes that already have a path
242
+ # listed in the given hash mapping node IDs to paths.
243
+ # Paths are given as Path objects (see AbstractSynInterface).
244
+ # It is assumed that the graph of n is a tree, which
245
+ # is searched depth-first, first the children, then the parent of n.
246
+ def RosyFeatureExtractor.all_paths_from(n, # SynNode
247
+ hash = nil) # Hash: nodeID(string) => Path object
248
+ # initial step of all: no hash existing yet
249
+ if hash.nil?
250
+ hash = Hash.new
251
+ hash[n.id()] = Path.new(n)
252
+ end
253
+
254
+ # invariant at this point: n must be listed in hash
255
+ unless hash[n.id()]
256
+ raise "Shouldn't be here"
257
+ end
258
+
259
+ # for each child c of n: compute its path from the path of n,
260
+ # and explore paths below c
261
+ n.each_child_with_edgelabel { |label, c|
262
+ if hash[c.id()].nil?
263
+ hash[c.id()] = hash[n.id()].deep_clone().add_last_step("D",
264
+ label,
265
+ @@interpreter_class.simplified_pt(c),
266
+ c)
267
+ RosyFeatureExtractor.all_paths_from(c, hash)
268
+ end
269
+ }
270
+
271
+ # compute the path from n's parent p from the path of n,
272
+ # and explore paths beyond p
273
+ if (p = n.parent) and hash[p.id()].nil?
274
+ # node has a parent, and it is not listed in the path hash
275
+ # make a new path for parent: n's path, plus one up-step
276
+ hash[p.id()] = hash[n.id()].deep_clone().add_last_step("U",
277
+ n.parent_label,
278
+ @@interpreter_class.simplified_pt(p),
279
+ p)
280
+ RosyFeatureExtractor.all_paths_from(p, hash)
281
+ end
282
+
283
+ return hash
284
+
285
+ end
286
+
287
+ end
288
+
289
+ ###############################
290
+ # Rosy single feature extractor, duplicating stuff from
291
+ # AbstractSingleFeatureExtractor
292
+ class RosySingleFeatureExtractor < RosyFeatureExtractor
293
+
294
+ ###
295
+ # returns a string: the designator for this feature extractor
296
+ # (an extractor may compute several features, but
297
+ # in the experiment file it is chosen by a single designator)
298
+ #
299
+ # here: single feature, and the feature name is the designator
300
+ def RosySingleFeatureExtractor.designator()
301
+ return eval(self.name()).feature_name()
302
+ end
303
+
304
+ ###
305
+ def RosySingleFeatureExtractor.feature_names()
306
+ return [eval(self.name()).feature_name()]
307
+ end
308
+
309
+ ###
310
+ # compute_feature_value: first check if instance is OK
311
+ #
312
+ # returns: list of features
313
+ def compute_features()
314
+ unless @@instance_ok
315
+ return nil
316
+ end
317
+
318
+ return make_features_safe_for_sql([compute_feature_instanceOK()])
319
+ end
320
+
321
+ ############
322
+ private
323
+
324
+ def compute_feature_instanceOK()
325
+ raise "Overwrite me"
326
+ end
327
+
328
+ end
329
+
330
+ ##############################################
331
+ # Individual feature extractors
332
+ ##############################################
333
+
334
+ ####################
335
+ # gold role label
336
+ class GoldlabelFeature < RosySingleFeatureExtractor
337
+ GoldlabelFeature.announce_me()
338
+
339
+ def GoldlabelFeature.feature_name()
340
+ return "gold"
341
+ end
342
+ def GoldlabelFeature.sql_type()
343
+ return "VARCHAR(30)"
344
+ end
345
+ def GoldlabelFeature.feature_type()
346
+ return "gold"
347
+ end
348
+ def GoldlabelFeature.info()
349
+ # additional info: I am an index feature
350
+ return super().concat(["index"])
351
+ end
352
+
353
+ ################
354
+ private
355
+
356
+ def compute_feature_instanceOK()
357
+ @@frame.each_fe_by_name {|fe|
358
+ if fe.children.include? @@node
359
+ return fe.name
360
+ end
361
+ }
362
+
363
+ # no role label for this node
364
+ # if @@split_nones
365
+ # split "no role" label into:
366
+ # before/after/dominating the target node
367
+ # return @@relpos
368
+ # else
369
+ return nil
370
+ # end
371
+ end
372
+ end
373
+
374
+ ####################
375
+ # path features
376
+ class AbstractPathFeature < RosySingleFeatureExtractor
377
+ def AbstractPathFeature.sql_type()
378
+ return "VARCHAR(80)"
379
+ end
380
+ def AbstractPathFeature.feature_type()
381
+ return "syn"
382
+ end
383
+
384
+ ################
385
+ private
386
+
387
+ def compute_feature_instanceOK()
388
+ if @@paths[@@node.id()].nil?
389
+ path = nil
390
+ else
391
+ path = my_path_computation()
392
+ end
393
+
394
+ if path.nil? or path.empty?
395
+ return nil
396
+ else
397
+ return path
398
+ end
399
+ end
400
+
401
+ def my_path_computation()
402
+ raise "overwrite me"
403
+ end
404
+ end
405
+
406
+
407
+ ####################
408
+ # path consisting of nodelabels, dependencies and directions
409
+ class PathFeature < AbstractPathFeature
410
+ PathFeature.announce_me()
411
+
412
+ def PathFeature.sql_type()
413
+ return "VARCHAR(120)"
414
+ end
415
+ def PathFeature.feature_name()
416
+ return "path"
417
+ end
418
+
419
+ ################
420
+ private
421
+
422
+ def my_path_computation()
423
+ if @@paths[@@node.id()].nil?
424
+ return nil
425
+ end
426
+
427
+ return @@paths[@@node.id()].print(true, true, true)
428
+ end
429
+ end
430
+
431
+
432
+
433
+ ####################
434
+ # path consisting of phrase type and directions
435
+ class NodelabelPathFeature < AbstractPathFeature
436
+ NodelabelPathFeature.announce_me()
437
+
438
+ def NodelabelPathFeature.feature_name()
439
+ return "pt_path"
440
+ end
441
+
442
+ ################
443
+ private
444
+
445
+ def my_path_computation()
446
+ if @@paths[@@node.id()].nil?
447
+ return nil
448
+ end
449
+
450
+ return @@paths[@@node.id()].print(true, false, true)
451
+ end
452
+ end
453
+
454
+ ####################
455
+ # path consisting of dependencies and directions
456
+ class EdgelabelPathFeature < AbstractPathFeature
457
+ EdgelabelPathFeature.announce_me()
458
+
459
+ def EdgelabelPathFeature.feature_name()
460
+ return "gf_path"
461
+ end
462
+
463
+ ################
464
+ private
465
+
466
+ def my_path_computation()
467
+ if @@paths[@@node.id()].nil?
468
+ return nil
469
+ end
470
+
471
+ return @@paths[@@node.id()].print(true, true, false)
472
+ end
473
+ end
474
+
475
+ ####################
476
+ # features: path from governing verb
477
+ class AbstractGVPathFeature < RosySingleFeatureExtractor
478
+ def AbstractGVPathFeature.sql_type()
479
+ return "VARCHAR(80)"
480
+ end
481
+ def AbstractGVPathFeature.feature_type()
482
+ return "syn"
483
+ end
484
+
485
+ ################
486
+ private
487
+
488
+ def compute_feature_instanceOK()
489
+ if @@gv_paths[@@node.id()].nil?
490
+ path = nil
491
+ else
492
+ path = my_path_computation()
493
+ end
494
+
495
+ if path.nil? or path.empty?
496
+ return nil
497
+ else
498
+ return path
499
+ end
500
+ end
501
+
502
+ def my_path_computation()
503
+ raise "overwrite me"
504
+ end
505
+ end
506
+
507
+
508
+ ####################
509
+ # path from governing verb consisting of nodelabels, dependencies and directions
510
+ class GVPathFeature < AbstractGVPathFeature
511
+ GVPathFeature.announce_me()
512
+
513
+ def GVPathFeature.sql_type()
514
+ return "VARCHAR(120)"
515
+ end
516
+ def GVPathFeature.feature_name()
517
+ return "gvpath"
518
+ end
519
+
520
+ ################
521
+ private
522
+
523
+ def my_path_computation()
524
+ return @@gv_paths[@@node.id()].print(true, true, true)
525
+ end
526
+ end
527
+
528
+
529
+ ####################
530
+ # gov. verb path consisting of phrase type and directions
531
+ class GVNodelabelPathFeature < AbstractGVPathFeature
532
+ GVNodelabelPathFeature.announce_me()
533
+
534
+ def GVNodelabelPathFeature.feature_name()
535
+ return "pt_gvpath"
536
+ end
537
+
538
+ ################
539
+ private
540
+
541
+ def my_path_computation()
542
+ return @@gv_paths[@@node.id()].print(true, false, true)
543
+ end
544
+ end
545
+
546
+ ####################
547
+ # gov. verb path consisting of dependencies and directions
548
+ class GVEdgelabelPathFeature < AbstractGVPathFeature
549
+ GVEdgelabelPathFeature.announce_me()
550
+
551
+ def GVEdgelabelPathFeature.feature_name()
552
+ return "gf_gvpath"
553
+ end
554
+
555
+ ################
556
+ private
557
+
558
+ def my_path_computation()
559
+ return @@gv_paths[@@node.id()].print(true, true, false)
560
+ end
561
+ end
562
+
563
+ ####################
564
+ # path length
565
+ class PathLengthFeature < RosySingleFeatureExtractor
566
+ PathLengthFeature.announce_me()
567
+
568
+ def PathLengthFeature.feature_name()
569
+ return "path_length"
570
+ end
571
+ def PathLengthFeature.sql_type()
572
+ return "TINYINT"
573
+ end
574
+ def PathLengthFeature.feature_type()
575
+ return "syn"
576
+ end
577
+
578
+ ################
579
+ private
580
+
581
+ def compute_feature_instanceOK()
582
+ if @@paths[@@node.id()].nil?
583
+ return nil
584
+ else
585
+ return @@paths[@@node.id()].length()
586
+ end
587
+ end
588
+ end
589
+
590
+ #########
591
+ # group of combined path features:
592
+ # path to target combined with target part of speech and
593
+ # info on whether the target is passive
594
+ class AbstractCombinedPathFeature < RosySingleFeatureExtractor
595
+
596
+ def AbstractCombinedPathFeature.sql_type()
597
+ return "VARCHAR(90)"
598
+ end
599
+ def AbstractCombinedPathFeature.feature_type()
600
+ return "syn"
601
+ end
602
+
603
+ ################
604
+ private
605
+
606
+ def compute_feature_instanceOK()
607
+ if @@paths[@@node.id()].nil?
608
+ path = ""
609
+ else
610
+ path = my_path_computation()
611
+ end
612
+ return path + "--" + @@target_pos.to_s + "--" + @@target_voice.to_s
613
+ end
614
+
615
+ ###
616
+ def my_path_computation()
617
+ raise "Overwrite me"
618
+ end
619
+ end
620
+
621
+
622
+ ####################
623
+ # combined path based on nodelabels
624
+ class NodelabelCombinedPathFeature < AbstractCombinedPathFeature
625
+ NodelabelCombinedPathFeature.announce_me()
626
+
627
+ def NodelabelCombinedPathFeature.feature_name()
628
+ return "pt_combined_path"
629
+ end
630
+
631
+ ################
632
+ private
633
+
634
+ def my_path_computation()
635
+ if @@paths[@@node.id()].nil?
636
+ return nil
637
+ end
638
+
639
+ return @@paths[@@node.id()].print(false, false, true)
640
+ end
641
+ end
642
+
643
+ ####################
644
+ # combined path based on edgelabels
645
+ class EdgelabelCombinedPathFeature < AbstractCombinedPathFeature
646
+ EdgelabelCombinedPathFeature.announce_me()
647
+
648
+ def EdgelabelCombinedPathFeature.feature_name()
649
+ return "gf_combined_path"
650
+ end
651
+
652
+ ################
653
+ private
654
+
655
+ def my_path_computation()
656
+ if @@paths[@@node.id()].nil?
657
+ return nil
658
+ end
659
+
660
+ return @@paths[@@node.id()].print(false, true, false)
661
+ end
662
+ end
663
+
664
+
665
+ ####################
666
+ # combined path based on nodelabels and edgelabels
667
+ class CombinedPathFeature < AbstractCombinedPathFeature
668
+ CombinedPathFeature.announce_me()
669
+
670
+ def CombinedPathFeature.sql_type()
671
+ return "VARCHAR(130)"
672
+ end
673
+ def CombinedPathFeature.feature_name()
674
+ return "combined_path"
675
+ end
676
+
677
+ ################
678
+ private
679
+
680
+ def my_path_computation()
681
+ if @@paths[@@node.id()].nil?
682
+ return nil
683
+ end
684
+
685
+ return @@paths[@@node.id()].print(false, true, true)
686
+ end
687
+ end
688
+
689
+
690
+ ##################
691
+ # group of features for computing
692
+ # partial path to target: only up to
693
+ # the lowest common ancestor of current node and target
694
+ class AbstractPartialPathFeature < RosySingleFeatureExtractor
695
+
696
+ def AbstractPartialPathFeature.sql_type()
697
+ return "VARCHAR(70)"
698
+ end
699
+ def AbstractPartialPathFeature.feature_type()
700
+ return "syn"
701
+ end
702
+
703
+ ################
704
+ private
705
+
706
+ def compute_feature_instanceOK()
707
+ if @@paths[@@node.id()].nil?
708
+ path = nil
709
+ else
710
+ path = my_path_computation()
711
+ end
712
+ if path.nil? or path.empty?
713
+ return nil
714
+ else
715
+ return path
716
+ end
717
+ end
718
+ end
719
+
720
+ ####
721
+ # partial path based on node labels
722
+ class NodelabelPartialPathFeature < AbstractPartialPathFeature
723
+ NodelabelPartialPathFeature.announce_me()
724
+
725
+ def NodelabelPartialPathFeature.feature_name()
726
+ return "pt_partial_path"
727
+ end
728
+
729
+ ################
730
+ private
731
+
732
+ def my_path_computation()
733
+ if @@paths[@@node.id()].nil?
734
+ return nil
735
+ end
736
+
737
+ return @@paths[@@node.id()].print_downpart(true, false, true)
738
+ end
739
+ end
740
+
741
+ ####
742
+ # partial path based on edge labels
743
+ class EdgelabelPartialPathFeature < AbstractPartialPathFeature
744
+ EdgelabelPartialPathFeature.announce_me()
745
+
746
+ def EdgelabelPartialPathFeature.feature_name()
747
+ return "gf_partial_path"
748
+ end
749
+
750
+ ################
751
+ private
752
+
753
+ def my_path_computation()
754
+ if @@paths[@@node.id()].nil?
755
+ return nil
756
+ end
757
+
758
+ return @@paths[@@node.id()].print_downpart(true, true, false)
759
+ end
760
+ end
761
+
762
+ ####
763
+ # partial path based on node and edge labels
764
+ class PartialPathFeature < AbstractPartialPathFeature
765
+ PartialPathFeature.announce_me()
766
+
767
+ def PartialPathFeature.sql_type()
768
+ return "VARCHAR(110)"
769
+ end
770
+ def PartialPathFeature.feature_name()
771
+ return "partial_path"
772
+ end
773
+
774
+ ################
775
+ private
776
+
777
+ def my_path_computation()
778
+ if @@paths[@@node.id()].nil?
779
+ return nil
780
+ end
781
+
782
+ return @@paths[@@node.id()].print_downpart(true, true, true)
783
+ end
784
+ end
785
+
786
+
787
+
788
+ ##################
789
+ # ancestor rule: grammar rule
790
+ # expanding lowest common ancestor of current node and target
791
+ class AncestorRuleFeature < RosySingleFeatureExtractor
792
+ AncestorRuleFeature.announce_me()
793
+
794
+ def AncestorRuleFeature.feature_name()
795
+ return "ancestor_rule"
796
+ end
797
+ def AncestorRuleFeature.sql_type()
798
+ return "VARCHAR(50)"
799
+ end
800
+ def AncestorRuleFeature.feature_type()
801
+ return "syn"
802
+ end
803
+
804
+ ################
805
+ private
806
+
807
+ def compute_feature_instanceOK()
808
+ if @@paths[@@node.id()].nil?
809
+ return nil
810
+ end
811
+
812
+ lca = @@paths[@@node.id()].lca()
813
+ unless lca
814
+ return nil
815
+ end
816
+
817
+ return @@interpreter_class.simplified_pt(lca).to_s +
818
+ " -> "+
819
+ lca.children.map {|c| @@interpreter_class.simplified_pt(c).to_s }.join(" ")
820
+ end
821
+ end
822
+
823
+ ##################
824
+ # relative position to target: left, right, including target
825
+ class RelativePositionFeature < RosySingleFeatureExtractor
826
+ RelativePositionFeature.announce_me()
827
+
828
+ def RelativePositionFeature.feature_name()
829
+ return "relpos"
830
+ end
831
+ def RelativePositionFeature.sql_type()
832
+ return "CHAR(5)"
833
+ end
834
+ def RelativePositionFeature.feature_type()
835
+ return "syn"
836
+ end
837
+
838
+ ################
839
+ private
840
+
841
+ def compute_feature_instanceOK()
842
+ return @@relpos
843
+ end
844
+ end
845
+
846
+
847
+ ################
848
+ # phrase type of the instance node
849
+ class PhraseTypeFeature < RosySingleFeatureExtractor
850
+ PhraseTypeFeature.announce_me()
851
+
852
+ def PhraseTypeFeature.feature_name()
853
+ return "pt"
854
+ end
855
+ def PhraseTypeFeature.sql_type()
856
+ return "VARCHAR(15)"
857
+ end
858
+ def PhraseTypeFeature.feature_type()
859
+ return "syn"
860
+ end
861
+
862
+ ################
863
+ private
864
+
865
+ def compute_feature_instanceOK()
866
+ return @@interpreter_class.simplified_pt(@@node)
867
+ end
868
+ end
869
+
870
+ ################
871
+ # grammatical function that this instance node fills for the target
872
+ class GFFeature < RosySingleFeatureExtractor
873
+ GFFeature.announce_me()
874
+
875
+ def GFFeature.feature_name()
876
+ return "gf"
877
+ end
878
+ def GFFeature.sql_type()
879
+ return "VARCHAR(20)"
880
+ end
881
+ def GFFeature.feature_type()
882
+ return "syn"
883
+ end
884
+
885
+ ################
886
+ private
887
+
888
+ def compute_feature_instanceOK()
889
+ unless @@target_gfs
890
+ return nil
891
+ end
892
+
893
+ @@target_gfs.each { |rel, other_node|
894
+ if @@node == other_node
895
+ return rel
896
+ end
897
+ }
898
+
899
+ return nil
900
+ end
901
+ end
902
+
903
+ ##################
904
+ # phrase type of parent of this node
905
+ class FatherPhraseTypeFeature < RosySingleFeatureExtractor
906
+ FatherPhraseTypeFeature.announce_me()
907
+
908
+ def FatherPhraseTypeFeature.feature_name()
909
+ return "father_pt"
910
+ end
911
+ def FatherPhraseTypeFeature.sql_type()
912
+ return "VARCHAR(15)"
913
+ end
914
+ def FatherPhraseTypeFeature.feature_type()
915
+ return "syn"
916
+ end
917
+
918
+ #####
919
+ private
920
+
921
+ def compute_feature_instanceOK()
922
+ if @@node.parent
923
+ return @@interpreter_class.simplified_pt(@@node.parent)
924
+ else
925
+ return nil
926
+ end
927
+ end
928
+ end
929
+
930
+ ################
931
+ # target lemma
932
+ class TargetLemmaFeature < RosySingleFeatureExtractor
933
+ TargetLemmaFeature.announce_me()
934
+
935
+ def TargetLemmaFeature.feature_name()
936
+ return "target"
937
+ end
938
+ def TargetLemmaFeature.sql_type()
939
+ return "VARCHAR(20)"
940
+ end
941
+ def TargetLemmaFeature.feature_type()
942
+ return "ubiq"
943
+ end
944
+ def TargetLemmaFeature.info()
945
+ # additional info: I am an index feature
946
+ return super().concat(["index"])
947
+ end
948
+
949
+ #####
950
+ private
951
+
952
+ def compute_feature_instanceOK()
953
+ return @@interpreter_class.lemma_backoff(@@target)
954
+ end
955
+ end
956
+
957
+ ################
958
+ # part of speech of target lemma
959
+ class TargetPOSFeature < RosySingleFeatureExtractor
960
+ TargetPOSFeature.announce_me()
961
+
962
+ def TargetPOSFeature.feature_name()
963
+ return "target_pos"
964
+ end
965
+ def TargetPOSFeature.sql_type()
966
+ return "VARCHAR(10)"
967
+ end
968
+ def TargetPOSFeature.feature_type()
969
+ return "ubiq"
970
+ end
971
+ def TargetPOSFeature.info()
972
+ # additional info: I am an index feature
973
+ return super().concat(["index"])
974
+ end
975
+
976
+
977
+ #####
978
+ private
979
+
980
+ def compute_feature_instanceOK()
981
+ return @@target_pos
982
+ end
983
+ end
984
+
985
+ ################
986
+ # part of speech of target lemma
987
+ class TargetFineGrainedPOSFeature < RosySingleFeatureExtractor
988
+ TargetFineGrainedPOSFeature.announce_me()
989
+
990
+ def TargetFineGrainedPOSFeature.feature_name()
991
+ return "finegrained_target_pos"
992
+ end
993
+ def TargetFineGrainedPOSFeature.sql_type()
994
+ return "VARCHAR(20)"
995
+ end
996
+ def TargetFineGrainedPOSFeature.feature_type()
997
+ return "ubiq"
998
+ end
999
+
1000
+
1001
+ #####
1002
+ private
1003
+
1004
+ def compute_feature_instanceOK()
1005
+ return @@interpreter_class.pt(@@target)
1006
+ end
1007
+ end
1008
+
1009
+ ################
1010
+ # voice of the target lemma
1011
+ class TargetVoiceFeature < RosySingleFeatureExtractor
1012
+ TargetVoiceFeature.announce_me()
1013
+
1014
+ def TargetVoiceFeature.feature_name()
1015
+ return "target_voice"
1016
+ end
1017
+ def TargetVoiceFeature.sql_type()
1018
+ return "CHAR(4)"
1019
+ end
1020
+ def TargetVoiceFeature.feature_type()
1021
+ return "ubiq"
1022
+ end
1023
+
1024
+ #####
1025
+ private
1026
+
1027
+ def compute_feature_instanceOK()
1028
+ voice = @@interpreter_class.voice(@@target)
1029
+ if voice
1030
+ return voice.slice(0,4)
1031
+ else
1032
+ return nil
1033
+ end
1034
+ end
1035
+ end
1036
+
1037
+ ################
1038
+ # the governing verb of the target
1039
+ class GoverningVerbOfTargetFeature < RosySingleFeatureExtractor
1040
+ GoverningVerbOfTargetFeature.announce_me()
1041
+
1042
+ def GoverningVerbOfTargetFeature.feature_name()
1043
+ return "gov_verb"
1044
+ end
1045
+ def GoverningVerbOfTargetFeature.sql_type()
1046
+ return "VArCHAR(20)"
1047
+ end
1048
+ def GoverningVerbOfTargetFeature.feature_type()
1049
+ return "sem"
1050
+ end
1051
+
1052
+ #####
1053
+ private
1054
+
1055
+ def compute_feature_instanceOK()
1056
+ if @@governing_verb
1057
+ return RosyFeatureExtractor.headlemma(@@governing_verb)
1058
+ else
1059
+ return nil
1060
+ end
1061
+ end
1062
+ end
1063
+
1064
+ ################c
1065
+ # preposition for this constituent
1066
+ class PrepFeature < RosySingleFeatureExtractor
1067
+ PrepFeature.announce_me()
1068
+
1069
+ def PrepFeature.feature_name()
1070
+ return "prep"
1071
+ end
1072
+ def PrepFeature.sql_type()
1073
+ return "VARCHAR(20)"
1074
+ end
1075
+ def PrepFeature.feature_type()
1076
+ return "syn"
1077
+ end
1078
+
1079
+ #####
1080
+ private
1081
+
1082
+ def compute_feature_instanceOK()
1083
+ return @@interpreter_class.preposition(@@node)
1084
+ end
1085
+ end
1086
+
1087
+ ################
1088
+ # head lemma of this constituent
1089
+ class HeadFeature < RosySingleFeatureExtractor
1090
+ HeadFeature.announce_me()
1091
+
1092
+ def HeadFeature.feature_name()
1093
+ return "const_head"
1094
+ end
1095
+ def HeadFeature.sql_type()
1096
+ return "VARCHAR(20)"
1097
+ end
1098
+ def HeadFeature.feature_type()
1099
+ return "sem"
1100
+ end
1101
+
1102
+ #####
1103
+ private
1104
+
1105
+ def compute_feature_instanceOK()
1106
+ return RosyFeatureExtractor.headlemma(@@node)
1107
+ end
1108
+ end
1109
+
1110
+ ################
1111
+ # part of speech of the head of this constituent
1112
+ class HeadPosFeature < RosySingleFeatureExtractor
1113
+ HeadPosFeature.announce_me()
1114
+
1115
+ def HeadPosFeature.feature_name()
1116
+ return "const_head_pos"
1117
+ end
1118
+ def HeadPosFeature.sql_type()
1119
+ return "VARCHAR(10)"
1120
+ end
1121
+ def HeadPosFeature.feature_type()
1122
+ return "syn"
1123
+ end
1124
+
1125
+ #####
1126
+ private
1127
+
1128
+ def compute_feature_instanceOK()
1129
+ return RosyFeatureExtractor.headpos(@@node)
1130
+ end
1131
+ end
1132
+
1133
+ ################
1134
+ # informative content word (see AbstractSynFeature): lemma and POS
1135
+ class IcontLemmaFeature < RosyFeatureExtractor
1136
+ IcontLemmaFeature.announce_me()
1137
+
1138
+ def IcontLemmaFeature.designator()
1139
+ return "icont_word"
1140
+ end
1141
+ def IcontLemmaFeature.feature_names()
1142
+ return ["icont_lemma", "icont_pos"]
1143
+ end
1144
+ def IcontLemmaFeature.sql_type()
1145
+ return "VARCHAR(20)"
1146
+ end
1147
+ def IcontLemmaFeature.feature_type()
1148
+ return "sem"
1149
+ end
1150
+
1151
+ #####
1152
+ private
1153
+
1154
+ def compute_features_instanceOK()
1155
+ icont_node = @@interpreter_class.informative_content_node(@@node)
1156
+ if icont_node
1157
+ return [RosyFeatureExtractor.headlemma(icont_node), RosyFeatureExtractor.headpos(icont_node)]
1158
+ else
1159
+ return [nil, nil]
1160
+ end
1161
+ end
1162
+ end
1163
+
1164
+
1165
+ ################
1166
+ # leftmost terminal of this constituent
1167
+ class FirstWordFeature < RosyFeatureExtractor
1168
+ FirstWordFeature.announce_me()
1169
+
1170
+ def FirstWordFeature.designator()
1171
+ return "firstword"
1172
+ end
1173
+ def FirstWordFeature.feature_names()
1174
+ return ["firstword", "firstword_pos"]
1175
+ end
1176
+ def FirstWordFeature.sql_type()
1177
+ return "VARCHAR(20)"
1178
+ end
1179
+ def FirstWordFeature.feature_type()
1180
+ return "sem"
1181
+ end
1182
+
1183
+ #####
1184
+ private
1185
+
1186
+ def compute_features_instanceOK()
1187
+ if @@node_leftmost_terminal
1188
+ return [RosyFeatureExtractor.headlemma(@@node_leftmost_terminal), RosyFeatureExtractor.headpos(@@node_leftmost_terminal)]
1189
+ else
1190
+ return [nil, nil]
1191
+ end
1192
+ end
1193
+ end
1194
+
1195
+
1196
+ ################
1197
+ # rightmost terminal of this constituent
1198
+ class LastWordFeature < RosyFeatureExtractor
1199
+ LastWordFeature.announce_me()
1200
+
1201
+ def LastWordFeature.designator()
1202
+ return "lastword"
1203
+ end
1204
+ def LastWordFeature.feature_names()
1205
+ return ["lastword", "lastword_pos"]
1206
+ end
1207
+ def LastWordFeature.sql_type()
1208
+ return "VARCHAR(30)"
1209
+ end
1210
+ def LastWordFeature.feature_type()
1211
+ return "sem"
1212
+ end
1213
+
1214
+ #####
1215
+ private
1216
+
1217
+ def compute_features_instanceOK()
1218
+ if @@node_rightmost_terminal
1219
+ return [RosyFeatureExtractor.headlemma(@@node_rightmost_terminal), RosyFeatureExtractor.headpos(@@node_rightmost_terminal)]
1220
+ else
1221
+ return [nil, nil]
1222
+ end
1223
+ end
1224
+ end
1225
+
1226
+ ################
1227
+ # left sibling of the current node
1228
+ class LeftSiblingFeature < RosyFeatureExtractor
1229
+ LeftSiblingFeature.announce_me()
1230
+
1231
+ def LeftSiblingFeature.designator()
1232
+ return "leftsib"
1233
+ end
1234
+ def LeftSiblingFeature.feature_names()
1235
+ return ["leftsib_pt", "leftsib_lemma"]
1236
+ end
1237
+ def LeftSiblingFeature.sql_type()
1238
+ return "VARCHAR(20)"
1239
+ end
1240
+ def LeftSiblingFeature.feature_type()
1241
+ return "sem"
1242
+ end
1243
+
1244
+ #####
1245
+ private
1246
+
1247
+ def compute_features_instanceOK()
1248
+ # leftsib, rightsib (node)
1249
+ # siblings with max lastword/firstword among those with lastword/firstword index
1250
+ # smaller/greater than firstword/lastword index of self
1251
+ if @@node.parent.nil?
1252
+ return [nil, nil]
1253
+ end
1254
+
1255
+ node_ix = terminal_index(@@node_leftmost_terminal)
1256
+ unless node_ix
1257
+ return [nil, nil]
1258
+ end
1259
+
1260
+ leftsib_ix = nil
1261
+ leftsib = nil
1262
+ @@node.parent.children.each { |sibling|
1263
+ sib_ix = terminal_index(@@interpreter_class.rightmost_terminal(sibling))
1264
+ unless sib_ix
1265
+ next
1266
+ end
1267
+
1268
+ if sib_ix < node_ix and
1269
+ (leftsib.nil? or leftsib_ix < sib_ix)
1270
+
1271
+ leftsib = sibling
1272
+ leftsib_ix = sib_ix
1273
+ end
1274
+ }
1275
+
1276
+ if leftsib
1277
+ return [
1278
+ @@interpreter_class.simplified_pt(leftsib),
1279
+ @@interpreter_class.lemma_backoff(leftsib),
1280
+ ]
1281
+ else
1282
+ return [nil, nil]
1283
+ end
1284
+ end
1285
+
1286
+ ###
1287
+ # returns: index(integer) of node in list of terminals of this sentence;
1288
+ # nil if node is nil or does not occur in the list
1289
+ def terminal_index(node) # SynNode, terminal
1290
+ unless node
1291
+ return nil
1292
+ end
1293
+
1294
+ return @@terminals_ordered[node] # word index (or nil)
1295
+ end
1296
+ end
1297
+
1298
+ ################
1299
+ # distance between head word of constituent and target (in words)
1300
+ class WordDistanceFeature < RosySingleFeatureExtractor
1301
+ WordDistanceFeature.announce_me()
1302
+
1303
+ def WordDistanceFeature.feature_name()
1304
+ return "worddistance"
1305
+ end
1306
+ def WordDistanceFeature.sql_type()
1307
+ return "TINYINT"
1308
+ end
1309
+ def WordDistanceFeature.feature_type()
1310
+ return "syn"
1311
+ end
1312
+
1313
+ #####
1314
+ private
1315
+
1316
+ def compute_feature_instanceOK()
1317
+
1318
+ head_term = @@interpreter_class.head_terminal(@@node)
1319
+ targ_term = @@interpreter_class.head_terminal(@@target)
1320
+ if head_term.nil? or targ_term.nil?
1321
+ return nil
1322
+ end
1323
+ h_id = @@terminals_ordered[head_term]
1324
+ t_id = @@terminals_ordered[targ_term]
1325
+ if h_id.nil? or t_id.nil?
1326
+ return nil
1327
+ else
1328
+ return (h_id-t_id).abs
1329
+ end
1330
+ end
1331
+ end
1332
+
1333
+ ################
1334
+ # is the current node a maximal projection?
1335
+ # heuristic: is my category the same as my parent's?
1336
+ class IsMaxProj < RosySingleFeatureExtractor
1337
+ IsMaxProj.announce_me()
1338
+
1339
+ def IsMaxProj.feature_name()
1340
+ return "ismaxproj"
1341
+ end
1342
+ def IsMaxProj.sql_type()
1343
+ return "TINYINT"
1344
+ end
1345
+ def IsMaxProj.feature_type()
1346
+ return "syn"
1347
+ end
1348
+
1349
+ #####
1350
+ private
1351
+
1352
+ def compute_feature_instanceOK()
1353
+ unless @@node.parent()
1354
+ return 1
1355
+ end
1356
+ my_cat = @@interpreter_class.category(@@node)
1357
+ parent_cat = @@interpreter_class.category(@@node.parent)
1358
+ if my_cat == parent_cat
1359
+ return 0
1360
+ else
1361
+ return 1
1362
+ end
1363
+ end
1364
+ end
1365
+
1366
+ ################
1367
+ # right sibling of the current node
1368
+ class RightSiblingFeature < RosyFeatureExtractor
1369
+ RightSiblingFeature.announce_me()
1370
+
1371
+ def RightSiblingFeature.designator()
1372
+ return "rightsib"
1373
+ end
1374
+ def RightSiblingFeature.feature_names()
1375
+ return ["rightsib_pt", "rightsib_lemma"]
1376
+ end
1377
+ def RightSiblingFeature.sql_type()
1378
+ return "VARCHAR(20)"
1379
+ end
1380
+ def RightSiblingFeature.feature_type()
1381
+ return "sem"
1382
+ end
1383
+
1384
+ #####
1385
+ private
1386
+
1387
+ def compute_features_instanceOK()
1388
+ # leftsib, rightsib (node)
1389
+ # siblings with max lastword/firstword among those with lastword/firstword index
1390
+ # smaller/greater than firstword/lastword index of self
1391
+ if @@node.parent.nil?
1392
+ return [nil, nil]
1393
+ end
1394
+
1395
+ node_ix = terminal_index(@@node_rightmost_terminal)
1396
+ unless node_ix
1397
+ return [nil, nil]
1398
+ end
1399
+
1400
+ rightsib_ix = nil
1401
+ rightsib = nil
1402
+ @@node.parent.children.each { |sibling|
1403
+ sib_ix = terminal_index(@@interpreter_class.leftmost_terminal(sibling))
1404
+ unless sib_ix
1405
+ next
1406
+ end
1407
+
1408
+ if sib_ix > node_ix and
1409
+ (rightsib.nil? or sib_ix < rightsib_ix)
1410
+
1411
+ rightsib = sibling
1412
+ rightsib_ix = sib_ix
1413
+ end
1414
+ }
1415
+
1416
+ if rightsib
1417
+ return [
1418
+ @@interpreter_class.simplified_pt(rightsib),
1419
+ @@interpreter_class.lemma_backoff(rightsib),
1420
+ ]
1421
+ else
1422
+ return [nil, nil]
1423
+ end
1424
+ end
1425
+
1426
+ ###
1427
+ # returns: index(integer) of node in list of terminals of this sentence;
1428
+ # nil if node is nil or does not occur in the list
1429
+ def terminal_index(node) # SynNode, terminal
1430
+ unless node
1431
+ return nil
1432
+ end
1433
+
1434
+ return @@terminals_ordered[node] # word index (or nil)
1435
+ end
1436
+ end
1437
+
1438
+
1439
+ # ################
1440
+ # # admin feature: word span of this constituent
1441
+ # class WordSpanFeature < RosySingleFeatureExtractor
1442
+ # WordSpanFeature.announce_me()
1443
+
1444
+ # def WordSpanFeature.feature_name()
1445
+ # return "wordspan"
1446
+ # end
1447
+ # def WordSpanFeature.sql_type()
1448
+ # return "VARCHAR(30)"
1449
+ # end
1450
+ # def WordSpanFeature.feature_type()
1451
+ # return "admin"
1452
+ # end
1453
+
1454
+ # #####
1455
+ # private
1456
+
1457
+ # def compute_feature_instanceOK()
1458
+
1459
+ # fwh = RosyFeatureExtractor.headlemma(@@node_leftmost_terminal)
1460
+ # lwh = RosyFeatureExtractor.headlemma(@@node_rightmost_terminal)
1461
+
1462
+ # if fwh.nil?
1463
+ # fwh = ""
1464
+ # end
1465
+ # if lwh.nil?
1466
+ # lwh = ""
1467
+ # end
1468
+
1469
+ # return fwh+ "-" +lwh
1470
+ # end
1471
+ # end
1472
+
1473
+
1474
+ ################
1475
+ # admin feature: my node ID and my father's, separated by a space
1476
+ # the highest node (topnode) has ID 0, and no father ID.
1477
+ class NodeIDFeature < RosySingleFeatureExtractor
1478
+ NodeIDFeature.announce_me()
1479
+
1480
+ def NodeIDFeature.feature_name()
1481
+ return "nodeID"
1482
+ end
1483
+ def NodeIDFeature.sql_type()
1484
+ return "VARCHAR(100)"
1485
+ end
1486
+ def NodeIDFeature.feature_type()
1487
+ return "admin"
1488
+ end
1489
+
1490
+ #####
1491
+ private
1492
+
1493
+ def compute_feature_instanceOK()
1494
+
1495
+ if @@node.parent
1496
+ return @@node.id.to_s+ " " + @@node.parent.id.to_s
1497
+ else
1498
+ return @@node.id.to_s
1499
+ end
1500
+ end
1501
+ end
1502
+
1503
+ ################
1504
+ # admin feature: sentence ID
1505
+ class SentidFeature < RosySingleFeatureExtractor
1506
+ SentidFeature.announce_me()
1507
+
1508
+ def SentidFeature.feature_name()
1509
+ return "sentid"
1510
+ end
1511
+ def SentidFeature.sql_type()
1512
+ return "VARCHAR(100)"
1513
+ end
1514
+ def SentidFeature.feature_type()
1515
+ return "admin"
1516
+ end
1517
+ def SentidFeature.info()
1518
+ # additional info: I am an index feature
1519
+ return super().concat(["index"])
1520
+ end
1521
+
1522
+ #####
1523
+ private
1524
+
1525
+ def compute_feature_instanceOK()
1526
+ return construct_instance_id(@@sent.id(), @@frame.id())
1527
+ end
1528
+ end
1529
+
1530
+ # ################
1531
+ # # admin feature: tokens spanned by this constituent
1532
+ # class TokensFeature < RosySingleFeatureExtractor
1533
+ # TokensFeature.announce_me()
1534
+
1535
+ # def TokensFeature.feature_name()
1536
+ # return "tokens"
1537
+ # end
1538
+ # def TokensFeature.sql_type()
1539
+ # return "VARCHAR(100)"
1540
+ # end
1541
+ # def TokensFeature.feature_type()
1542
+ # return "admin"
1543
+ # end
1544
+
1545
+ # #####
1546
+ # private
1547
+
1548
+ # def compute_feature_instanceOK()
1549
+ # return @@node.to_s
1550
+ # end
1551
+ # end
1552
+
1553
+ ################
1554
+ # admin feature: frame assigned by FN
1555
+ class FrameFeature < RosySingleFeatureExtractor
1556
+ FrameFeature.announce_me()
1557
+
1558
+ def FrameFeature.feature_name()
1559
+ return "frame"
1560
+ end
1561
+ def FrameFeature.sql_type()
1562
+ return "VARCHAR(35)"
1563
+ end
1564
+ def FrameFeature.feature_type()
1565
+ return "ubiq"
1566
+ end
1567
+ def FrameFeature.info()
1568
+ # additional info: I am an index feature
1569
+ return super().concat(["index"])
1570
+ end
1571
+
1572
+ #####
1573
+ private
1574
+
1575
+ def compute_feature_instanceOK()
1576
+ if @@frame
1577
+ return @@frame.name()
1578
+ else
1579
+ return nil
1580
+ end
1581
+ end
1582
+ end
1583
+
1584
+ ################
1585
+ # admin feature: is this node a terminal?
1586
+ class TerminalFeature < RosySingleFeatureExtractor
1587
+ TerminalFeature.announce_me()
1588
+
1589
+ def TerminalFeature.feature_name()
1590
+ return "term"
1591
+ end
1592
+ def TerminalFeature.sql_type()
1593
+ return "TINYINT"
1594
+ end
1595
+ def TerminalFeature.feature_type()
1596
+ return "admin"
1597
+ end
1598
+
1599
+ #####
1600
+ private
1601
+
1602
+ def compute_feature_instanceOK()
1603
+ if @@node.is_terminal?
1604
+ return 1
1605
+ else
1606
+ return 0
1607
+ end
1608
+ end
1609
+ end