shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,1609 +0,0 @@
1
- ####
2
- # ke & sp
3
- # adapted to new feature extractor class,
4
- # Collins and Tiger features combined:
5
- # KE November 2005
6
- #
7
- # Feature Extractors for Rosy
8
- #
9
- # Contract: each feature extractor inherits from the RosyFeatureExtractor class
10
- #
11
- # Feature extractors return nil if no feature value could be
12
- # returned
13
-
14
-
15
- # Salsa packages
16
- require 'rosy/AbstractFeatureAndExternal'
17
- require 'common/SalsaTigerRegXML'
18
-
19
- # Fred and Rosy packages
20
- require 'common/RosyConventions'
21
-
22
-
23
- ################################
24
- # base class for all following feature extractors
25
- class RosyFeatureExtractor < AbstractFeatureExtractor
26
- @@instance_ok = nil # Boolean: set_node(), set_sent() successful?
27
- @@split_nones = nil # Boolean: split NONE value for gold feature?
28
-
29
- @@target = nil # SynNode: main target node
30
- @@target_pos = nil # string: part of speech of main target
31
- @@target_voice = nil # string: "active", "passive", or nil
32
- @@terminals_ordered = nil # Hash: sentence terminals, mapped onto their word indices (starting with 1)
33
- @@target_gfs = nil # Array of pairs [rel, node]: grammatical functions of the target
34
-
35
- @@paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
36
- @@relpos = nil # string: position of instance relative to target
37
- @@node_leftmost_terminal = nil # SynNode objects: first and last terminal
38
- @@node_rightmost_terminal = nil # in the yield of @@node
39
-
40
- @@governing_verb = nil # SynNode object: closest governing verb of @@target
41
- @@gv_paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
42
-
43
- ###
44
- # returns a string: "phase 1" or "phase 2",
45
- # depending on whether the feature is computed
46
- # directly from the SalsaTigerSentence and the SynNode objects
47
- # or whether it is computed from the phase 1 features
48
- # computed for the training set
49
- #
50
- # Here: all features in this packages are phase 1
51
- def RosyFeatureExtractor.phase()
52
- return "phase 1"
53
- end
54
-
55
- ###
56
- # returns an array of strings, providing information about
57
- # the feature extractor
58
- def RosyFeatureExtractor.info()
59
- return super().concat(["rosy"])
60
- end
61
-
62
- ###
63
- # set sentence, set node, set general settings: this is done prior to
64
- # feature computation using compute_feature_value()
65
- # such that computations that stay the same for
66
- # several features can be done in advance
67
- def RosyFeatureExtractor.set(var_hash) # hash. possible entries: split_nones=> true/false
68
-
69
- @@split_nones = var_hash["split_nones"]
70
-
71
- return true
72
- end
73
-
74
- ###
75
- def RosyFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
76
- frame) # FrameNode object
77
- super(sent, frame)
78
-
79
- root = @@sent.syn_roots.first()
80
- word_index_counter = 1
81
- @@terminals_ordered = Hash.new
82
- root.yield_nodes_ordered.each {|yield_node|
83
- @@terminals_ordered[yield_node] = word_index_counter
84
- word_index_counter += 1
85
- }
86
-
87
- # @@target: main target node (SynNode)
88
- # WARNING: at this moment, we are
89
- # not considering true multiword targets.
90
- # Remove the "no_mwe" parameter in determine_main_target
91
- # to change this
92
- unless frame.target
93
- @@target = nil
94
- return false
95
- end
96
- @@target = @@interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
97
-
98
- unless @@target
99
- return false
100
- end
101
-
102
- # @@target_pos: string, target POS
103
- @@target_pos = @@interpreter_class.category(@@target)
104
-
105
- # @@target_voice:
106
- # for verb targets, string, active or passive
107
- # else nil
108
- @@target_voice = @@interpreter_class.voice(@@target)
109
- @@target_gfs = @@interpreter_class.gfs(@@target, @@sent)
110
-
111
- # paths from target to all other nodes in the graph
112
- @@paths = RosyFeatureExtractor.all_paths_from(@@target)
113
-
114
- # governing verb of target.
115
- # If something goes wrong, this will remain unset
116
- @@gv_paths = Hash.new
117
- if (targetlemma = RosyFeatureExtractor.headlemma(@@target))
118
- # determine governing verb
119
- parent = @@target
120
- while (parent = parent.parent)
121
- parentlemma = RosyFeatureExtractor.headlemma(parent)
122
-
123
- if @@interpreter_class.category(parent) == "verb" and
124
- parentlemma != targetlemma
125
- # success: found the governing verb of the target
126
-
127
- @@governing_verb = @@interpreter_class.head_terminal(parent)
128
- # paths from governing verb of target to all other nodes in the graph
129
- if @@governing_verb
130
- @@gv_paths = RosyFeatureExtractor.all_paths_from(@@governing_verb)
131
- end
132
-
133
- break
134
- end
135
- end
136
- end
137
-
138
-
139
- # paths: when printing, leave off the phrase type of the end node
140
- @@paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
141
- @@gv_paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
142
-
143
- return true
144
- end
145
-
146
- ###
147
- # node: SynNode of the sentence set in set_sentence
148
- def RosyFeatureExtractor.set_node(node)
149
- super(node)
150
-
151
- @@instance_ok = true
152
-
153
- unless @@target
154
- # no target, nothing I can compute here
155
- @@instance_ok = false
156
- return false
157
- end
158
-
159
- # # path between target and current instance node
160
- # @@path = @@interpreter_class.path_between(@@target, @@node)
161
- # @@path.set_cutoff_last_pt_on_printing(true) # when printing path, cut off last node label
162
-
163
-
164
- # position of instance node relative to main target node
165
- @@relpos = @@interpreter_class.relative_position(@@node, @@target)
166
- # leftmost, rightmost terminal in the yield of @@node
167
- @@node_leftmost_terminal = @@interpreter_class.leftmost_terminal(@@node)
168
- @@node_rightmost_terminal = @@interpreter_class.rightmost_terminal(@@node)
169
-
170
- return true
171
- end
172
-
173
- ###
174
- # compute_feature_value: first check if instance is OK
175
- #
176
- # returns: list of features
177
- def compute_features()
178
- unless @@instance_ok
179
- return nil
180
- end
181
-
182
- return make_features_safe_for_sql(compute_features_instanceOK())
183
- end
184
-
185
- ############
186
- protected
187
-
188
-
189
- # returns: list of features
190
- def compute_features_instanceOK()
191
- raise "Overwrite me"
192
- end
193
-
194
- ###
195
- # in computed features:
196
- # replace "," by COMMA in order not to confuse SQL
197
- def make_features_safe_for_sql(feature_list)
198
- return feature_list.map { |feature|
199
- if feature.kind_of? String
200
- feature.gsub(/,/, "COMMA").gsub(/\\/, "BACK")
201
- else
202
- feature
203
- end
204
- }
205
- end
206
-
207
-
208
- ###
209
- # lemma of the head terminal of SynNode n
210
- def RosyFeatureExtractor.headlemma(n) # SynNode
211
- unless n
212
- return nil
213
- end
214
-
215
- h = @@interpreter_class.head_terminal(n)
216
- if h
217
- return @@interpreter_class.lemma_backoff(h)
218
- else
219
- return nil
220
- end
221
- end
222
-
223
- ###
224
- # part of speech of the head terminal of SynNode n
225
- def RosyFeatureExtractor.headpos(n) # SynNode
226
- unless n
227
- return nil
228
- end
229
-
230
- h = @@interpreter_class.head_terminal(n)
231
- if h
232
- return h.part_of_speech()
233
- else
234
- return nil
235
- end
236
- end
237
-
238
- ###
239
- # Given a SynNode n, recursively determine
240
- # the paths from n to all other reachable nodes,
241
- # skipping nodes that already have a path
242
- # listed in the given hash mapping node IDs to paths.
243
- # Paths are given as Path objects (see AbstractSynInterface).
244
- # It is assumed that the graph of n is a tree, which
245
- # is searched depth-first, first the children, then the parent of n.
246
- def RosyFeatureExtractor.all_paths_from(n, # SynNode
247
- hash = nil) # Hash: nodeID(string) => Path object
248
- # initial step of all: no hash existing yet
249
- if hash.nil?
250
- hash = Hash.new
251
- hash[n.id()] = Path.new(n)
252
- end
253
-
254
- # invariant at this point: n must be listed in hash
255
- unless hash[n.id()]
256
- raise "Shouldn't be here"
257
- end
258
-
259
- # for each child c of n: compute its path from the path of n,
260
- # and explore paths below c
261
- n.each_child_with_edgelabel { |label, c|
262
- if hash[c.id()].nil?
263
- hash[c.id()] = hash[n.id()].deep_clone().add_last_step("D",
264
- label,
265
- @@interpreter_class.simplified_pt(c),
266
- c)
267
- RosyFeatureExtractor.all_paths_from(c, hash)
268
- end
269
- }
270
-
271
- # compute the path from n's parent p from the path of n,
272
- # and explore paths beyond p
273
- if (p = n.parent) and hash[p.id()].nil?
274
- # node has a parent, and it is not listed in the path hash
275
- # make a new path for parent: n's path, plus one up-step
276
- hash[p.id()] = hash[n.id()].deep_clone().add_last_step("U",
277
- n.parent_label,
278
- @@interpreter_class.simplified_pt(p),
279
- p)
280
- RosyFeatureExtractor.all_paths_from(p, hash)
281
- end
282
-
283
- return hash
284
-
285
- end
286
-
287
- end
288
-
289
- ###############################
290
- # Rosy single feature extractor, duplicating stuff from
291
- # AbstractSingleFeatureExtractor
292
- class RosySingleFeatureExtractor < RosyFeatureExtractor
293
-
294
- ###
295
- # returns a string: the designator for this feature extractor
296
- # (an extractor may compute several features, but
297
- # in the experiment file it is chosen by a single designator)
298
- #
299
- # here: single feature, and the feature name is the designator
300
- def RosySingleFeatureExtractor.designator()
301
- return eval(self.name()).feature_name()
302
- end
303
-
304
- ###
305
- def RosySingleFeatureExtractor.feature_names()
306
- return [eval(self.name()).feature_name()]
307
- end
308
-
309
- ###
310
- # compute_feature_value: first check if instance is OK
311
- #
312
- # returns: list of features
313
- def compute_features()
314
- unless @@instance_ok
315
- return nil
316
- end
317
-
318
- return make_features_safe_for_sql([compute_feature_instanceOK()])
319
- end
320
-
321
- ############
322
- private
323
-
324
- def compute_feature_instanceOK()
325
- raise "Overwrite me"
326
- end
327
-
328
- end
329
-
330
- ##############################################
331
- # Individual feature extractors
332
- ##############################################
333
-
334
- ####################
335
- # gold role label
336
- class GoldlabelFeature < RosySingleFeatureExtractor
337
- GoldlabelFeature.announce_me()
338
-
339
- def GoldlabelFeature.feature_name()
340
- return "gold"
341
- end
342
- def GoldlabelFeature.sql_type()
343
- return "VARCHAR(30)"
344
- end
345
- def GoldlabelFeature.feature_type()
346
- return "gold"
347
- end
348
- def GoldlabelFeature.info()
349
- # additional info: I am an index feature
350
- return super().concat(["index"])
351
- end
352
-
353
- ################
354
- private
355
-
356
- def compute_feature_instanceOK()
357
- @@frame.each_fe_by_name {|fe|
358
- if fe.children.include? @@node
359
- return fe.name
360
- end
361
- }
362
-
363
- # no role label for this node
364
- # if @@split_nones
365
- # split "no role" label into:
366
- # before/after/dominating the target node
367
- # return @@relpos
368
- # else
369
- return nil
370
- # end
371
- end
372
- end
373
-
374
- ####################
375
- # path features
376
- class AbstractPathFeature < RosySingleFeatureExtractor
377
- def AbstractPathFeature.sql_type()
378
- return "VARCHAR(80)"
379
- end
380
- def AbstractPathFeature.feature_type()
381
- return "syn"
382
- end
383
-
384
- ################
385
- private
386
-
387
- def compute_feature_instanceOK()
388
- if @@paths[@@node.id()].nil?
389
- path = nil
390
- else
391
- path = my_path_computation()
392
- end
393
-
394
- if path.nil? or path.empty?
395
- return nil
396
- else
397
- return path
398
- end
399
- end
400
-
401
- def my_path_computation()
402
- raise "overwrite me"
403
- end
404
- end
405
-
406
-
407
- ####################
408
- # path consisting of nodelabels, dependencies and directions
409
- class PathFeature < AbstractPathFeature
410
- PathFeature.announce_me()
411
-
412
- def PathFeature.sql_type()
413
- return "VARCHAR(120)"
414
- end
415
- def PathFeature.feature_name()
416
- return "path"
417
- end
418
-
419
- ################
420
- private
421
-
422
- def my_path_computation()
423
- if @@paths[@@node.id()].nil?
424
- return nil
425
- end
426
-
427
- return @@paths[@@node.id()].print(true, true, true)
428
- end
429
- end
430
-
431
-
432
-
433
- ####################
434
- # path consisting of phrase type and directions
435
- class NodelabelPathFeature < AbstractPathFeature
436
- NodelabelPathFeature.announce_me()
437
-
438
- def NodelabelPathFeature.feature_name()
439
- return "pt_path"
440
- end
441
-
442
- ################
443
- private
444
-
445
- def my_path_computation()
446
- if @@paths[@@node.id()].nil?
447
- return nil
448
- end
449
-
450
- return @@paths[@@node.id()].print(true, false, true)
451
- end
452
- end
453
-
454
- ####################
455
- # path consisting of dependencies and directions
456
- class EdgelabelPathFeature < AbstractPathFeature
457
- EdgelabelPathFeature.announce_me()
458
-
459
- def EdgelabelPathFeature.feature_name()
460
- return "gf_path"
461
- end
462
-
463
- ################
464
- private
465
-
466
- def my_path_computation()
467
- if @@paths[@@node.id()].nil?
468
- return nil
469
- end
470
-
471
- return @@paths[@@node.id()].print(true, true, false)
472
- end
473
- end
474
-
475
- ####################
476
- # features: path from governing verb
477
- class AbstractGVPathFeature < RosySingleFeatureExtractor
478
- def AbstractGVPathFeature.sql_type()
479
- return "VARCHAR(80)"
480
- end
481
- def AbstractGVPathFeature.feature_type()
482
- return "syn"
483
- end
484
-
485
- ################
486
- private
487
-
488
- def compute_feature_instanceOK()
489
- if @@gv_paths[@@node.id()].nil?
490
- path = nil
491
- else
492
- path = my_path_computation()
493
- end
494
-
495
- if path.nil? or path.empty?
496
- return nil
497
- else
498
- return path
499
- end
500
- end
501
-
502
- def my_path_computation()
503
- raise "overwrite me"
504
- end
505
- end
506
-
507
-
508
- ####################
509
- # path from governing verb consisting of nodelabels, dependencies and directions
510
- class GVPathFeature < AbstractGVPathFeature
511
- GVPathFeature.announce_me()
512
-
513
- def GVPathFeature.sql_type()
514
- return "VARCHAR(120)"
515
- end
516
- def GVPathFeature.feature_name()
517
- return "gvpath"
518
- end
519
-
520
- ################
521
- private
522
-
523
- def my_path_computation()
524
- return @@gv_paths[@@node.id()].print(true, true, true)
525
- end
526
- end
527
-
528
-
529
- ####################
530
- # gov. verb path consisting of phrase type and directions
531
- class GVNodelabelPathFeature < AbstractGVPathFeature
532
- GVNodelabelPathFeature.announce_me()
533
-
534
- def GVNodelabelPathFeature.feature_name()
535
- return "pt_gvpath"
536
- end
537
-
538
- ################
539
- private
540
-
541
- def my_path_computation()
542
- return @@gv_paths[@@node.id()].print(true, false, true)
543
- end
544
- end
545
-
546
- ####################
547
- # gov. verb path consisting of dependencies and directions
548
- class GVEdgelabelPathFeature < AbstractGVPathFeature
549
- GVEdgelabelPathFeature.announce_me()
550
-
551
- def GVEdgelabelPathFeature.feature_name()
552
- return "gf_gvpath"
553
- end
554
-
555
- ################
556
- private
557
-
558
- def my_path_computation()
559
- return @@gv_paths[@@node.id()].print(true, true, false)
560
- end
561
- end
562
-
563
- ####################
564
- # path length
565
- class PathLengthFeature < RosySingleFeatureExtractor
566
- PathLengthFeature.announce_me()
567
-
568
- def PathLengthFeature.feature_name()
569
- return "path_length"
570
- end
571
- def PathLengthFeature.sql_type()
572
- return "TINYINT"
573
- end
574
- def PathLengthFeature.feature_type()
575
- return "syn"
576
- end
577
-
578
- ################
579
- private
580
-
581
- def compute_feature_instanceOK()
582
- if @@paths[@@node.id()].nil?
583
- return nil
584
- else
585
- return @@paths[@@node.id()].length()
586
- end
587
- end
588
- end
589
-
590
- #########
591
- # group of combined path features:
592
- # path to target combined with target part of speech and
593
- # info on whether the target is passive
594
- class AbstractCombinedPathFeature < RosySingleFeatureExtractor
595
-
596
- def AbstractCombinedPathFeature.sql_type()
597
- return "VARCHAR(90)"
598
- end
599
- def AbstractCombinedPathFeature.feature_type()
600
- return "syn"
601
- end
602
-
603
- ################
604
- private
605
-
606
- def compute_feature_instanceOK()
607
- if @@paths[@@node.id()].nil?
608
- path = ""
609
- else
610
- path = my_path_computation()
611
- end
612
- return path + "--" + @@target_pos.to_s + "--" + @@target_voice.to_s
613
- end
614
-
615
- ###
616
- def my_path_computation()
617
- raise "Overwrite me"
618
- end
619
- end
620
-
621
-
622
- ####################
623
- # combined path based on nodelabels
624
- class NodelabelCombinedPathFeature < AbstractCombinedPathFeature
625
- NodelabelCombinedPathFeature.announce_me()
626
-
627
- def NodelabelCombinedPathFeature.feature_name()
628
- return "pt_combined_path"
629
- end
630
-
631
- ################
632
- private
633
-
634
- def my_path_computation()
635
- if @@paths[@@node.id()].nil?
636
- return nil
637
- end
638
-
639
- return @@paths[@@node.id()].print(false, false, true)
640
- end
641
- end
642
-
643
- ####################
644
- # combined path based on edgelabels
645
- class EdgelabelCombinedPathFeature < AbstractCombinedPathFeature
646
- EdgelabelCombinedPathFeature.announce_me()
647
-
648
- def EdgelabelCombinedPathFeature.feature_name()
649
- return "gf_combined_path"
650
- end
651
-
652
- ################
653
- private
654
-
655
- def my_path_computation()
656
- if @@paths[@@node.id()].nil?
657
- return nil
658
- end
659
-
660
- return @@paths[@@node.id()].print(false, true, false)
661
- end
662
- end
663
-
664
-
665
- ####################
666
- # combined path based on nodelabels and edgelabels
667
- class CombinedPathFeature < AbstractCombinedPathFeature
668
- CombinedPathFeature.announce_me()
669
-
670
- def CombinedPathFeature.sql_type()
671
- return "VARCHAR(130)"
672
- end
673
- def CombinedPathFeature.feature_name()
674
- return "combined_path"
675
- end
676
-
677
- ################
678
- private
679
-
680
- def my_path_computation()
681
- if @@paths[@@node.id()].nil?
682
- return nil
683
- end
684
-
685
- return @@paths[@@node.id()].print(false, true, true)
686
- end
687
- end
688
-
689
-
690
- ##################
691
- # group of features for computing
692
- # partial path to target: only up to
693
- # the lowest common ancestor of current node and target
694
- class AbstractPartialPathFeature < RosySingleFeatureExtractor
695
-
696
- def AbstractPartialPathFeature.sql_type()
697
- return "VARCHAR(70)"
698
- end
699
- def AbstractPartialPathFeature.feature_type()
700
- return "syn"
701
- end
702
-
703
- ################
704
- private
705
-
706
- def compute_feature_instanceOK()
707
- if @@paths[@@node.id()].nil?
708
- path = nil
709
- else
710
- path = my_path_computation()
711
- end
712
- if path.nil? or path.empty?
713
- return nil
714
- else
715
- return path
716
- end
717
- end
718
- end
719
-
720
- ####
721
- # partial path based on node labels
722
- class NodelabelPartialPathFeature < AbstractPartialPathFeature
723
- NodelabelPartialPathFeature.announce_me()
724
-
725
- def NodelabelPartialPathFeature.feature_name()
726
- return "pt_partial_path"
727
- end
728
-
729
- ################
730
- private
731
-
732
- def my_path_computation()
733
- if @@paths[@@node.id()].nil?
734
- return nil
735
- end
736
-
737
- return @@paths[@@node.id()].print_downpart(true, false, true)
738
- end
739
- end
740
-
741
- ####
742
- # partial path based on edge labels
743
- class EdgelabelPartialPathFeature < AbstractPartialPathFeature
744
- EdgelabelPartialPathFeature.announce_me()
745
-
746
- def EdgelabelPartialPathFeature.feature_name()
747
- return "gf_partial_path"
748
- end
749
-
750
- ################
751
- private
752
-
753
- def my_path_computation()
754
- if @@paths[@@node.id()].nil?
755
- return nil
756
- end
757
-
758
- return @@paths[@@node.id()].print_downpart(true, true, false)
759
- end
760
- end
761
-
762
- ####
763
- # partial path based on node and edge labels
764
- class PartialPathFeature < AbstractPartialPathFeature
765
- PartialPathFeature.announce_me()
766
-
767
- def PartialPathFeature.sql_type()
768
- return "VARCHAR(110)"
769
- end
770
- def PartialPathFeature.feature_name()
771
- return "partial_path"
772
- end
773
-
774
- ################
775
- private
776
-
777
- def my_path_computation()
778
- if @@paths[@@node.id()].nil?
779
- return nil
780
- end
781
-
782
- return @@paths[@@node.id()].print_downpart(true, true, true)
783
- end
784
- end
785
-
786
-
787
-
788
- ##################
789
- # ancestor rule: grammar rule
790
- # expanding lowest common ancestor of current node and target
791
- class AncestorRuleFeature < RosySingleFeatureExtractor
792
- AncestorRuleFeature.announce_me()
793
-
794
- def AncestorRuleFeature.feature_name()
795
- return "ancestor_rule"
796
- end
797
- def AncestorRuleFeature.sql_type()
798
- return "VARCHAR(50)"
799
- end
800
- def AncestorRuleFeature.feature_type()
801
- return "syn"
802
- end
803
-
804
- ################
805
- private
806
-
807
- def compute_feature_instanceOK()
808
- if @@paths[@@node.id()].nil?
809
- return nil
810
- end
811
-
812
- lca = @@paths[@@node.id()].lca()
813
- unless lca
814
- return nil
815
- end
816
-
817
- return @@interpreter_class.simplified_pt(lca).to_s +
818
- " -> "+
819
- lca.children.map {|c| @@interpreter_class.simplified_pt(c).to_s }.join(" ")
820
- end
821
- end
822
-
823
- ##################
824
- # relative position to target: left, right, including target
825
- class RelativePositionFeature < RosySingleFeatureExtractor
826
- RelativePositionFeature.announce_me()
827
-
828
- def RelativePositionFeature.feature_name()
829
- return "relpos"
830
- end
831
- def RelativePositionFeature.sql_type()
832
- return "CHAR(5)"
833
- end
834
- def RelativePositionFeature.feature_type()
835
- return "syn"
836
- end
837
-
838
- ################
839
- private
840
-
841
- def compute_feature_instanceOK()
842
- return @@relpos
843
- end
844
- end
845
-
846
-
847
- ################
848
- # phrase type of the instance node
849
- class PhraseTypeFeature < RosySingleFeatureExtractor
850
- PhraseTypeFeature.announce_me()
851
-
852
- def PhraseTypeFeature.feature_name()
853
- return "pt"
854
- end
855
- def PhraseTypeFeature.sql_type()
856
- return "VARCHAR(15)"
857
- end
858
- def PhraseTypeFeature.feature_type()
859
- return "syn"
860
- end
861
-
862
- ################
863
- private
864
-
865
- def compute_feature_instanceOK()
866
- return @@interpreter_class.simplified_pt(@@node)
867
- end
868
- end
869
-
870
- ################
871
- # grammatical function that this instance node fills for the target
872
- class GFFeature < RosySingleFeatureExtractor
873
- GFFeature.announce_me()
874
-
875
- def GFFeature.feature_name()
876
- return "gf"
877
- end
878
- def GFFeature.sql_type()
879
- return "VARCHAR(20)"
880
- end
881
- def GFFeature.feature_type()
882
- return "syn"
883
- end
884
-
885
- ################
886
- private
887
-
888
- def compute_feature_instanceOK()
889
- unless @@target_gfs
890
- return nil
891
- end
892
-
893
- @@target_gfs.each { |rel, other_node|
894
- if @@node == other_node
895
- return rel
896
- end
897
- }
898
-
899
- return nil
900
- end
901
- end
902
-
903
- ##################
904
- # phrase type of parent of this node
905
- class FatherPhraseTypeFeature < RosySingleFeatureExtractor
906
- FatherPhraseTypeFeature.announce_me()
907
-
908
- def FatherPhraseTypeFeature.feature_name()
909
- return "father_pt"
910
- end
911
- def FatherPhraseTypeFeature.sql_type()
912
- return "VARCHAR(15)"
913
- end
914
- def FatherPhraseTypeFeature.feature_type()
915
- return "syn"
916
- end
917
-
918
- #####
919
- private
920
-
921
- def compute_feature_instanceOK()
922
- if @@node.parent
923
- return @@interpreter_class.simplified_pt(@@node.parent)
924
- else
925
- return nil
926
- end
927
- end
928
- end
929
-
930
- ################
931
- # target lemma
932
- class TargetLemmaFeature < RosySingleFeatureExtractor
933
- TargetLemmaFeature.announce_me()
934
-
935
- def TargetLemmaFeature.feature_name()
936
- return "target"
937
- end
938
- def TargetLemmaFeature.sql_type()
939
- return "VARCHAR(20)"
940
- end
941
- def TargetLemmaFeature.feature_type()
942
- return "ubiq"
943
- end
944
- def TargetLemmaFeature.info()
945
- # additional info: I am an index feature
946
- return super().concat(["index"])
947
- end
948
-
949
- #####
950
- private
951
-
952
- def compute_feature_instanceOK()
953
- return @@interpreter_class.lemma_backoff(@@target)
954
- end
955
- end
956
-
957
- ################
958
- # part of speech of target lemma
959
- class TargetPOSFeature < RosySingleFeatureExtractor
960
- TargetPOSFeature.announce_me()
961
-
962
- def TargetPOSFeature.feature_name()
963
- return "target_pos"
964
- end
965
- def TargetPOSFeature.sql_type()
966
- return "VARCHAR(10)"
967
- end
968
- def TargetPOSFeature.feature_type()
969
- return "ubiq"
970
- end
971
- def TargetPOSFeature.info()
972
- # additional info: I am an index feature
973
- return super().concat(["index"])
974
- end
975
-
976
-
977
- #####
978
- private
979
-
980
- def compute_feature_instanceOK()
981
- return @@target_pos
982
- end
983
- end
984
-
985
- ################
986
- # part of speech of target lemma
987
- class TargetFineGrainedPOSFeature < RosySingleFeatureExtractor
988
- TargetFineGrainedPOSFeature.announce_me()
989
-
990
- def TargetFineGrainedPOSFeature.feature_name()
991
- return "finegrained_target_pos"
992
- end
993
- def TargetFineGrainedPOSFeature.sql_type()
994
- return "VARCHAR(20)"
995
- end
996
- def TargetFineGrainedPOSFeature.feature_type()
997
- return "ubiq"
998
- end
999
-
1000
-
1001
- #####
1002
- private
1003
-
1004
- def compute_feature_instanceOK()
1005
- return @@interpreter_class.pt(@@target)
1006
- end
1007
- end
1008
-
1009
- ################
1010
- # voice of the target lemma
1011
- class TargetVoiceFeature < RosySingleFeatureExtractor
1012
- TargetVoiceFeature.announce_me()
1013
-
1014
- def TargetVoiceFeature.feature_name()
1015
- return "target_voice"
1016
- end
1017
- def TargetVoiceFeature.sql_type()
1018
- return "CHAR(4)"
1019
- end
1020
- def TargetVoiceFeature.feature_type()
1021
- return "ubiq"
1022
- end
1023
-
1024
- #####
1025
- private
1026
-
1027
- def compute_feature_instanceOK()
1028
- voice = @@interpreter_class.voice(@@target)
1029
- if voice
1030
- return voice.slice(0,4)
1031
- else
1032
- return nil
1033
- end
1034
- end
1035
- end
1036
-
1037
- ################
1038
- # the governing verb of the target
1039
- class GoverningVerbOfTargetFeature < RosySingleFeatureExtractor
1040
- GoverningVerbOfTargetFeature.announce_me()
1041
-
1042
- def GoverningVerbOfTargetFeature.feature_name()
1043
- return "gov_verb"
1044
- end
1045
- def GoverningVerbOfTargetFeature.sql_type()
1046
- return "VArCHAR(20)"
1047
- end
1048
- def GoverningVerbOfTargetFeature.feature_type()
1049
- return "sem"
1050
- end
1051
-
1052
- #####
1053
- private
1054
-
1055
- def compute_feature_instanceOK()
1056
- if @@governing_verb
1057
- return RosyFeatureExtractor.headlemma(@@governing_verb)
1058
- else
1059
- return nil
1060
- end
1061
- end
1062
- end
1063
-
1064
- ################c
1065
- # preposition for this constituent
1066
- class PrepFeature < RosySingleFeatureExtractor
1067
- PrepFeature.announce_me()
1068
-
1069
- def PrepFeature.feature_name()
1070
- return "prep"
1071
- end
1072
- def PrepFeature.sql_type()
1073
- return "VARCHAR(20)"
1074
- end
1075
- def PrepFeature.feature_type()
1076
- return "syn"
1077
- end
1078
-
1079
- #####
1080
- private
1081
-
1082
- def compute_feature_instanceOK()
1083
- return @@interpreter_class.preposition(@@node)
1084
- end
1085
- end
1086
-
1087
- ################
1088
- # head lemma of this constituent
1089
- class HeadFeature < RosySingleFeatureExtractor
1090
- HeadFeature.announce_me()
1091
-
1092
- def HeadFeature.feature_name()
1093
- return "const_head"
1094
- end
1095
- def HeadFeature.sql_type()
1096
- return "VARCHAR(20)"
1097
- end
1098
- def HeadFeature.feature_type()
1099
- return "sem"
1100
- end
1101
-
1102
- #####
1103
- private
1104
-
1105
- def compute_feature_instanceOK()
1106
- return RosyFeatureExtractor.headlemma(@@node)
1107
- end
1108
- end
1109
-
1110
- ################
1111
- # part of speech of the head of this constituent
1112
- class HeadPosFeature < RosySingleFeatureExtractor
1113
- HeadPosFeature.announce_me()
1114
-
1115
- def HeadPosFeature.feature_name()
1116
- return "const_head_pos"
1117
- end
1118
- def HeadPosFeature.sql_type()
1119
- return "VARCHAR(10)"
1120
- end
1121
- def HeadPosFeature.feature_type()
1122
- return "syn"
1123
- end
1124
-
1125
- #####
1126
- private
1127
-
1128
- def compute_feature_instanceOK()
1129
- return RosyFeatureExtractor.headpos(@@node)
1130
- end
1131
- end
1132
-
1133
- ################
1134
- # informative content word (see AbstractSynFeature): lemma and POS
1135
- class IcontLemmaFeature < RosyFeatureExtractor
1136
- IcontLemmaFeature.announce_me()
1137
-
1138
- def IcontLemmaFeature.designator()
1139
- return "icont_word"
1140
- end
1141
- def IcontLemmaFeature.feature_names()
1142
- return ["icont_lemma", "icont_pos"]
1143
- end
1144
- def IcontLemmaFeature.sql_type()
1145
- return "VARCHAR(20)"
1146
- end
1147
- def IcontLemmaFeature.feature_type()
1148
- return "sem"
1149
- end
1150
-
1151
- #####
1152
- private
1153
-
1154
- def compute_features_instanceOK()
1155
- icont_node = @@interpreter_class.informative_content_node(@@node)
1156
- if icont_node
1157
- return [RosyFeatureExtractor.headlemma(icont_node), RosyFeatureExtractor.headpos(icont_node)]
1158
- else
1159
- return [nil, nil]
1160
- end
1161
- end
1162
- end
1163
-
1164
-
1165
- ################
1166
- # leftmost terminal of this constituent
1167
- class FirstWordFeature < RosyFeatureExtractor
1168
- FirstWordFeature.announce_me()
1169
-
1170
- def FirstWordFeature.designator()
1171
- return "firstword"
1172
- end
1173
- def FirstWordFeature.feature_names()
1174
- return ["firstword", "firstword_pos"]
1175
- end
1176
- def FirstWordFeature.sql_type()
1177
- return "VARCHAR(20)"
1178
- end
1179
- def FirstWordFeature.feature_type()
1180
- return "sem"
1181
- end
1182
-
1183
- #####
1184
- private
1185
-
1186
- def compute_features_instanceOK()
1187
- if @@node_leftmost_terminal
1188
- return [RosyFeatureExtractor.headlemma(@@node_leftmost_terminal), RosyFeatureExtractor.headpos(@@node_leftmost_terminal)]
1189
- else
1190
- return [nil, nil]
1191
- end
1192
- end
1193
- end
1194
-
1195
-
1196
- ################
1197
- # rightmost terminal of this constituent
1198
- class LastWordFeature < RosyFeatureExtractor
1199
- LastWordFeature.announce_me()
1200
-
1201
- def LastWordFeature.designator()
1202
- return "lastword"
1203
- end
1204
- def LastWordFeature.feature_names()
1205
- return ["lastword", "lastword_pos"]
1206
- end
1207
- def LastWordFeature.sql_type()
1208
- return "VARCHAR(30)"
1209
- end
1210
- def LastWordFeature.feature_type()
1211
- return "sem"
1212
- end
1213
-
1214
- #####
1215
- private
1216
-
1217
- def compute_features_instanceOK()
1218
- if @@node_rightmost_terminal
1219
- return [RosyFeatureExtractor.headlemma(@@node_rightmost_terminal), RosyFeatureExtractor.headpos(@@node_rightmost_terminal)]
1220
- else
1221
- return [nil, nil]
1222
- end
1223
- end
1224
- end
1225
-
1226
- ################
1227
- # left sibling of the current node
1228
- class LeftSiblingFeature < RosyFeatureExtractor
1229
- LeftSiblingFeature.announce_me()
1230
-
1231
- def LeftSiblingFeature.designator()
1232
- return "leftsib"
1233
- end
1234
- def LeftSiblingFeature.feature_names()
1235
- return ["leftsib_pt", "leftsib_lemma"]
1236
- end
1237
- def LeftSiblingFeature.sql_type()
1238
- return "VARCHAR(20)"
1239
- end
1240
- def LeftSiblingFeature.feature_type()
1241
- return "sem"
1242
- end
1243
-
1244
- #####
1245
- private
1246
-
1247
- def compute_features_instanceOK()
1248
- # leftsib, rightsib (node)
1249
- # siblings with max lastword/firstword among those with lastword/firstword index
1250
- # smaller/greater than firstword/lastword index of self
1251
- if @@node.parent.nil?
1252
- return [nil, nil]
1253
- end
1254
-
1255
- node_ix = terminal_index(@@node_leftmost_terminal)
1256
- unless node_ix
1257
- return [nil, nil]
1258
- end
1259
-
1260
- leftsib_ix = nil
1261
- leftsib = nil
1262
- @@node.parent.children.each { |sibling|
1263
- sib_ix = terminal_index(@@interpreter_class.rightmost_terminal(sibling))
1264
- unless sib_ix
1265
- next
1266
- end
1267
-
1268
- if sib_ix < node_ix and
1269
- (leftsib.nil? or leftsib_ix < sib_ix)
1270
-
1271
- leftsib = sibling
1272
- leftsib_ix = sib_ix
1273
- end
1274
- }
1275
-
1276
- if leftsib
1277
- return [
1278
- @@interpreter_class.simplified_pt(leftsib),
1279
- @@interpreter_class.lemma_backoff(leftsib),
1280
- ]
1281
- else
1282
- return [nil, nil]
1283
- end
1284
- end
1285
-
1286
- ###
1287
- # returns: index(integer) of node in list of terminals of this sentence;
1288
- # nil if node is nil or does not occur in the list
1289
- def terminal_index(node) # SynNode, terminal
1290
- unless node
1291
- return nil
1292
- end
1293
-
1294
- return @@terminals_ordered[node] # word index (or nil)
1295
- end
1296
- end
1297
-
1298
- ################
1299
- # distance between head word of constituent and target (in words)
1300
- class WordDistanceFeature < RosySingleFeatureExtractor
1301
- WordDistanceFeature.announce_me()
1302
-
1303
- def WordDistanceFeature.feature_name()
1304
- return "worddistance"
1305
- end
1306
- def WordDistanceFeature.sql_type()
1307
- return "TINYINT"
1308
- end
1309
- def WordDistanceFeature.feature_type()
1310
- return "syn"
1311
- end
1312
-
1313
- #####
1314
- private
1315
-
1316
- def compute_feature_instanceOK()
1317
-
1318
- head_term = @@interpreter_class.head_terminal(@@node)
1319
- targ_term = @@interpreter_class.head_terminal(@@target)
1320
- if head_term.nil? or targ_term.nil?
1321
- return nil
1322
- end
1323
- h_id = @@terminals_ordered[head_term]
1324
- t_id = @@terminals_ordered[targ_term]
1325
- if h_id.nil? or t_id.nil?
1326
- return nil
1327
- else
1328
- return (h_id-t_id).abs
1329
- end
1330
- end
1331
- end
1332
-
1333
- ################
1334
- # is the current node a maximal projection?
1335
- # heuristic: is my category the same as my parent's?
1336
- class IsMaxProj < RosySingleFeatureExtractor
1337
- IsMaxProj.announce_me()
1338
-
1339
- def IsMaxProj.feature_name()
1340
- return "ismaxproj"
1341
- end
1342
- def IsMaxProj.sql_type()
1343
- return "TINYINT"
1344
- end
1345
- def IsMaxProj.feature_type()
1346
- return "syn"
1347
- end
1348
-
1349
- #####
1350
- private
1351
-
1352
- def compute_feature_instanceOK()
1353
- unless @@node.parent()
1354
- return 1
1355
- end
1356
- my_cat = @@interpreter_class.category(@@node)
1357
- parent_cat = @@interpreter_class.category(@@node.parent)
1358
- if my_cat == parent_cat
1359
- return 0
1360
- else
1361
- return 1
1362
- end
1363
- end
1364
- end
1365
-
1366
- ################
1367
- # right sibling of the current node
1368
- class RightSiblingFeature < RosyFeatureExtractor
1369
- RightSiblingFeature.announce_me()
1370
-
1371
- def RightSiblingFeature.designator()
1372
- return "rightsib"
1373
- end
1374
- def RightSiblingFeature.feature_names()
1375
- return ["rightsib_pt", "rightsib_lemma"]
1376
- end
1377
- def RightSiblingFeature.sql_type()
1378
- return "VARCHAR(20)"
1379
- end
1380
- def RightSiblingFeature.feature_type()
1381
- return "sem"
1382
- end
1383
-
1384
- #####
1385
- private
1386
-
1387
- def compute_features_instanceOK()
1388
- # leftsib, rightsib (node)
1389
- # siblings with max lastword/firstword among those with lastword/firstword index
1390
- # smaller/greater than firstword/lastword index of self
1391
- if @@node.parent.nil?
1392
- return [nil, nil]
1393
- end
1394
-
1395
- node_ix = terminal_index(@@node_rightmost_terminal)
1396
- unless node_ix
1397
- return [nil, nil]
1398
- end
1399
-
1400
- rightsib_ix = nil
1401
- rightsib = nil
1402
- @@node.parent.children.each { |sibling|
1403
- sib_ix = terminal_index(@@interpreter_class.leftmost_terminal(sibling))
1404
- unless sib_ix
1405
- next
1406
- end
1407
-
1408
- if sib_ix > node_ix and
1409
- (rightsib.nil? or sib_ix < rightsib_ix)
1410
-
1411
- rightsib = sibling
1412
- rightsib_ix = sib_ix
1413
- end
1414
- }
1415
-
1416
- if rightsib
1417
- return [
1418
- @@interpreter_class.simplified_pt(rightsib),
1419
- @@interpreter_class.lemma_backoff(rightsib),
1420
- ]
1421
- else
1422
- return [nil, nil]
1423
- end
1424
- end
1425
-
1426
- ###
1427
- # returns: index(integer) of node in list of terminals of this sentence;
1428
- # nil if node is nil or does not occur in the list
1429
- def terminal_index(node) # SynNode, terminal
1430
- unless node
1431
- return nil
1432
- end
1433
-
1434
- return @@terminals_ordered[node] # word index (or nil)
1435
- end
1436
- end
1437
-
1438
-
1439
- # ################
1440
- # # admin feature: word span of this constituent
1441
- # class WordSpanFeature < RosySingleFeatureExtractor
1442
- # WordSpanFeature.announce_me()
1443
-
1444
- # def WordSpanFeature.feature_name()
1445
- # return "wordspan"
1446
- # end
1447
- # def WordSpanFeature.sql_type()
1448
- # return "VARCHAR(30)"
1449
- # end
1450
- # def WordSpanFeature.feature_type()
1451
- # return "admin"
1452
- # end
1453
-
1454
- # #####
1455
- # private
1456
-
1457
- # def compute_feature_instanceOK()
1458
-
1459
- # fwh = RosyFeatureExtractor.headlemma(@@node_leftmost_terminal)
1460
- # lwh = RosyFeatureExtractor.headlemma(@@node_rightmost_terminal)
1461
-
1462
- # if fwh.nil?
1463
- # fwh = ""
1464
- # end
1465
- # if lwh.nil?
1466
- # lwh = ""
1467
- # end
1468
-
1469
- # return fwh+ "-" +lwh
1470
- # end
1471
- # end
1472
-
1473
-
1474
- ################
1475
- # admin feature: my node ID and my father's, separated by a space
1476
- # the highest node (topnode) has ID 0, and no father ID.
1477
- class NodeIDFeature < RosySingleFeatureExtractor
1478
- NodeIDFeature.announce_me()
1479
-
1480
- def NodeIDFeature.feature_name()
1481
- return "nodeID"
1482
- end
1483
- def NodeIDFeature.sql_type()
1484
- return "VARCHAR(100)"
1485
- end
1486
- def NodeIDFeature.feature_type()
1487
- return "admin"
1488
- end
1489
-
1490
- #####
1491
- private
1492
-
1493
- def compute_feature_instanceOK()
1494
-
1495
- if @@node.parent
1496
- return @@node.id.to_s+ " " + @@node.parent.id.to_s
1497
- else
1498
- return @@node.id.to_s
1499
- end
1500
- end
1501
- end
1502
-
1503
- ################
1504
- # admin feature: sentence ID
1505
- class SentidFeature < RosySingleFeatureExtractor
1506
- SentidFeature.announce_me()
1507
-
1508
- def SentidFeature.feature_name()
1509
- return "sentid"
1510
- end
1511
- def SentidFeature.sql_type()
1512
- return "VARCHAR(100)"
1513
- end
1514
- def SentidFeature.feature_type()
1515
- return "admin"
1516
- end
1517
- def SentidFeature.info()
1518
- # additional info: I am an index feature
1519
- return super().concat(["index"])
1520
- end
1521
-
1522
- #####
1523
- private
1524
-
1525
- def compute_feature_instanceOK()
1526
- return construct_instance_id(@@sent.id(), @@frame.id())
1527
- end
1528
- end
1529
-
1530
- # ################
1531
- # # admin feature: tokens spanned by this constituent
1532
- # class TokensFeature < RosySingleFeatureExtractor
1533
- # TokensFeature.announce_me()
1534
-
1535
- # def TokensFeature.feature_name()
1536
- # return "tokens"
1537
- # end
1538
- # def TokensFeature.sql_type()
1539
- # return "VARCHAR(100)"
1540
- # end
1541
- # def TokensFeature.feature_type()
1542
- # return "admin"
1543
- # end
1544
-
1545
- # #####
1546
- # private
1547
-
1548
- # def compute_feature_instanceOK()
1549
- # return @@node.to_s
1550
- # end
1551
- # end
1552
-
1553
- ################
1554
- # admin feature: frame assigned by FN
1555
- class FrameFeature < RosySingleFeatureExtractor
1556
- FrameFeature.announce_me()
1557
-
1558
- def FrameFeature.feature_name()
1559
- return "frame"
1560
- end
1561
- def FrameFeature.sql_type()
1562
- return "VARCHAR(35)"
1563
- end
1564
- def FrameFeature.feature_type()
1565
- return "ubiq"
1566
- end
1567
- def FrameFeature.info()
1568
- # additional info: I am an index feature
1569
- return super().concat(["index"])
1570
- end
1571
-
1572
- #####
1573
- private
1574
-
1575
- def compute_feature_instanceOK()
1576
- if @@frame
1577
- return @@frame.name()
1578
- else
1579
- return nil
1580
- end
1581
- end
1582
- end
1583
-
1584
- ################
1585
- # admin feature: is this node a terminal?
1586
- class TerminalFeature < RosySingleFeatureExtractor
1587
- TerminalFeature.announce_me()
1588
-
1589
- def TerminalFeature.feature_name()
1590
- return "term"
1591
- end
1592
- def TerminalFeature.sql_type()
1593
- return "TINYINT"
1594
- end
1595
- def TerminalFeature.feature_type()
1596
- return "admin"
1597
- end
1598
-
1599
- #####
1600
- private
1601
-
1602
- def compute_feature_instanceOK()
1603
- if @@node.is_terminal?
1604
- return 1
1605
- else
1606
- return 0
1607
- end
1608
- end
1609
- end