frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,1609 @@
1
+ ####
2
+ # ke & sp
3
+ # adapted to new feature extractor class,
4
+ # Collins and Tiger features combined:
5
+ # KE November 2005
6
+ #
7
+ # Feature Extractors for Rosy
8
+ #
9
+ # Contract: each feature extractor inherits from the RosyFeatureExtractor class
10
+ #
11
+ # Feature extractors return nil if no feature value could be
12
+ # returned
13
+
14
+
15
+ # Salsa packages
16
+ require 'rosy/AbstractFeatureAndExternal'
17
+ require 'common/SalsaTigerRegXML'
18
+
19
+ # Fred and Rosy packages
20
+ require 'common/RosyConventions'
21
+
22
+
23
+ ################################
24
+ # base class for all following feature extractors
25
+ class RosyFeatureExtractor < AbstractFeatureExtractor
26
+ @@instance_ok = nil # Boolean: set_node(), set_sent() successful?
27
+ @@split_nones = nil # Boolean: split NONE value for gold feature?
28
+
29
+ @@target = nil # SynNode: main target node
30
+ @@target_pos = nil # string: part of speech of main target
31
+ @@target_voice = nil # string: "active", "passive", or nil
32
+ @@terminals_ordered = nil # Hash: sentence terminals, mapped onto their word indices (starting with 1)
33
+ @@target_gfs = nil # Array of pairs [rel, node]: grammatical functions of the target
34
+
35
+ @@paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
36
+ @@relpos = nil # string: position of instance relative to target
37
+ @@node_leftmost_terminal = nil # SynNode objects: first and last terminal
38
+ @@node_rightmost_terminal = nil # in the yield of @@node
39
+
40
+ @@governing_verb = nil # SynNode object: closest governing verb of @@target
41
+ @@gv_paths = nil # Hash: node ID -> path object, path from main target node to the node with that ID
42
+
43
+ ###
44
+ # returns a string: "phase 1" or "phase 2",
45
+ # depending on whether the feature is computed
46
+ # directly from the SalsaTigerSentence and the SynNode objects
47
+ # or whether it is computed from the phase 1 features
48
+ # computed for the training set
49
+ #
50
+ # Here: all features in this packages are phase 1
51
+ def RosyFeatureExtractor.phase()
52
+ return "phase 1"
53
+ end
54
+
55
+ ###
56
+ # returns an array of strings, providing information about
57
+ # the feature extractor
58
+ def RosyFeatureExtractor.info()
59
+ return super().concat(["rosy"])
60
+ end
61
+
62
+ ###
63
+ # set sentence, set node, set general settings: this is done prior to
64
+ # feature computation using compute_feature_value()
65
+ # such that computations that stay the same for
66
+ # several features can be done in advance
67
+ def RosyFeatureExtractor.set(var_hash) # hash. possible entries: split_nones=> true/false
68
+
69
+ @@split_nones = var_hash["split_nones"]
70
+
71
+ return true
72
+ end
73
+
74
+ ###
75
+ def RosyFeatureExtractor.set_sentence(sent, # SalsaTigerSentence object
76
+ frame) # FrameNode object
77
+ super(sent, frame)
78
+
79
+ root = @@sent.syn_roots.first()
80
+ word_index_counter = 1
81
+ @@terminals_ordered = Hash.new
82
+ root.yield_nodes_ordered.each {|yield_node|
83
+ @@terminals_ordered[yield_node] = word_index_counter
84
+ word_index_counter += 1
85
+ }
86
+
87
+ # @@target: main target node (SynNode)
88
+ # WARNING: at this moment, we are
89
+ # not considering true multiword targets.
90
+ # Remove the "no_mwe" parameter in determine_main_target
91
+ # to change this
92
+ unless frame.target
93
+ @@target = nil
94
+ return false
95
+ end
96
+ @@target = @@interpreter_class.main_node_of_expr(frame.target.children(), "no_mwe")
97
+
98
+ unless @@target
99
+ return false
100
+ end
101
+
102
+ # @@target_pos: string, target POS
103
+ @@target_pos = @@interpreter_class.category(@@target)
104
+
105
+ # @@target_voice:
106
+ # for verb targets, string, active or passive
107
+ # else nil
108
+ @@target_voice = @@interpreter_class.voice(@@target)
109
+ @@target_gfs = @@interpreter_class.gfs(@@target, @@sent)
110
+
111
+ # paths from target to all other nodes in the graph
112
+ @@paths = RosyFeatureExtractor.all_paths_from(@@target)
113
+
114
+ # governing verb of target.
115
+ # If something goes wrong, this will remain unset
116
+ @@gv_paths = Hash.new
117
+ if (targetlemma = RosyFeatureExtractor.headlemma(@@target))
118
+ # determine governing verb
119
+ parent = @@target
120
+ while (parent = parent.parent)
121
+ parentlemma = RosyFeatureExtractor.headlemma(parent)
122
+
123
+ if @@interpreter_class.category(parent) == "verb" and
124
+ parentlemma != targetlemma
125
+ # success: found the governing verb of the target
126
+
127
+ @@governing_verb = @@interpreter_class.head_terminal(parent)
128
+ # paths from governing verb of target to all other nodes in the graph
129
+ if @@governing_verb
130
+ @@gv_paths = RosyFeatureExtractor.all_paths_from(@@governing_verb)
131
+ end
132
+
133
+ break
134
+ end
135
+ end
136
+ end
137
+
138
+
139
+ # paths: when printing, leave off the phrase type of the end node
140
+ @@paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
141
+ @@gv_paths.each_value { |p| p.set_cutoff_last_pt_on_printing(true) }
142
+
143
+ return true
144
+ end
145
+
146
+ ###
147
+ # node: SynNode of the sentence set in set_sentence
148
+ def RosyFeatureExtractor.set_node(node)
149
+ super(node)
150
+
151
+ @@instance_ok = true
152
+
153
+ unless @@target
154
+ # no target, nothing I can compute here
155
+ @@instance_ok = false
156
+ return false
157
+ end
158
+
159
+ # # path between target and current instance node
160
+ # @@path = @@interpreter_class.path_between(@@target, @@node)
161
+ # @@path.set_cutoff_last_pt_on_printing(true) # when printing path, cut off last node label
162
+
163
+
164
+ # position of instance node relative to main target node
165
+ @@relpos = @@interpreter_class.relative_position(@@node, @@target)
166
+ # leftmost, rightmost terminal in the yield of @@node
167
+ @@node_leftmost_terminal = @@interpreter_class.leftmost_terminal(@@node)
168
+ @@node_rightmost_terminal = @@interpreter_class.rightmost_terminal(@@node)
169
+
170
+ return true
171
+ end
172
+
173
+ ###
174
+ # compute_feature_value: first check if instance is OK
175
+ #
176
+ # returns: list of features
177
+ def compute_features()
178
+ unless @@instance_ok
179
+ return nil
180
+ end
181
+
182
+ return make_features_safe_for_sql(compute_features_instanceOK())
183
+ end
184
+
185
+ ############
186
+ protected
187
+
188
+
189
+ # returns: list of features
190
+ def compute_features_instanceOK()
191
+ raise "Overwrite me"
192
+ end
193
+
194
+ ###
195
+ # in computed features:
196
+ # replace "," by COMMA in order not to confuse SQL
197
+ def make_features_safe_for_sql(feature_list)
198
+ return feature_list.map { |feature|
199
+ if feature.kind_of? String
200
+ feature.gsub(/,/, "COMMA").gsub(/\\/, "BACK")
201
+ else
202
+ feature
203
+ end
204
+ }
205
+ end
206
+
207
+
208
+ ###
209
+ # lemma of the head terminal of SynNode n
210
+ def RosyFeatureExtractor.headlemma(n) # SynNode
211
+ unless n
212
+ return nil
213
+ end
214
+
215
+ h = @@interpreter_class.head_terminal(n)
216
+ if h
217
+ return @@interpreter_class.lemma_backoff(h)
218
+ else
219
+ return nil
220
+ end
221
+ end
222
+
223
+ ###
224
+ # part of speech of the head terminal of SynNode n
225
+ def RosyFeatureExtractor.headpos(n) # SynNode
226
+ unless n
227
+ return nil
228
+ end
229
+
230
+ h = @@interpreter_class.head_terminal(n)
231
+ if h
232
+ return h.part_of_speech()
233
+ else
234
+ return nil
235
+ end
236
+ end
237
+
238
+ ###
239
+ # Given a SynNode n, recursively determine
240
+ # the paths from n to all other reachable nodes,
241
+ # skipping nodes that already have a path
242
+ # listed in the given hash mapping node IDs to paths.
243
+ # Paths are given as Path objects (see AbstractSynInterface).
244
+ # It is assumed that the graph of n is a tree, which
245
+ # is searched depth-first, first the children, then the parent of n.
246
+ def RosyFeatureExtractor.all_paths_from(n, # SynNode
247
+ hash = nil) # Hash: nodeID(string) => Path object
248
+ # initial step of all: no hash existing yet
249
+ if hash.nil?
250
+ hash = Hash.new
251
+ hash[n.id()] = Path.new(n)
252
+ end
253
+
254
+ # invariant at this point: n must be listed in hash
255
+ unless hash[n.id()]
256
+ raise "Shouldn't be here"
257
+ end
258
+
259
+ # for each child c of n: compute its path from the path of n,
260
+ # and explore paths below c
261
+ n.each_child_with_edgelabel { |label, c|
262
+ if hash[c.id()].nil?
263
+ hash[c.id()] = hash[n.id()].deep_clone().add_last_step("D",
264
+ label,
265
+ @@interpreter_class.simplified_pt(c),
266
+ c)
267
+ RosyFeatureExtractor.all_paths_from(c, hash)
268
+ end
269
+ }
270
+
271
+ # compute the path from n's parent p from the path of n,
272
+ # and explore paths beyond p
273
+ if (p = n.parent) and hash[p.id()].nil?
274
+ # node has a parent, and it is not listed in the path hash
275
+ # make a new path for parent: n's path, plus one up-step
276
+ hash[p.id()] = hash[n.id()].deep_clone().add_last_step("U",
277
+ n.parent_label,
278
+ @@interpreter_class.simplified_pt(p),
279
+ p)
280
+ RosyFeatureExtractor.all_paths_from(p, hash)
281
+ end
282
+
283
+ return hash
284
+
285
+ end
286
+
287
+ end
288
+
289
+ ###############################
290
+ # Rosy single feature extractor, duplicating stuff from
291
+ # AbstractSingleFeatureExtractor
292
+ class RosySingleFeatureExtractor < RosyFeatureExtractor
293
+
294
+ ###
295
+ # returns a string: the designator for this feature extractor
296
+ # (an extractor may compute several features, but
297
+ # in the experiment file it is chosen by a single designator)
298
+ #
299
+ # here: single feature, and the feature name is the designator
300
+ def RosySingleFeatureExtractor.designator()
301
+ return eval(self.name()).feature_name()
302
+ end
303
+
304
+ ###
305
+ def RosySingleFeatureExtractor.feature_names()
306
+ return [eval(self.name()).feature_name()]
307
+ end
308
+
309
+ ###
310
+ # compute_feature_value: first check if instance is OK
311
+ #
312
+ # returns: list of features
313
+ def compute_features()
314
+ unless @@instance_ok
315
+ return nil
316
+ end
317
+
318
+ return make_features_safe_for_sql([compute_feature_instanceOK()])
319
+ end
320
+
321
+ ############
322
+ private
323
+
324
+ def compute_feature_instanceOK()
325
+ raise "Overwrite me"
326
+ end
327
+
328
+ end
329
+
330
+ ##############################################
331
+ # Individual feature extractors
332
+ ##############################################
333
+
334
+ ####################
335
+ # gold role label
336
+ class GoldlabelFeature < RosySingleFeatureExtractor
337
+ GoldlabelFeature.announce_me()
338
+
339
+ def GoldlabelFeature.feature_name()
340
+ return "gold"
341
+ end
342
+ def GoldlabelFeature.sql_type()
343
+ return "VARCHAR(30)"
344
+ end
345
+ def GoldlabelFeature.feature_type()
346
+ return "gold"
347
+ end
348
+ def GoldlabelFeature.info()
349
+ # additional info: I am an index feature
350
+ return super().concat(["index"])
351
+ end
352
+
353
+ ################
354
+ private
355
+
356
+ def compute_feature_instanceOK()
357
+ @@frame.each_fe_by_name {|fe|
358
+ if fe.children.include? @@node
359
+ return fe.name
360
+ end
361
+ }
362
+
363
+ # no role label for this node
364
+ # if @@split_nones
365
+ # split "no role" label into:
366
+ # before/after/dominating the target node
367
+ # return @@relpos
368
+ # else
369
+ return nil
370
+ # end
371
+ end
372
+ end
373
+
374
+ ####################
375
+ # path features
376
+ class AbstractPathFeature < RosySingleFeatureExtractor
377
+ def AbstractPathFeature.sql_type()
378
+ return "VARCHAR(80)"
379
+ end
380
+ def AbstractPathFeature.feature_type()
381
+ return "syn"
382
+ end
383
+
384
+ ################
385
+ private
386
+
387
+ def compute_feature_instanceOK()
388
+ if @@paths[@@node.id()].nil?
389
+ path = nil
390
+ else
391
+ path = my_path_computation()
392
+ end
393
+
394
+ if path.nil? or path.empty?
395
+ return nil
396
+ else
397
+ return path
398
+ end
399
+ end
400
+
401
+ def my_path_computation()
402
+ raise "overwrite me"
403
+ end
404
+ end
405
+
406
+
407
+ ####################
408
+ # path consisting of nodelabels, dependencies and directions
409
+ class PathFeature < AbstractPathFeature
410
+ PathFeature.announce_me()
411
+
412
+ def PathFeature.sql_type()
413
+ return "VARCHAR(120)"
414
+ end
415
+ def PathFeature.feature_name()
416
+ return "path"
417
+ end
418
+
419
+ ################
420
+ private
421
+
422
+ def my_path_computation()
423
+ if @@paths[@@node.id()].nil?
424
+ return nil
425
+ end
426
+
427
+ return @@paths[@@node.id()].print(true, true, true)
428
+ end
429
+ end
430
+
431
+
432
+
433
+ ####################
434
+ # path consisting of phrase type and directions
435
+ class NodelabelPathFeature < AbstractPathFeature
436
+ NodelabelPathFeature.announce_me()
437
+
438
+ def NodelabelPathFeature.feature_name()
439
+ return "pt_path"
440
+ end
441
+
442
+ ################
443
+ private
444
+
445
+ def my_path_computation()
446
+ if @@paths[@@node.id()].nil?
447
+ return nil
448
+ end
449
+
450
+ return @@paths[@@node.id()].print(true, false, true)
451
+ end
452
+ end
453
+
454
+ ####################
455
+ # path consisting of dependencies and directions
456
+ class EdgelabelPathFeature < AbstractPathFeature
457
+ EdgelabelPathFeature.announce_me()
458
+
459
+ def EdgelabelPathFeature.feature_name()
460
+ return "gf_path"
461
+ end
462
+
463
+ ################
464
+ private
465
+
466
+ def my_path_computation()
467
+ if @@paths[@@node.id()].nil?
468
+ return nil
469
+ end
470
+
471
+ return @@paths[@@node.id()].print(true, true, false)
472
+ end
473
+ end
474
+
475
+ ####################
476
+ # features: path from governing verb
477
+ class AbstractGVPathFeature < RosySingleFeatureExtractor
478
+ def AbstractGVPathFeature.sql_type()
479
+ return "VARCHAR(80)"
480
+ end
481
+ def AbstractGVPathFeature.feature_type()
482
+ return "syn"
483
+ end
484
+
485
+ ################
486
+ private
487
+
488
+ def compute_feature_instanceOK()
489
+ if @@gv_paths[@@node.id()].nil?
490
+ path = nil
491
+ else
492
+ path = my_path_computation()
493
+ end
494
+
495
+ if path.nil? or path.empty?
496
+ return nil
497
+ else
498
+ return path
499
+ end
500
+ end
501
+
502
+ def my_path_computation()
503
+ raise "overwrite me"
504
+ end
505
+ end
506
+
507
+
508
+ ####################
509
+ # path from governing verb consisting of nodelabels, dependencies and directions
510
+ class GVPathFeature < AbstractGVPathFeature
511
+ GVPathFeature.announce_me()
512
+
513
+ def GVPathFeature.sql_type()
514
+ return "VARCHAR(120)"
515
+ end
516
+ def GVPathFeature.feature_name()
517
+ return "gvpath"
518
+ end
519
+
520
+ ################
521
+ private
522
+
523
+ def my_path_computation()
524
+ return @@gv_paths[@@node.id()].print(true, true, true)
525
+ end
526
+ end
527
+
528
+
529
+ ####################
530
+ # gov. verb path consisting of phrase type and directions
531
+ class GVNodelabelPathFeature < AbstractGVPathFeature
532
+ GVNodelabelPathFeature.announce_me()
533
+
534
+ def GVNodelabelPathFeature.feature_name()
535
+ return "pt_gvpath"
536
+ end
537
+
538
+ ################
539
+ private
540
+
541
+ def my_path_computation()
542
+ return @@gv_paths[@@node.id()].print(true, false, true)
543
+ end
544
+ end
545
+
546
+ ####################
547
+ # gov. verb path consisting of dependencies and directions
548
+ class GVEdgelabelPathFeature < AbstractGVPathFeature
549
+ GVEdgelabelPathFeature.announce_me()
550
+
551
+ def GVEdgelabelPathFeature.feature_name()
552
+ return "gf_gvpath"
553
+ end
554
+
555
+ ################
556
+ private
557
+
558
+ def my_path_computation()
559
+ return @@gv_paths[@@node.id()].print(true, true, false)
560
+ end
561
+ end
562
+
563
+ ####################
564
+ # path length
565
+ class PathLengthFeature < RosySingleFeatureExtractor
566
+ PathLengthFeature.announce_me()
567
+
568
+ def PathLengthFeature.feature_name()
569
+ return "path_length"
570
+ end
571
+ def PathLengthFeature.sql_type()
572
+ return "TINYINT"
573
+ end
574
+ def PathLengthFeature.feature_type()
575
+ return "syn"
576
+ end
577
+
578
+ ################
579
+ private
580
+
581
+ def compute_feature_instanceOK()
582
+ if @@paths[@@node.id()].nil?
583
+ return nil
584
+ else
585
+ return @@paths[@@node.id()].length()
586
+ end
587
+ end
588
+ end
589
+
590
+ #########
591
+ # group of combined path features:
592
+ # path to target combined with target part of speech and
593
+ # info on whether the target is passive
594
+ class AbstractCombinedPathFeature < RosySingleFeatureExtractor
595
+
596
+ def AbstractCombinedPathFeature.sql_type()
597
+ return "VARCHAR(90)"
598
+ end
599
+ def AbstractCombinedPathFeature.feature_type()
600
+ return "syn"
601
+ end
602
+
603
+ ################
604
+ private
605
+
606
+ def compute_feature_instanceOK()
607
+ if @@paths[@@node.id()].nil?
608
+ path = ""
609
+ else
610
+ path = my_path_computation()
611
+ end
612
+ return path + "--" + @@target_pos.to_s + "--" + @@target_voice.to_s
613
+ end
614
+
615
+ ###
616
+ def my_path_computation()
617
+ raise "Overwrite me"
618
+ end
619
+ end
620
+
621
+
622
+ ####################
623
+ # combined path based on nodelabels
624
+ class NodelabelCombinedPathFeature < AbstractCombinedPathFeature
625
+ NodelabelCombinedPathFeature.announce_me()
626
+
627
+ def NodelabelCombinedPathFeature.feature_name()
628
+ return "pt_combined_path"
629
+ end
630
+
631
+ ################
632
+ private
633
+
634
+ def my_path_computation()
635
+ if @@paths[@@node.id()].nil?
636
+ return nil
637
+ end
638
+
639
+ return @@paths[@@node.id()].print(false, false, true)
640
+ end
641
+ end
642
+
643
+ ####################
644
+ # combined path based on edgelabels
645
+ class EdgelabelCombinedPathFeature < AbstractCombinedPathFeature
646
+ EdgelabelCombinedPathFeature.announce_me()
647
+
648
+ def EdgelabelCombinedPathFeature.feature_name()
649
+ return "gf_combined_path"
650
+ end
651
+
652
+ ################
653
+ private
654
+
655
+ def my_path_computation()
656
+ if @@paths[@@node.id()].nil?
657
+ return nil
658
+ end
659
+
660
+ return @@paths[@@node.id()].print(false, true, false)
661
+ end
662
+ end
663
+
664
+
665
+ ####################
666
+ # combined path based on nodelabels and edgelabels
667
+ class CombinedPathFeature < AbstractCombinedPathFeature
668
+ CombinedPathFeature.announce_me()
669
+
670
+ def CombinedPathFeature.sql_type()
671
+ return "VARCHAR(130)"
672
+ end
673
+ def CombinedPathFeature.feature_name()
674
+ return "combined_path"
675
+ end
676
+
677
+ ################
678
+ private
679
+
680
+ def my_path_computation()
681
+ if @@paths[@@node.id()].nil?
682
+ return nil
683
+ end
684
+
685
+ return @@paths[@@node.id()].print(false, true, true)
686
+ end
687
+ end
688
+
689
+
690
+ ##################
691
+ # group of features for computing
692
+ # partial path to target: only up to
693
+ # the lowest common ancestor of current node and target
694
+ class AbstractPartialPathFeature < RosySingleFeatureExtractor
695
+
696
+ def AbstractPartialPathFeature.sql_type()
697
+ return "VARCHAR(70)"
698
+ end
699
+ def AbstractPartialPathFeature.feature_type()
700
+ return "syn"
701
+ end
702
+
703
+ ################
704
+ private
705
+
706
+ def compute_feature_instanceOK()
707
+ if @@paths[@@node.id()].nil?
708
+ path = nil
709
+ else
710
+ path = my_path_computation()
711
+ end
712
+ if path.nil? or path.empty?
713
+ return nil
714
+ else
715
+ return path
716
+ end
717
+ end
718
+ end
719
+
720
+ ####
721
+ # partial path based on node labels
722
+ class NodelabelPartialPathFeature < AbstractPartialPathFeature
723
+ NodelabelPartialPathFeature.announce_me()
724
+
725
+ def NodelabelPartialPathFeature.feature_name()
726
+ return "pt_partial_path"
727
+ end
728
+
729
+ ################
730
+ private
731
+
732
+ def my_path_computation()
733
+ if @@paths[@@node.id()].nil?
734
+ return nil
735
+ end
736
+
737
+ return @@paths[@@node.id()].print_downpart(true, false, true)
738
+ end
739
+ end
740
+
741
+ ####
742
+ # partial path based on edge labels
743
+ class EdgelabelPartialPathFeature < AbstractPartialPathFeature
744
+ EdgelabelPartialPathFeature.announce_me()
745
+
746
+ def EdgelabelPartialPathFeature.feature_name()
747
+ return "gf_partial_path"
748
+ end
749
+
750
+ ################
751
+ private
752
+
753
+ def my_path_computation()
754
+ if @@paths[@@node.id()].nil?
755
+ return nil
756
+ end
757
+
758
+ return @@paths[@@node.id()].print_downpart(true, true, false)
759
+ end
760
+ end
761
+
762
+ ####
763
+ # partial path based on node and edge labels
764
+ class PartialPathFeature < AbstractPartialPathFeature
765
+ PartialPathFeature.announce_me()
766
+
767
+ def PartialPathFeature.sql_type()
768
+ return "VARCHAR(110)"
769
+ end
770
+ def PartialPathFeature.feature_name()
771
+ return "partial_path"
772
+ end
773
+
774
+ ################
775
+ private
776
+
777
+ def my_path_computation()
778
+ if @@paths[@@node.id()].nil?
779
+ return nil
780
+ end
781
+
782
+ return @@paths[@@node.id()].print_downpart(true, true, true)
783
+ end
784
+ end
785
+
786
+
787
+
788
+ ##################
789
+ # ancestor rule: grammar rule
790
+ # expanding lowest common ancestor of current node and target
791
+ class AncestorRuleFeature < RosySingleFeatureExtractor
792
+ AncestorRuleFeature.announce_me()
793
+
794
+ def AncestorRuleFeature.feature_name()
795
+ return "ancestor_rule"
796
+ end
797
+ def AncestorRuleFeature.sql_type()
798
+ return "VARCHAR(50)"
799
+ end
800
+ def AncestorRuleFeature.feature_type()
801
+ return "syn"
802
+ end
803
+
804
+ ################
805
+ private
806
+
807
+ def compute_feature_instanceOK()
808
+ if @@paths[@@node.id()].nil?
809
+ return nil
810
+ end
811
+
812
+ lca = @@paths[@@node.id()].lca()
813
+ unless lca
814
+ return nil
815
+ end
816
+
817
+ return @@interpreter_class.simplified_pt(lca).to_s +
818
+ " -> "+
819
+ lca.children.map {|c| @@interpreter_class.simplified_pt(c).to_s }.join(" ")
820
+ end
821
+ end
822
+
823
+ ##################
824
+ # relative position to target: left, right, including target
825
+ class RelativePositionFeature < RosySingleFeatureExtractor
826
+ RelativePositionFeature.announce_me()
827
+
828
+ def RelativePositionFeature.feature_name()
829
+ return "relpos"
830
+ end
831
+ def RelativePositionFeature.sql_type()
832
+ return "CHAR(5)"
833
+ end
834
+ def RelativePositionFeature.feature_type()
835
+ return "syn"
836
+ end
837
+
838
+ ################
839
+ private
840
+
841
+ def compute_feature_instanceOK()
842
+ return @@relpos
843
+ end
844
+ end
845
+
846
+
847
+ ################
848
+ # phrase type of the instance node
849
+ class PhraseTypeFeature < RosySingleFeatureExtractor
850
+ PhraseTypeFeature.announce_me()
851
+
852
+ def PhraseTypeFeature.feature_name()
853
+ return "pt"
854
+ end
855
+ def PhraseTypeFeature.sql_type()
856
+ return "VARCHAR(15)"
857
+ end
858
+ def PhraseTypeFeature.feature_type()
859
+ return "syn"
860
+ end
861
+
862
+ ################
863
+ private
864
+
865
+ def compute_feature_instanceOK()
866
+ return @@interpreter_class.simplified_pt(@@node)
867
+ end
868
+ end
869
+
870
+ ################
871
+ # grammatical function that this instance node fills for the target
872
+ class GFFeature < RosySingleFeatureExtractor
873
+ GFFeature.announce_me()
874
+
875
+ def GFFeature.feature_name()
876
+ return "gf"
877
+ end
878
+ def GFFeature.sql_type()
879
+ return "VARCHAR(20)"
880
+ end
881
+ def GFFeature.feature_type()
882
+ return "syn"
883
+ end
884
+
885
+ ################
886
+ private
887
+
888
+ def compute_feature_instanceOK()
889
+ unless @@target_gfs
890
+ return nil
891
+ end
892
+
893
+ @@target_gfs.each { |rel, other_node|
894
+ if @@node == other_node
895
+ return rel
896
+ end
897
+ }
898
+
899
+ return nil
900
+ end
901
+ end
902
+
903
+ ##################
904
+ # phrase type of parent of this node
905
+ class FatherPhraseTypeFeature < RosySingleFeatureExtractor
906
+ FatherPhraseTypeFeature.announce_me()
907
+
908
+ def FatherPhraseTypeFeature.feature_name()
909
+ return "father_pt"
910
+ end
911
+ def FatherPhraseTypeFeature.sql_type()
912
+ return "VARCHAR(15)"
913
+ end
914
+ def FatherPhraseTypeFeature.feature_type()
915
+ return "syn"
916
+ end
917
+
918
+ #####
919
+ private
920
+
921
+ def compute_feature_instanceOK()
922
+ if @@node.parent
923
+ return @@interpreter_class.simplified_pt(@@node.parent)
924
+ else
925
+ return nil
926
+ end
927
+ end
928
+ end
929
+
930
+ ################
931
+ # target lemma
932
+ class TargetLemmaFeature < RosySingleFeatureExtractor
933
+ TargetLemmaFeature.announce_me()
934
+
935
+ def TargetLemmaFeature.feature_name()
936
+ return "target"
937
+ end
938
+ def TargetLemmaFeature.sql_type()
939
+ return "VARCHAR(20)"
940
+ end
941
+ def TargetLemmaFeature.feature_type()
942
+ return "ubiq"
943
+ end
944
+ def TargetLemmaFeature.info()
945
+ # additional info: I am an index feature
946
+ return super().concat(["index"])
947
+ end
948
+
949
+ #####
950
+ private
951
+
952
+ def compute_feature_instanceOK()
953
+ return @@interpreter_class.lemma_backoff(@@target)
954
+ end
955
+ end
956
+
957
+ ################
958
+ # part of speech of target lemma
959
+ class TargetPOSFeature < RosySingleFeatureExtractor
960
+ TargetPOSFeature.announce_me()
961
+
962
+ def TargetPOSFeature.feature_name()
963
+ return "target_pos"
964
+ end
965
+ def TargetPOSFeature.sql_type()
966
+ return "VARCHAR(10)"
967
+ end
968
+ def TargetPOSFeature.feature_type()
969
+ return "ubiq"
970
+ end
971
+ def TargetPOSFeature.info()
972
+ # additional info: I am an index feature
973
+ return super().concat(["index"])
974
+ end
975
+
976
+
977
+ #####
978
+ private
979
+
980
+ def compute_feature_instanceOK()
981
+ return @@target_pos
982
+ end
983
+ end
984
+
985
+ ################
986
+ # part of speech of target lemma
987
+ class TargetFineGrainedPOSFeature < RosySingleFeatureExtractor
988
+ TargetFineGrainedPOSFeature.announce_me()
989
+
990
+ def TargetFineGrainedPOSFeature.feature_name()
991
+ return "finegrained_target_pos"
992
+ end
993
+ def TargetFineGrainedPOSFeature.sql_type()
994
+ return "VARCHAR(20)"
995
+ end
996
+ def TargetFineGrainedPOSFeature.feature_type()
997
+ return "ubiq"
998
+ end
999
+
1000
+
1001
+ #####
1002
+ private
1003
+
1004
+ def compute_feature_instanceOK()
1005
+ return @@interpreter_class.pt(@@target)
1006
+ end
1007
+ end
1008
+
1009
+ ################
1010
+ # voice of the target lemma
1011
+ class TargetVoiceFeature < RosySingleFeatureExtractor
1012
+ TargetVoiceFeature.announce_me()
1013
+
1014
+ def TargetVoiceFeature.feature_name()
1015
+ return "target_voice"
1016
+ end
1017
+ def TargetVoiceFeature.sql_type()
1018
+ return "CHAR(4)"
1019
+ end
1020
+ def TargetVoiceFeature.feature_type()
1021
+ return "ubiq"
1022
+ end
1023
+
1024
+ #####
1025
+ private
1026
+
1027
+ def compute_feature_instanceOK()
1028
+ voice = @@interpreter_class.voice(@@target)
1029
+ if voice
1030
+ return voice.slice(0,4)
1031
+ else
1032
+ return nil
1033
+ end
1034
+ end
1035
+ end
1036
+
1037
+ ################
1038
+ # the governing verb of the target
1039
+ class GoverningVerbOfTargetFeature < RosySingleFeatureExtractor
1040
+ GoverningVerbOfTargetFeature.announce_me()
1041
+
1042
+ def GoverningVerbOfTargetFeature.feature_name()
1043
+ return "gov_verb"
1044
+ end
1045
+ def GoverningVerbOfTargetFeature.sql_type()
1046
+ return "VArCHAR(20)"
1047
+ end
1048
+ def GoverningVerbOfTargetFeature.feature_type()
1049
+ return "sem"
1050
+ end
1051
+
1052
+ #####
1053
+ private
1054
+
1055
+ def compute_feature_instanceOK()
1056
+ if @@governing_verb
1057
+ return RosyFeatureExtractor.headlemma(@@governing_verb)
1058
+ else
1059
+ return nil
1060
+ end
1061
+ end
1062
+ end
1063
+
1064
+ ################c
1065
+ # preposition for this constituent
1066
+ class PrepFeature < RosySingleFeatureExtractor
1067
+ PrepFeature.announce_me()
1068
+
1069
+ def PrepFeature.feature_name()
1070
+ return "prep"
1071
+ end
1072
+ def PrepFeature.sql_type()
1073
+ return "VARCHAR(20)"
1074
+ end
1075
+ def PrepFeature.feature_type()
1076
+ return "syn"
1077
+ end
1078
+
1079
+ #####
1080
+ private
1081
+
1082
+ def compute_feature_instanceOK()
1083
+ return @@interpreter_class.preposition(@@node)
1084
+ end
1085
+ end
1086
+
1087
+ ################
1088
+ # head lemma of this constituent
1089
+ class HeadFeature < RosySingleFeatureExtractor
1090
+ HeadFeature.announce_me()
1091
+
1092
+ def HeadFeature.feature_name()
1093
+ return "const_head"
1094
+ end
1095
+ def HeadFeature.sql_type()
1096
+ return "VARCHAR(20)"
1097
+ end
1098
+ def HeadFeature.feature_type()
1099
+ return "sem"
1100
+ end
1101
+
1102
+ #####
1103
+ private
1104
+
1105
+ def compute_feature_instanceOK()
1106
+ return RosyFeatureExtractor.headlemma(@@node)
1107
+ end
1108
+ end
1109
+
1110
+ ################
1111
+ # part of speech of the head of this constituent
1112
+ class HeadPosFeature < RosySingleFeatureExtractor
1113
+ HeadPosFeature.announce_me()
1114
+
1115
+ def HeadPosFeature.feature_name()
1116
+ return "const_head_pos"
1117
+ end
1118
+ def HeadPosFeature.sql_type()
1119
+ return "VARCHAR(10)"
1120
+ end
1121
+ def HeadPosFeature.feature_type()
1122
+ return "syn"
1123
+ end
1124
+
1125
+ #####
1126
+ private
1127
+
1128
+ def compute_feature_instanceOK()
1129
+ return RosyFeatureExtractor.headpos(@@node)
1130
+ end
1131
+ end
1132
+
1133
+ ################
1134
+ # informative content word (see AbstractSynFeature): lemma and POS
1135
+ class IcontLemmaFeature < RosyFeatureExtractor
1136
+ IcontLemmaFeature.announce_me()
1137
+
1138
+ def IcontLemmaFeature.designator()
1139
+ return "icont_word"
1140
+ end
1141
+ def IcontLemmaFeature.feature_names()
1142
+ return ["icont_lemma", "icont_pos"]
1143
+ end
1144
+ def IcontLemmaFeature.sql_type()
1145
+ return "VARCHAR(20)"
1146
+ end
1147
+ def IcontLemmaFeature.feature_type()
1148
+ return "sem"
1149
+ end
1150
+
1151
+ #####
1152
+ private
1153
+
1154
+ def compute_features_instanceOK()
1155
+ icont_node = @@interpreter_class.informative_content_node(@@node)
1156
+ if icont_node
1157
+ return [RosyFeatureExtractor.headlemma(icont_node), RosyFeatureExtractor.headpos(icont_node)]
1158
+ else
1159
+ return [nil, nil]
1160
+ end
1161
+ end
1162
+ end
1163
+
1164
+
1165
+ ################
1166
+ # leftmost terminal of this constituent
1167
+ class FirstWordFeature < RosyFeatureExtractor
1168
+ FirstWordFeature.announce_me()
1169
+
1170
+ def FirstWordFeature.designator()
1171
+ return "firstword"
1172
+ end
1173
+ def FirstWordFeature.feature_names()
1174
+ return ["firstword", "firstword_pos"]
1175
+ end
1176
+ def FirstWordFeature.sql_type()
1177
+ return "VARCHAR(20)"
1178
+ end
1179
+ def FirstWordFeature.feature_type()
1180
+ return "sem"
1181
+ end
1182
+
1183
+ #####
1184
+ private
1185
+
1186
+ def compute_features_instanceOK()
1187
+ if @@node_leftmost_terminal
1188
+ return [RosyFeatureExtractor.headlemma(@@node_leftmost_terminal), RosyFeatureExtractor.headpos(@@node_leftmost_terminal)]
1189
+ else
1190
+ return [nil, nil]
1191
+ end
1192
+ end
1193
+ end
1194
+
1195
+
1196
+ ################
1197
+ # rightmost terminal of this constituent
1198
+ class LastWordFeature < RosyFeatureExtractor
1199
+ LastWordFeature.announce_me()
1200
+
1201
+ def LastWordFeature.designator()
1202
+ return "lastword"
1203
+ end
1204
+ def LastWordFeature.feature_names()
1205
+ return ["lastword", "lastword_pos"]
1206
+ end
1207
+ def LastWordFeature.sql_type()
1208
+ return "VARCHAR(30)"
1209
+ end
1210
+ def LastWordFeature.feature_type()
1211
+ return "sem"
1212
+ end
1213
+
1214
+ #####
1215
+ private
1216
+
1217
+ def compute_features_instanceOK()
1218
+ if @@node_rightmost_terminal
1219
+ return [RosyFeatureExtractor.headlemma(@@node_rightmost_terminal), RosyFeatureExtractor.headpos(@@node_rightmost_terminal)]
1220
+ else
1221
+ return [nil, nil]
1222
+ end
1223
+ end
1224
+ end
1225
+
1226
+ ################
1227
+ # left sibling of the current node
1228
+ class LeftSiblingFeature < RosyFeatureExtractor
1229
+ LeftSiblingFeature.announce_me()
1230
+
1231
+ def LeftSiblingFeature.designator()
1232
+ return "leftsib"
1233
+ end
1234
+ def LeftSiblingFeature.feature_names()
1235
+ return ["leftsib_pt", "leftsib_lemma"]
1236
+ end
1237
+ def LeftSiblingFeature.sql_type()
1238
+ return "VARCHAR(20)"
1239
+ end
1240
+ def LeftSiblingFeature.feature_type()
1241
+ return "sem"
1242
+ end
1243
+
1244
+ #####
1245
+ private
1246
+
1247
+ def compute_features_instanceOK()
1248
+ # leftsib, rightsib (node)
1249
+ # siblings with max lastword/firstword among those with lastword/firstword index
1250
+ # smaller/greater than firstword/lastword index of self
1251
+ if @@node.parent.nil?
1252
+ return [nil, nil]
1253
+ end
1254
+
1255
+ node_ix = terminal_index(@@node_leftmost_terminal)
1256
+ unless node_ix
1257
+ return [nil, nil]
1258
+ end
1259
+
1260
+ leftsib_ix = nil
1261
+ leftsib = nil
1262
+ @@node.parent.children.each { |sibling|
1263
+ sib_ix = terminal_index(@@interpreter_class.rightmost_terminal(sibling))
1264
+ unless sib_ix
1265
+ next
1266
+ end
1267
+
1268
+ if sib_ix < node_ix and
1269
+ (leftsib.nil? or leftsib_ix < sib_ix)
1270
+
1271
+ leftsib = sibling
1272
+ leftsib_ix = sib_ix
1273
+ end
1274
+ }
1275
+
1276
+ if leftsib
1277
+ return [
1278
+ @@interpreter_class.simplified_pt(leftsib),
1279
+ @@interpreter_class.lemma_backoff(leftsib),
1280
+ ]
1281
+ else
1282
+ return [nil, nil]
1283
+ end
1284
+ end
1285
+
1286
+ ###
1287
+ # returns: index(integer) of node in list of terminals of this sentence;
1288
+ # nil if node is nil or does not occur in the list
1289
+ def terminal_index(node) # SynNode, terminal
1290
+ unless node
1291
+ return nil
1292
+ end
1293
+
1294
+ return @@terminals_ordered[node] # word index (or nil)
1295
+ end
1296
+ end
1297
+
1298
+ ################
1299
+ # distance between head word of constituent and target (in words)
1300
+ class WordDistanceFeature < RosySingleFeatureExtractor
1301
+ WordDistanceFeature.announce_me()
1302
+
1303
+ def WordDistanceFeature.feature_name()
1304
+ return "worddistance"
1305
+ end
1306
+ def WordDistanceFeature.sql_type()
1307
+ return "TINYINT"
1308
+ end
1309
+ def WordDistanceFeature.feature_type()
1310
+ return "syn"
1311
+ end
1312
+
1313
+ #####
1314
+ private
1315
+
1316
+ def compute_feature_instanceOK()
1317
+
1318
+ head_term = @@interpreter_class.head_terminal(@@node)
1319
+ targ_term = @@interpreter_class.head_terminal(@@target)
1320
+ if head_term.nil? or targ_term.nil?
1321
+ return nil
1322
+ end
1323
+ h_id = @@terminals_ordered[head_term]
1324
+ t_id = @@terminals_ordered[targ_term]
1325
+ if h_id.nil? or t_id.nil?
1326
+ return nil
1327
+ else
1328
+ return (h_id-t_id).abs
1329
+ end
1330
+ end
1331
+ end
1332
+
1333
+ ################
1334
+ # is the current node a maximal projection?
1335
+ # heuristic: is my category the same as my parent's?
1336
+ class IsMaxProj < RosySingleFeatureExtractor
1337
+ IsMaxProj.announce_me()
1338
+
1339
+ def IsMaxProj.feature_name()
1340
+ return "ismaxproj"
1341
+ end
1342
+ def IsMaxProj.sql_type()
1343
+ return "TINYINT"
1344
+ end
1345
+ def IsMaxProj.feature_type()
1346
+ return "syn"
1347
+ end
1348
+
1349
+ #####
1350
+ private
1351
+
1352
+ def compute_feature_instanceOK()
1353
+ unless @@node.parent()
1354
+ return 1
1355
+ end
1356
+ my_cat = @@interpreter_class.category(@@node)
1357
+ parent_cat = @@interpreter_class.category(@@node.parent)
1358
+ if my_cat == parent_cat
1359
+ return 0
1360
+ else
1361
+ return 1
1362
+ end
1363
+ end
1364
+ end
1365
+
1366
+ ################
1367
+ # right sibling of the current node
1368
+ class RightSiblingFeature < RosyFeatureExtractor
1369
+ RightSiblingFeature.announce_me()
1370
+
1371
+ def RightSiblingFeature.designator()
1372
+ return "rightsib"
1373
+ end
1374
+ def RightSiblingFeature.feature_names()
1375
+ return ["rightsib_pt", "rightsib_lemma"]
1376
+ end
1377
+ def RightSiblingFeature.sql_type()
1378
+ return "VARCHAR(20)"
1379
+ end
1380
+ def RightSiblingFeature.feature_type()
1381
+ return "sem"
1382
+ end
1383
+
1384
+ #####
1385
+ private
1386
+
1387
+ def compute_features_instanceOK()
1388
+ # leftsib, rightsib (node)
1389
+ # siblings with max lastword/firstword among those with lastword/firstword index
1390
+ # smaller/greater than firstword/lastword index of self
1391
+ if @@node.parent.nil?
1392
+ return [nil, nil]
1393
+ end
1394
+
1395
+ node_ix = terminal_index(@@node_rightmost_terminal)
1396
+ unless node_ix
1397
+ return [nil, nil]
1398
+ end
1399
+
1400
+ rightsib_ix = nil
1401
+ rightsib = nil
1402
+ @@node.parent.children.each { |sibling|
1403
+ sib_ix = terminal_index(@@interpreter_class.leftmost_terminal(sibling))
1404
+ unless sib_ix
1405
+ next
1406
+ end
1407
+
1408
+ if sib_ix > node_ix and
1409
+ (rightsib.nil? or sib_ix < rightsib_ix)
1410
+
1411
+ rightsib = sibling
1412
+ rightsib_ix = sib_ix
1413
+ end
1414
+ }
1415
+
1416
+ if rightsib
1417
+ return [
1418
+ @@interpreter_class.simplified_pt(rightsib),
1419
+ @@interpreter_class.lemma_backoff(rightsib),
1420
+ ]
1421
+ else
1422
+ return [nil, nil]
1423
+ end
1424
+ end
1425
+
1426
+ ###
1427
+ # returns: index(integer) of node in list of terminals of this sentence;
1428
+ # nil if node is nil or does not occur in the list
1429
+ def terminal_index(node) # SynNode, terminal
1430
+ unless node
1431
+ return nil
1432
+ end
1433
+
1434
+ return @@terminals_ordered[node] # word index (or nil)
1435
+ end
1436
+ end
1437
+
1438
+
1439
+ # ################
1440
+ # # admin feature: word span of this constituent
1441
+ # class WordSpanFeature < RosySingleFeatureExtractor
1442
+ # WordSpanFeature.announce_me()
1443
+
1444
+ # def WordSpanFeature.feature_name()
1445
+ # return "wordspan"
1446
+ # end
1447
+ # def WordSpanFeature.sql_type()
1448
+ # return "VARCHAR(30)"
1449
+ # end
1450
+ # def WordSpanFeature.feature_type()
1451
+ # return "admin"
1452
+ # end
1453
+
1454
+ # #####
1455
+ # private
1456
+
1457
+ # def compute_feature_instanceOK()
1458
+
1459
+ # fwh = RosyFeatureExtractor.headlemma(@@node_leftmost_terminal)
1460
+ # lwh = RosyFeatureExtractor.headlemma(@@node_rightmost_terminal)
1461
+
1462
+ # if fwh.nil?
1463
+ # fwh = ""
1464
+ # end
1465
+ # if lwh.nil?
1466
+ # lwh = ""
1467
+ # end
1468
+
1469
+ # return fwh+ "-" +lwh
1470
+ # end
1471
+ # end
1472
+
1473
+
1474
+ ################
1475
+ # admin feature: my node ID and my father's, separated by a space
1476
+ # the highest node (topnode) has ID 0, and no father ID.
1477
+ class NodeIDFeature < RosySingleFeatureExtractor
1478
+ NodeIDFeature.announce_me()
1479
+
1480
+ def NodeIDFeature.feature_name()
1481
+ return "nodeID"
1482
+ end
1483
+ def NodeIDFeature.sql_type()
1484
+ return "VARCHAR(100)"
1485
+ end
1486
+ def NodeIDFeature.feature_type()
1487
+ return "admin"
1488
+ end
1489
+
1490
+ #####
1491
+ private
1492
+
1493
+ def compute_feature_instanceOK()
1494
+
1495
+ if @@node.parent
1496
+ return @@node.id.to_s+ " " + @@node.parent.id.to_s
1497
+ else
1498
+ return @@node.id.to_s
1499
+ end
1500
+ end
1501
+ end
1502
+
1503
+ ################
1504
+ # admin feature: sentence ID
1505
+ class SentidFeature < RosySingleFeatureExtractor
1506
+ SentidFeature.announce_me()
1507
+
1508
+ def SentidFeature.feature_name()
1509
+ return "sentid"
1510
+ end
1511
+ def SentidFeature.sql_type()
1512
+ return "VARCHAR(100)"
1513
+ end
1514
+ def SentidFeature.feature_type()
1515
+ return "admin"
1516
+ end
1517
+ def SentidFeature.info()
1518
+ # additional info: I am an index feature
1519
+ return super().concat(["index"])
1520
+ end
1521
+
1522
+ #####
1523
+ private
1524
+
1525
+ def compute_feature_instanceOK()
1526
+ return construct_instance_id(@@sent.id(), @@frame.id())
1527
+ end
1528
+ end
1529
+
1530
+ # ################
1531
+ # # admin feature: tokens spanned by this constituent
1532
+ # class TokensFeature < RosySingleFeatureExtractor
1533
+ # TokensFeature.announce_me()
1534
+
1535
+ # def TokensFeature.feature_name()
1536
+ # return "tokens"
1537
+ # end
1538
+ # def TokensFeature.sql_type()
1539
+ # return "VARCHAR(100)"
1540
+ # end
1541
+ # def TokensFeature.feature_type()
1542
+ # return "admin"
1543
+ # end
1544
+
1545
+ # #####
1546
+ # private
1547
+
1548
+ # def compute_feature_instanceOK()
1549
+ # return @@node.to_s
1550
+ # end
1551
+ # end
1552
+
1553
+ ################
1554
+ # admin feature: frame assigned by FN
1555
+ class FrameFeature < RosySingleFeatureExtractor
1556
+ FrameFeature.announce_me()
1557
+
1558
+ def FrameFeature.feature_name()
1559
+ return "frame"
1560
+ end
1561
+ def FrameFeature.sql_type()
1562
+ return "VARCHAR(35)"
1563
+ end
1564
+ def FrameFeature.feature_type()
1565
+ return "ubiq"
1566
+ end
1567
+ def FrameFeature.info()
1568
+ # additional info: I am an index feature
1569
+ return super().concat(["index"])
1570
+ end
1571
+
1572
+ #####
1573
+ private
1574
+
1575
+ def compute_feature_instanceOK()
1576
+ if @@frame
1577
+ return @@frame.name()
1578
+ else
1579
+ return nil
1580
+ end
1581
+ end
1582
+ end
1583
+
1584
+ ################
1585
+ # admin feature: is this node a terminal?
1586
+ class TerminalFeature < RosySingleFeatureExtractor
1587
+ TerminalFeature.announce_me()
1588
+
1589
+ def TerminalFeature.feature_name()
1590
+ return "term"
1591
+ end
1592
+ def TerminalFeature.sql_type()
1593
+ return "TINYINT"
1594
+ end
1595
+ def TerminalFeature.feature_type()
1596
+ return "admin"
1597
+ end
1598
+
1599
+ #####
1600
+ private
1601
+
1602
+ def compute_feature_instanceOK()
1603
+ if @@node.is_terminal?
1604
+ return 1
1605
+ else
1606
+ return 0
1607
+ end
1608
+ end
1609
+ end