shalmaneser 0.0.1.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +284 -0
@@ -0,0 +1,1448 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "common/headz"
3
+ require "common/SalsaTigerRegXML"
4
+ require "common/ruby_class_extensions"
5
+ class Array
6
+ include EnumerableDistribute
7
+ end
8
+
9
+
10
+ require "common/AbstractSynInterface"
11
+
12
+ #############################################
13
+ #
14
+ # max. projection:
15
+ #
16
+ # consists of methods that are 'building blocks' for computing
17
+ # the maximum projection of a verb in TIGER syntax
18
+ #
19
+ # basically, computing the max. projection is about moving an
20
+ # upper node upward. At the beginning it is the parent of the
21
+ # terminal node for the verb, and each building block moves it up
22
+ # to its parent, if the building block matches.
23
+ #
24
+ # Apart from the upper node, a lower node is also watched. At the
25
+ # beginning it is the terminal node for the verb, later it is usually
26
+ # the 'HD' child of the upper node. This lower node is needed for
27
+ # testing whether a building block matches.
28
+ #
29
+ # For handling conjunction, the upper node is split into two, a 'lower upper'
30
+ # and an 'upper upper' node. The 'lower upper' is used when some relation
31
+ # between the upper node and its descendants is tested, and the 'upper upper'
32
+ # is used when some relation between the upper node and its predecessors
33
+ # is tested. Usually the 'lower upper' and the 'upper upper' are the same,
34
+ # but conjunction building blocks move the 'upper upper' up to its parent
35
+ # while leaving the 'lower upper' unchanged.
36
+ #
37
+ # So all building block methods take three arguments: lower, upper_l and
38
+ # upper_u. All three are nodes given as SalsaTigerSentence objects
39
+ #
40
+ # All building block methods give as their return value a list of three
41
+ # nodes: [new_lower, new_upper_l, new_upper_u], if the building block
42
+ # matched. If it does not match, nil is returned.
43
+ #
44
+ # The method explain describes all building blocks,
45
+ # the conditions for the building blocks matching, and shows
46
+ # where the lower and the upper nodes will be after a building block matched.
47
+ #
48
+ # building blocks:
49
+ # pp_pp
50
+ # pp_fin
51
+ # inf_fin
52
+ # vzinf_fin
53
+ # cvzinf_fin
54
+ # modal
55
+ # othermodal
56
+ # conj
57
+ #
58
+ # To compute the maximal projection of a verb,
59
+ # we start at the parent of the terminal node for the verb
60
+ # "and move upwards.
61
+ # "The move upwards is broken up in little building blocks."
62
+ # "Each of them licenses one step upward in the syntactic tree."
63
+ #
64
+ # "Each building block needs information about two nodes:"
65
+ # "The current upper node (at the beginning, that is"
66
+ # "the parent of the terminal node for the verb) and"
67
+ # "one specific child of that current upper node"
68
+ # "(at the beginning, that is the terminal node for the verb)."
69
+ #
70
+ # "Each building block provides information of"
71
+ # "- where the new upper node is, depending on the current"
72
+ # " upper node, and"
73
+ # "- where the new specific child is."
74
+ #
75
+ # "For handling conjunction, we need to complicate this picture somewhat:"
76
+ # "We split the current upper node into an 'upper upper'"
77
+ # "and a 'lower upper' node."
78
+ # "If we want to check the edge from the current upper node upwards,"
79
+ # "we use the 'upper upper'."
80
+ # "If we want to check an edge from the current upper node downwards,"
81
+ # "we use the 'lower upper'."
82
+ # "Almost always, the 'lower upper' and the 'upper upper' will be the same."
83
+ # "Except for the building block for conjunction:"
84
+ # "It moves the 'upper upper' one level up,"
85
+ # "but leaves the 'lower upper' the same."
86
+ #
87
+ # "There are five levels of building blocks."
88
+ #
89
+ # "* 1st level: auxiliary verb constructions involving a participle"
90
+ # " The following building blocks are tried, in this order:"
91
+ # " CONJ, PP-PP, CONJ, PP_FIN"
92
+ #
93
+ # "* 2nd level: infinitive constructions"
94
+ # " The following building blocks are tried, in this order:"
95
+ # " CONJ, INF-FIN, VZINF-FIN, CVZINF-FIN"
96
+ #
97
+ # "* 3rd level: modals"
98
+ # " The following building blocks are tried, in this order:"
99
+ # " CONJ, MODAL, OTHERMODAL"
100
+ #
101
+ # "* 4th level = 1st level"
102
+ #
103
+ # "* 5th level = 2nd level"
104
+ #
105
+ #
106
+ # "***These are the building blocks:"
107
+ #
108
+ # "PP-PP"
109
+ # " VP (new uppermost node)"
110
+ # " / | \\OC"
111
+ # " HD/ | VP|CVP (current uppermost node)"
112
+ # " / | |"
113
+ # " o FE |HD|CJ"
114
+ # "POS: V[AMV]PP |"
115
+ # " new target current target"
116
+ # " POS: V[AMV]PP"
117
+ #
118
+ # "PP-FIN"
119
+ # " S/VP (new uppermost node)"
120
+ # " / | \\OC or PD"
121
+ # " HD/ | VP|CVP|CO (current uppermost node)"
122
+ # " / | |"
123
+ # " o FE |HD|CJ"
124
+ # "POS: V[AMV]FIN |"
125
+ # " V[AMV]INF current target"
126
+ # "or CAT: VZ POS: V[AMV]PP"
127
+ #
128
+ # "INF_FIN"
129
+ # " S/VP (new uppermost node)"
130
+ # " / | \\OC"
131
+ # " HD/ | VP|CVP (current uppermost node)"
132
+ # " / | |"
133
+ # " o FE |HD|CJ"
134
+ # "POS: VAFIN |"
135
+ # " VAINF current target"
136
+ # " VVINF POS: V[AMV]INF"
137
+ # " new target"
138
+ #
139
+ # "VZINF-FIN"
140
+ # " S/VP (new uppermost node)"
141
+ # " / | \\OC"
142
+ # " HD/ | VP (current uppermost node)"
143
+ # " / | |"
144
+ # " o FE |HD"
145
+ # "POS: V[AV]FIN |"
146
+ # " new target current target"
147
+ # " CAT: VZ"
148
+ #
149
+ # "CVZINF-FIN"
150
+ # " S/VP (new uppermost node)"
151
+ # " | \\OC"
152
+ # " | CVP (current uppermost node)"
153
+ # " | |"
154
+ # " FE |CJ"
155
+ # " |"
156
+ # " current and new target"
157
+ # " CAT: VZ"
158
+ #
159
+ # "MODAL"
160
+ # " S/VP (new uppermost node)"
161
+ # " / | \\OC"
162
+ # " HD/ | VP|CVP (current uppermost node)"
163
+ # " / | |"
164
+ # " o FE |HD|CJ"
165
+ # " POS: |"
166
+ # " VM(PP|FIN|INF) current target"
167
+ # " new target POS: V[AMV]INF"
168
+ #
169
+ # "OTHERMODAL"
170
+ # " S/VP (new uppermost node)"
171
+ # " / | \\OC"
172
+ # " HD/ | VP (current uppermost node)"
173
+ # " / | | \\"
174
+ # " o FE |HD \\OC"
175
+ # "POS: VMFIN | \\"
176
+ # " VMINF POS: current target"
177
+ # " VMPP V[AMV]INF POS: V[AMV]PP"
178
+ # " new target V[AMV]FIN"
179
+ #
180
+ # "CONJ"
181
+ # " CVP (new upper uppermost node)"
182
+ # " | \\CJ"
183
+ # " | VP (current and new uppermost node)"
184
+ # " | |"
185
+ # " FE |"
186
+ # " |"
187
+ # " current and new target"
188
+ ###
189
+ module TigerMaxProjection
190
+
191
+ def max_projection(node)
192
+ parent = node.parent
193
+ # node has no parent? recover somehow
194
+ if parent.nil?
195
+ return {'max_proj' => node,
196
+ 'max_proj_at_level' => [node]}
197
+ end
198
+
199
+ maxproj_at_level = Array.new
200
+ maxproj_at_level << parent
201
+
202
+ lower = node
203
+ upper_u = upper_l = parent
204
+
205
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
206
+ maxproj_at_level << upper_u
207
+
208
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
209
+ maxproj_at_level << upper_u
210
+
211
+ lower, upper_l, upper_u = project_modal(lower, upper_l, upper_u)
212
+ maxproj_at_level << upper_u
213
+
214
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
215
+ maxproj_at_level << upper_u
216
+
217
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
218
+ maxproj_at_level << upper_u
219
+
220
+ return {'max_proj' => upper_u,
221
+ 'max_proj_at_level' => maxproj_at_level}
222
+ end
223
+
224
+
225
+ ###
226
+ def test_localtrees(path)
227
+
228
+
229
+ #HIER WEITER: was genau passiert hier?
230
+
231
+
232
+ retv = Hash.new
233
+
234
+ # test each step
235
+ path.each { |step|
236
+ retv = test_step(step, retv)
237
+
238
+ if retv.nil?
239
+ return nil
240
+ end
241
+ }
242
+
243
+ # return result of last step
244
+ return retv
245
+ end
246
+
247
+ ######
248
+ private
249
+
250
+ ###
251
+ def test_step(path, previous)
252
+ if path['from'].nil? or path['to'].nil? or path['edge'].nil?
253
+ $stderr.puts 'TigerAux error: missing path hash entry'
254
+ exit 1
255
+ end
256
+
257
+ from_node, *from_descr = path['from']
258
+ to_node, *to_descr = path['to']
259
+
260
+ # using the special flags tp_prev_to and tp_prev_from,
261
+ # a node can also be set to be the value in the
262
+ # 'previous' hash
263
+ from_node = cf_previous(from_node, previous)
264
+ to_node = cf_previous(to_node, previous)
265
+
266
+ # test if 'from' node description matches
267
+ unless test_node(from_node, from_descr)
268
+ return nil
269
+ end
270
+
271
+ # try path
272
+ direction, edgelabel = path['edge']
273
+ case direction
274
+ when 'up'
275
+ label = from_node.parent_label()
276
+ if label =~ edgelabel
277
+ end_nodes = [from_node.parent()]
278
+ else
279
+ end_nodes = []
280
+ end
281
+ when 'dn'
282
+ end_nodes = []
283
+ from_node.each_child { |child|
284
+ if child.parent_label() =~ edgelabel
285
+ end_nodes << child
286
+ end
287
+ }
288
+ else
289
+ $stderr.puts 'TigerAux error: unknown direction'
290
+ exit 1
291
+ end
292
+
293
+ # check all prospective end nodes
294
+ remaining_end_nodes = end_nodes.select { |prosp_to_node|
295
+ if to_node.nil? or to_node == prosp_to_node
296
+ test_node(prosp_to_node, to_descr)
297
+ else
298
+ false
299
+ end
300
+ }
301
+
302
+ if remaining_end_nodes.empty?
303
+ return nil
304
+ else
305
+ return {'from' => from_node,
306
+ 'to' => remaining_end_nodes}
307
+ end
308
+ end
309
+
310
+ ###
311
+ def test_node(node, descr)
312
+
313
+ cat_or_pos, pattern = descr
314
+ if node.nil?
315
+ $stderr.puts 'TigerAux error: test_node nil'
316
+ exit 1
317
+ end
318
+
319
+ case cat_or_pos
320
+ when 'pos'
321
+ if node.part_of_speech =~ pattern
322
+ return true
323
+ else
324
+ return false
325
+ end
326
+ when 'cat'
327
+ if node.category =~ pattern
328
+ return true
329
+ else
330
+ return false
331
+ end
332
+ when nil
333
+ return true
334
+ else
335
+ $stderr.puts 'TigerAux error: neither cat nor pos'
336
+ exit 1
337
+ end
338
+ end
339
+
340
+ ###
341
+ def cf_previous(node, previous)
342
+ case node
343
+ when 'tp_prev_to'
344
+ return previous['to'].first
345
+ when 'tp_prev_from'
346
+ return previous['from']
347
+ else
348
+ return node
349
+ end
350
+ end
351
+
352
+ ###
353
+ def project_participle(lower, upper_l, upper_u)
354
+ return project_this(lower, upper_l, upper_u,
355
+ [self.method('conj'),
356
+ self.method('pp_pp'),
357
+ self.method('conj'),
358
+ self.method('pp_fin')])
359
+ end
360
+
361
+ ###
362
+ def project_infinitive(lower, upper_l, upper_u)
363
+ return project_this(lower, upper_l, upper_u,
364
+ [self.method('conj'),
365
+ self.method('inf_fin'),
366
+ self.method('vzinf_fin'),
367
+ self.method('cvzinf_fin')
368
+ ])
369
+ end
370
+
371
+ ###
372
+ def project_modal(lower, upper_l, upper_u)
373
+ return project_this(lower, upper_l, upper_u,
374
+ [self.method('conj'),
375
+ self.method('modal'),
376
+ self.method('othermodal')
377
+ ])
378
+ end
379
+
380
+ ###
381
+ def project_participle_(lower, upper_l, upper_u)
382
+ return project_this(lower, upper_l, upper_u,
383
+ [self.method('conj'),
384
+ self.method('pp_pp'),
385
+ self.method('conj'),
386
+ self.method('pp_fin')])
387
+ end
388
+
389
+ ###
390
+ def project_this(lower, upper_l, upper_u, method_list)
391
+ method_list.each { |method|
392
+ retv = method.call(lower, upper_l, upper_u)
393
+ unless retv.nil?
394
+ lower, upper_l, upper_u = retv
395
+ end
396
+ }
397
+ return [lower, upper_l, upper_u]
398
+ end
399
+
400
+ ###
401
+ def pp_pp(lower, upper_l, upper_u)
402
+
403
+ retv =
404
+ test_localtrees([
405
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
406
+ 'to' => [upper_l, 'cat', /^C?VP$/],
407
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
408
+ {'from' => [upper_u, 'cat', /^C?VP$/],
409
+ 'to' => [nil, 'cat', /^VP$/],
410
+ 'edge' => ['up', /^OC$/]},
411
+ {'from' => ['tp_prev_to', 'cat', /^VP$/],
412
+ 'to' => [nil, 'pos', /^V[AMV]PP$/],
413
+ 'edge' => ['dn', /^HD$/]}
414
+ ])
415
+
416
+ if retv.nil?
417
+ return nil
418
+ else
419
+ return [retv['to'].first, retv['from'], retv['from']]
420
+ end
421
+ end
422
+
423
+ ###
424
+ def pp_fin(lower, upper_l, upper_u)
425
+
426
+ retv =
427
+ test_localtrees([
428
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
429
+ 'to' => [upper_l, 'cat', /^C?VP$/],
430
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
431
+ {'from' => [upper_u,'cat', /^C?VP$/],
432
+ 'to' => [nil, 'cat', /^(VP)|S$/],
433
+ 'edge' => ['up', /^(OC)|(PD)$/]}
434
+ ])
435
+
436
+ if retv.nil?
437
+ return nil
438
+ end
439
+
440
+ new_upper = retv['to'].first
441
+
442
+ # test two alternatives:
443
+ # head child of new_upper is either a VXFIN or VXINF terminal...
444
+ retv =
445
+ test_localtrees([
446
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
447
+ 'to' => [nil, 'pos', /^V[AMV]((FIN)|(INF))$/],
448
+ 'edge' => ['dn', /^HD$/]}
449
+ ])
450
+
451
+ # ... or a VZ nonterminal
452
+ if retv.nil?
453
+ retv =
454
+ test_localtrees([
455
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
456
+ 'to' => [nil, 'cat', /^VZ$/],
457
+ 'edge' => ['dn', /^HD$/]}
458
+ ])
459
+ end
460
+
461
+ if retv.nil?
462
+ return nil
463
+ else
464
+ return [retv['to'].first, new_upper, new_upper]
465
+ end
466
+ end
467
+
468
+
469
+ ###
470
+ def inf_fin(lower, upper_l, upper_u)
471
+
472
+ retv =
473
+ test_localtrees([
474
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
475
+ 'to' => [upper_l, 'cat', /^C?VP$/],
476
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
477
+ {'from' => [upper_u,'cat', /^C?VP$/],
478
+ 'to' => [nil, 'cat', /^(VP)|S$/],
479
+ 'edge' => ['up', /^OC$/]},
480
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
481
+ 'to' => [nil, 'pos', /^(VAFIN)|(VAINF)|(VVINF)$/],
482
+ 'edge' => ['dn', /^HD$/]}
483
+ ])
484
+ if retv.nil?
485
+ return nil
486
+ else
487
+ return [retv['to'].first, retv['from'], retv['from']]
488
+ end
489
+ end
490
+
491
+
492
+ ###
493
+ def vzinf_fin(lower, upper_l, upper_u)
494
+
495
+ retv =
496
+ test_localtrees([
497
+ {'from' => [lower, 'cat', /^VZ$/],
498
+ 'to' => [upper_l, 'cat', /^VP$/],
499
+ 'edge' => ['up', /^HD$/]},
500
+ {'from' => [upper_u,'cat', /^VP$/],
501
+ 'to' => [nil, 'cat', /^(VP)|S$/],
502
+ 'edge' => ['up', /^OC$/]},
503
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
504
+ 'to' => [nil, 'pos', /^V[AV]FIN$/],
505
+ 'edge' => ['dn', /^HD$/]}
506
+ ])
507
+
508
+ if retv.nil?
509
+ return nil
510
+ else
511
+ return [retv['to'].first, retv['from'], retv['from']]
512
+ end
513
+ end
514
+
515
+ ###
516
+ def cvzinf_fin(lower, upper_l, upper_u)
517
+
518
+ retv =
519
+ test_localtrees([
520
+ {'from' => [lower, 'cat', /^VZ$/],
521
+ 'to' => [upper_l, 'cat', /^CVP$/],
522
+ 'edge' => ['up', /^CJ$/]},
523
+ {'from' => [upper_u,'cat', /^CVP$/],
524
+ 'to' => [nil, 'cat', /^(VP)|S$/],
525
+ 'edge' => ['up', /^OC$/]}
526
+ ])
527
+
528
+ if retv.nil?
529
+ return nil
530
+ else
531
+ return [lower, upper_l, retv['to'].first]
532
+ end
533
+ end
534
+
535
+ ###
536
+ def modal(lower, upper_l, upper_u)
537
+
538
+ retv =
539
+ test_localtrees([
540
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
541
+ 'to' => [upper_l, 'cat', /^C?VP$/],
542
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
543
+ {'from' => [upper_u,'cat', /^C?VP$/],
544
+ 'to' => [nil, 'cat', /^(VP)|S$/],
545
+ 'edge' => ['up', /^OC$/]},
546
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
547
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
548
+ 'edge' => ['dn', /^HD$/]}
549
+ ])
550
+
551
+ if retv.nil?
552
+ return nil
553
+ else
554
+ return [retv['to'].first, retv['from'], retv['from']]
555
+ end
556
+ end
557
+
558
+ ###
559
+ def othermodal(lower, upper_l, upper_u)
560
+
561
+ retv =
562
+ test_localtrees([
563
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
564
+ 'to' => [upper_l, 'cat', /^VP$/],
565
+ 'edge' => ['up', /^OC$/]},
566
+ {'from' => [upper_l, 'cat', /^VP$/],
567
+ 'to' => [nil, 'pos', /^V[AMV]((INF)|(FIN))$/],
568
+ 'edge' => ['dn', /^HD$/]},
569
+ {'from' => [upper_u,'cat', /^VP$/],
570
+ 'to' => [nil, 'cat', /^(VP)|S$/],
571
+ 'edge' => ['up', /^OC$/]},
572
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
573
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
574
+ 'edge' => ['dn', /^HD$/]}
575
+ ])
576
+
577
+ if retv.nil?
578
+ return nil
579
+ else
580
+ return [retv['to'].first, retv['from'], retv['from']]
581
+ end
582
+ end
583
+
584
+ ###
585
+ def conj(lower, upper_l, upper_u)
586
+
587
+ retv = test_localtrees([
588
+ {'from' => [lower, nil, //],
589
+ 'to' => [upper_l, 'cat', /^VP$/],
590
+ 'edge' => ['up', //]},
591
+ {'from' => [upper_u,'cat', /^VP$/],
592
+ 'to' => [nil, 'cat', /^CVP$/],
593
+ 'edge' => ['up', /^CJ$/]}
594
+ ])
595
+
596
+ if retv.nil?
597
+ return nil
598
+ else
599
+ return [lower, upper_l, retv['to'].first]
600
+ end
601
+ end
602
+ end
603
+
604
+ ###########################################################3
605
+ class Tiger < SynInterpreter
606
+
607
+ extend TigerMaxProjection
608
+
609
+ @@heads_obj = Headz.new()
610
+
611
+ ###
612
+ # generalize over POS tags.
613
+ #
614
+ # returns one of:
615
+ #
616
+ # adj: adjective (phrase)
617
+ # adv: adverb (phrase)
618
+ # card: numbers, quantity phrases
619
+ # con: conjunction
620
+ # det: determiner, including possessive/demonstrative pronouns etc.
621
+ # for: foreign material
622
+ # noun: noun (phrase), including personal pronouns, proper names, expletives
623
+ # part: particles, truncated words (German compound parts)
624
+ # prep: preposition (phrase)
625
+ # pun: punctuation, brackets, etc.
626
+ # sent: sentence
627
+ # top: top node of a sentence
628
+ # verb: verb (phrase)
629
+ # nil: something went wrong
630
+ #
631
+ # default: return phrase type as is
632
+ def Tiger.category(node) # SynNode
633
+ pt = Tiger.pt(node)
634
+ if pt.nil?
635
+ # phrase type could not be determined
636
+ return nil
637
+ end
638
+
639
+ case pt.to_s.strip()
640
+ when /^C?ADJ/, /^PIS/, /^C?AP[^A-Za-z]?/ then return "adj"
641
+ when /^C?ADV/, /^C?AVP/, /^PROAV/ then return "adv"
642
+ when /^CARD/ then return "card"
643
+ when /^C?KO/ then return "con"
644
+ when /^PPOS/, /^ART/ ,/^PIAT/, /^PD/, /^PRELAT/, /^PWAT/ then return "det"
645
+ when /^FM/ , /^XY/ then return "for"
646
+ when /^C?N/, /^PPER/, /^PN/, /^PRELS/, /^PWS/ then return "noun"
647
+ when /^ITJ/ then return "sent"
648
+ when /^PRF/, /^PTK/, /^TRUNC/ then return "part"
649
+ when /^C?PP/ , /^APPR/, /^PWAV/ then return "prep"
650
+ when /^\$/ then return "pun"
651
+ when /^C?S$/, /^CO/, /^DL/, /^CH/, /^ISU/ then return "sent" # I don't like to put CO/DL in here, but where should they go?
652
+ when /^TOP/ then return "top"
653
+ when /^C?V/ then return "verb"
654
+ else
655
+ # $stderr.puts "WARNING Unknown category/POS "+c.to_s+" (German data)"
656
+ return nil
657
+ end
658
+ end
659
+
660
+ ###
661
+ # is relative pronoun?
662
+ #
663
+ def Tiger.relative_pronoun?(node) # SynNode
664
+ pt = Tiger.pt(node)
665
+ if pt.nil?
666
+ # phrase type could not be determined
667
+ return nil
668
+ end
669
+
670
+ case pt.to_s.strip()
671
+ when /^PREL/, /^PWAV/, /^PWAT/
672
+ return true
673
+ else
674
+ return false
675
+ end
676
+ end
677
+
678
+
679
+ ###
680
+ # lemma_backoff:
681
+ #
682
+ # if we have lemma information, return that,
683
+ # and failing that, return the word
684
+ #
685
+ # returns: string or nil
686
+ def Tiger.lemma_backoff(node)
687
+ lemma = super(node)
688
+ # lemmatizer has returned more than one possible lemma form:
689
+ # just accept the first
690
+ if lemma =~ /^([^|]+)|/
691
+ return $1
692
+ else
693
+ return lemma
694
+ end
695
+ end
696
+
697
+ ###
698
+ # verb_with_particle:
699
+ #
700
+ # given a node and a nodelist,
701
+ # if the node represents a verb:
702
+ # see if the verb has a particle among the nodes in nodelist
703
+ # if so, return it
704
+ def Tiger.particle_of_verb(node, # SynNode
705
+ node_list) # array: SynNode
706
+
707
+ # must be verb
708
+ unless Tiger.category(node) == "verb"
709
+ return nil
710
+ end
711
+
712
+ # must have parent
713
+ unless node.parent
714
+ return nil
715
+ end
716
+
717
+ particles = node.parent.children.select { |sister|
718
+ # look for sisters of the verb node that are in node_list
719
+ node_list.include? sister
720
+ }.select { |sister|
721
+ # see if its incoming edge is labeled "SVP"
722
+ sister.parent_label() == "SVP"
723
+ }.reject { |particle|
724
+ # Sleepy parser problem: it often tags ")" as a separate verb particle
725
+ particle.get_attribute("lemma") == ")" or
726
+ particle.word == ")"
727
+ }
728
+
729
+ if particles.length == 0
730
+ return nil
731
+ else
732
+ return particles.first
733
+ end
734
+ end
735
+
736
+
737
+ ###
738
+ # auxiliary?
739
+ #
740
+ # returns true if the given node is an auxiliary
741
+ # default: no recognition of auxiliaries
742
+ def Tiger.auxiliary?(node)
743
+ if node.part_of_speech() and
744
+ node.part_of_speech =~ /^VA/
745
+ return true
746
+ else
747
+ return false
748
+ end
749
+ end
750
+
751
+ ###
752
+ # modal?
753
+ #
754
+ # returns true if the given node is a modal verb
755
+ #
756
+ # returns: boolean
757
+ def Tiger.modal?(node)
758
+ if node.part_of_speech() and
759
+ node.part_of_speech =~ /^VM/
760
+ return true
761
+ else
762
+ return false
763
+ end
764
+ end
765
+
766
+ ###
767
+ # head_terminal
768
+ #
769
+ # given a constituent, return the terminal node
770
+ # that describes its headword
771
+ # default: a heuristic that assumes the existence of a 'head'
772
+ # attribute on nodes:
773
+ # find the first node in my yield corresponding to my head attribute.
774
+ # add-on: if this doesn't work, ask the headz package for the head
775
+ #
776
+ # returns: a SynNode object if successful, else nil
777
+ def Tiger.head_terminal(node)
778
+ if (head = super(node))
779
+ return head
780
+ end
781
+
782
+ head_hash = @@heads_obj.get_sem_head(node)
783
+ if head_hash.nil?
784
+ return nil
785
+ elsif head_hash["prep"]
786
+ return head_hash["prep"]
787
+ else
788
+ return head_hash["head"]
789
+ end
790
+ end
791
+
792
+
793
+ #####################################
794
+ # verbs(sobj) sobj is a sentence in SalsaTigerSentence format
795
+ #
796
+ # return a list of the nodes of full verbs in a given sentence:
797
+ # it is a list of lists. An item in that list is
798
+ # - either a pair [verb, svp]
799
+ # of the node of a verb with separable prefix
800
+ # and the node of its separate prefix
801
+ # - or a singleton [verb]
802
+ # of the node of a verb without separate prefix
803
+ def Tiger.verbs(sobj)
804
+ return sobj.terminals().select { |t|
805
+ # verbs
806
+
807
+ Tiger.category(t) == "verb"
808
+ }.map { |verb|
809
+
810
+ # watch out for separate verb prefixes
811
+ parent = verb.parent
812
+ if parent.nil?
813
+ # verb is root node, for whatever reason
814
+ [verb]
815
+ else
816
+
817
+ svp_children = parent.children_by_edgelabels(['SVP'])
818
+ if svp_children.empty?
819
+ # verb has no separate verb prefix
820
+ [verb]
821
+ elsif svp_children.length == 1
822
+ # verb has exactly one separate verb prefix
823
+ [verb, svp_children.first]
824
+ else
825
+ # more than one separate verb prefix? weird.
826
+ $stderr.print 'Tiger warning: more than one separate verb prefix '
827
+ $stderr.print 'for node ', verb.id, "\n"
828
+ [verb, svp_children.first]
829
+ end
830
+ end
831
+ }
832
+ end
833
+
834
+ ###
835
+ # preposition
836
+ #
837
+ # if the given node represents a PP, return the preposition (string)
838
+ def Tiger.preposition(node) # SynNode
839
+ hash = @@heads_obj.get_sem_head(node)
840
+ if hash and hash["prep"]
841
+ return hash["prep"].to_s
842
+ end
843
+
844
+ # this didn't work, try something else: first preposition among my terminals
845
+ pnode = node.terminals_sorted().detect { |n|
846
+ Tiger.category(n) == "prep"
847
+ }
848
+ if pnode
849
+ return pnode.word()
850
+ else
851
+ return nil
852
+ end
853
+ end
854
+
855
+
856
+ ###
857
+ # voice
858
+ #
859
+ # given a constituent, return
860
+ # - "active"/"passive" if it is a verb
861
+ # - nil, else
862
+ def Tiger.voice(node)
863
+
864
+ unless Tiger.category(node) == "verb"
865
+ return nil
866
+ end
867
+
868
+ # node is a participle linked to its VP or S parent by HD or CJ
869
+ retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
870
+ 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
871
+ 'edge' => ['up', /^(HD)|(CJ)$/]}])
872
+
873
+ if retv
874
+ verb_parent = retv['to'].first
875
+
876
+ # coordination?
877
+ retv = test_localtrees([{'from' => [verb_parent, nil, //],
878
+ 'to' => [nil, 'cat', /^CVP$/],
879
+ 'edge' => ['up', /^CJ$/]}])
880
+ if retv
881
+
882
+ # yes, coordination
883
+ # S/VP
884
+ # |OC
885
+ # CVP
886
+ # | CJ
887
+ # VP
888
+ # | HD
889
+ # participle
890
+
891
+ cvp = retv['to'].first
892
+
893
+ retv = test_localtrees([{'from' => [cvp, nil, //],
894
+ 'to' => [nil, 'cat', /^S|(VP)$/],
895
+ 'edge' => ['up', /^OC$/]}])
896
+
897
+ else
898
+ # node's parent is linked to its parent via an OC edge
899
+ retv = test_localtrees([{'from' => [verb_parent, nil, //],
900
+ 'to' => [nil, 'cat', /^(VP)|S$/],
901
+ 'edge' => ['up', /^OC$/]}])
902
+ end
903
+
904
+ if retv.nil?
905
+ return "active"
906
+ end
907
+
908
+ verb_grandparent = retv['to'].first
909
+
910
+ else
911
+ # KE Dec 19: test whether the participle
912
+ # is linked to its parent via an OC edge.
913
+ # if so, it has the same function as the
914
+ # verb_grandparent above
915
+
916
+ # node is a participle linked to its VP or S parent by OC
917
+ retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
918
+ 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
919
+ 'edge' => ['up', /^OC$/]}])
920
+
921
+ if retv
922
+ verb_grandparent = retv['to'].first
923
+
924
+ else
925
+ # this test has failed
926
+ return "active"
927
+ end
928
+ end
929
+
930
+ #puts test_localtrees([{'from' => [verb_grandparent, nil, //],
931
+ # 'to' => [nil, 'pos', /^VA.*$/],
932
+ # 'edge' => ['dn', /^HD$/]}])
933
+
934
+ # node's grandparent has a HD child that is a terminal node, an auxiliary
935
+ retv = test_localtrees([{'from' => [verb_grandparent, nil, //],
936
+ 'to' => [nil, 'pos', /^VA.*$/],
937
+ 'edge' => ['dn', /^HD$/]}])
938
+
939
+ if retv.nil?
940
+ return "active"
941
+ end
942
+
943
+ # that HD child is a form of 'werden'
944
+ may_be_werden = retv['to'].first
945
+
946
+ unless may_be_werden.part_of_speech() =~ /^VA/
947
+ return "active"
948
+ end
949
+
950
+ # no morphology, so approximate it using regexp.s
951
+ case may_be_werden.word
952
+ when "geworden"
953
+ when /^w.+rd(e|en|et|st|est)?$/
954
+ else
955
+ return "active"
956
+ end
957
+
958
+ # all tests passed successfully
959
+ return "passive"
960
+ end
961
+
962
+ ###
963
+ # gfs
964
+ #
965
+ # grammatical functions of a constituent:
966
+ #
967
+ # returns: a list of pairs [relation(string), node(SynNode)]
968
+ # where <node> stands in the relation <relation> to the parameter
969
+ # that the method was called with
970
+ #
971
+ def Tiger.gfs(node, # SynNode object
972
+ sent) # SalsaTigerSentence object
973
+
974
+ case Tiger.category(node)
975
+ when "adj"
976
+ return Tiger.gfs_adj(node)
977
+ when "noun"
978
+ return Tiger.gfs_noun(node, sent)
979
+ when "verb"
980
+ return Tiger.gfs_verb(node)
981
+ else
982
+ return []
983
+ end
984
+ end
985
+
986
+
987
+ ###
988
+ # informative_content_node
989
+ #
990
+ # for most constituents: nil
991
+ # for a PP, the NP
992
+ # for an SBAR, the VP
993
+ # for a VP, the embedded VP
994
+ def Tiger.informative_content_node(node)
995
+ this_pt = Tiger.simplified_pt(node)
996
+
997
+ unless ["S", "CS", "VP", "CVP", "PP", "CPP"].include? this_pt
998
+ return nil
999
+ end
1000
+
1001
+ nh = Tiger.head_terminal(node)
1002
+ unless nh
1003
+ return nil
1004
+ end
1005
+ headlemma = Tiger.lemma_backoff(nh)
1006
+
1007
+ nonhead_children = node.children().reject { |n|
1008
+ nnh = Tiger.head_terminal(n)
1009
+ not(nnh) or
1010
+ Tiger.lemma_backoff(nnh) == headlemma
1011
+ }
1012
+ if nonhead_children.length() == 1
1013
+ return nonhead_children.first
1014
+ end
1015
+
1016
+ # more than one child:
1017
+ # for SBAR and VP take child with head POS starting in VB,
1018
+ # for PP child with head POS starting in NN
1019
+ case this_pt
1020
+ when /^C?S/, /^C?VP/
1021
+ icont_child = nonhead_children.detect { |n|
1022
+ h = Tiger.head_terminal(n)
1023
+ h and h.part_of_speech() =~ /^V/
1024
+ }
1025
+ when /^C?PP/
1026
+ icont_child = nonhead_children.detect { |n|
1027
+ h = Tiger.head_terminal(n)
1028
+ h and h.part_of_speech() =~ /^N/
1029
+ }
1030
+ else
1031
+ raise "Shouldn't be here"
1032
+ end
1033
+
1034
+ if icont_child
1035
+ return icont_child
1036
+ else
1037
+ return nonhead_children.first
1038
+ end
1039
+ end
1040
+
1041
+ ###
1042
+ # main node of expression
1043
+ #
1044
+ # second argument non-nil:
1045
+ # don't handle multiword expressions beyond verbs with separate particles
1046
+ #
1047
+ # returns: SynNode, main node, if found
1048
+ # else nil
1049
+ def Tiger.main_node_of_expr(nodelist,
1050
+ no_mwes = nil)
1051
+
1052
+ # map nodes to terminals
1053
+ nodelist = nodelist.map { |n| n.yield_nodes() }.flatten
1054
+
1055
+ # do we have a list of length 2,
1056
+ # one member being "zu", the other a verb, with a common parent "VZ"?
1057
+ # then return the verb
1058
+ if nodelist.length() == 2
1059
+ zu, verb = nodelist.distribute { |n| n.part_of_speech() == "PTKZU" }
1060
+ if zu.length() == 1 and
1061
+ Tiger.category(verb.first) == "verb" and
1062
+ verb.first.parent == zu.first.parent and
1063
+ verb.first.parent.category() == "VZ"
1064
+ return verb.first
1065
+ end
1066
+ end
1067
+
1068
+ # no joy: try method offered by abstract class
1069
+ return super(nodelist, no_mwes)
1070
+ end
1071
+
1072
+
1073
+ ########
1074
+ # prune?
1075
+ # given a target node t and another node n of the syntactic structure,
1076
+ # decide whether n is likely to instantiate a semantic role
1077
+ # of t. If not, recommend n for pruning.
1078
+ #
1079
+ # This method implements a slight variant of Xue and Palmer (EMNLP 2004).
1080
+ # Pruning according to Xue & Palmer, EMNLP 2004.
1081
+ # "Step 1: Designate the predicate as the current node and
1082
+ # collect its sisters (constituents attached at the same level
1083
+ # as the predicate) unless its sisters are coordinated with the
1084
+ # predicate.
1085
+ #
1086
+ # Step 2: Reset the current node to its parent and repeat Step 1
1087
+ # till it reaches the top level node.
1088
+ #
1089
+ # Modifications made here:
1090
+ # - paths of length 0 accepted in any case
1091
+ # - TIGER coordination allowed (phrase types CX)
1092
+ #
1093
+ # returns: false to recommend n for pruning, else true
1094
+ def Tiger.prune?(node, # SynNode
1095
+ paths_to_target, # hash: node ID -> Path object: paths from nodes to target
1096
+ terminal_index) # hash: terminal node -> word index in sentence
1097
+
1098
+ path_to_target = paths_to_target[node.id()]
1099
+
1100
+ if not path_to_target
1101
+ # no path from target to node: suggest for pruning
1102
+ return 0
1103
+ elsif path_to_target.length == 0
1104
+ # target may be its own role: definite accept
1105
+ return 1
1106
+ else
1107
+ # consider path from target to node:
1108
+ # (1) If the path to the current node includes at least one Up
1109
+ # and exactly one Down, keep.
1110
+ # (2) If the parth to the current node includes at least one Up
1111
+ # and two Down and the roof node is a C-something, keep (coordination).
1112
+ # (3) else discard
1113
+
1114
+ # count number of up and down steps in path to target
1115
+ num_up = 0
1116
+ num_down = 0
1117
+ path_to_target.each_step { |direction, edgelabel, nodelabel, endnode|
1118
+ case direction
1119
+ when /U/
1120
+ num_up += 1
1121
+ when /D/
1122
+ num_down += 1
1123
+ end
1124
+ }
1125
+
1126
+ if num_up >= 1 and num_down == 1
1127
+ # case (1)
1128
+ return 1
1129
+ elsif num_up >= 1 and num_down == 2 and CollinsTntInterpreter.category(path_to_target.lca()) =~ /^C/
1130
+ # case (2)
1131
+ return 1
1132
+ else
1133
+ # case (3)
1134
+ return 0
1135
+ end
1136
+ end
1137
+ end
1138
+
1139
+
1140
+ ################################
1141
+ private
1142
+ ################################
1143
+
1144
+ ###
1145
+ def Tiger.subject(verb_node)
1146
+
1147
+ unless Tiger.category(verb_node) == "verb"
1148
+ return nil
1149
+ end
1150
+
1151
+ if Tiger.voice(verb_node) == "passive"
1152
+ # passive: then what we would like to return as subject
1153
+ # is the SBP sibling of this verb
1154
+
1155
+ parent = verb_node.parent
1156
+
1157
+ if parent.nil?
1158
+ # verb_node seems to be the root, strangely enough
1159
+ return []
1160
+ end
1161
+ return parent.children_by_edgelabels(['SBP'])
1162
+
1163
+ else
1164
+ # not passive: then the subject of the verb
1165
+ # is actually its subject in this sentence
1166
+
1167
+ # needed???
1168
+ # return if there is no surface subject
1169
+ # e.g. parser errors like ADJD => VVPP
1170
+
1171
+ return Tiger.surface_subject(verb_node)
1172
+ end
1173
+
1174
+ end
1175
+
1176
+
1177
+ ###
1178
+ def Tiger.direct_object(verb_node)
1179
+
1180
+ unless Tiger.category(verb_node) == "verb"
1181
+ return nil
1182
+ end
1183
+
1184
+ if Tiger.voice(verb_node) == "passive"
1185
+ # passive: then what we would like to return as direct object
1186
+ # is the subject of this verb
1187
+ return Tiger.surface_subject(verb_node)
1188
+ else
1189
+
1190
+ # not passive: then the direct object
1191
+ # is an OA sibling of the node verb_node
1192
+ parent = verb_node.parent
1193
+
1194
+ if parent.nil?
1195
+ # verb_node seems to be the root, strangely enough
1196
+ return []
1197
+ end
1198
+
1199
+ return parent.children_by_edgelabels(['OA'])
1200
+ end
1201
+ end
1202
+
1203
+ ###
1204
+ def Tiger.dative_object(verb_node)
1205
+
1206
+ unless Tiger.category(verb_node) == "verb"
1207
+ return nil
1208
+ end
1209
+
1210
+ parent = verb_node.parent
1211
+
1212
+ if parent.nil?
1213
+ return []
1214
+ end
1215
+
1216
+ return parent.children_by_edgelabels(['DA'])
1217
+ end
1218
+
1219
+ ###
1220
+ def Tiger.prep_object(verb_node, preposition)
1221
+
1222
+ unless Tiger.category(verb_node) == "verb"
1223
+ return nil
1224
+ end
1225
+
1226
+ parent = verb_node.parent()
1227
+ if parent.nil?
1228
+ # verb_node seems to be the root, strangely enough
1229
+ return []
1230
+ end
1231
+
1232
+ # find all PPs that are siblings of verb_node
1233
+ pps = []
1234
+ parent.each_child { |child|
1235
+ if child.category == 'PP'
1236
+ pps << child
1237
+ end
1238
+ }
1239
+
1240
+ # now filter for those with the right preposition
1241
+ if preposition.nil?
1242
+ return pps
1243
+ else
1244
+ return pps.find_all { |node|
1245
+ # prepositions are AC children of PP nodes
1246
+ node.children_by_edgelabels(['AC']).map { |prep_node|
1247
+ # prepositions are terminal words
1248
+ prep_node.word()
1249
+ # we are interested in those that match the parameter 'preposition'
1250
+ }.include? preposition
1251
+ }
1252
+ end
1253
+ end
1254
+
1255
+ ###
1256
+ def Tiger.surface_subject(verb_node)
1257
+
1258
+ max_proj = Tiger.max_projection(verb_node)
1259
+ # test each level in the computation of the maximal projection,
1260
+ # from the lowest (the parent of verb_node)
1261
+ # to the highest
1262
+ max_proj['max_proj_at_level'].each { |node|
1263
+ # test if this node has a SB child
1264
+ # if so, use it
1265
+ sb_children = node.children_by_edgelabels(['SB'])
1266
+
1267
+ unless sb_children.empty?
1268
+ return sb_children
1269
+ end
1270
+ }
1271
+ return []
1272
+ end
1273
+
1274
+
1275
+ ##################
1276
+ # gfs_verb
1277
+ #
1278
+ # given a node (a SynNode object) that is a terminal node
1279
+ # representing a verb, determine
1280
+ # all grammatical functions of this verb
1281
+ # along with their head words
1282
+ #
1283
+ # verb_node: SynNode object, terminal node representing a verb
1284
+ #
1285
+ # returns: a list of pairs [relation(string), node(SynNode)]
1286
+ # 'relation' is 'SB', 'OA', 'DA', 'MO', 'OC'
1287
+ # 'node' is the constituent that stands in this relation to verb_node
1288
+
1289
+ def Tiger.gfs_verb(verb_node)
1290
+
1291
+ unless Tiger.category(verb_node) == "verb"
1292
+ return []
1293
+ end
1294
+
1295
+ # construct a list of pairs [relation, node]
1296
+ nodes = Array.new
1297
+ # subjects:
1298
+ n_arr = Tiger.subject(verb_node)
1299
+
1300
+ if n_arr.length() > 0
1301
+ nodes << ["SB", n_arr.first]
1302
+ end
1303
+
1304
+ #extrem frustrierend , sondern auch schädlich
1305
+ #sagte
1306
+ #Däubler-Gmelin
1307
+ #&apos;&apos;
1308
+ #die gesamte SPD
1309
+ #nicht nur für Euch extrem frustrierend , sondern auch schädlich für die gesamte SPD &apos;&apos; gewesen
1310
+ #die Streitigkeiten zwischen Führungsmitgliedern
1311
+ #gewesen
1312
+ #die Streitigkeiten zwischen Führungsmitgliedern
1313
+ #frustrierend
1314
+
1315
+ # direct object:
1316
+ n_arr = Tiger.direct_object(verb_node)
1317
+ if n_arr.length() > 0
1318
+ nodes << ["OA", n_arr.first]
1319
+ end
1320
+
1321
+ # dative object:
1322
+ n_arr = Tiger.dative_object(verb_node)
1323
+ if n_arr.length() > 0
1324
+ nodes << ["DA", n_arr.first]
1325
+ end
1326
+
1327
+
1328
+ # pp objects and adjuncts:
1329
+ nodes.concat Tiger.prep_object(verb_node, nil).map { |n|
1330
+ unless (edgelabel = n.parent_label)
1331
+ edgelabel = "MO"
1332
+ end
1333
+ [edgelabel + "-" + Tiger.preposition(n).to_s, n]
1334
+ }
1335
+
1336
+ # sentence complement:
1337
+ # verb node's parent has an OC child
1338
+ parent = verb_node.parent
1339
+ unless parent.nil?
1340
+ parent.children_by_edgelabels(["OC"]).each { |n|
1341
+ nodes << ["OC", n]
1342
+ }
1343
+ end
1344
+
1345
+ return nodes
1346
+ end
1347
+
1348
+ ###
1349
+ # gfs_noun
1350
+ #
1351
+ # determine relation names and relation-bearing syntax nodes
1352
+ # for noun targets
1353
+ #
1354
+ # returns: a list of pairs
1355
+ # [rel(string), node(SynNode)]
1356
+ def Tiger.gfs_noun(noun_node, # SynNode object: terminal, noun
1357
+ sent_obj) # SalsaTigerSentence object: sentence in which this noun occurs
1358
+
1359
+
1360
+ # construct a list of pairs [relation, node]
1361
+ retv = Array.new
1362
+
1363
+ ##
1364
+ # determine noun-noun relations:
1365
+ # (1) edge label leading to this node is NK, and
1366
+ # parent of this node has child with edge label not NK
1367
+ # then: that child
1368
+ # (2) or parent of this node is NP/PP, the grandparent is NP,
1369
+ # and parent and grandparent are not linked by an NK edge
1370
+ # then: the grandparent
1371
+ # (3) or grandparent of this node is CNP
1372
+ # then: that CNP's other children
1373
+ parent = noun_node.parent()
1374
+ np_pp_labels_without_cnp = ["NP", "PP", "PN"]
1375
+ np_pp_labels = ["NP", "PP", "PN", "CNP"]
1376
+
1377
+ if parent and
1378
+ noun_node.parent_label() == "NK"
1379
+ # (1)
1380
+ parent.children().select { |n|
1381
+ n.parent_label() != "NK"
1382
+ }.each { |n|
1383
+ unless n == noun_node
1384
+
1385
+ retv << [n.parent_label(), n]
1386
+ end
1387
+ }
1388
+ end
1389
+
1390
+ # (2)
1391
+ if parent
1392
+ grandparent = parent.parent()
1393
+ end
1394
+
1395
+ if parent and grandparent and
1396
+ np_pp_labels.include? parent.category() and
1397
+ np_pp_labels_without_cnp.include? grandparent.category() and
1398
+ parent.parent_label() != "NK"
1399
+
1400
+ retv << [parent.parent_label(), grandparent]
1401
+ end
1402
+
1403
+ # (3)
1404
+ if parent and grandparent and
1405
+ grandparent.category() == "CNP"
1406
+
1407
+ grandparent.each_child() { |n|
1408
+ if np_pp_labels.include? n.category() and
1409
+ n != parent
1410
+
1411
+ retv << ["CJ", n]
1412
+ end
1413
+ }
1414
+ end
1415
+
1416
+ return retv
1417
+ end
1418
+
1419
+ ###
1420
+ # gfs_adj
1421
+ #
1422
+ # determine relation names and relation-bearing syntax nodes
1423
+ # for adjective targets
1424
+ #
1425
+ # returns: a list of pairs
1426
+ # [rel(string), node(SynNode)]
1427
+ #
1428
+ # although in this case it's just one pair (if we can find it),
1429
+ # describing the head noun
1430
+ def Tiger.gfs_adj(adj_node) # SynNode object: terminal, adjective
1431
+
1432
+ parent = adj_node.parent()
1433
+
1434
+ if parent.nil?
1435
+ return []
1436
+ end
1437
+
1438
+ if ["NP", "CNP", "PP", "CPP", "PN"].include? parent.category()
1439
+ return [["HD", parent]]
1440
+ else
1441
+ return []
1442
+ end
1443
+ end
1444
+
1445
+
1446
+ end
1447
+
1448
+ #( (TOP (S (KON_JU Und) (ADV_MO schon) (VVFIN_HD weiÃ<9f>) (NP_SB (ART_NK der) (ADJA_NK Berliner) (NN_NK Verkehrsverein)) ($, ,) (S_OC (PWS_SB was) (ADV_MO da) (PIS_MNR alles) (PP_MO (APPR_AC auf) (ART_NK die) (NN_NK Stadt) ($, ,) (PPOSAT_NK seine) (AP_NK (PP_MO (APPR_AC durch) (ART_NK eine) (NN_NK Rekonstruktion)) (ADV_MO so) (ADJA_HD prachtvoll)) (ADJA_NK markierte) (NN_NK Mitte) ($, ,) (AP_NK (PIS_HD alles))) (VVFIN_HD zukommt))) ($. .)))