frprep 0.0.1.prealpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. data/.yardopts +8 -0
  2. data/CHANGELOG.rdoc +0 -0
  3. data/LICENSE.rdoc +0 -0
  4. data/README.rdoc +0 -0
  5. data/lib/common/AbstractSynInterface.rb +1227 -0
  6. data/lib/common/BerkeleyInterface.rb +375 -0
  7. data/lib/common/CollinsInterface.rb +1165 -0
  8. data/lib/common/ConfigData.rb +694 -0
  9. data/lib/common/Counter.rb +18 -0
  10. data/lib/common/DBInterface.rb +48 -0
  11. data/lib/common/EnduserMode.rb +27 -0
  12. data/lib/common/Eval.rb +480 -0
  13. data/lib/common/FixSynSemMapping.rb +196 -0
  14. data/lib/common/FrPrepConfigData.rb +66 -0
  15. data/lib/common/FrprepHelper.rb +1324 -0
  16. data/lib/common/Graph.rb +345 -0
  17. data/lib/common/ISO-8859-1.rb +24 -0
  18. data/lib/common/ML.rb +186 -0
  19. data/lib/common/Maxent.rb +215 -0
  20. data/lib/common/MiniparInterface.rb +1388 -0
  21. data/lib/common/Optimise.rb +195 -0
  22. data/lib/common/Parser.rb +213 -0
  23. data/lib/common/RegXML.rb +269 -0
  24. data/lib/common/RosyConventions.rb +171 -0
  25. data/lib/common/SQLQuery.rb +243 -0
  26. data/lib/common/STXmlTerminalOrder.rb +194 -0
  27. data/lib/common/SalsaTigerRegXML.rb +2347 -0
  28. data/lib/common/SalsaTigerXMLHelper.rb +99 -0
  29. data/lib/common/SleepyInterface.rb +384 -0
  30. data/lib/common/SynInterfaces.rb +275 -0
  31. data/lib/common/TabFormat.rb +720 -0
  32. data/lib/common/Tiger.rb +1448 -0
  33. data/lib/common/TntInterface.rb +44 -0
  34. data/lib/common/Tree.rb +61 -0
  35. data/lib/common/TreetaggerInterface.rb +303 -0
  36. data/lib/common/headz.rb +338 -0
  37. data/lib/common/option_parser.rb +13 -0
  38. data/lib/common/ruby_class_extensions.rb +310 -0
  39. data/lib/fred/Baseline.rb +150 -0
  40. data/lib/fred/FileZipped.rb +31 -0
  41. data/lib/fred/FredBOWContext.rb +863 -0
  42. data/lib/fred/FredConfigData.rb +182 -0
  43. data/lib/fred/FredConventions.rb +232 -0
  44. data/lib/fred/FredDetermineTargets.rb +324 -0
  45. data/lib/fred/FredEval.rb +312 -0
  46. data/lib/fred/FredFeatureExtractors.rb +321 -0
  47. data/lib/fred/FredFeatures.rb +1061 -0
  48. data/lib/fred/FredFeaturize.rb +596 -0
  49. data/lib/fred/FredNumTrainingSenses.rb +27 -0
  50. data/lib/fred/FredParameters.rb +402 -0
  51. data/lib/fred/FredSplit.rb +84 -0
  52. data/lib/fred/FredSplitPkg.rb +180 -0
  53. data/lib/fred/FredTest.rb +607 -0
  54. data/lib/fred/FredTrain.rb +144 -0
  55. data/lib/fred/PlotAndREval.rb +480 -0
  56. data/lib/fred/fred.rb +45 -0
  57. data/lib/fred/md5.rb +23 -0
  58. data/lib/fred/opt_parser.rb +250 -0
  59. data/lib/frprep/AbstractSynInterface.rb +1227 -0
  60. data/lib/frprep/Ampersand.rb +37 -0
  61. data/lib/frprep/BerkeleyInterface.rb +375 -0
  62. data/lib/frprep/CollinsInterface.rb +1165 -0
  63. data/lib/frprep/ConfigData.rb +694 -0
  64. data/lib/frprep/Counter.rb +18 -0
  65. data/lib/frprep/FNCorpusXML.rb +643 -0
  66. data/lib/frprep/FNDatabase.rb +144 -0
  67. data/lib/frprep/FixSynSemMapping.rb +196 -0
  68. data/lib/frprep/FrPrepConfigData.rb +66 -0
  69. data/lib/frprep/FrameXML.rb +513 -0
  70. data/lib/frprep/FrprepHelper.rb +1324 -0
  71. data/lib/frprep/Graph.rb +345 -0
  72. data/lib/frprep/ISO-8859-1.rb +24 -0
  73. data/lib/frprep/MiniparInterface.rb +1388 -0
  74. data/lib/frprep/Parser.rb +213 -0
  75. data/lib/frprep/RegXML.rb +269 -0
  76. data/lib/frprep/STXmlTerminalOrder.rb +194 -0
  77. data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
  78. data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
  79. data/lib/frprep/SleepyInterface.rb +384 -0
  80. data/lib/frprep/SynInterfaces.rb +275 -0
  81. data/lib/frprep/TabFormat.rb +720 -0
  82. data/lib/frprep/Tiger.rb +1448 -0
  83. data/lib/frprep/TntInterface.rb +44 -0
  84. data/lib/frprep/Tree.rb +61 -0
  85. data/lib/frprep/TreetaggerInterface.rb +303 -0
  86. data/lib/frprep/do_parses.rb +142 -0
  87. data/lib/frprep/frprep.rb +686 -0
  88. data/lib/frprep/headz.rb +338 -0
  89. data/lib/frprep/one_parsed_file.rb +28 -0
  90. data/lib/frprep/opt_parser.rb +94 -0
  91. data/lib/frprep/ruby_class_extensions.rb +310 -0
  92. data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
  93. data/lib/rosy/DBMySQL.rb +146 -0
  94. data/lib/rosy/DBSQLite.rb +280 -0
  95. data/lib/rosy/DBTable.rb +239 -0
  96. data/lib/rosy/DBWrapper.rb +176 -0
  97. data/lib/rosy/ExternalConfigData.rb +58 -0
  98. data/lib/rosy/FailedParses.rb +130 -0
  99. data/lib/rosy/FeatureInfo.rb +242 -0
  100. data/lib/rosy/GfInduce.rb +1115 -0
  101. data/lib/rosy/GfInduceFeature.rb +148 -0
  102. data/lib/rosy/InputData.rb +294 -0
  103. data/lib/rosy/RosyConfigData.rb +115 -0
  104. data/lib/rosy/RosyConfusability.rb +338 -0
  105. data/lib/rosy/RosyEval.rb +465 -0
  106. data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
  107. data/lib/rosy/RosyFeaturize.rb +280 -0
  108. data/lib/rosy/RosyInspect.rb +336 -0
  109. data/lib/rosy/RosyIterator.rb +477 -0
  110. data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
  111. data/lib/rosy/RosyPruning.rb +165 -0
  112. data/lib/rosy/RosyServices.rb +744 -0
  113. data/lib/rosy/RosySplit.rb +232 -0
  114. data/lib/rosy/RosyTask.rb +19 -0
  115. data/lib/rosy/RosyTest.rb +826 -0
  116. data/lib/rosy/RosyTrain.rb +232 -0
  117. data/lib/rosy/RosyTrainingTestTable.rb +786 -0
  118. data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
  119. data/lib/rosy/View.rb +418 -0
  120. data/lib/rosy/opt_parser.rb +379 -0
  121. data/lib/rosy/rosy.rb +77 -0
  122. data/lib/shalmaneser/version.rb +3 -0
  123. data/test/frprep/test_opt_parser.rb +94 -0
  124. data/test/functional/functional_test_helper.rb +40 -0
  125. data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
  126. data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
  127. data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
  128. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
  129. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
  130. data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
  131. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
  132. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
  133. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
  134. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
  135. data/test/functional/test_fred.rb +47 -0
  136. data/test/functional/test_frprep.rb +52 -0
  137. data/test/functional/test_rosy.rb +20 -0
  138. metadata +270 -0
@@ -0,0 +1,1448 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "common/headz"
3
+ require "common/SalsaTigerRegXML"
4
+ require "common/ruby_class_extensions"
5
+ class Array
6
+ include EnumerableDistribute
7
+ end
8
+
9
+
10
+ require "common/AbstractSynInterface"
11
+
12
+ #############################################
13
+ #
14
+ # max. projection:
15
+ #
16
+ # consists of methods that are 'building blocks' for computing
17
+ # the maximum projection of a verb in TIGER syntax
18
+ #
19
+ # basically, computing the max. projection is about moving an
20
+ # upper node upward. At the beginning it is the parent of the
21
+ # terminal node for the verb, and each building block moves it up
22
+ # to its parent, if the building block matches.
23
+ #
24
+ # Apart from the upper node, a lower node is also watched. At the
25
+ # beginning it is the terminal node for the verb, later it is usually
26
+ # the 'HD' child of the upper node. This lower node is needed for
27
+ # testing whether a building block matches.
28
+ #
29
+ # For handling conjunction, the upper node is split into two, a 'lower upper'
30
+ # and an 'upper upper' node. The 'lower upper' is used when some relation
31
+ # between the upper node and its descendants is tested, and the 'upper upper'
32
+ # is used when some relation between the upper node and its predecessors
33
+ # is tested. Usually the 'lower upper' and the 'upper upper' are the same,
34
+ # but conjunction building blocks move the 'upper upper' up to its parent
35
+ # while leaving the 'lower upper' unchanged.
36
+ #
37
+ # So all building block methods take three arguments: lower, upper_l and
38
+ # upper_u. All three are nodes given as SalsaTigerSentence objects
39
+ #
40
+ # All building block methods give as their return value a list of three
41
+ # nodes: [new_lower, new_upper_l, new_upper_u], if the building block
42
+ # matched. If it does not match, nil is returned.
43
+ #
44
+ # The method explain describes all building blocks,
45
+ # the conditions for the building blocks matching, and shows
46
+ # where the lower and the upper nodes will be after a building block matched.
47
+ #
48
+ # building blocks:
49
+ # pp_pp
50
+ # pp_fin
51
+ # inf_fin
52
+ # vzinf_fin
53
+ # cvzinf_fin
54
+ # modal
55
+ # othermodal
56
+ # conj
57
+ #
58
+ # To compute the maximal projection of a verb,
59
+ # we start at the parent of the terminal node for the verb
60
+ # "and move upwards.
61
+ # "The move upwards is broken up in little building blocks."
62
+ # "Each of them licenses one step upward in the syntactic tree."
63
+ #
64
+ # "Each building block needs information about two nodes:"
65
+ # "The current upper node (at the beginning, that is"
66
+ # "the parent of the terminal node for the verb) and"
67
+ # "one specific child of that current upper node"
68
+ # "(at the beginning, that is the terminal node for the verb)."
69
+ #
70
+ # "Each building block provides information of"
71
+ # "- where the new upper node is, depending on the current"
72
+ # " upper node, and"
73
+ # "- where the new specific child is."
74
+ #
75
+ # "For handling conjunction, we need to complicate this picture somewhat:"
76
+ # "We split the current upper node into an 'upper upper'"
77
+ # "and a 'lower upper' node."
78
+ # "If we want to check the edge from the current upper node upwards,"
79
+ # "we use the 'upper upper'."
80
+ # "If we want to check an edge from the current upper node downwards,"
81
+ # "we use the 'lower upper'."
82
+ # "Almost always, the 'lower upper' and the 'upper upper' will be the same."
83
+ # "Except for the building block for conjunction:"
84
+ # "It moves the 'upper upper' one level up,"
85
+ # "but leaves the 'lower upper' the same."
86
+ #
87
+ # "There are five levels of building blocks."
88
+ #
89
+ # "* 1st level: auxiliary verb constructions involving a participle"
90
+ # " The following building blocks are tried, in this order:"
91
+ # " CONJ, PP-PP, CONJ, PP_FIN"
92
+ #
93
+ # "* 2nd level: infinitive constructions"
94
+ # " The following building blocks are tried, in this order:"
95
+ # " CONJ, INF-FIN, VZINF-FIN, CVZINF-FIN"
96
+ #
97
+ # "* 3rd level: modals"
98
+ # " The following building blocks are tried, in this order:"
99
+ # " CONJ, MODAL, OTHERMODAL"
100
+ #
101
+ # "* 4th level = 1st level"
102
+ #
103
+ # "* 5th level = 2nd level"
104
+ #
105
+ #
106
+ # "***These are the building blocks:"
107
+ #
108
+ # "PP-PP"
109
+ # " VP (new uppermost node)"
110
+ # " / | \\OC"
111
+ # " HD/ | VP|CVP (current uppermost node)"
112
+ # " / | |"
113
+ # " o FE |HD|CJ"
114
+ # "POS: V[AMV]PP |"
115
+ # " new target current target"
116
+ # " POS: V[AMV]PP"
117
+ #
118
+ # "PP-FIN"
119
+ # " S/VP (new uppermost node)"
120
+ # " / | \\OC or PD"
121
+ # " HD/ | VP|CVP|CO (current uppermost node)"
122
+ # " / | |"
123
+ # " o FE |HD|CJ"
124
+ # "POS: V[AMV]FIN |"
125
+ # " V[AMV]INF current target"
126
+ # "or CAT: VZ POS: V[AMV]PP"
127
+ #
128
+ # "INF_FIN"
129
+ # " S/VP (new uppermost node)"
130
+ # " / | \\OC"
131
+ # " HD/ | VP|CVP (current uppermost node)"
132
+ # " / | |"
133
+ # " o FE |HD|CJ"
134
+ # "POS: VAFIN |"
135
+ # " VAINF current target"
136
+ # " VVINF POS: V[AMV]INF"
137
+ # " new target"
138
+ #
139
+ # "VZINF-FIN"
140
+ # " S/VP (new uppermost node)"
141
+ # " / | \\OC"
142
+ # " HD/ | VP (current uppermost node)"
143
+ # " / | |"
144
+ # " o FE |HD"
145
+ # "POS: V[AV]FIN |"
146
+ # " new target current target"
147
+ # " CAT: VZ"
148
+ #
149
+ # "CVZINF-FIN"
150
+ # " S/VP (new uppermost node)"
151
+ # " | \\OC"
152
+ # " | CVP (current uppermost node)"
153
+ # " | |"
154
+ # " FE |CJ"
155
+ # " |"
156
+ # " current and new target"
157
+ # " CAT: VZ"
158
+ #
159
+ # "MODAL"
160
+ # " S/VP (new uppermost node)"
161
+ # " / | \\OC"
162
+ # " HD/ | VP|CVP (current uppermost node)"
163
+ # " / | |"
164
+ # " o FE |HD|CJ"
165
+ # " POS: |"
166
+ # " VM(PP|FIN|INF) current target"
167
+ # " new target POS: V[AMV]INF"
168
+ #
169
+ # "OTHERMODAL"
170
+ # " S/VP (new uppermost node)"
171
+ # " / | \\OC"
172
+ # " HD/ | VP (current uppermost node)"
173
+ # " / | | \\"
174
+ # " o FE |HD \\OC"
175
+ # "POS: VMFIN | \\"
176
+ # " VMINF POS: current target"
177
+ # " VMPP V[AMV]INF POS: V[AMV]PP"
178
+ # " new target V[AMV]FIN"
179
+ #
180
+ # "CONJ"
181
+ # " CVP (new upper uppermost node)"
182
+ # " | \\CJ"
183
+ # " | VP (current and new uppermost node)"
184
+ # " | |"
185
+ # " FE |"
186
+ # " |"
187
+ # " current and new target"
188
+ ###
189
+ module TigerMaxProjection
190
+
191
+ def max_projection(node)
192
+ parent = node.parent
193
+ # node has no parent? recover somehow
194
+ if parent.nil?
195
+ return {'max_proj' => node,
196
+ 'max_proj_at_level' => [node]}
197
+ end
198
+
199
+ maxproj_at_level = Array.new
200
+ maxproj_at_level << parent
201
+
202
+ lower = node
203
+ upper_u = upper_l = parent
204
+
205
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
206
+ maxproj_at_level << upper_u
207
+
208
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
209
+ maxproj_at_level << upper_u
210
+
211
+ lower, upper_l, upper_u = project_modal(lower, upper_l, upper_u)
212
+ maxproj_at_level << upper_u
213
+
214
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
215
+ maxproj_at_level << upper_u
216
+
217
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
218
+ maxproj_at_level << upper_u
219
+
220
+ return {'max_proj' => upper_u,
221
+ 'max_proj_at_level' => maxproj_at_level}
222
+ end
223
+
224
+
225
+ ###
226
+ def test_localtrees(path)
227
+
228
+
229
+ #HIER WEITER: was genau passiert hier?
230
+
231
+
232
+ retv = Hash.new
233
+
234
+ # test each step
235
+ path.each { |step|
236
+ retv = test_step(step, retv)
237
+
238
+ if retv.nil?
239
+ return nil
240
+ end
241
+ }
242
+
243
+ # return result of last step
244
+ return retv
245
+ end
246
+
247
+ ######
248
+ private
249
+
250
+ ###
251
+ def test_step(path, previous)
252
+ if path['from'].nil? or path['to'].nil? or path['edge'].nil?
253
+ $stderr.puts 'TigerAux error: missing path hash entry'
254
+ exit 1
255
+ end
256
+
257
+ from_node, *from_descr = path['from']
258
+ to_node, *to_descr = path['to']
259
+
260
+ # using the special flags tp_prev_to and tp_prev_from,
261
+ # a node can also be set to be the value in the
262
+ # 'previous' hash
263
+ from_node = cf_previous(from_node, previous)
264
+ to_node = cf_previous(to_node, previous)
265
+
266
+ # test if 'from' node description matches
267
+ unless test_node(from_node, from_descr)
268
+ return nil
269
+ end
270
+
271
+ # try path
272
+ direction, edgelabel = path['edge']
273
+ case direction
274
+ when 'up'
275
+ label = from_node.parent_label()
276
+ if label =~ edgelabel
277
+ end_nodes = [from_node.parent()]
278
+ else
279
+ end_nodes = []
280
+ end
281
+ when 'dn'
282
+ end_nodes = []
283
+ from_node.each_child { |child|
284
+ if child.parent_label() =~ edgelabel
285
+ end_nodes << child
286
+ end
287
+ }
288
+ else
289
+ $stderr.puts 'TigerAux error: unknown direction'
290
+ exit 1
291
+ end
292
+
293
+ # check all prospective end nodes
294
+ remaining_end_nodes = end_nodes.select { |prosp_to_node|
295
+ if to_node.nil? or to_node == prosp_to_node
296
+ test_node(prosp_to_node, to_descr)
297
+ else
298
+ false
299
+ end
300
+ }
301
+
302
+ if remaining_end_nodes.empty?
303
+ return nil
304
+ else
305
+ return {'from' => from_node,
306
+ 'to' => remaining_end_nodes}
307
+ end
308
+ end
309
+
310
+ ###
311
+ def test_node(node, descr)
312
+
313
+ cat_or_pos, pattern = descr
314
+ if node.nil?
315
+ $stderr.puts 'TigerAux error: test_node nil'
316
+ exit 1
317
+ end
318
+
319
+ case cat_or_pos
320
+ when 'pos'
321
+ if node.part_of_speech =~ pattern
322
+ return true
323
+ else
324
+ return false
325
+ end
326
+ when 'cat'
327
+ if node.category =~ pattern
328
+ return true
329
+ else
330
+ return false
331
+ end
332
+ when nil
333
+ return true
334
+ else
335
+ $stderr.puts 'TigerAux error: neither cat nor pos'
336
+ exit 1
337
+ end
338
+ end
339
+
340
+ ###
341
+ def cf_previous(node, previous)
342
+ case node
343
+ when 'tp_prev_to'
344
+ return previous['to'].first
345
+ when 'tp_prev_from'
346
+ return previous['from']
347
+ else
348
+ return node
349
+ end
350
+ end
351
+
352
+ ###
353
+ def project_participle(lower, upper_l, upper_u)
354
+ return project_this(lower, upper_l, upper_u,
355
+ [self.method('conj'),
356
+ self.method('pp_pp'),
357
+ self.method('conj'),
358
+ self.method('pp_fin')])
359
+ end
360
+
361
+ ###
362
+ def project_infinitive(lower, upper_l, upper_u)
363
+ return project_this(lower, upper_l, upper_u,
364
+ [self.method('conj'),
365
+ self.method('inf_fin'),
366
+ self.method('vzinf_fin'),
367
+ self.method('cvzinf_fin')
368
+ ])
369
+ end
370
+
371
+ ###
372
+ def project_modal(lower, upper_l, upper_u)
373
+ return project_this(lower, upper_l, upper_u,
374
+ [self.method('conj'),
375
+ self.method('modal'),
376
+ self.method('othermodal')
377
+ ])
378
+ end
379
+
380
+ ###
381
+ def project_participle_(lower, upper_l, upper_u)
382
+ return project_this(lower, upper_l, upper_u,
383
+ [self.method('conj'),
384
+ self.method('pp_pp'),
385
+ self.method('conj'),
386
+ self.method('pp_fin')])
387
+ end
388
+
389
+ ###
390
+ def project_this(lower, upper_l, upper_u, method_list)
391
+ method_list.each { |method|
392
+ retv = method.call(lower, upper_l, upper_u)
393
+ unless retv.nil?
394
+ lower, upper_l, upper_u = retv
395
+ end
396
+ }
397
+ return [lower, upper_l, upper_u]
398
+ end
399
+
400
+ ###
401
+ def pp_pp(lower, upper_l, upper_u)
402
+
403
+ retv =
404
+ test_localtrees([
405
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
406
+ 'to' => [upper_l, 'cat', /^C?VP$/],
407
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
408
+ {'from' => [upper_u, 'cat', /^C?VP$/],
409
+ 'to' => [nil, 'cat', /^VP$/],
410
+ 'edge' => ['up', /^OC$/]},
411
+ {'from' => ['tp_prev_to', 'cat', /^VP$/],
412
+ 'to' => [nil, 'pos', /^V[AMV]PP$/],
413
+ 'edge' => ['dn', /^HD$/]}
414
+ ])
415
+
416
+ if retv.nil?
417
+ return nil
418
+ else
419
+ return [retv['to'].first, retv['from'], retv['from']]
420
+ end
421
+ end
422
+
423
+ ###
424
+ def pp_fin(lower, upper_l, upper_u)
425
+
426
+ retv =
427
+ test_localtrees([
428
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
429
+ 'to' => [upper_l, 'cat', /^C?VP$/],
430
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
431
+ {'from' => [upper_u,'cat', /^C?VP$/],
432
+ 'to' => [nil, 'cat', /^(VP)|S$/],
433
+ 'edge' => ['up', /^(OC)|(PD)$/]}
434
+ ])
435
+
436
+ if retv.nil?
437
+ return nil
438
+ end
439
+
440
+ new_upper = retv['to'].first
441
+
442
+ # test two alternatives:
443
+ # head child of new_upper is either a VXFIN or VXINF terminal...
444
+ retv =
445
+ test_localtrees([
446
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
447
+ 'to' => [nil, 'pos', /^V[AMV]((FIN)|(INF))$/],
448
+ 'edge' => ['dn', /^HD$/]}
449
+ ])
450
+
451
+ # ... or a VZ nonterminal
452
+ if retv.nil?
453
+ retv =
454
+ test_localtrees([
455
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
456
+ 'to' => [nil, 'cat', /^VZ$/],
457
+ 'edge' => ['dn', /^HD$/]}
458
+ ])
459
+ end
460
+
461
+ if retv.nil?
462
+ return nil
463
+ else
464
+ return [retv['to'].first, new_upper, new_upper]
465
+ end
466
+ end
467
+
468
+
469
+ ###
470
+ def inf_fin(lower, upper_l, upper_u)
471
+
472
+ retv =
473
+ test_localtrees([
474
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
475
+ 'to' => [upper_l, 'cat', /^C?VP$/],
476
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
477
+ {'from' => [upper_u,'cat', /^C?VP$/],
478
+ 'to' => [nil, 'cat', /^(VP)|S$/],
479
+ 'edge' => ['up', /^OC$/]},
480
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
481
+ 'to' => [nil, 'pos', /^(VAFIN)|(VAINF)|(VVINF)$/],
482
+ 'edge' => ['dn', /^HD$/]}
483
+ ])
484
+ if retv.nil?
485
+ return nil
486
+ else
487
+ return [retv['to'].first, retv['from'], retv['from']]
488
+ end
489
+ end
490
+
491
+
492
+ ###
493
+ def vzinf_fin(lower, upper_l, upper_u)
494
+
495
+ retv =
496
+ test_localtrees([
497
+ {'from' => [lower, 'cat', /^VZ$/],
498
+ 'to' => [upper_l, 'cat', /^VP$/],
499
+ 'edge' => ['up', /^HD$/]},
500
+ {'from' => [upper_u,'cat', /^VP$/],
501
+ 'to' => [nil, 'cat', /^(VP)|S$/],
502
+ 'edge' => ['up', /^OC$/]},
503
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
504
+ 'to' => [nil, 'pos', /^V[AV]FIN$/],
505
+ 'edge' => ['dn', /^HD$/]}
506
+ ])
507
+
508
+ if retv.nil?
509
+ return nil
510
+ else
511
+ return [retv['to'].first, retv['from'], retv['from']]
512
+ end
513
+ end
514
+
515
+ ###
516
+ def cvzinf_fin(lower, upper_l, upper_u)
517
+
518
+ retv =
519
+ test_localtrees([
520
+ {'from' => [lower, 'cat', /^VZ$/],
521
+ 'to' => [upper_l, 'cat', /^CVP$/],
522
+ 'edge' => ['up', /^CJ$/]},
523
+ {'from' => [upper_u,'cat', /^CVP$/],
524
+ 'to' => [nil, 'cat', /^(VP)|S$/],
525
+ 'edge' => ['up', /^OC$/]}
526
+ ])
527
+
528
+ if retv.nil?
529
+ return nil
530
+ else
531
+ return [lower, upper_l, retv['to'].first]
532
+ end
533
+ end
534
+
535
+ ###
536
+ def modal(lower, upper_l, upper_u)
537
+
538
+ retv =
539
+ test_localtrees([
540
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
541
+ 'to' => [upper_l, 'cat', /^C?VP$/],
542
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
543
+ {'from' => [upper_u,'cat', /^C?VP$/],
544
+ 'to' => [nil, 'cat', /^(VP)|S$/],
545
+ 'edge' => ['up', /^OC$/]},
546
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
547
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
548
+ 'edge' => ['dn', /^HD$/]}
549
+ ])
550
+
551
+ if retv.nil?
552
+ return nil
553
+ else
554
+ return [retv['to'].first, retv['from'], retv['from']]
555
+ end
556
+ end
557
+
558
+ ###
559
+ def othermodal(lower, upper_l, upper_u)
560
+
561
+ retv =
562
+ test_localtrees([
563
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
564
+ 'to' => [upper_l, 'cat', /^VP$/],
565
+ 'edge' => ['up', /^OC$/]},
566
+ {'from' => [upper_l, 'cat', /^VP$/],
567
+ 'to' => [nil, 'pos', /^V[AMV]((INF)|(FIN))$/],
568
+ 'edge' => ['dn', /^HD$/]},
569
+ {'from' => [upper_u,'cat', /^VP$/],
570
+ 'to' => [nil, 'cat', /^(VP)|S$/],
571
+ 'edge' => ['up', /^OC$/]},
572
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
573
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
574
+ 'edge' => ['dn', /^HD$/]}
575
+ ])
576
+
577
+ if retv.nil?
578
+ return nil
579
+ else
580
+ return [retv['to'].first, retv['from'], retv['from']]
581
+ end
582
+ end
583
+
584
+ ###
585
+ def conj(lower, upper_l, upper_u)
586
+
587
+ retv = test_localtrees([
588
+ {'from' => [lower, nil, //],
589
+ 'to' => [upper_l, 'cat', /^VP$/],
590
+ 'edge' => ['up', //]},
591
+ {'from' => [upper_u,'cat', /^VP$/],
592
+ 'to' => [nil, 'cat', /^CVP$/],
593
+ 'edge' => ['up', /^CJ$/]}
594
+ ])
595
+
596
+ if retv.nil?
597
+ return nil
598
+ else
599
+ return [lower, upper_l, retv['to'].first]
600
+ end
601
+ end
602
+ end
603
+
604
+ ###########################################################3
605
+ class Tiger < SynInterpreter
606
+
607
+ extend TigerMaxProjection
608
+
609
+ @@heads_obj = Headz.new()
610
+
611
+ ###
612
+ # generalize over POS tags.
613
+ #
614
+ # returns one of:
615
+ #
616
+ # adj: adjective (phrase)
617
+ # adv: adverb (phrase)
618
+ # card: numbers, quantity phrases
619
+ # con: conjunction
620
+ # det: determiner, including possessive/demonstrative pronouns etc.
621
+ # for: foreign material
622
+ # noun: noun (phrase), including personal pronouns, proper names, expletives
623
+ # part: particles, truncated words (German compound parts)
624
+ # prep: preposition (phrase)
625
+ # pun: punctuation, brackets, etc.
626
+ # sent: sentence
627
+ # top: top node of a sentence
628
+ # verb: verb (phrase)
629
+ # nil: something went wrong
630
+ #
631
+ # default: return phrase type as is
632
+ def Tiger.category(node) # SynNode
633
+ pt = Tiger.pt(node)
634
+ if pt.nil?
635
+ # phrase type could not be determined
636
+ return nil
637
+ end
638
+
639
+ case pt.to_s.strip()
640
+ when /^C?ADJ/, /^PIS/, /^C?AP[^A-Za-z]?/ then return "adj"
641
+ when /^C?ADV/, /^C?AVP/, /^PROAV/ then return "adv"
642
+ when /^CARD/ then return "card"
643
+ when /^C?KO/ then return "con"
644
+ when /^PPOS/, /^ART/ ,/^PIAT/, /^PD/, /^PRELAT/, /^PWAT/ then return "det"
645
+ when /^FM/ , /^XY/ then return "for"
646
+ when /^C?N/, /^PPER/, /^PN/, /^PRELS/, /^PWS/ then return "noun"
647
+ when /^ITJ/ then return "sent"
648
+ when /^PRF/, /^PTK/, /^TRUNC/ then return "part"
649
+ when /^C?PP/ , /^APPR/, /^PWAV/ then return "prep"
650
+ when /^\$/ then return "pun"
651
+ when /^C?S$/, /^CO/, /^DL/, /^CH/, /^ISU/ then return "sent" # I don't like to put CO/DL in here, but where should they go?
652
+ when /^TOP/ then return "top"
653
+ when /^C?V/ then return "verb"
654
+ else
655
+ # $stderr.puts "WARNING Unknown category/POS "+c.to_s+" (German data)"
656
+ return nil
657
+ end
658
+ end
659
+
660
+ ###
661
+ # is relative pronoun?
662
+ #
663
+ def Tiger.relative_pronoun?(node) # SynNode
664
+ pt = Tiger.pt(node)
665
+ if pt.nil?
666
+ # phrase type could not be determined
667
+ return nil
668
+ end
669
+
670
+ case pt.to_s.strip()
671
+ when /^PREL/, /^PWAV/, /^PWAT/
672
+ return true
673
+ else
674
+ return false
675
+ end
676
+ end
677
+
678
+
679
+ ###
680
+ # lemma_backoff:
681
+ #
682
+ # if we have lemma information, return that,
683
+ # and failing that, return the word
684
+ #
685
+ # returns: string or nil
686
+ def Tiger.lemma_backoff(node)
687
+ lemma = super(node)
688
+ # lemmatizer has returned more than one possible lemma form:
689
+ # just accept the first
690
+ if lemma =~ /^([^|]+)|/
691
+ return $1
692
+ else
693
+ return lemma
694
+ end
695
+ end
696
+
697
+ ###
698
+ # verb_with_particle:
699
+ #
700
+ # given a node and a nodelist,
701
+ # if the node represents a verb:
702
+ # see if the verb has a particle among the nodes in nodelist
703
+ # if so, return it
704
+ def Tiger.particle_of_verb(node, # SynNode
705
+ node_list) # array: SynNode
706
+
707
+ # must be verb
708
+ unless Tiger.category(node) == "verb"
709
+ return nil
710
+ end
711
+
712
+ # must have parent
713
+ unless node.parent
714
+ return nil
715
+ end
716
+
717
+ particles = node.parent.children.select { |sister|
718
+ # look for sisters of the verb node that are in node_list
719
+ node_list.include? sister
720
+ }.select { |sister|
721
+ # see if its incoming edge is labeled "SVP"
722
+ sister.parent_label() == "SVP"
723
+ }.reject { |particle|
724
+ # Sleepy parser problem: it often tags ")" as a separate verb particle
725
+ particle.get_attribute("lemma") == ")" or
726
+ particle.word == ")"
727
+ }
728
+
729
+ if particles.length == 0
730
+ return nil
731
+ else
732
+ return particles.first
733
+ end
734
+ end
735
+
736
+
737
+ ###
738
+ # auxiliary?
739
+ #
740
+ # returns true if the given node is an auxiliary
741
+ # default: no recognition of auxiliaries
742
+ def Tiger.auxiliary?(node)
743
+ if node.part_of_speech() and
744
+ node.part_of_speech =~ /^VA/
745
+ return true
746
+ else
747
+ return false
748
+ end
749
+ end
750
+
751
+ ###
752
+ # modal?
753
+ #
754
+ # returns true if the given node is a modal verb
755
+ #
756
+ # returns: boolean
757
+ def Tiger.modal?(node)
758
+ if node.part_of_speech() and
759
+ node.part_of_speech =~ /^VM/
760
+ return true
761
+ else
762
+ return false
763
+ end
764
+ end
765
+
766
+ ###
767
+ # head_terminal
768
+ #
769
+ # given a constituent, return the terminal node
770
+ # that describes its headword
771
+ # default: a heuristic that assumes the existence of a 'head'
772
+ # attribute on nodes:
773
+ # find the first node in my yield corresponding to my head attribute.
774
+ # add-on: if this doesn't work, ask the headz package for the head
775
+ #
776
+ # returns: a SynNode object if successful, else nil
777
+ def Tiger.head_terminal(node)
778
+ if (head = super(node))
779
+ return head
780
+ end
781
+
782
+ head_hash = @@heads_obj.get_sem_head(node)
783
+ if head_hash.nil?
784
+ return nil
785
+ elsif head_hash["prep"]
786
+ return head_hash["prep"]
787
+ else
788
+ return head_hash["head"]
789
+ end
790
+ end
791
+
792
+
793
+ #####################################
794
+ # verbs(sobj) sobj is a sentence in SalsaTigerSentence format
795
+ #
796
+ # return a list of the nodes of full verbs in a given sentence:
797
+ # it is a list of lists. An item in that list is
798
+ # - either a pair [verb, svp]
799
+ # of the node of a verb with separable prefix
800
+ # and the node of its separate prefix
801
+ # - or a singleton [verb]
802
+ # of the node of a verb without separate prefix
803
+ def Tiger.verbs(sobj)
804
+ return sobj.terminals().select { |t|
805
+ # verbs
806
+
807
+ Tiger.category(t) == "verb"
808
+ }.map { |verb|
809
+
810
+ # watch out for separate verb prefixes
811
+ parent = verb.parent
812
+ if parent.nil?
813
+ # verb is root node, for whatever reason
814
+ [verb]
815
+ else
816
+
817
+ svp_children = parent.children_by_edgelabels(['SVP'])
818
+ if svp_children.empty?
819
+ # verb has no separate verb prefix
820
+ [verb]
821
+ elsif svp_children.length == 1
822
+ # verb has exactly one separate verb prefix
823
+ [verb, svp_children.first]
824
+ else
825
+ # more than one separate verb prefix? weird.
826
+ $stderr.print 'Tiger warning: more than one separate verb prefix '
827
+ $stderr.print 'for node ', verb.id, "\n"
828
+ [verb, svp_children.first]
829
+ end
830
+ end
831
+ }
832
+ end
833
+
834
+ ###
835
+ # preposition
836
+ #
837
+ # if the given node represents a PP, return the preposition (string)
838
+ def Tiger.preposition(node) # SynNode
839
+ hash = @@heads_obj.get_sem_head(node)
840
+ if hash and hash["prep"]
841
+ return hash["prep"].to_s
842
+ end
843
+
844
+ # this didn't work, try something else: first preposition among my terminals
845
+ pnode = node.terminals_sorted().detect { |n|
846
+ Tiger.category(n) == "prep"
847
+ }
848
+ if pnode
849
+ return pnode.word()
850
+ else
851
+ return nil
852
+ end
853
+ end
854
+
855
+
856
+ ###
857
+ # voice
858
+ #
859
+ # given a constituent, return
860
+ # - "active"/"passive" if it is a verb
861
+ # - nil, else
862
+ def Tiger.voice(node)
863
+
864
+ unless Tiger.category(node) == "verb"
865
+ return nil
866
+ end
867
+
868
+ # node is a participle linked to its VP or S parent by HD or CJ
869
+ retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
870
+ 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
871
+ 'edge' => ['up', /^(HD)|(CJ)$/]}])
872
+
873
+ if retv
874
+ verb_parent = retv['to'].first
875
+
876
+ # coordination?
877
+ retv = test_localtrees([{'from' => [verb_parent, nil, //],
878
+ 'to' => [nil, 'cat', /^CVP$/],
879
+ 'edge' => ['up', /^CJ$/]}])
880
+ if retv
881
+
882
+ # yes, coordination
883
+ # S/VP
884
+ # |OC
885
+ # CVP
886
+ # | CJ
887
+ # VP
888
+ # | HD
889
+ # participle
890
+
891
+ cvp = retv['to'].first
892
+
893
+ retv = test_localtrees([{'from' => [cvp, nil, //],
894
+ 'to' => [nil, 'cat', /^S|(VP)$/],
895
+ 'edge' => ['up', /^OC$/]}])
896
+
897
+ else
898
+ # node's parent is linked to its parent via an OC edge
899
+ retv = test_localtrees([{'from' => [verb_parent, nil, //],
900
+ 'to' => [nil, 'cat', /^(VP)|S$/],
901
+ 'edge' => ['up', /^OC$/]}])
902
+ end
903
+
904
+ if retv.nil?
905
+ return "active"
906
+ end
907
+
908
+ verb_grandparent = retv['to'].first
909
+
910
+ else
911
+ # KE Dec 19: test whether the participle
912
+ # is linked to its parent via an OC edge.
913
+ # if so, it has the same function as the
914
+ # verb_grandparent above
915
+
916
+ # node is a participle linked to its VP or S parent by OC
917
+ retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
918
+ 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
919
+ 'edge' => ['up', /^OC$/]}])
920
+
921
+ if retv
922
+ verb_grandparent = retv['to'].first
923
+
924
+ else
925
+ # this test has failed
926
+ return "active"
927
+ end
928
+ end
929
+
930
+ #puts test_localtrees([{'from' => [verb_grandparent, nil, //],
931
+ # 'to' => [nil, 'pos', /^VA.*$/],
932
+ # 'edge' => ['dn', /^HD$/]}])
933
+
934
+ # node's grandparent has a HD child that is a terminal node, an auxiliary
935
+ retv = test_localtrees([{'from' => [verb_grandparent, nil, //],
936
+ 'to' => [nil, 'pos', /^VA.*$/],
937
+ 'edge' => ['dn', /^HD$/]}])
938
+
939
+ if retv.nil?
940
+ return "active"
941
+ end
942
+
943
+ # that HD child is a form of 'werden'
944
+ may_be_werden = retv['to'].first
945
+
946
+ unless may_be_werden.part_of_speech() =~ /^VA/
947
+ return "active"
948
+ end
949
+
950
+ # no morphology, so approximate it using regexp.s
951
+ case may_be_werden.word
952
+ when "geworden"
953
+ when /^w.+rd(e|en|et|st|est)?$/
954
+ else
955
+ return "active"
956
+ end
957
+
958
+ # all tests passed successfully
959
+ return "passive"
960
+ end
961
+
962
+ ###
963
+ # gfs
964
+ #
965
+ # grammatical functions of a constituent:
966
+ #
967
+ # returns: a list of pairs [relation(string), node(SynNode)]
968
+ # where <node> stands in the relation <relation> to the parameter
969
+ # that the method was called with
970
+ #
971
+ def Tiger.gfs(node, # SynNode object
972
+ sent) # SalsaTigerSentence object
973
+
974
+ case Tiger.category(node)
975
+ when "adj"
976
+ return Tiger.gfs_adj(node)
977
+ when "noun"
978
+ return Tiger.gfs_noun(node, sent)
979
+ when "verb"
980
+ return Tiger.gfs_verb(node)
981
+ else
982
+ return []
983
+ end
984
+ end
985
+
986
+
987
+ ###
988
+ # informative_content_node
989
+ #
990
+ # for most constituents: nil
991
+ # for a PP, the NP
992
+ # for an SBAR, the VP
993
+ # for a VP, the embedded VP
994
+ def Tiger.informative_content_node(node)
995
+ this_pt = Tiger.simplified_pt(node)
996
+
997
+ unless ["S", "CS", "VP", "CVP", "PP", "CPP"].include? this_pt
998
+ return nil
999
+ end
1000
+
1001
+ nh = Tiger.head_terminal(node)
1002
+ unless nh
1003
+ return nil
1004
+ end
1005
+ headlemma = Tiger.lemma_backoff(nh)
1006
+
1007
+ nonhead_children = node.children().reject { |n|
1008
+ nnh = Tiger.head_terminal(n)
1009
+ not(nnh) or
1010
+ Tiger.lemma_backoff(nnh) == headlemma
1011
+ }
1012
+ if nonhead_children.length() == 1
1013
+ return nonhead_children.first
1014
+ end
1015
+
1016
+ # more than one child:
1017
+ # for SBAR and VP take child with head POS starting in VB,
1018
+ # for PP child with head POS starting in NN
1019
+ case this_pt
1020
+ when /^C?S/, /^C?VP/
1021
+ icont_child = nonhead_children.detect { |n|
1022
+ h = Tiger.head_terminal(n)
1023
+ h and h.part_of_speech() =~ /^V/
1024
+ }
1025
+ when /^C?PP/
1026
+ icont_child = nonhead_children.detect { |n|
1027
+ h = Tiger.head_terminal(n)
1028
+ h and h.part_of_speech() =~ /^N/
1029
+ }
1030
+ else
1031
+ raise "Shouldn't be here"
1032
+ end
1033
+
1034
+ if icont_child
1035
+ return icont_child
1036
+ else
1037
+ return nonhead_children.first
1038
+ end
1039
+ end
1040
+
1041
+ ###
1042
+ # main node of expression
1043
+ #
1044
+ # second argument non-nil:
1045
+ # don't handle multiword expressions beyond verbs with separate particles
1046
+ #
1047
+ # returns: SynNode, main node, if found
1048
+ # else nil
1049
+ def Tiger.main_node_of_expr(nodelist,
1050
+ no_mwes = nil)
1051
+
1052
+ # map nodes to terminals
1053
+ nodelist = nodelist.map { |n| n.yield_nodes() }.flatten
1054
+
1055
+ # do we have a list of length 2,
1056
+ # one member being "zu", the other a verb, with a common parent "VZ"?
1057
+ # then return the verb
1058
+ if nodelist.length() == 2
1059
+ zu, verb = nodelist.distribute { |n| n.part_of_speech() == "PTKZU" }
1060
+ if zu.length() == 1 and
1061
+ Tiger.category(verb.first) == "verb" and
1062
+ verb.first.parent == zu.first.parent and
1063
+ verb.first.parent.category() == "VZ"
1064
+ return verb.first
1065
+ end
1066
+ end
1067
+
1068
+ # no joy: try method offered by abstract class
1069
+ return super(nodelist, no_mwes)
1070
+ end
1071
+
1072
+
1073
+ ########
1074
+ # prune?
1075
+ # given a target node t and another node n of the syntactic structure,
1076
+ # decide whether n is likely to instantiate a semantic role
1077
+ # of t. If not, recommend n for pruning.
1078
+ #
1079
+ # This method implements a slight variant of Xue and Palmer (EMNLP 2004).
1080
+ # Pruning according to Xue & Palmer, EMNLP 2004.
1081
+ # "Step 1: Designate the predicate as the current node and
1082
+ # collect its sisters (constituents attached at the same level
1083
+ # as the predicate) unless its sisters are coordinated with the
1084
+ # predicate.
1085
+ #
1086
+ # Step 2: Reset the current node to its parent and repeat Step 1
1087
+ # till it reaches the top level node.
1088
+ #
1089
+ # Modifications made here:
1090
+ # - paths of length 0 accepted in any case
1091
+ # - TIGER coordination allowed (phrase types CX)
1092
+ #
1093
+ # returns: false to recommend n for pruning, else true
1094
+ def Tiger.prune?(node, # SynNode
1095
+ paths_to_target, # hash: node ID -> Path object: paths from nodes to target
1096
+ terminal_index) # hash: terminal node -> word index in sentence
1097
+
1098
+ path_to_target = paths_to_target[node.id()]
1099
+
1100
+ if not path_to_target
1101
+ # no path from target to node: suggest for pruning
1102
+ return 0
1103
+ elsif path_to_target.length == 0
1104
+ # target may be its own role: definite accept
1105
+ return 1
1106
+ else
1107
+ # consider path from target to node:
1108
+ # (1) If the path to the current node includes at least one Up
1109
+ # and exactly one Down, keep.
1110
+ # (2) If the parth to the current node includes at least one Up
1111
+ # and two Down and the roof node is a C-something, keep (coordination).
1112
+ # (3) else discard
1113
+
1114
+ # count number of up and down steps in path to target
1115
+ num_up = 0
1116
+ num_down = 0
1117
+ path_to_target.each_step { |direction, edgelabel, nodelabel, endnode|
1118
+ case direction
1119
+ when /U/
1120
+ num_up += 1
1121
+ when /D/
1122
+ num_down += 1
1123
+ end
1124
+ }
1125
+
1126
+ if num_up >= 1 and num_down == 1
1127
+ # case (1)
1128
+ return 1
1129
+ elsif num_up >= 1 and num_down == 2 and CollinsTntInterpreter.category(path_to_target.lca()) =~ /^C/
1130
+ # case (2)
1131
+ return 1
1132
+ else
1133
+ # case (3)
1134
+ return 0
1135
+ end
1136
+ end
1137
+ end
1138
+
1139
+
1140
+ ################################
1141
+ private
1142
+ ################################
1143
+
1144
+ ###
1145
+ def Tiger.subject(verb_node)
1146
+
1147
+ unless Tiger.category(verb_node) == "verb"
1148
+ return nil
1149
+ end
1150
+
1151
+ if Tiger.voice(verb_node) == "passive"
1152
+ # passive: then what we would like to return as subject
1153
+ # is the SBP sibling of this verb
1154
+
1155
+ parent = verb_node.parent
1156
+
1157
+ if parent.nil?
1158
+ # verb_node seems to be the root, strangely enough
1159
+ return []
1160
+ end
1161
+ return parent.children_by_edgelabels(['SBP'])
1162
+
1163
+ else
1164
+ # not passive: then the subject of the verb
1165
+ # is actually its subject in this sentence
1166
+
1167
+ # needed???
1168
+ # return if there is no surface subject
1169
+ # e.g. parser errors like ADJD => VVPP
1170
+
1171
+ return Tiger.surface_subject(verb_node)
1172
+ end
1173
+
1174
+ end
1175
+
1176
+
1177
+ ###
1178
+ def Tiger.direct_object(verb_node)
1179
+
1180
+ unless Tiger.category(verb_node) == "verb"
1181
+ return nil
1182
+ end
1183
+
1184
+ if Tiger.voice(verb_node) == "passive"
1185
+ # passive: then what we would like to return as direct object
1186
+ # is the subject of this verb
1187
+ return Tiger.surface_subject(verb_node)
1188
+ else
1189
+
1190
+ # not passive: then the direct object
1191
+ # is an OA sibling of the node verb_node
1192
+ parent = verb_node.parent
1193
+
1194
+ if parent.nil?
1195
+ # verb_node seems to be the root, strangely enough
1196
+ return []
1197
+ end
1198
+
1199
+ return parent.children_by_edgelabels(['OA'])
1200
+ end
1201
+ end
1202
+
1203
+ ###
1204
+ def Tiger.dative_object(verb_node)
1205
+
1206
+ unless Tiger.category(verb_node) == "verb"
1207
+ return nil
1208
+ end
1209
+
1210
+ parent = verb_node.parent
1211
+
1212
+ if parent.nil?
1213
+ return []
1214
+ end
1215
+
1216
+ return parent.children_by_edgelabels(['DA'])
1217
+ end
1218
+
1219
+ ###
1220
+ def Tiger.prep_object(verb_node, preposition)
1221
+
1222
+ unless Tiger.category(verb_node) == "verb"
1223
+ return nil
1224
+ end
1225
+
1226
+ parent = verb_node.parent()
1227
+ if parent.nil?
1228
+ # verb_node seems to be the root, strangely enough
1229
+ return []
1230
+ end
1231
+
1232
+ # find all PPs that are siblings of verb_node
1233
+ pps = []
1234
+ parent.each_child { |child|
1235
+ if child.category == 'PP'
1236
+ pps << child
1237
+ end
1238
+ }
1239
+
1240
+ # now filter for those with the right preposition
1241
+ if preposition.nil?
1242
+ return pps
1243
+ else
1244
+ return pps.find_all { |node|
1245
+ # prepositions are AC children of PP nodes
1246
+ node.children_by_edgelabels(['AC']).map { |prep_node|
1247
+ # prepositions are terminal words
1248
+ prep_node.word()
1249
+ # we are interested in those that match the parameter 'preposition'
1250
+ }.include? preposition
1251
+ }
1252
+ end
1253
+ end
1254
+
1255
+ ###
1256
+ def Tiger.surface_subject(verb_node)
1257
+
1258
+ max_proj = Tiger.max_projection(verb_node)
1259
+ # test each level in the computation of the maximal projection,
1260
+ # from the lowest (the parent of verb_node)
1261
+ # to the highest
1262
+ max_proj['max_proj_at_level'].each { |node|
1263
+ # test if this node has a SB child
1264
+ # if so, use it
1265
+ sb_children = node.children_by_edgelabels(['SB'])
1266
+
1267
+ unless sb_children.empty?
1268
+ return sb_children
1269
+ end
1270
+ }
1271
+ return []
1272
+ end
1273
+
1274
+
1275
+ ##################
1276
+ # gfs_verb
1277
+ #
1278
+ # given a node (a SynNode object) that is a terminal node
1279
+ # representing a verb, determine
1280
+ # all grammatical functions of this verb
1281
+ # along with their head words
1282
+ #
1283
+ # verb_node: SynNode object, terminal node representing a verb
1284
+ #
1285
+ # returns: a list of pairs [relation(string), node(SynNode)]
1286
+ # 'relation' is 'SB', 'OA', 'DA', 'MO', 'OC'
1287
+ # 'node' is the constituent that stands in this relation to verb_node
1288
+
1289
+ def Tiger.gfs_verb(verb_node)
1290
+
1291
+ unless Tiger.category(verb_node) == "verb"
1292
+ return []
1293
+ end
1294
+
1295
+ # construct a list of pairs [relation, node]
1296
+ nodes = Array.new
1297
+ # subjects:
1298
+ n_arr = Tiger.subject(verb_node)
1299
+
1300
+ if n_arr.length() > 0
1301
+ nodes << ["SB", n_arr.first]
1302
+ end
1303
+
1304
+ #extrem frustrierend , sondern auch schädlich
1305
+ #sagte
1306
+ #Däubler-Gmelin
1307
+ #&apos;&apos;
1308
+ #die gesamte SPD
1309
+ #nicht nur für Euch extrem frustrierend , sondern auch schädlich für die gesamte SPD &apos;&apos; gewesen
1310
+ #die Streitigkeiten zwischen Führungsmitgliedern
1311
+ #gewesen
1312
+ #die Streitigkeiten zwischen Führungsmitgliedern
1313
+ #frustrierend
1314
+
1315
+ # direct object:
1316
+ n_arr = Tiger.direct_object(verb_node)
1317
+ if n_arr.length() > 0
1318
+ nodes << ["OA", n_arr.first]
1319
+ end
1320
+
1321
+ # dative object:
1322
+ n_arr = Tiger.dative_object(verb_node)
1323
+ if n_arr.length() > 0
1324
+ nodes << ["DA", n_arr.first]
1325
+ end
1326
+
1327
+
1328
+ # pp objects and adjuncts:
1329
+ nodes.concat Tiger.prep_object(verb_node, nil).map { |n|
1330
+ unless (edgelabel = n.parent_label)
1331
+ edgelabel = "MO"
1332
+ end
1333
+ [edgelabel + "-" + Tiger.preposition(n).to_s, n]
1334
+ }
1335
+
1336
+ # sentence complement:
1337
+ # verb node's parent has an OC child
1338
+ parent = verb_node.parent
1339
+ unless parent.nil?
1340
+ parent.children_by_edgelabels(["OC"]).each { |n|
1341
+ nodes << ["OC", n]
1342
+ }
1343
+ end
1344
+
1345
+ return nodes
1346
+ end
1347
+
1348
+ ###
1349
+ # gfs_noun
1350
+ #
1351
+ # determine relation names and relation-bearing syntax nodes
1352
+ # for noun targets
1353
+ #
1354
+ # returns: a list of pairs
1355
+ # [rel(string), node(SynNode)]
1356
+ def Tiger.gfs_noun(noun_node, # SynNode object: terminal, noun
1357
+ sent_obj) # SalsaTigerSentence object: sentence in which this noun occurs
1358
+
1359
+
1360
+ # construct a list of pairs [relation, node]
1361
+ retv = Array.new
1362
+
1363
+ ##
1364
+ # determine noun-noun relations:
1365
+ # (1) edge label leading to this node is NK, and
1366
+ # parent of this node has child with edge label not NK
1367
+ # then: that child
1368
+ # (2) or parent of this node is NP/PP, the grandparent is NP,
1369
+ # and parent and grandparent are not linked by an NK edge
1370
+ # then: the grandparent
1371
+ # (3) or grandparent of this node is CNP
1372
+ # then: that CNP's other children
1373
+ parent = noun_node.parent()
1374
+ np_pp_labels_without_cnp = ["NP", "PP", "PN"]
1375
+ np_pp_labels = ["NP", "PP", "PN", "CNP"]
1376
+
1377
+ if parent and
1378
+ noun_node.parent_label() == "NK"
1379
+ # (1)
1380
+ parent.children().select { |n|
1381
+ n.parent_label() != "NK"
1382
+ }.each { |n|
1383
+ unless n == noun_node
1384
+
1385
+ retv << [n.parent_label(), n]
1386
+ end
1387
+ }
1388
+ end
1389
+
1390
+ # (2)
1391
+ if parent
1392
+ grandparent = parent.parent()
1393
+ end
1394
+
1395
+ if parent and grandparent and
1396
+ np_pp_labels.include? parent.category() and
1397
+ np_pp_labels_without_cnp.include? grandparent.category() and
1398
+ parent.parent_label() != "NK"
1399
+
1400
+ retv << [parent.parent_label(), grandparent]
1401
+ end
1402
+
1403
+ # (3)
1404
+ if parent and grandparent and
1405
+ grandparent.category() == "CNP"
1406
+
1407
+ grandparent.each_child() { |n|
1408
+ if np_pp_labels.include? n.category() and
1409
+ n != parent
1410
+
1411
+ retv << ["CJ", n]
1412
+ end
1413
+ }
1414
+ end
1415
+
1416
+ return retv
1417
+ end
1418
+
1419
+ ###
1420
+ # gfs_adj
1421
+ #
1422
+ # determine relation names and relation-bearing syntax nodes
1423
+ # for adjective targets
1424
+ #
1425
+ # returns: a list of pairs
1426
+ # [rel(string), node(SynNode)]
1427
+ #
1428
+ # although in this case it's just one pair (if we can find it),
1429
+ # describing the head noun
1430
+ def Tiger.gfs_adj(adj_node) # SynNode object: terminal, adjective
1431
+
1432
+ parent = adj_node.parent()
1433
+
1434
+ if parent.nil?
1435
+ return []
1436
+ end
1437
+
1438
+ if ["NP", "CNP", "PP", "CPP", "PN"].include? parent.category()
1439
+ return [["HD", parent]]
1440
+ else
1441
+ return []
1442
+ end
1443
+ end
1444
+
1445
+
1446
+ end
1447
+
1448
+ #( (TOP (S (KON_JU Und) (ADV_MO schon) (VVFIN_HD weiÃ<9f>) (NP_SB (ART_NK der) (ADJA_NK Berliner) (NN_NK Verkehrsverein)) ($, ,) (S_OC (PWS_SB was) (ADV_MO da) (PIS_MNR alles) (PP_MO (APPR_AC auf) (ART_NK die) (NN_NK Stadt) ($, ,) (PPOSAT_NK seine) (AP_NK (PP_MO (APPR_AC durch) (ART_NK eine) (NN_NK Rekonstruktion)) (ADV_MO so) (ADJA_HD prachtvoll)) (ADJA_NK markierte) (NN_NK Mitte) ($, ,) (AP_NK (PIS_HD alles))) (VVFIN_HD zukommt))) ($. .)))