shalmaneser 0.0.1.alpha → 1.2.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +2 -2
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +49 -0
  6. data/bin/fred +18 -0
  7. data/bin/frprep +34 -0
  8. data/bin/rosy +17 -0
  9. data/lib/common/AbstractSynInterface.rb +35 -33
  10. data/lib/common/Mallet.rb +236 -0
  11. data/lib/common/Maxent.rb +26 -12
  12. data/lib/common/Parser.rb +5 -5
  13. data/lib/common/SynInterfaces.rb +13 -6
  14. data/lib/common/TabFormat.rb +7 -6
  15. data/lib/common/Tiger.rb +4 -4
  16. data/lib/common/Timbl.rb +144 -0
  17. data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
  18. data/lib/common/headz.rb +1 -1
  19. data/lib/common/ruby_class_extensions.rb +3 -3
  20. data/lib/fred/FredBOWContext.rb +14 -2
  21. data/lib/fred/FredDetermineTargets.rb +4 -9
  22. data/lib/fred/FredEval.rb +1 -1
  23. data/lib/fred/FredFeatureExtractors.rb +4 -3
  24. data/lib/fred/FredFeaturize.rb +1 -1
  25. data/lib/frprep/CollinsInterface.rb +6 -6
  26. data/lib/frprep/MiniparInterface.rb +5 -5
  27. data/lib/frprep/SleepyInterface.rb +7 -7
  28. data/lib/frprep/TntInterface.rb +1 -1
  29. data/lib/frprep/TreetaggerInterface.rb +29 -5
  30. data/lib/frprep/do_parses.rb +1 -0
  31. data/lib/frprep/frprep.rb +36 -32
  32. data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
  33. data/lib/frprep/interfaces/stanford_interface.rb +353 -0
  34. data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
  35. data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
  36. data/lib/frprep/opt_parser.rb +2 -2
  37. data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
  38. data/lib/rosy/RosyIterator.rb +11 -10
  39. data/lib/rosy/rosy.rb +1 -0
  40. data/lib/shalmaneser/version.rb +1 -1
  41. data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
  42. data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
  43. data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
  44. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
  45. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
  46. data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
  47. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
  48. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
  49. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
  50. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
  51. data/test/functional/test_frprep.rb +3 -3
  52. data/test/functional/test_rosy.rb +20 -0
  53. metadata +215 -224
  54. data/CHANGELOG.rdoc +0 -0
  55. data/LICENSE.rdoc +0 -0
  56. data/README.rdoc +0 -0
  57. data/lib/common/CollinsInterface.rb +0 -1165
  58. data/lib/common/MiniparInterface.rb +0 -1388
  59. data/lib/common/SleepyInterface.rb +0 -384
  60. data/lib/common/TntInterface.rb +0 -44
  61. data/lib/common/TreetaggerInterface.rb +0 -303
  62. data/lib/frprep/AbstractSynInterface.rb +0 -1227
  63. data/lib/frprep/BerkeleyInterface.rb +0 -375
  64. data/lib/frprep/ConfigData.rb +0 -694
  65. data/lib/frprep/FixSynSemMapping.rb +0 -196
  66. data/lib/frprep/FrPrepConfigData.rb +0 -66
  67. data/lib/frprep/FrprepHelper.rb +0 -1324
  68. data/lib/frprep/ISO-8859-1.rb +0 -24
  69. data/lib/frprep/Parser.rb +0 -213
  70. data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
  71. data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
  72. data/lib/frprep/SynInterfaces.rb +0 -275
  73. data/lib/frprep/TabFormat.rb +0 -720
  74. data/lib/frprep/Tiger.rb +0 -1448
  75. data/lib/frprep/Tree.rb +0 -61
  76. data/lib/frprep/headz.rb +0 -338
data/lib/frprep/Tiger.rb DELETED
@@ -1,1448 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require "frprep/headz"
3
- require "frprep/SalsaTigerRegXML"
4
- require "frprep/ruby_class_extensions"
5
- class Array
6
- include EnumerableDistribute
7
- end
8
-
9
-
10
- require "frprep/AbstractSynInterface"
11
-
12
- #############################################
13
- #
14
- # max. projection:
15
- #
16
- # consists of methods that are 'building blocks' for computing
17
- # the maximum projection of a verb in TIGER syntax
18
- #
19
- # basically, computing the max. projection is about moving an
20
- # upper node upward. At the beginning it is the parent of the
21
- # terminal node for the verb, and each building block moves it up
22
- # to its parent, if the building block matches.
23
- #
24
- # Apart from the upper node, a lower node is also watched. At the
25
- # beginning it is the terminal node for the verb, later it is usually
26
- # the 'HD' child of the upper node. This lower node is needed for
27
- # testing whether a building block matches.
28
- #
29
- # For handling conjunction, the upper node is split into two, a 'lower upper'
30
- # and an 'upper upper' node. The 'lower upper' is used when some relation
31
- # between the upper node and its descendants is tested, and the 'upper upper'
32
- # is used when some relation between the upper node and its predecessors
33
- # is tested. Usually the 'lower upper' and the 'upper upper' are the same,
34
- # but conjunction building blocks move the 'upper upper' up to its parent
35
- # while leaving the 'lower upper' unchanged.
36
- #
37
- # So all building block methods take three arguments: lower, upper_l and
38
- # upper_u. All three are nodes given as SalsaTigerSentence objects
39
- #
40
- # All building block methods give as their return value a list of three
41
- # nodes: [new_lower, new_upper_l, new_upper_u], if the building block
42
- # matched. If it does not match, nil is returned.
43
- #
44
- # The method explain describes all building blocks,
45
- # the conditions for the building blocks matching, and shows
46
- # where the lower and the upper nodes will be after a building block matched.
47
- #
48
- # building blocks:
49
- # pp_pp
50
- # pp_fin
51
- # inf_fin
52
- # vzinf_fin
53
- # cvzinf_fin
54
- # modal
55
- # othermodal
56
- # conj
57
- #
58
- # To compute the maximal projection of a verb,
59
- # we start at the parent of the terminal node for the verb
60
- # "and move upwards.
61
- # "The move upwards is broken up in little building blocks."
62
- # "Each of them licenses one step upward in the syntactic tree."
63
- #
64
- # "Each building block needs information about two nodes:"
65
- # "The current upper node (at the beginning, that is"
66
- # "the parent of the terminal node for the verb) and"
67
- # "one specific child of that current upper node"
68
- # "(at the beginning, that is the terminal node for the verb)."
69
- #
70
- # "Each building block provides information of"
71
- # "- where the new upper node is, depending on the current"
72
- # " upper node, and"
73
- # "- where the new specific child is."
74
- #
75
- # "For handling conjunction, we need to complicate this picture somewhat:"
76
- # "We split the current upper node into an 'upper upper'"
77
- # "and a 'lower upper' node."
78
- # "If we want to check the edge from the current upper node upwards,"
79
- # "we use the 'upper upper'."
80
- # "If we want to check an edge from the current upper node downwards,"
81
- # "we use the 'lower upper'."
82
- # "Almost always, the 'lower upper' and the 'upper upper' will be the same."
83
- # "Except for the building block for conjunction:"
84
- # "It moves the 'upper upper' one level up,"
85
- # "but leaves the 'lower upper' the same."
86
- #
87
- # "There are five levels of building blocks."
88
- #
89
- # "* 1st level: auxiliary verb constructions involving a participle"
90
- # " The following building blocks are tried, in this order:"
91
- # " CONJ, PP-PP, CONJ, PP_FIN"
92
- #
93
- # "* 2nd level: infinitive constructions"
94
- # " The following building blocks are tried, in this order:"
95
- # " CONJ, INF-FIN, VZINF-FIN, CVZINF-FIN"
96
- #
97
- # "* 3rd level: modals"
98
- # " The following building blocks are tried, in this order:"
99
- # " CONJ, MODAL, OTHERMODAL"
100
- #
101
- # "* 4th level = 1st level"
102
- #
103
- # "* 5th level = 2nd level"
104
- #
105
- #
106
- # "***These are the building blocks:"
107
- #
108
- # "PP-PP"
109
- # " VP (new uppermost node)"
110
- # " / | \\OC"
111
- # " HD/ | VP|CVP (current uppermost node)"
112
- # " / | |"
113
- # " o FE |HD|CJ"
114
- # "POS: V[AMV]PP |"
115
- # " new target current target"
116
- # " POS: V[AMV]PP"
117
- #
118
- # "PP-FIN"
119
- # " S/VP (new uppermost node)"
120
- # " / | \\OC or PD"
121
- # " HD/ | VP|CVP|CO (current uppermost node)"
122
- # " / | |"
123
- # " o FE |HD|CJ"
124
- # "POS: V[AMV]FIN |"
125
- # " V[AMV]INF current target"
126
- # "or CAT: VZ POS: V[AMV]PP"
127
- #
128
- # "INF_FIN"
129
- # " S/VP (new uppermost node)"
130
- # " / | \\OC"
131
- # " HD/ | VP|CVP (current uppermost node)"
132
- # " / | |"
133
- # " o FE |HD|CJ"
134
- # "POS: VAFIN |"
135
- # " VAINF current target"
136
- # " VVINF POS: V[AMV]INF"
137
- # " new target"
138
- #
139
- # "VZINF-FIN"
140
- # " S/VP (new uppermost node)"
141
- # " / | \\OC"
142
- # " HD/ | VP (current uppermost node)"
143
- # " / | |"
144
- # " o FE |HD"
145
- # "POS: V[AV]FIN |"
146
- # " new target current target"
147
- # " CAT: VZ"
148
- #
149
- # "CVZINF-FIN"
150
- # " S/VP (new uppermost node)"
151
- # " | \\OC"
152
- # " | CVP (current uppermost node)"
153
- # " | |"
154
- # " FE |CJ"
155
- # " |"
156
- # " current and new target"
157
- # " CAT: VZ"
158
- #
159
- # "MODAL"
160
- # " S/VP (new uppermost node)"
161
- # " / | \\OC"
162
- # " HD/ | VP|CVP (current uppermost node)"
163
- # " / | |"
164
- # " o FE |HD|CJ"
165
- # " POS: |"
166
- # " VM(PP|FIN|INF) current target"
167
- # " new target POS: V[AMV]INF"
168
- #
169
- # "OTHERMODAL"
170
- # " S/VP (new uppermost node)"
171
- # " / | \\OC"
172
- # " HD/ | VP (current uppermost node)"
173
- # " / | | \\"
174
- # " o FE |HD \\OC"
175
- # "POS: VMFIN | \\"
176
- # " VMINF POS: current target"
177
- # " VMPP V[AMV]INF POS: V[AMV]PP"
178
- # " new target V[AMV]FIN"
179
- #
180
- # "CONJ"
181
- # " CVP (new upper uppermost node)"
182
- # " | \\CJ"
183
- # " | VP (current and new uppermost node)"
184
- # " | |"
185
- # " FE |"
186
- # " |"
187
- # " current and new target"
188
- ###
189
- module TigerMaxProjection
190
-
191
- def max_projection(node)
192
- parent = node.parent
193
- # node has no parent? recover somehow
194
- if parent.nil?
195
- return {'max_proj' => node,
196
- 'max_proj_at_level' => [node]}
197
- end
198
-
199
- maxproj_at_level = Array.new
200
- maxproj_at_level << parent
201
-
202
- lower = node
203
- upper_u = upper_l = parent
204
-
205
- lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
206
- maxproj_at_level << upper_u
207
-
208
- lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
209
- maxproj_at_level << upper_u
210
-
211
- lower, upper_l, upper_u = project_modal(lower, upper_l, upper_u)
212
- maxproj_at_level << upper_u
213
-
214
- lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
215
- maxproj_at_level << upper_u
216
-
217
- lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
218
- maxproj_at_level << upper_u
219
-
220
- return {'max_proj' => upper_u,
221
- 'max_proj_at_level' => maxproj_at_level}
222
- end
223
-
224
-
225
- ###
226
- def test_localtrees(path)
227
-
228
-
229
- #HIER WEITER: was genau passiert hier?
230
-
231
-
232
- retv = Hash.new
233
-
234
- # test each step
235
- path.each { |step|
236
- retv = test_step(step, retv)
237
-
238
- if retv.nil?
239
- return nil
240
- end
241
- }
242
-
243
- # return result of last step
244
- return retv
245
- end
246
-
247
- ######
248
- private
249
-
250
- ###
251
- def test_step(path, previous)
252
- if path['from'].nil? or path['to'].nil? or path['edge'].nil?
253
- $stderr.puts 'TigerAux error: missing path hash entry'
254
- exit 1
255
- end
256
-
257
- from_node, *from_descr = path['from']
258
- to_node, *to_descr = path['to']
259
-
260
- # using the special flags tp_prev_to and tp_prev_from,
261
- # a node can also be set to be the value in the
262
- # 'previous' hash
263
- from_node = cf_previous(from_node, previous)
264
- to_node = cf_previous(to_node, previous)
265
-
266
- # test if 'from' node description matches
267
- unless test_node(from_node, from_descr)
268
- return nil
269
- end
270
-
271
- # try path
272
- direction, edgelabel = path['edge']
273
- case direction
274
- when 'up'
275
- label = from_node.parent_label()
276
- if label =~ edgelabel
277
- end_nodes = [from_node.parent()]
278
- else
279
- end_nodes = []
280
- end
281
- when 'dn'
282
- end_nodes = []
283
- from_node.each_child { |child|
284
- if child.parent_label() =~ edgelabel
285
- end_nodes << child
286
- end
287
- }
288
- else
289
- $stderr.puts 'TigerAux error: unknown direction'
290
- exit 1
291
- end
292
-
293
- # check all prospective end nodes
294
- remaining_end_nodes = end_nodes.select { |prosp_to_node|
295
- if to_node.nil? or to_node == prosp_to_node
296
- test_node(prosp_to_node, to_descr)
297
- else
298
- false
299
- end
300
- }
301
-
302
- if remaining_end_nodes.empty?
303
- return nil
304
- else
305
- return {'from' => from_node,
306
- 'to' => remaining_end_nodes}
307
- end
308
- end
309
-
310
- ###
311
- def test_node(node, descr)
312
-
313
- cat_or_pos, pattern = descr
314
- if node.nil?
315
- $stderr.puts 'TigerAux error: test_node nil'
316
- exit 1
317
- end
318
-
319
- case cat_or_pos
320
- when 'pos'
321
- if node.part_of_speech =~ pattern
322
- return true
323
- else
324
- return false
325
- end
326
- when 'cat'
327
- if node.category =~ pattern
328
- return true
329
- else
330
- return false
331
- end
332
- when nil
333
- return true
334
- else
335
- $stderr.puts 'TigerAux error: neither cat nor pos'
336
- exit 1
337
- end
338
- end
339
-
340
- ###
341
- def cf_previous(node, previous)
342
- case node
343
- when 'tp_prev_to'
344
- return previous['to'].first
345
- when 'tp_prev_from'
346
- return previous['from']
347
- else
348
- return node
349
- end
350
- end
351
-
352
- ###
353
- def project_participle(lower, upper_l, upper_u)
354
- return project_this(lower, upper_l, upper_u,
355
- [self.method('conj'),
356
- self.method('pp_pp'),
357
- self.method('conj'),
358
- self.method('pp_fin')])
359
- end
360
-
361
- ###
362
- def project_infinitive(lower, upper_l, upper_u)
363
- return project_this(lower, upper_l, upper_u,
364
- [self.method('conj'),
365
- self.method('inf_fin'),
366
- self.method('vzinf_fin'),
367
- self.method('cvzinf_fin')
368
- ])
369
- end
370
-
371
- ###
372
- def project_modal(lower, upper_l, upper_u)
373
- return project_this(lower, upper_l, upper_u,
374
- [self.method('conj'),
375
- self.method('modal'),
376
- self.method('othermodal')
377
- ])
378
- end
379
-
380
- ###
381
- def project_participle_(lower, upper_l, upper_u)
382
- return project_this(lower, upper_l, upper_u,
383
- [self.method('conj'),
384
- self.method('pp_pp'),
385
- self.method('conj'),
386
- self.method('pp_fin')])
387
- end
388
-
389
- ###
390
- def project_this(lower, upper_l, upper_u, method_list)
391
- method_list.each { |method|
392
- retv = method.call(lower, upper_l, upper_u)
393
- unless retv.nil?
394
- lower, upper_l, upper_u = retv
395
- end
396
- }
397
- return [lower, upper_l, upper_u]
398
- end
399
-
400
- ###
401
- def pp_pp(lower, upper_l, upper_u)
402
-
403
- retv =
404
- test_localtrees([
405
- {'from' => [lower, 'pos', /^V[AMV]PP$/],
406
- 'to' => [upper_l, 'cat', /^C?VP$/],
407
- 'edge' => ['up', /^(HD)|(CJ)$/]},
408
- {'from' => [upper_u, 'cat', /^C?VP$/],
409
- 'to' => [nil, 'cat', /^VP$/],
410
- 'edge' => ['up', /^OC$/]},
411
- {'from' => ['tp_prev_to', 'cat', /^VP$/],
412
- 'to' => [nil, 'pos', /^V[AMV]PP$/],
413
- 'edge' => ['dn', /^HD$/]}
414
- ])
415
-
416
- if retv.nil?
417
- return nil
418
- else
419
- return [retv['to'].first, retv['from'], retv['from']]
420
- end
421
- end
422
-
423
- ###
424
- def pp_fin(lower, upper_l, upper_u)
425
-
426
- retv =
427
- test_localtrees([
428
- {'from' => [lower, 'pos', /^V[AMV]PP$/],
429
- 'to' => [upper_l, 'cat', /^C?VP$/],
430
- 'edge' => ['up', /^(HD)|(CJ)$/]},
431
- {'from' => [upper_u,'cat', /^C?VP$/],
432
- 'to' => [nil, 'cat', /^(VP)|S$/],
433
- 'edge' => ['up', /^(OC)|(PD)$/]}
434
- ])
435
-
436
- if retv.nil?
437
- return nil
438
- end
439
-
440
- new_upper = retv['to'].first
441
-
442
- # test two alternatives:
443
- # head child of new_upper is either a VXFIN or VXINF terminal...
444
- retv =
445
- test_localtrees([
446
- {'from' => [new_upper, 'cat', /^(VP)|S$/],
447
- 'to' => [nil, 'pos', /^V[AMV]((FIN)|(INF))$/],
448
- 'edge' => ['dn', /^HD$/]}
449
- ])
450
-
451
- # ... or a VZ nonterminal
452
- if retv.nil?
453
- retv =
454
- test_localtrees([
455
- {'from' => [new_upper, 'cat', /^(VP)|S$/],
456
- 'to' => [nil, 'cat', /^VZ$/],
457
- 'edge' => ['dn', /^HD$/]}
458
- ])
459
- end
460
-
461
- if retv.nil?
462
- return nil
463
- else
464
- return [retv['to'].first, new_upper, new_upper]
465
- end
466
- end
467
-
468
-
469
- ###
470
- def inf_fin(lower, upper_l, upper_u)
471
-
472
- retv =
473
- test_localtrees([
474
- {'from' => [lower, 'pos', /^V[AMV]INF$/],
475
- 'to' => [upper_l, 'cat', /^C?VP$/],
476
- 'edge' => ['up', /^(HD)|(CJ)$/]},
477
- {'from' => [upper_u,'cat', /^C?VP$/],
478
- 'to' => [nil, 'cat', /^(VP)|S$/],
479
- 'edge' => ['up', /^OC$/]},
480
- {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
481
- 'to' => [nil, 'pos', /^(VAFIN)|(VAINF)|(VVINF)$/],
482
- 'edge' => ['dn', /^HD$/]}
483
- ])
484
- if retv.nil?
485
- return nil
486
- else
487
- return [retv['to'].first, retv['from'], retv['from']]
488
- end
489
- end
490
-
491
-
492
- ###
493
- def vzinf_fin(lower, upper_l, upper_u)
494
-
495
- retv =
496
- test_localtrees([
497
- {'from' => [lower, 'cat', /^VZ$/],
498
- 'to' => [upper_l, 'cat', /^VP$/],
499
- 'edge' => ['up', /^HD$/]},
500
- {'from' => [upper_u,'cat', /^VP$/],
501
- 'to' => [nil, 'cat', /^(VP)|S$/],
502
- 'edge' => ['up', /^OC$/]},
503
- {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
504
- 'to' => [nil, 'pos', /^V[AV]FIN$/],
505
- 'edge' => ['dn', /^HD$/]}
506
- ])
507
-
508
- if retv.nil?
509
- return nil
510
- else
511
- return [retv['to'].first, retv['from'], retv['from']]
512
- end
513
- end
514
-
515
- ###
516
- def cvzinf_fin(lower, upper_l, upper_u)
517
-
518
- retv =
519
- test_localtrees([
520
- {'from' => [lower, 'cat', /^VZ$/],
521
- 'to' => [upper_l, 'cat', /^CVP$/],
522
- 'edge' => ['up', /^CJ$/]},
523
- {'from' => [upper_u,'cat', /^CVP$/],
524
- 'to' => [nil, 'cat', /^(VP)|S$/],
525
- 'edge' => ['up', /^OC$/]}
526
- ])
527
-
528
- if retv.nil?
529
- return nil
530
- else
531
- return [lower, upper_l, retv['to'].first]
532
- end
533
- end
534
-
535
- ###
536
- def modal(lower, upper_l, upper_u)
537
-
538
- retv =
539
- test_localtrees([
540
- {'from' => [lower, 'pos', /^V[AMV]INF$/],
541
- 'to' => [upper_l, 'cat', /^C?VP$/],
542
- 'edge' => ['up', /^(HD)|(CJ)$/]},
543
- {'from' => [upper_u,'cat', /^C?VP$/],
544
- 'to' => [nil, 'cat', /^(VP)|S$/],
545
- 'edge' => ['up', /^OC$/]},
546
- {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
547
- 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
548
- 'edge' => ['dn', /^HD$/]}
549
- ])
550
-
551
- if retv.nil?
552
- return nil
553
- else
554
- return [retv['to'].first, retv['from'], retv['from']]
555
- end
556
- end
557
-
558
- ###
559
- def othermodal(lower, upper_l, upper_u)
560
-
561
- retv =
562
- test_localtrees([
563
- {'from' => [lower, 'pos', /^V[AMV]PP$/],
564
- 'to' => [upper_l, 'cat', /^VP$/],
565
- 'edge' => ['up', /^OC$/]},
566
- {'from' => [upper_l, 'cat', /^VP$/],
567
- 'to' => [nil, 'pos', /^V[AMV]((INF)|(FIN))$/],
568
- 'edge' => ['dn', /^HD$/]},
569
- {'from' => [upper_u,'cat', /^VP$/],
570
- 'to' => [nil, 'cat', /^(VP)|S$/],
571
- 'edge' => ['up', /^OC$/]},
572
- {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
573
- 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
574
- 'edge' => ['dn', /^HD$/]}
575
- ])
576
-
577
- if retv.nil?
578
- return nil
579
- else
580
- return [retv['to'].first, retv['from'], retv['from']]
581
- end
582
- end
583
-
584
- ###
585
- def conj(lower, upper_l, upper_u)
586
-
587
- retv = test_localtrees([
588
- {'from' => [lower, nil, //],
589
- 'to' => [upper_l, 'cat', /^VP$/],
590
- 'edge' => ['up', //]},
591
- {'from' => [upper_u,'cat', /^VP$/],
592
- 'to' => [nil, 'cat', /^CVP$/],
593
- 'edge' => ['up', /^CJ$/]}
594
- ])
595
-
596
- if retv.nil?
597
- return nil
598
- else
599
- return [lower, upper_l, retv['to'].first]
600
- end
601
- end
602
- end
603
-
604
- ###########################################################3
605
- class Tiger < SynInterpreter
606
-
607
- extend TigerMaxProjection
608
-
609
- @@heads_obj = Headz.new()
610
-
611
- ###
612
- # generalize over POS tags.
613
- #
614
- # returns one of:
615
- #
616
- # adj: adjective (phrase)
617
- # adv: adverb (phrase)
618
- # card: numbers, quantity phrases
619
- # con: conjunction
620
- # det: determiner, including possessive/demonstrative pronouns etc.
621
- # for: foreign material
622
- # noun: noun (phrase), including personal pronouns, proper names, expletives
623
- # part: particles, truncated words (German compound parts)
624
- # prep: preposition (phrase)
625
- # pun: punctuation, brackets, etc.
626
- # sent: sentence
627
- # top: top node of a sentence
628
- # verb: verb (phrase)
629
- # nil: something went wrong
630
- #
631
- # default: return phrase type as is
632
- def Tiger.category(node) # SynNode
633
- pt = Tiger.pt(node)
634
- if pt.nil?
635
- # phrase type could not be determined
636
- return nil
637
- end
638
-
639
- case pt.to_s.strip()
640
- when /^C?ADJ/, /^PIS/, /^C?AP[^A-Za-z]?/ then return "adj"
641
- when /^C?ADV/, /^C?AVP/, /^PROAV/ then return "adv"
642
- when /^CARD/ then return "card"
643
- when /^C?KO/ then return "con"
644
- when /^PPOS/, /^ART/ ,/^PIAT/, /^PD/, /^PRELAT/, /^PWAT/ then return "det"
645
- when /^FM/ , /^XY/ then return "for"
646
- when /^C?N/, /^PPER/, /^PN/, /^PRELS/, /^PWS/ then return "noun"
647
- when /^ITJ/ then return "sent"
648
- when /^PRF/, /^PTK/, /^TRUNC/ then return "part"
649
- when /^C?PP/ , /^APPR/, /^PWAV/ then return "prep"
650
- when /^\$/ then return "pun"
651
- when /^C?S$/, /^CO/, /^DL/, /^CH/, /^ISU/ then return "sent" # I don't like to put CO/DL in here, but where should they go?
652
- when /^TOP/ then return "top"
653
- when /^C?V/ then return "verb"
654
- else
655
- # $stderr.puts "WARNING Unknown category/POS "+c.to_s+" (German data)"
656
- return nil
657
- end
658
- end
659
-
660
- ###
661
- # is relative pronoun?
662
- #
663
- def Tiger.relative_pronoun?(node) # SynNode
664
- pt = Tiger.pt(node)
665
- if pt.nil?
666
- # phrase type could not be determined
667
- return nil
668
- end
669
-
670
- case pt.to_s.strip()
671
- when /^PREL/, /^PWAV/, /^PWAT/
672
- return true
673
- else
674
- return false
675
- end
676
- end
677
-
678
-
679
- ###
680
- # lemma_backoff:
681
- #
682
- # if we have lemma information, return that,
683
- # and failing that, return the word
684
- #
685
- # returns: string or nil
686
- def Tiger.lemma_backoff(node)
687
- lemma = super(node)
688
- # lemmatizer has returned more than one possible lemma form:
689
- # just accept the first
690
- if lemma =~ /^([^|]+)|/
691
- return $1
692
- else
693
- return lemma
694
- end
695
- end
696
-
697
- ###
698
- # verb_with_particle:
699
- #
700
- # given a node and a nodelist,
701
- # if the node represents a verb:
702
- # see if the verb has a particle among the nodes in nodelist
703
- # if so, return it
704
- def Tiger.particle_of_verb(node, # SynNode
705
- node_list) # array: SynNode
706
-
707
- # must be verb
708
- unless Tiger.category(node) == "verb"
709
- return nil
710
- end
711
-
712
- # must have parent
713
- unless node.parent
714
- return nil
715
- end
716
-
717
- particles = node.parent.children.select { |sister|
718
- # look for sisters of the verb node that are in node_list
719
- node_list.include? sister
720
- }.select { |sister|
721
- # see if its incoming edge is labeled "SVP"
722
- sister.parent_label() == "SVP"
723
- }.reject { |particle|
724
- # Sleepy parser problem: it often tags ")" as a separate verb particle
725
- particle.get_attribute("lemma") == ")" or
726
- particle.word == ")"
727
- }
728
-
729
- if particles.length == 0
730
- return nil
731
- else
732
- return particles.first
733
- end
734
- end
735
-
736
-
737
- ###
738
- # auxiliary?
739
- #
740
- # returns true if the given node is an auxiliary
741
- # default: no recognition of auxiliaries
742
- def Tiger.auxiliary?(node)
743
- if node.part_of_speech() and
744
- node.part_of_speech =~ /^VA/
745
- return true
746
- else
747
- return false
748
- end
749
- end
750
-
751
- ###
752
- # modal?
753
- #
754
- # returns true if the given node is a modal verb
755
- #
756
- # returns: boolean
757
- def Tiger.modal?(node)
758
- if node.part_of_speech() and
759
- node.part_of_speech =~ /^VM/
760
- return true
761
- else
762
- return false
763
- end
764
- end
765
-
766
- ###
767
- # head_terminal
768
- #
769
- # given a constituent, return the terminal node
770
- # that describes its headword
771
- # default: a heuristic that assumes the existence of a 'head'
772
- # attribute on nodes:
773
- # find the first node in my yield corresponding to my head attribute.
774
- # add-on: if this doesn't work, ask the headz package for the head
775
- #
776
- # returns: a SynNode object if successful, else nil
777
- def Tiger.head_terminal(node)
778
- if (head = super(node))
779
- return head
780
- end
781
-
782
- head_hash = @@heads_obj.get_sem_head(node)
783
- if head_hash.nil?
784
- return nil
785
- elsif head_hash["prep"]
786
- return head_hash["prep"]
787
- else
788
- return head_hash["head"]
789
- end
790
- end
791
-
792
-
793
- #####################################
794
- # verbs(sobj) sobj is a sentence in SalsaTigerSentence format
795
- #
796
- # return a list of the nodes of full verbs in a given sentence:
797
- # it is a list of lists. An item in that list is
798
- # - either a pair [verb, svp]
799
- # of the node of a verb with separable prefix
800
- # and the node of its separate prefix
801
- # - or a singleton [verb]
802
- # of the node of a verb without separate prefix
803
- def Tiger.verbs(sobj)
804
- return sobj.terminals().select { |t|
805
- # verbs
806
-
807
- Tiger.category(t) == "verb"
808
- }.map { |verb|
809
-
810
- # watch out for separate verb prefixes
811
- parent = verb.parent
812
- if parent.nil?
813
- # verb is root node, for whatever reason
814
- [verb]
815
- else
816
-
817
- svp_children = parent.children_by_edgelabels(['SVP'])
818
- if svp_children.empty?
819
- # verb has no separate verb prefix
820
- [verb]
821
- elsif svp_children.length == 1
822
- # verb has exactly one separate verb prefix
823
- [verb, svp_children.first]
824
- else
825
- # more than one separate verb prefix? weird.
826
- $stderr.print 'Tiger warning: more than one separate verb prefix '
827
- $stderr.print 'for node ', verb.id, "\n"
828
- [verb, svp_children.first]
829
- end
830
- end
831
- }
832
- end
833
-
834
- ###
835
- # preposition
836
- #
837
- # if the given node represents a PP, return the preposition (string)
838
- def Tiger.preposition(node) # SynNode
839
- hash = @@heads_obj.get_sem_head(node)
840
- if hash and hash["prep"]
841
- return hash["prep"].to_s
842
- end
843
-
844
- # this didn't work, try something else: first preposition among my terminals
845
- pnode = node.terminals_sorted().detect { |n|
846
- Tiger.category(n) == "prep"
847
- }
848
- if pnode
849
- return pnode.word()
850
- else
851
- return nil
852
- end
853
- end
854
-
855
-
856
- ###
857
- # voice
858
- #
859
- # given a constituent, return
860
- # - "active"/"passive" if it is a verb
861
- # - nil, else
862
- def Tiger.voice(node)
863
-
864
- unless Tiger.category(node) == "verb"
865
- return nil
866
- end
867
-
868
- # node is a participle linked to its VP or S parent by HD or CJ
869
- retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
870
- 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
871
- 'edge' => ['up', /^(HD)|(CJ)$/]}])
872
-
873
- if retv
874
- verb_parent = retv['to'].first
875
-
876
- # coordination?
877
- retv = test_localtrees([{'from' => [verb_parent, nil, //],
878
- 'to' => [nil, 'cat', /^CVP$/],
879
- 'edge' => ['up', /^CJ$/]}])
880
- if retv
881
-
882
- # yes, coordination
883
- # S/VP
884
- # |OC
885
- # CVP
886
- # | CJ
887
- # VP
888
- # | HD
889
- # participle
890
-
891
- cvp = retv['to'].first
892
-
893
- retv = test_localtrees([{'from' => [cvp, nil, //],
894
- 'to' => [nil, 'cat', /^S|(VP)$/],
895
- 'edge' => ['up', /^OC$/]}])
896
-
897
- else
898
- # node's parent is linked to its parent via an OC edge
899
- retv = test_localtrees([{'from' => [verb_parent, nil, //],
900
- 'to' => [nil, 'cat', /^(VP)|S$/],
901
- 'edge' => ['up', /^OC$/]}])
902
- end
903
-
904
- if retv.nil?
905
- return "active"
906
- end
907
-
908
- verb_grandparent = retv['to'].first
909
-
910
- else
911
- # KE Dec 19: test whether the participle
912
- # is linked to its parent via an OC edge.
913
- # if so, it has the same function as the
914
- # verb_grandparent above
915
-
916
- # node is a participle linked to its VP or S parent by OC
917
- retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
918
- 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
919
- 'edge' => ['up', /^OC$/]}])
920
-
921
- if retv
922
- verb_grandparent = retv['to'].first
923
-
924
- else
925
- # this test has failed
926
- return "active"
927
- end
928
- end
929
-
930
- #puts test_localtrees([{'from' => [verb_grandparent, nil, //],
931
- # 'to' => [nil, 'pos', /^VA.*$/],
932
- # 'edge' => ['dn', /^HD$/]}])
933
-
934
- # node's grandparent has a HD child that is a terminal node, an auxiliary
935
- retv = test_localtrees([{'from' => [verb_grandparent, nil, //],
936
- 'to' => [nil, 'pos', /^VA.*$/],
937
- 'edge' => ['dn', /^HD$/]}])
938
-
939
- if retv.nil?
940
- return "active"
941
- end
942
-
943
- # that HD child is a form of 'werden'
944
- may_be_werden = retv['to'].first
945
-
946
- unless may_be_werden.part_of_speech() =~ /^VA/
947
- return "active"
948
- end
949
-
950
- # no morphology, so approximate it using regexp.s
951
- case may_be_werden.word
952
- when "geworden"
953
- when /^w.+rd(e|en|et|st|est)?$/
954
- else
955
- return "active"
956
- end
957
-
958
- # all tests passed successfully
959
- return "passive"
960
- end
961
-
962
- ###
963
- # gfs
964
- #
965
- # grammatical functions of a constituent:
966
- #
967
- # returns: a list of pairs [relation(string), node(SynNode)]
968
- # where <node> stands in the relation <relation> to the parameter
969
- # that the method was called with
970
- #
971
- def Tiger.gfs(node, # SynNode object
972
- sent) # SalsaTigerSentence object
973
-
974
- case Tiger.category(node)
975
- when "adj"
976
- return Tiger.gfs_adj(node)
977
- when "noun"
978
- return Tiger.gfs_noun(node, sent)
979
- when "verb"
980
- return Tiger.gfs_verb(node)
981
- else
982
- return []
983
- end
984
- end
985
-
986
-
987
- ###
988
- # informative_content_node
989
- #
990
- # for most constituents: nil
991
- # for a PP, the NP
992
- # for an SBAR, the VP
993
- # for a VP, the embedded VP
994
- def Tiger.informative_content_node(node)
995
- this_pt = Tiger.simplified_pt(node)
996
-
997
- unless ["S", "CS", "VP", "CVP", "PP", "CPP"].include? this_pt
998
- return nil
999
- end
1000
-
1001
- nh = Tiger.head_terminal(node)
1002
- unless nh
1003
- return nil
1004
- end
1005
- headlemma = Tiger.lemma_backoff(nh)
1006
-
1007
- nonhead_children = node.children().reject { |n|
1008
- nnh = Tiger.head_terminal(n)
1009
- not(nnh) or
1010
- Tiger.lemma_backoff(nnh) == headlemma
1011
- }
1012
- if nonhead_children.length() == 1
1013
- return nonhead_children.first
1014
- end
1015
-
1016
- # more than one child:
1017
- # for SBAR and VP take child with head POS starting in VB,
1018
- # for PP child with head POS starting in NN
1019
- case this_pt
1020
- when /^C?S/, /^C?VP/
1021
- icont_child = nonhead_children.detect { |n|
1022
- h = Tiger.head_terminal(n)
1023
- h and h.part_of_speech() =~ /^V/
1024
- }
1025
- when /^C?PP/
1026
- icont_child = nonhead_children.detect { |n|
1027
- h = Tiger.head_terminal(n)
1028
- h and h.part_of_speech() =~ /^N/
1029
- }
1030
- else
1031
- raise "Shouldn't be here"
1032
- end
1033
-
1034
- if icont_child
1035
- return icont_child
1036
- else
1037
- return nonhead_children.first
1038
- end
1039
- end
1040
-
1041
- ###
1042
- # main node of expression
1043
- #
1044
- # second argument non-nil:
1045
- # don't handle multiword expressions beyond verbs with separate particles
1046
- #
1047
- # returns: SynNode, main node, if found
1048
- # else nil
1049
- def Tiger.main_node_of_expr(nodelist,
1050
- no_mwes = nil)
1051
-
1052
- # map nodes to terminals
1053
- nodelist = nodelist.map { |n| n.yield_nodes() }.flatten
1054
-
1055
- # do we have a list of length 2,
1056
- # one member being "zu", the other a verb, with a common parent "VZ"?
1057
- # then return the verb
1058
- if nodelist.length() == 2
1059
- zu, verb = nodelist.distribute { |n| n.part_of_speech() == "PTKZU" }
1060
- if zu.length() == 1 and
1061
- Tiger.category(verb.first) == "verb" and
1062
- verb.first.parent == zu.first.parent and
1063
- verb.first.parent.category() == "VZ"
1064
- return verb.first
1065
- end
1066
- end
1067
-
1068
- # no joy: try method offered by abstract class
1069
- return super(nodelist, no_mwes)
1070
- end
1071
-
1072
-
1073
- ########
1074
- # prune?
1075
- # given a target node t and another node n of the syntactic structure,
1076
- # decide whether n is likely to instantiate a semantic role
1077
- # of t. If not, recommend n for pruning.
1078
- #
1079
- # This method implements a slight variant of Xue and Palmer (EMNLP 2004).
1080
- # Pruning according to Xue & Palmer, EMNLP 2004.
1081
- # "Step 1: Designate the predicate as the current node and
1082
- # collect its sisters (constituents attached at the same level
1083
- # as the predicate) unless its sisters are coordinated with the
1084
- # predicate.
1085
- #
1086
- # Step 2: Reset the current node to its parent and repeat Step 1
1087
- # till it reaches the top level node.
1088
- #
1089
- # Modifications made here:
1090
- # - paths of length 0 accepted in any case
1091
- # - TIGER coordination allowed (phrase types CX)
1092
- #
1093
- # returns: false to recommend n for pruning, else true
1094
- def Tiger.prune?(node, # SynNode
1095
- paths_to_target, # hash: node ID -> Path object: paths from nodes to target
1096
- terminal_index) # hash: terminal node -> word index in sentence
1097
-
1098
- path_to_target = paths_to_target[node.id()]
1099
-
1100
- if not path_to_target
1101
- # no path from target to node: suggest for pruning
1102
- return 0
1103
- elsif path_to_target.length == 0
1104
- # target may be its own role: definite accept
1105
- return 1
1106
- else
1107
- # consider path from target to node:
1108
- # (1) If the path to the current node includes at least one Up
1109
- # and exactly one Down, keep.
1110
- # (2) If the parth to the current node includes at least one Up
1111
- # and two Down and the roof node is a C-something, keep (coordination).
1112
- # (3) else discard
1113
-
1114
- # count number of up and down steps in path to target
1115
- num_up = 0
1116
- num_down = 0
1117
- path_to_target.each_step { |direction, edgelabel, nodelabel, endnode|
1118
- case direction
1119
- when /U/
1120
- num_up += 1
1121
- when /D/
1122
- num_down += 1
1123
- end
1124
- }
1125
-
1126
- if num_up >= 1 and num_down == 1
1127
- # case (1)
1128
- return 1
1129
- elsif num_up >= 1 and num_down == 2 and CollinsTntInterpreter.category(path_to_target.lca()) =~ /^C/
1130
- # case (2)
1131
- return 1
1132
- else
1133
- # case (3)
1134
- return 0
1135
- end
1136
- end
1137
- end
1138
-
1139
-
1140
- ################################
1141
- private
1142
- ################################
1143
-
1144
- ###
1145
- def Tiger.subject(verb_node)
1146
-
1147
- unless Tiger.category(verb_node) == "verb"
1148
- return nil
1149
- end
1150
-
1151
- if Tiger.voice(verb_node) == "passive"
1152
- # passive: then what we would like to return as subject
1153
- # is the SBP sibling of this verb
1154
-
1155
- parent = verb_node.parent
1156
-
1157
- if parent.nil?
1158
- # verb_node seems to be the root, strangely enough
1159
- return []
1160
- end
1161
- return parent.children_by_edgelabels(['SBP'])
1162
-
1163
- else
1164
- # not passive: then the subject of the verb
1165
- # is actually its subject in this sentence
1166
-
1167
- # needed???
1168
- # return if there is no surface subject
1169
- # e.g. parser errors like ADJD => VVPP
1170
-
1171
- return Tiger.surface_subject(verb_node)
1172
- end
1173
-
1174
- end
1175
-
1176
-
1177
- ###
1178
- def Tiger.direct_object(verb_node)
1179
-
1180
- unless Tiger.category(verb_node) == "verb"
1181
- return nil
1182
- end
1183
-
1184
- if Tiger.voice(verb_node) == "passive"
1185
- # passive: then what we would like to return as direct object
1186
- # is the subject of this verb
1187
- return Tiger.surface_subject(verb_node)
1188
- else
1189
-
1190
- # not passive: then the direct object
1191
- # is an OA sibling of the node verb_node
1192
- parent = verb_node.parent
1193
-
1194
- if parent.nil?
1195
- # verb_node seems to be the root, strangely enough
1196
- return []
1197
- end
1198
-
1199
- return parent.children_by_edgelabels(['OA'])
1200
- end
1201
- end
1202
-
1203
- ###
1204
- def Tiger.dative_object(verb_node)
1205
-
1206
- unless Tiger.category(verb_node) == "verb"
1207
- return nil
1208
- end
1209
-
1210
- parent = verb_node.parent
1211
-
1212
- if parent.nil?
1213
- return []
1214
- end
1215
-
1216
- return parent.children_by_edgelabels(['DA'])
1217
- end
1218
-
1219
- ###
1220
- def Tiger.prep_object(verb_node, preposition)
1221
-
1222
- unless Tiger.category(verb_node) == "verb"
1223
- return nil
1224
- end
1225
-
1226
- parent = verb_node.parent()
1227
- if parent.nil?
1228
- # verb_node seems to be the root, strangely enough
1229
- return []
1230
- end
1231
-
1232
- # find all PPs that are siblings of verb_node
1233
- pps = []
1234
- parent.each_child { |child|
1235
- if child.category == 'PP'
1236
- pps << child
1237
- end
1238
- }
1239
-
1240
- # now filter for those with the right preposition
1241
- if preposition.nil?
1242
- return pps
1243
- else
1244
- return pps.find_all { |node|
1245
- # prepositions are AC children of PP nodes
1246
- node.children_by_edgelabels(['AC']).map { |prep_node|
1247
- # prepositions are terminal words
1248
- prep_node.word()
1249
- # we are interested in those that match the parameter 'preposition'
1250
- }.include? preposition
1251
- }
1252
- end
1253
- end
1254
-
1255
- ###
1256
- def Tiger.surface_subject(verb_node)
1257
-
1258
- max_proj = Tiger.max_projection(verb_node)
1259
- # test each level in the computation of the maximal projection,
1260
- # from the lowest (the parent of verb_node)
1261
- # to the highest
1262
- max_proj['max_proj_at_level'].each { |node|
1263
- # test if this node has a SB child
1264
- # if so, use it
1265
- sb_children = node.children_by_edgelabels(['SB'])
1266
-
1267
- unless sb_children.empty?
1268
- return sb_children
1269
- end
1270
- }
1271
- return []
1272
- end
1273
-
1274
-
1275
- ##################
1276
- # gfs_verb
1277
- #
1278
- # given a node (a SynNode object) that is a terminal node
1279
- # representing a verb, determine
1280
- # all grammatical functions of this verb
1281
- # along with their head words
1282
- #
1283
- # verb_node: SynNode object, terminal node representing a verb
1284
- #
1285
- # returns: a list of pairs [relation(string), node(SynNode)]
1286
- # 'relation' is 'SB', 'OA', 'DA', 'MO', 'OC'
1287
- # 'node' is the constituent that stands in this relation to verb_node
1288
-
1289
- def Tiger.gfs_verb(verb_node)
1290
-
1291
- unless Tiger.category(verb_node) == "verb"
1292
- return []
1293
- end
1294
-
1295
- # construct a list of pairs [relation, node]
1296
- nodes = Array.new
1297
- # subjects:
1298
- n_arr = Tiger.subject(verb_node)
1299
-
1300
- if n_arr.length() > 0
1301
- nodes << ["SB", n_arr.first]
1302
- end
1303
-
1304
- #extrem frustrierend , sondern auch schädlich
1305
- #sagte
1306
- #Däubler-Gmelin
1307
- #&apos;&apos;
1308
- #die gesamte SPD
1309
- #nicht nur für Euch extrem frustrierend , sondern auch schädlich für die gesamte SPD &apos;&apos; gewesen
1310
- #die Streitigkeiten zwischen Führungsmitgliedern
1311
- #gewesen
1312
- #die Streitigkeiten zwischen Führungsmitgliedern
1313
- #frustrierend
1314
-
1315
- # direct object:
1316
- n_arr = Tiger.direct_object(verb_node)
1317
- if n_arr.length() > 0
1318
- nodes << ["OA", n_arr.first]
1319
- end
1320
-
1321
- # dative object:
1322
- n_arr = Tiger.dative_object(verb_node)
1323
- if n_arr.length() > 0
1324
- nodes << ["DA", n_arr.first]
1325
- end
1326
-
1327
-
1328
- # pp objects and adjuncts:
1329
- nodes.concat Tiger.prep_object(verb_node, nil).map { |n|
1330
- unless (edgelabel = n.parent_label)
1331
- edgelabel = "MO"
1332
- end
1333
- [edgelabel + "-" + Tiger.preposition(n).to_s, n]
1334
- }
1335
-
1336
- # sentence complement:
1337
- # verb node's parent has an OC child
1338
- parent = verb_node.parent
1339
- unless parent.nil?
1340
- parent.children_by_edgelabels(["OC"]).each { |n|
1341
- nodes << ["OC", n]
1342
- }
1343
- end
1344
-
1345
- return nodes
1346
- end
1347
-
1348
- ###
1349
- # gfs_noun
1350
- #
1351
- # determine relation names and relation-bearing syntax nodes
1352
- # for noun targets
1353
- #
1354
- # returns: a list of pairs
1355
- # [rel(string), node(SynNode)]
1356
- def Tiger.gfs_noun(noun_node, # SynNode object: terminal, noun
1357
- sent_obj) # SalsaTigerSentence object: sentence in which this noun occurs
1358
-
1359
-
1360
- # construct a list of pairs [relation, node]
1361
- retv = Array.new
1362
-
1363
- ##
1364
- # determine noun-noun relations:
1365
- # (1) edge label leading to this node is NK, and
1366
- # parent of this node has child with edge label not NK
1367
- # then: that child
1368
- # (2) or parent of this node is NP/PP, the grandparent is NP,
1369
- # and parent and grandparent are not linked by an NK edge
1370
- # then: the grandparent
1371
- # (3) or grandparent of this node is CNP
1372
- # then: that CNP's other children
1373
- parent = noun_node.parent()
1374
- np_pp_labels_without_cnp = ["NP", "PP", "PN"]
1375
- np_pp_labels = ["NP", "PP", "PN", "CNP"]
1376
-
1377
- if parent and
1378
- noun_node.parent_label() == "NK"
1379
- # (1)
1380
- parent.children().select { |n|
1381
- n.parent_label() != "NK"
1382
- }.each { |n|
1383
- unless n == noun_node
1384
-
1385
- retv << [n.parent_label(), n]
1386
- end
1387
- }
1388
- end
1389
-
1390
- # (2)
1391
- if parent
1392
- grandparent = parent.parent()
1393
- end
1394
-
1395
- if parent and grandparent and
1396
- np_pp_labels.include? parent.category() and
1397
- np_pp_labels_without_cnp.include? grandparent.category() and
1398
- parent.parent_label() != "NK"
1399
-
1400
- retv << [parent.parent_label(), grandparent]
1401
- end
1402
-
1403
- # (3)
1404
- if parent and grandparent and
1405
- grandparent.category() == "CNP"
1406
-
1407
- grandparent.each_child() { |n|
1408
- if np_pp_labels.include? n.category() and
1409
- n != parent
1410
-
1411
- retv << ["CJ", n]
1412
- end
1413
- }
1414
- end
1415
-
1416
- return retv
1417
- end
1418
-
1419
- ###
1420
- # gfs_adj
1421
- #
1422
- # determine relation names and relation-bearing syntax nodes
1423
- # for adjective targets
1424
- #
1425
- # returns: a list of pairs
1426
- # [rel(string), node(SynNode)]
1427
- #
1428
- # although in this case it's just one pair (if we can find it),
1429
- # describing the head noun
1430
- def Tiger.gfs_adj(adj_node) # SynNode object: terminal, adjective
1431
-
1432
- parent = adj_node.parent()
1433
-
1434
- if parent.nil?
1435
- return []
1436
- end
1437
-
1438
- if ["NP", "CNP", "PP", "CPP", "PN"].include? parent.category()
1439
- return [["HD", parent]]
1440
- else
1441
- return []
1442
- end
1443
- end
1444
-
1445
-
1446
- end
1447
-
1448
- #( (TOP (S (KON_JU Und) (ADV_MO schon) (VVFIN_HD weiÃ<9f>) (NP_SB (ART_NK der) (ADJA_NK Berliner) (NN_NK Verkehrsverein)) ($, ,) (S_OC (PWS_SB was) (ADV_MO da) (PIS_MNR alles) (PP_MO (APPR_AC auf) (ART_NK die) (NN_NK Stadt) ($, ,) (PPOSAT_NK seine) (AP_NK (PP_MO (APPR_AC durch) (ART_NK eine) (NN_NK Rekonstruktion)) (ADV_MO so) (ADJA_HD prachtvoll)) (ADJA_NK markierte) (NN_NK Mitte) ($, ,) (AP_NK (PIS_HD alles))) (VVFIN_HD zukommt))) ($. .)))