shalmaneser-frappe 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +10 -0
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +122 -0
  6. data/lib/frappe/Ampersand.rb +41 -0
  7. data/lib/frappe/file_parser.rb +126 -0
  8. data/lib/frappe/fix_syn_sem_mapping.rb +196 -0
  9. data/lib/frappe/frappe.rb +217 -0
  10. data/lib/frappe/frappe_flat_syntax.rb +89 -0
  11. data/lib/frappe/frappe_read_stxml.rb +48 -0
  12. data/lib/frappe/interfaces/berkeley_interface.rb +380 -0
  13. data/lib/frappe/interfaces/collins_interface.rb +340 -0
  14. data/lib/frappe/interfaces/counter.rb +19 -0
  15. data/lib/frappe/interfaces/stanford_interface.rb +353 -0
  16. data/lib/frappe/interfaces/treetagger_interface.rb +74 -0
  17. data/lib/frappe/interfaces/treetagger_module.rb +111 -0
  18. data/lib/frappe/interfaces/treetagger_pos_interface.rb +80 -0
  19. data/lib/frappe/interpreters/berkeley_interpreter.rb +27 -0
  20. data/lib/frappe/interpreters/collins_tnt_interpreter.rb +807 -0
  21. data/lib/frappe/interpreters/collins_treetagger_interpreter.rb +16 -0
  22. data/lib/frappe/interpreters/empty_interpreter.rb +26 -0
  23. data/lib/frappe/interpreters/headz.rb +265 -0
  24. data/lib/frappe/interpreters/headz_helpers.rb +54 -0
  25. data/lib/frappe/interpreters/stanford_interpreter.rb +28 -0
  26. data/lib/frappe/interpreters/syn_interpreter.rb +727 -0
  27. data/lib/frappe/interpreters/tiger_interpreter.rb +1846 -0
  28. data/lib/frappe/interpreters/treetagger_interpreter.rb +89 -0
  29. data/lib/frappe/one_parsed_file.rb +31 -0
  30. data/lib/frappe/opt_parser.rb +92 -0
  31. data/lib/frappe/path.rb +199 -0
  32. data/lib/frappe/plain_converter.rb +59 -0
  33. data/lib/frappe/salsa_tab_converter.rb +154 -0
  34. data/lib/frappe/salsa_tab_with_pos_converter.rb +531 -0
  35. data/lib/frappe/stxml_converter.rb +666 -0
  36. data/lib/frappe/syn_interface.rb +76 -0
  37. data/lib/frappe/syn_interface_stxml.rb +173 -0
  38. data/lib/frappe/syn_interface_tab.rb +39 -0
  39. data/lib/frappe/utf_iso.rb +27 -0
  40. data/lib/shalmaneser/frappe.rb +1 -0
  41. metadata +130 -0
@@ -0,0 +1,1846 @@
1
+ # coding: utf-8
2
+
3
+ # @todo Investigate the dependency between this class and STXML.
4
+ # Probably they can be combined.
5
+
6
+ #############################################
7
+ #
8
+ # max. projection:
9
+ #
10
+ # consists of methods that are 'building blocks' for computing
11
+ # the maximum projection of a verb in TIGER syntax
12
+ #
13
+ # basically, computing the max. projection is about moving an
14
+ # upper node upward. At the beginning it is the parent of the
15
+ # terminal node for the verb, and each building block moves it up
16
+ # to its parent, if the building block matches.
17
+ #
18
+ # Apart from the upper node, a lower node is also watched. At the
19
+ # beginning it is the terminal node for the verb, later it is usually
20
+ # the 'HD' child of the upper node. This lower node is needed for
21
+ # testing whether a building block matches.
22
+ #
23
+ # For handling conjunction, the upper node is split into two, a 'lower upper'
24
+ # and an 'upper upper' node. The 'lower upper' is used when some relation
25
+ # between the upper node and its descendants is tested, and the 'upper upper'
26
+ # is used when some relation between the upper node and its predecessors
27
+ # is tested. Usually the 'lower upper' and the 'upper upper' are the same,
28
+ # but conjunction building blocks move the 'upper upper' up to its parent
29
+ # while leaving the 'lower upper' unchanged.
30
+ #
31
+ # So all building block methods take three arguments: lower, upper_l and
32
+ # upper_u. All three are nodes given as SalsaTigerSentence objects
33
+ #
34
+ # All building block methods give as their return value a list of three
35
+ # nodes: [new_lower, new_upper_l, new_upper_u], if the building block
36
+ # matched. If it does not match, nil is returned.
37
+ #
38
+ # The method explain describes all building blocks,
39
+ # the conditions for the building blocks matching, and shows
40
+ # where the lower and the upper nodes will be after a building block matched.
41
+ #
42
+ # building blocks:
43
+ # pp_pp
44
+ # pp_fin
45
+ # inf_fin
46
+ # vzinf_fin
47
+ # cvzinf_fin
48
+ # modal
49
+ # othermodal
50
+ # conj
51
+ #
52
+ # To compute the maximal projection of a verb,
53
+ # we start at the parent of the terminal node for the verb
54
+ # "and move upwards.
55
+ # "The move upwards is broken up in little building blocks."
56
+ # "Each of them licenses one step upward in the syntactic tree."
57
+ #
58
+ # "Each building block needs information about two nodes:"
59
+ # "The current upper node (at the beginning, that is"
60
+ # "the parent of the terminal node for the verb) and"
61
+ # "one specific child of that current upper node"
62
+ # "(at the beginning, that is the terminal node for the verb)."
63
+ #
64
+ # "Each building block provides information of"
65
+ # "- where the new upper node is, depending on the current"
66
+ # " upper node, and"
67
+ # "- where the new specific child is."
68
+ #
69
+ # "For handling conjunction, we need to complicate this picture somewhat:"
70
+ # "We split the current upper node into an 'upper upper'"
71
+ # "and a 'lower upper' node."
72
+ # "If we want to check the edge from the current upper node upwards,"
73
+ # "we use the 'upper upper'."
74
+ # "If we want to check an edge from the current upper node downwards,"
75
+ # "we use the 'lower upper'."
76
+ # "Almost always, the 'lower upper' and the 'upper upper' will be the same."
77
+ # "Except for the building block for conjunction:"
78
+ # "It moves the 'upper upper' one level up,"
79
+ # "but leaves the 'lower upper' the same."
80
+ #
81
+ # "There are five levels of building blocks."
82
+ #
83
+ # "* 1st level: auxiliary verb constructions involving a participle"
84
+ # " The following building blocks are tried, in this order:"
85
+ # " CONJ, PP-PP, CONJ, PP_FIN"
86
+ #
87
+ # "* 2nd level: infinitive constructions"
88
+ # " The following building blocks are tried, in this order:"
89
+ # " CONJ, INF-FIN, VZINF-FIN, CVZINF-FIN"
90
+ #
91
+ # "* 3rd level: modals"
92
+ # " The following building blocks are tried, in this order:"
93
+ # " CONJ, MODAL, OTHERMODAL"
94
+ #
95
+ # "* 4th level = 1st level"
96
+ #
97
+ # "* 5th level = 2nd level"
98
+ #
99
+ #
100
+ # "***These are the building blocks:"
101
+ #
102
+ # "PP-PP"
103
+ # " VP (new uppermost node)"
104
+ # " / | \\OC"
105
+ # " HD/ | VP|CVP (current uppermost node)"
106
+ # " / | |"
107
+ # " o FE |HD|CJ"
108
+ # "POS: V[AMV]PP |"
109
+ # " new target current target"
110
+ # " POS: V[AMV]PP"
111
+ #
112
+ # "PP-FIN"
113
+ # " S/VP (new uppermost node)"
114
+ # " / | \\OC or PD"
115
+ # " HD/ | VP|CVP|CO (current uppermost node)"
116
+ # " / | |"
117
+ # " o FE |HD|CJ"
118
+ # "POS: V[AMV]FIN |"
119
+ # " V[AMV]INF current target"
120
+ # "or CAT: VZ POS: V[AMV]PP"
121
+ #
122
+ # "INF_FIN"
123
+ # " S/VP (new uppermost node)"
124
+ # " / | \\OC"
125
+ # " HD/ | VP|CVP (current uppermost node)"
126
+ # " / | |"
127
+ # " o FE |HD|CJ"
128
+ # "POS: VAFIN |"
129
+ # " VAINF current target"
130
+ # " VVINF POS: V[AMV]INF"
131
+ # " new target"
132
+ #
133
+ # "VZINF-FIN"
134
+ # " S/VP (new uppermost node)"
135
+ # " / | \\OC"
136
+ # " HD/ | VP (current uppermost node)"
137
+ # " / | |"
138
+ # " o FE |HD"
139
+ # "POS: V[AV]FIN |"
140
+ # " new target current target"
141
+ # " CAT: VZ"
142
+ #
143
+ # "CVZINF-FIN"
144
+ # " S/VP (new uppermost node)"
145
+ # " | \\OC"
146
+ # " | CVP (current uppermost node)"
147
+ # " | |"
148
+ # " FE |CJ"
149
+ # " |"
150
+ # " current and new target"
151
+ # " CAT: VZ"
152
+ #
153
+ # "MODAL"
154
+ # " S/VP (new uppermost node)"
155
+ # " / | \\OC"
156
+ # " HD/ | VP|CVP (current uppermost node)"
157
+ # " / | |"
158
+ # " o FE |HD|CJ"
159
+ # " POS: |"
160
+ # " VM(PP|FIN|INF) current target"
161
+ # " new target POS: V[AMV]INF"
162
+ #
163
+ # "OTHERMODAL"
164
+ # " S/VP (new uppermost node)"
165
+ # " / | \\OC"
166
+ # " HD/ | VP (current uppermost node)"
167
+ # " / | | \\"
168
+ # " o FE |HD \\OC"
169
+ # "POS: VMFIN | \\"
170
+ # " VMINF POS: current target"
171
+ # " VMPP V[AMV]INF POS: V[AMV]PP"
172
+ # " new target V[AMV]FIN"
173
+ #
174
+ # "CONJ"
175
+ # " CVP (new upper uppermost node)"
176
+ # " | \\CJ"
177
+ # " | VP (current and new uppermost node)"
178
+ # " | |"
179
+ # " FE |"
180
+ # " |"
181
+ # " current and new target"
182
+ ###
183
+ =begin
184
+ module Shalmaneser
185
+ module Frappe
186
+ module TigerMaxProjection
187
+
188
+ def self.max_projection(node)
189
+ parent = node.parent
190
+ # node has no parent? recover somehow
191
+ if parent.nil?
192
+ return {'max_proj' => node,
193
+ 'max_proj_at_level' => [node]}
194
+ end
195
+
196
+ maxproj_at_level = []
197
+ maxproj_at_level << parent
198
+
199
+ lower = node
200
+ upper_u = upper_l = parent
201
+
202
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
203
+ maxproj_at_level << upper_u
204
+
205
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
206
+ maxproj_at_level << upper_u
207
+
208
+ lower, upper_l, upper_u = project_modal(lower, upper_l, upper_u)
209
+ maxproj_at_level << upper_u
210
+
211
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
212
+ maxproj_at_level << upper_u
213
+
214
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
215
+ maxproj_at_level << upper_u
216
+
217
+ return {'max_proj' => upper_u,
218
+ 'max_proj_at_level' => maxproj_at_level}
219
+ end
220
+
221
+
222
+ ###
223
+ def self.test_localtrees(path)
224
+ # HIER WEITER: was genau passiert hier?
225
+ retv = {}
226
+
227
+ # test each step
228
+ path.each { |step|
229
+ retv = test_step(step, retv)
230
+
231
+ if retv.nil?
232
+ return nil
233
+ end
234
+ }
235
+
236
+ # return result of last step
237
+ return retv
238
+ end
239
+
240
+ ######
241
+ private
242
+
243
+ ###
244
+ def self.test_step(path, previous)
245
+ if path['from'].nil? or path['to'].nil? or path['edge'].nil?
246
+ $stderr.puts 'TigerAux error: missing path hash entry'
247
+ exit 1
248
+ end
249
+
250
+ from_node, *from_descr = path['from']
251
+ to_node, *to_descr = path['to']
252
+
253
+ # using the special flags tp_prev_to and tp_prev_from,
254
+ # a node can also be set to be the value in the
255
+ # 'previous' hash
256
+ from_node = cf_previous(from_node, previous)
257
+ to_node = cf_previous(to_node, previous)
258
+
259
+ # test if 'from' node description matches
260
+ unless test_node(from_node, from_descr)
261
+ return nil
262
+ end
263
+
264
+ # try path
265
+ direction, edgelabel = path['edge']
266
+ case direction
267
+ when 'up'
268
+ label = from_node.parent_label
269
+ if label =~ edgelabel
270
+ end_nodes = [from_node.parent]
271
+ else
272
+ end_nodes = []
273
+ end
274
+ when 'dn'
275
+ end_nodes = []
276
+ from_node.each_child { |child|
277
+ if child.parent_label =~ edgelabel
278
+ end_nodes << child
279
+ end
280
+ }
281
+ else
282
+ $stderr.puts 'TigerAux error: unknown direction'
283
+ exit 1
284
+ end
285
+
286
+ # check all prospective end nodes
287
+ remaining_end_nodes = end_nodes.select { |prosp_to_node|
288
+ if to_node.nil? or to_node == prosp_to_node
289
+ test_node(prosp_to_node, to_descr)
290
+ else
291
+ false
292
+ end
293
+ }
294
+
295
+ if remaining_end_nodes.empty?
296
+ return nil
297
+ else
298
+ return {'from' => from_node,
299
+ 'to' => remaining_end_nodes}
300
+ end
301
+ end
302
+
303
+ ###
304
+ def self.test_node(node, descr)
305
+
306
+ cat_or_pos, pattern = descr
307
+ if node.nil?
308
+ $stderr.puts 'TigerAux error: test_node nil'
309
+ exit 1
310
+ end
311
+
312
+ case cat_or_pos
313
+ when 'pos'
314
+ if node.part_of_speech =~ pattern
315
+ return true
316
+ else
317
+ return false
318
+ end
319
+ when 'cat'
320
+ if node.category =~ pattern
321
+ return true
322
+ else
323
+ return false
324
+ end
325
+ when nil
326
+ return true
327
+ else
328
+ $stderr.puts 'TigerAux error: neither cat nor pos'
329
+ exit 1
330
+ end
331
+ end
332
+
333
+ ###
334
+ def self.cf_previous(node, previous)
335
+ case node
336
+ when 'tp_prev_to'
337
+ return previous['to'].first
338
+ when 'tp_prev_from'
339
+ return previous['from']
340
+ else
341
+ return node
342
+ end
343
+ end
344
+
345
+ ###
346
+ def self.project_participle(lower, upper_l, upper_u)
347
+ return project_this(lower, upper_l, upper_u,
348
+ [self.method('conj'),
349
+ self.method('pp_pp'),
350
+ self.method('conj'),
351
+ self.method('pp_fin')])
352
+ end
353
+
354
+ ###
355
+ def self.project_infinitive(lower, upper_l, upper_u)
356
+ return project_this(lower, upper_l, upper_u,
357
+ [self.method('conj'),
358
+ self.method('inf_fin'),
359
+ self.method('vzinf_fin'),
360
+ self.method('cvzinf_fin')
361
+ ])
362
+ end
363
+
364
+ ###
365
+ def self.project_modal(lower, upper_l, upper_u)
366
+ return project_this(lower, upper_l, upper_u,
367
+ [self.method('conj'),
368
+ self.method('modal'),
369
+ self.method('othermodal')
370
+ ])
371
+ end
372
+
373
+ ###
374
+ def self.project_participle_(lower, upper_l, upper_u)
375
+ return project_this(lower, upper_l, upper_u,
376
+ [self.method('conj'),
377
+ self.method('pp_pp'),
378
+ self.method('conj'),
379
+ self.method('pp_fin')])
380
+ end
381
+
382
+ ###
383
+ def self.project_this(lower, upper_l, upper_u, method_list)
384
+ method_list.each { |method|
385
+ retv = method.call(lower, upper_l, upper_u)
386
+ unless retv.nil?
387
+ lower, upper_l, upper_u = retv
388
+ end
389
+ }
390
+ return [lower, upper_l, upper_u]
391
+ end
392
+
393
+ ###
394
+ def self.pp_pp(lower, upper_l, upper_u)
395
+
396
+ retv =
397
+ test_localtrees([
398
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
399
+ 'to' => [upper_l, 'cat', /^C?VP$/],
400
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
401
+ {'from' => [upper_u, 'cat', /^C?VP$/],
402
+ 'to' => [nil, 'cat', /^VP$/],
403
+ 'edge' => ['up', /^OC$/]},
404
+ {'from' => ['tp_prev_to', 'cat', /^VP$/],
405
+ 'to' => [nil, 'pos', /^V[AMV]PP$/],
406
+ 'edge' => ['dn', /^HD$/]}
407
+ ])
408
+
409
+ if retv.nil?
410
+ return nil
411
+ else
412
+ return [retv['to'].first, retv['from'], retv['from']]
413
+ end
414
+ end
415
+
416
+ ###
417
+ def self.pp_fin(lower, upper_l, upper_u)
418
+
419
+ retv =
420
+ test_localtrees([
421
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
422
+ 'to' => [upper_l, 'cat', /^C?VP$/],
423
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
424
+ {'from' => [upper_u,'cat', /^C?VP$/],
425
+ 'to' => [nil, 'cat', /^(VP)|S$/],
426
+ 'edge' => ['up', /^(OC)|(PD)$/]}
427
+ ])
428
+
429
+ if retv.nil?
430
+ return nil
431
+ end
432
+
433
+ new_upper = retv['to'].first
434
+
435
+ # test two alternatives:
436
+ # head child of new_upper is either a VXFIN or VXINF terminal...
437
+ retv =
438
+ test_localtrees([
439
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
440
+ 'to' => [nil, 'pos', /^V[AMV]((FIN)|(INF))$/],
441
+ 'edge' => ['dn', /^HD$/]}
442
+ ])
443
+
444
+ # ... or a VZ nonterminal
445
+ if retv.nil?
446
+ retv =
447
+ test_localtrees([
448
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
449
+ 'to' => [nil, 'cat', /^VZ$/],
450
+ 'edge' => ['dn', /^HD$/]}
451
+ ])
452
+ end
453
+
454
+ if retv.nil?
455
+ return nil
456
+ else
457
+ return [retv['to'].first, new_upper, new_upper]
458
+ end
459
+ end
460
+
461
+
462
+ ###
463
+ def self.inf_fin(lower, upper_l, upper_u)
464
+
465
+ retv =
466
+ test_localtrees([
467
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
468
+ 'to' => [upper_l, 'cat', /^C?VP$/],
469
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
470
+ {'from' => [upper_u,'cat', /^C?VP$/],
471
+ 'to' => [nil, 'cat', /^(VP)|S$/],
472
+ 'edge' => ['up', /^OC$/]},
473
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
474
+ 'to' => [nil, 'pos', /^(VAFIN)|(VAINF)|(VVINF)$/],
475
+ 'edge' => ['dn', /^HD$/]}
476
+ ])
477
+ if retv.nil?
478
+ return nil
479
+ else
480
+ return [retv['to'].first, retv['from'], retv['from']]
481
+ end
482
+ end
483
+
484
+
485
+ ###
486
+ def self.vzinf_fin(lower, upper_l, upper_u)
487
+
488
+ retv =
489
+ test_localtrees([
490
+ {'from' => [lower, 'cat', /^VZ$/],
491
+ 'to' => [upper_l, 'cat', /^VP$/],
492
+ 'edge' => ['up', /^HD$/]},
493
+ {'from' => [upper_u,'cat', /^VP$/],
494
+ 'to' => [nil, 'cat', /^(VP)|S$/],
495
+ 'edge' => ['up', /^OC$/]},
496
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
497
+ 'to' => [nil, 'pos', /^V[AV]FIN$/],
498
+ 'edge' => ['dn', /^HD$/]}
499
+ ])
500
+
501
+ if retv.nil?
502
+ return nil
503
+ else
504
+ return [retv['to'].first, retv['from'], retv['from']]
505
+ end
506
+ end
507
+
508
+ ###
509
+ def self.cvzinf_fin(lower, upper_l, upper_u)
510
+
511
+ retv =
512
+ test_localtrees([
513
+ {'from' => [lower, 'cat', /^VZ$/],
514
+ 'to' => [upper_l, 'cat', /^CVP$/],
515
+ 'edge' => ['up', /^CJ$/]},
516
+ {'from' => [upper_u,'cat', /^CVP$/],
517
+ 'to' => [nil, 'cat', /^(VP)|S$/],
518
+ 'edge' => ['up', /^OC$/]}
519
+ ])
520
+
521
+ if retv.nil?
522
+ return nil
523
+ else
524
+ return [lower, upper_l, retv['to'].first]
525
+ end
526
+ end
527
+
528
+ ###
529
+ def self.modal(lower, upper_l, upper_u)
530
+
531
+ retv =
532
+ test_localtrees([
533
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
534
+ 'to' => [upper_l, 'cat', /^C?VP$/],
535
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
536
+ {'from' => [upper_u,'cat', /^C?VP$/],
537
+ 'to' => [nil, 'cat', /^(VP)|S$/],
538
+ 'edge' => ['up', /^OC$/]},
539
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
540
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
541
+ 'edge' => ['dn', /^HD$/]}
542
+ ])
543
+
544
+ if retv.nil?
545
+ return nil
546
+ else
547
+ return [retv['to'].first, retv['from'], retv['from']]
548
+ end
549
+ end
550
+
551
+ ###
552
+ def self.othermodal(lower, upper_l, upper_u)
553
+
554
+ retv =
555
+ test_localtrees([
556
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
557
+ 'to' => [upper_l, 'cat', /^VP$/],
558
+ 'edge' => ['up', /^OC$/]},
559
+ {'from' => [upper_l, 'cat', /^VP$/],
560
+ 'to' => [nil, 'pos', /^V[AMV]((INF)|(FIN))$/],
561
+ 'edge' => ['dn', /^HD$/]},
562
+ {'from' => [upper_u,'cat', /^VP$/],
563
+ 'to' => [nil, 'cat', /^(VP)|S$/],
564
+ 'edge' => ['up', /^OC$/]},
565
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
566
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
567
+ 'edge' => ['dn', /^HD$/]}
568
+ ])
569
+
570
+ if retv.nil?
571
+ return nil
572
+ else
573
+ return [retv['to'].first, retv['from'], retv['from']]
574
+ end
575
+ end
576
+
577
+ ###
578
+ def self.conj(lower, upper_l, upper_u)
579
+
580
+ retv = test_localtrees([
581
+ {'from' => [lower, nil, //],
582
+ 'to' => [upper_l, 'cat', /^VP$/],
583
+ 'edge' => ['up', //]},
584
+ {'from' => [upper_u,'cat', /^VP$/],
585
+ 'to' => [nil, 'cat', /^CVP$/],
586
+ 'edge' => ['up', /^CJ$/]}
587
+ ])
588
+
589
+ if retv.nil?
590
+ return nil
591
+ else
592
+ return [lower, upper_l, retv['to'].first]
593
+ end
594
+ end
595
+ end
596
+ end
597
+ end
598
+ =end
599
+
600
+ require_relative 'headz'
601
+ require_relative 'syn_interpreter'
602
+
603
+ require 'ruby_class_extensions'
604
+
605
+ # @todo AB: [2015-12-16 Wed 00:05]
606
+ # Rename this class to TigerInterpreter.
607
+
608
+ module Shalmaneser
609
+ module Frappe
610
+ class TigerInterpreter < SynInterpreter
611
+ @@heads_obj = Headz.new
612
+
613
+ ###
614
+ # generalize over POS tags.
615
+ #
616
+ # returns one of:
617
+ #
618
+ # adj: adjective (phrase)
619
+ # adv: adverb (phrase)
620
+ # card: numbers, quantity phrases
621
+ # con: conjunction
622
+ # det: determiner, including possessive/demonstrative pronouns etc.
623
+ # for: foreign material
624
+ # noun: noun (phrase), including personal pronouns, proper names, expletives
625
+ # part: particles, truncated words (German compound parts)
626
+ # prep: preposition (phrase)
627
+ # pun: punctuation, brackets, etc.
628
+ # sent: sentence
629
+ # top: top node of a sentence
630
+ # verb: verb (phrase)
631
+ # nil: something went wrong
632
+ #
633
+ # default: return phrase type as is
634
+ def self.category(node) # SynNode
635
+ pt = self.pt(node)
636
+ if pt.nil?
637
+ # phrase type could not be determined
638
+ return nil
639
+ end
640
+
641
+ case pt.to_s.strip
642
+ when /^C?ADJ/, /^PIS/, /^C?AP[^A-Za-z]?/ then return "adj"
643
+ when /^C?ADV/, /^C?AVP/, /^PROAV/ then return "adv"
644
+ when /^CARD/ then return "card"
645
+ when /^C?KO/ then return "con"
646
+ when /^PPOS/, /^ART/ ,/^PIAT/, /^PD/, /^PRELAT/, /^PWAT/ then return "det"
647
+ when /^FM/ , /^XY/ then return "for"
648
+ when /^C?N/, /^PPER/, /^PN/, /^PRELS/, /^PWS/ then return "noun"
649
+ when /^ITJ/ then return "sent"
650
+ when /^PRF/, /^PTK/, /^TRUNC/ then return "part"
651
+ when /^C?PP/ , /^APPR/, /^PWAV/ then return "prep"
652
+ when /^\$/ then return "pun"
653
+ when /^C?S$/, /^CO/, /^DL/, /^CH/, /^ISU/ then return "sent" # I don't like to put CO/DL in here, but where should they go?
654
+ when /^TOP/ then return "top"
655
+ when /^C?V/ then return "verb"
656
+ else
657
+ # $stderr.puts "WARNING Unknown category/POS "+c.to_s+" (German data)"
658
+ return nil
659
+ end
660
+ end
661
+
662
+ ###
663
+ # is relative pronoun?
664
+ #
665
+ def self.relative_pronoun?(node) # SynNode
666
+ pt = self.pt(node)
667
+ if pt.nil?
668
+ # phrase type could not be determined
669
+ return nil
670
+ end
671
+
672
+ case pt.to_s.strip
673
+ when /^PREL/, /^PWAV/, /^PWAT/
674
+ return true
675
+ else
676
+ return false
677
+ end
678
+ end
679
+
680
+
681
+ ###
682
+ # lemma_backoff:
683
+ #
684
+ # if we have lemma information, return that,
685
+ # and failing that, return the word
686
+ #
687
+ # returns: string or nil
688
+ def self.lemma_backoff(node)
689
+ lemma = super(node)
690
+ # lemmatizer has returned more than one possible lemma form:
691
+ # just accept the first
692
+ if lemma =~ /^([^|]+)|/
693
+ return $1
694
+ else
695
+ return lemma
696
+ end
697
+ end
698
+
699
+ ###
700
+ # verb_with_particle:
701
+ #
702
+ # given a node and a nodelist,
703
+ # if the node represents a verb:
704
+ # see if the verb has a particle among the nodes in nodelist
705
+ # if so, return it
706
+ def self.particle_of_verb(node, # SynNode
707
+ node_list) # array: SynNode
708
+
709
+ # must be verb
710
+ unless self.category(node) == "verb"
711
+ return nil
712
+ end
713
+
714
+ # must have parent
715
+ unless node.parent
716
+ return nil
717
+ end
718
+
719
+ particles = node.parent.children.select { |sister|
720
+ # look for sisters of the verb node that are in node_list
721
+ node_list.include? sister
722
+ }.select { |sister|
723
+ # see if its incoming edge is labeled "SVP"
724
+ sister.parent_label == "SVP"
725
+ }.reject { |particle|
726
+ # Sleepy parser problem: it often tags ")" as a separate verb particle
727
+ particle.get_attribute("lemma") == ")" or
728
+ particle.word == ")"
729
+ }
730
+
731
+ if particles.length == 0
732
+ return nil
733
+ else
734
+ return particles.first
735
+ end
736
+ end
737
+
738
+
739
+ ###
740
+ # auxiliary?
741
+ #
742
+ # returns true if the given node is an auxiliary
743
+ # default: no recognition of auxiliaries
744
+ def self.auxiliary?(node)
745
+ if node.part_of_speech and
746
+ node.part_of_speech =~ /^VA/
747
+ return true
748
+ else
749
+ return false
750
+ end
751
+ end
752
+
753
+ ###
754
+ # modal?
755
+ #
756
+ # returns true if the given node is a modal verb
757
+ #
758
+ # returns: boolean
759
+ def self.modal?(node)
760
+ if node.part_of_speech and
761
+ node.part_of_speech =~ /^VM/
762
+ return true
763
+ else
764
+ return false
765
+ end
766
+ end
767
+
768
+ ###
769
+ # head_terminal
770
+ #
771
+ # given a constituent, return the terminal node
772
+ # that describes its headword
773
+ # default: a heuristic that assumes the existence of a 'head'
774
+ # attribute on nodes:
775
+ # find the first node in my yield corresponding to my head attribute.
776
+ # add-on: if this doesn't work, ask the headz package for the head
777
+ #
778
+ # returns: a SynNode object if successful, else nil
779
+ def self.head_terminal(node)
780
+ if (head = super(node))
781
+ return head
782
+ end
783
+
784
+ head_hash = @@heads_obj.get_sem_head(node)
785
+ if head_hash.nil?
786
+ return nil
787
+ elsif head_hash["prep"]
788
+ return head_hash["prep"]
789
+ else
790
+ return head_hash["head"]
791
+ end
792
+ end
793
+
794
+
795
+ #####################################
796
+ # verbs(sobj) sobj is a sentence in SalsaTigerSentence format
797
+ #
798
+ # return a list of the nodes of full verbs in a given sentence:
799
+ # it is a list of lists. An item in that list is
800
+ # - either a pair [verb, svp]
801
+ # of the node of a verb with separable prefix
802
+ # and the node of its separate prefix
803
+ # - or a singleton [verb]
804
+ # of the node of a verb without separate prefix
805
+ def self.verbs(sobj)
806
+ return sobj.terminals.select { |t|
807
+ # verbs
808
+
809
+ self.category(t) == "verb"
810
+ }.map { |verb|
811
+
812
+ # watch out for separate verb prefixes
813
+ parent = verb.parent
814
+ if parent.nil?
815
+ # verb is root node, for whatever reason
816
+ [verb]
817
+ else
818
+
819
+ svp_children = parent.children_by_edgelabels(['SVP'])
820
+ if svp_children.empty?
821
+ # verb has no separate verb prefix
822
+ [verb]
823
+ elsif svp_children.length == 1
824
+ # verb has exactly one separate verb prefix
825
+ [verb, svp_children.first]
826
+ else
827
+ # more than one separate verb prefix? weird.
828
+ $stderr.print 'Tiger warning: more than one separate verb prefix '
829
+ $stderr.print 'for node ', verb.id, "\n"
830
+ [verb, svp_children.first]
831
+ end
832
+ end
833
+ }
834
+ end
835
+
836
+ ###
837
+ # preposition
838
+ #
839
+ # if the given node represents a PP, return the preposition (string)
840
+ def self.preposition(node) # SynNode
841
+ hash = @@heads_obj.get_sem_head(node)
842
+ if hash and hash["prep"]
843
+ return hash["prep"].to_s
844
+ end
845
+
846
+ # this didn't work, try something else: first preposition among my terminals
847
+ pnode = node.terminals_sorted.detect { |n|
848
+ self.category(n) == "prep"
849
+ }
850
+ if pnode
851
+ return pnode.word
852
+ else
853
+ return nil
854
+ end
855
+ end
856
+
857
+
858
+ ###
859
+ # voice
860
+ #
861
+ # given a constituent, return
862
+ # - "active"/"passive" if it is a verb
863
+ # - nil, else
864
+ def self.voice(node)
865
+
866
+ unless self.category(node) == "verb"
867
+ return nil
868
+ end
869
+
870
+ # node is a participle linked to its VP or S parent by HD or CJ
871
+ retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
872
+ 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
873
+ 'edge' => ['up', /^(HD)|(CJ)$/]}])
874
+
875
+ if retv
876
+ verb_parent = retv['to'].first
877
+
878
+ # coordination?
879
+ retv = test_localtrees([{'from' => [verb_parent, nil, //],
880
+ 'to' => [nil, 'cat', /^CVP$/],
881
+ 'edge' => ['up', /^CJ$/]}])
882
+ if retv
883
+
884
+ # yes, coordination
885
+ # S/VP
886
+ # |OC
887
+ # CVP
888
+ # | CJ
889
+ # VP
890
+ # | HD
891
+ # participle
892
+
893
+ cvp = retv['to'].first
894
+
895
+ retv = test_localtrees([{'from' => [cvp, nil, //],
896
+ 'to' => [nil, 'cat', /^S|(VP)$/],
897
+ 'edge' => ['up', /^OC$/]}])
898
+
899
+ else
900
+ # node's parent is linked to its parent via an OC edge
901
+ retv = test_localtrees([{'from' => [verb_parent, nil, //],
902
+ 'to' => [nil, 'cat', /^(VP)|S$/],
903
+ 'edge' => ['up', /^OC$/]}])
904
+ end
905
+
906
+ if retv.nil?
907
+ return "active"
908
+ end
909
+
910
+ verb_grandparent = retv['to'].first
911
+
912
+ else
913
+ # KE Dec 19: test whether the participle
914
+ # is linked to its parent via an OC edge.
915
+ # if so, it has the same function as the
916
+ # verb_grandparent above
917
+
918
+ # node is a participle linked to its VP or S parent by OC
919
+ retv = test_localtrees([ {'from' => [node, 'pos', /^V[AMV]PP$/],
920
+ 'to' => [nil, 'cat', /^(CVP)|(VP)|S|(CS)$/],
921
+ 'edge' => ['up', /^OC$/]}])
922
+
923
+ if retv
924
+ verb_grandparent = retv['to'].first
925
+
926
+ else
927
+ # this test has failed
928
+ return "active"
929
+ end
930
+ end
931
+
932
+ #puts test_localtrees([{'from' => [verb_grandparent, nil, //],
933
+ # 'to' => [nil, 'pos', /^VA.*$/],
934
+ # 'edge' => ['dn', /^HD$/]}])
935
+
936
+ # node's grandparent has a HD child that is a terminal node, an auxiliary
937
+ retv = test_localtrees([{'from' => [verb_grandparent, nil, //],
938
+ 'to' => [nil, 'pos', /^VA.*$/],
939
+ 'edge' => ['dn', /^HD$/]}])
940
+
941
+ if retv.nil?
942
+ return "active"
943
+ end
944
+
945
+ # that HD child is a form of 'werden'
946
+ may_be_werden = retv['to'].first
947
+
948
+ unless may_be_werden.part_of_speech =~ /^VA/
949
+ return "active"
950
+ end
951
+
952
+ # no morphology, so approximate it using regexp.s
953
+ case may_be_werden.word
954
+ when "geworden"
955
+ when /^w.+rd(e|en|et|st|est)?$/
956
+ else
957
+ return "active"
958
+ end
959
+
960
+ # all tests passed successfully
961
+ return "passive"
962
+ end
963
+
964
+ ###
965
+ # gfs
966
+ #
967
+ # grammatical functions of a constituent:
968
+ #
969
+ # returns: a list of pairs [relation(string), node(SynNode)]
970
+ # where <node> stands in the relation <relation> to the parameter
971
+ # that the method was called with
972
+ #
973
+ def self.gfs(node, # SynNode object
974
+ sent) # SalsaTigerSentence object
975
+
976
+ case self.category(node)
977
+ when "adj"
978
+ return self.gfs_adj(node)
979
+ when "noun"
980
+ return self.gfs_noun(node, sent)
981
+ when "verb"
982
+ return self.gfs_verb(node)
983
+ else
984
+ return []
985
+ end
986
+ end
987
+
988
+
989
+ ###
990
+ # informative_content_node
991
+ #
992
+ # for most constituents: nil
993
+ # for a PP, the NP
994
+ # for an SBAR, the VP
995
+ # for a VP, the embedded VP
996
+ def self.informative_content_node(node)
997
+ this_pt = self.simplified_pt(node)
998
+
999
+ unless ["S", "CS", "VP", "CVP", "PP", "CPP"].include? this_pt
1000
+ return nil
1001
+ end
1002
+
1003
+ nh = self.head_terminal(node)
1004
+ unless nh
1005
+ return nil
1006
+ end
1007
+ headlemma = self.lemma_backoff(nh)
1008
+
1009
+ nonhead_children = node.children.reject { |n|
1010
+ nnh = self.head_terminal(n)
1011
+ not(nnh) or
1012
+ self.lemma_backoff(nnh) == headlemma
1013
+ }
1014
+ if nonhead_children.length == 1
1015
+ return nonhead_children.first
1016
+ end
1017
+
1018
+ # more than one child:
1019
+ # for SBAR and VP take child with head POS starting in VB,
1020
+ # for PP child with head POS starting in NN
1021
+ case this_pt
1022
+ when /^C?S/, /^C?VP/
1023
+ icont_child = nonhead_children.detect { |n|
1024
+ h = self.head_terminal(n)
1025
+ h and h.part_of_speech =~ /^V/
1026
+ }
1027
+ when /^C?PP/
1028
+ icont_child = nonhead_children.detect { |n|
1029
+ h = self.head_terminal(n)
1030
+ h and h.part_of_speech =~ /^N/
1031
+ }
1032
+ else
1033
+ raise "Shouldn't be here"
1034
+ end
1035
+
1036
+ if icont_child
1037
+ return icont_child
1038
+ else
1039
+ return nonhead_children.first
1040
+ end
1041
+ end
1042
+
1043
+ ###
1044
+ # main node of expression
1045
+ #
1046
+ # second argument non-nil:
1047
+ # don't handle multiword expressions beyond verbs with separate particles
1048
+ #
1049
+ # returns: SynNode, main node, if found
1050
+ # else nil
1051
+ def self.main_node_of_expr(nodelist,
1052
+ no_mwes = nil)
1053
+
1054
+ # map nodes to terminals
1055
+ nodelist = nodelist.map { |n| n.yield_nodes }.flatten
1056
+
1057
+ # do we have a list of length 2,
1058
+ # one member being "zu", the other a verb, with a common parent "VZ"?
1059
+ # then return the verb
1060
+ if nodelist.length == 2
1061
+ zu, verb = nodelist.distribute { |n| n.part_of_speech == "PTKZU" }
1062
+ if zu.length == 1 and
1063
+ self.category(verb.first) == "verb" and
1064
+ verb.first.parent == zu.first.parent and
1065
+ verb.first.parent.category == "VZ"
1066
+ return verb.first
1067
+ end
1068
+ end
1069
+
1070
+ # no joy: try method offered by abstract class
1071
+ return super(nodelist, no_mwes)
1072
+ end
1073
+
1074
+
1075
+ ########
1076
+ # prune?
1077
+ # given a target node t and another node n of the syntactic structure,
1078
+ # decide whether n is likely to instantiate a semantic role
1079
+ # of t. If not, recommend n for pruning.
1080
+ #
1081
+ # This method implements a slight variant of Xue and Palmer (EMNLP 2004).
1082
+ # Pruning according to Xue & Palmer, EMNLP 2004.
1083
+ # "Step 1: Designate the predicate as the current node and
1084
+ # collect its sisters (constituents attached at the same level
1085
+ # as the predicate) unless its sisters are coordinated with the
1086
+ # predicate.
1087
+ #
1088
+ # Step 2: Reset the current node to its parent and repeat Step 1
1089
+ # till it reaches the top level node.
1090
+ #
1091
+ # Modifications made here:
1092
+ # - paths of length 0 accepted in any case
1093
+ # - TIGER coordination allowed (phrase types CX)
1094
+ #
1095
+ # returns: false to recommend n for pruning, else true
1096
+ def self.prune?(node, # SynNode
1097
+ paths_to_target, # hash: node ID -> Path object: paths from nodes to target
1098
+ terminal_index) # hash: terminal node -> word index in sentence
1099
+
1100
+ path_to_target = paths_to_target[node.id]
1101
+
1102
+ if not path_to_target
1103
+ # no path from target to node: suggest for pruning
1104
+ return 0
1105
+ elsif path_to_target.length == 0
1106
+ # target may be its own role: definite accept
1107
+ return 1
1108
+ else
1109
+ # consider path from target to node:
1110
+ # (1) If the path to the current node includes at least one Up
1111
+ # and exactly one Down, keep.
1112
+ # (2) If the parth to the current node includes at least one Up
1113
+ # and two Down and the roof node is a C-something, keep (coordination).
1114
+ # (3) else discard
1115
+
1116
+ # count number of up and down steps in path to target
1117
+ num_up = 0
1118
+ num_down = 0
1119
+ path_to_target.each_step { |direction, edgelabel, nodelabel, endnode|
1120
+ case direction
1121
+ when /U/
1122
+ num_up += 1
1123
+ when /D/
1124
+ num_down += 1
1125
+ end
1126
+ }
1127
+
1128
+ if num_up >= 1 and num_down == 1
1129
+ # case (1)
1130
+ return 1
1131
+ elsif num_up >= 1 and num_down == 2 and CollinsTntInterpreter.category(path_to_target.lca) =~ /^C/
1132
+ # case (2)
1133
+ return 1
1134
+ else
1135
+ # case (3)
1136
+ return 0
1137
+ end
1138
+ end
1139
+ end
1140
+
1141
+
1142
+ ################################
1143
+ private
1144
+ ################################
1145
+
1146
+ ###
1147
+ def self.subject(verb_node)
1148
+
1149
+ unless self.category(verb_node) == "verb"
1150
+ return nil
1151
+ end
1152
+
1153
+ if self.voice(verb_node) == "passive"
1154
+ # passive: then what we would like to return as subject
1155
+ # is the SBP sibling of this verb
1156
+
1157
+ parent = verb_node.parent
1158
+
1159
+ if parent.nil?
1160
+ # verb_node seems to be the root, strangely enough
1161
+ return []
1162
+ end
1163
+ return parent.children_by_edgelabels(['SBP'])
1164
+
1165
+ else
1166
+ # not passive: then the subject of the verb
1167
+ # is actually its subject in this sentence
1168
+
1169
+ # needed???
1170
+ # return if there is no surface subject
1171
+ # e.g. parser errors like ADJD => VVPP
1172
+
1173
+ return self.surface_subject(verb_node)
1174
+ end
1175
+
1176
+ end
1177
+
1178
+
1179
+ ###
1180
+ def self.direct_object(verb_node)
1181
+
1182
+ unless self.category(verb_node) == "verb"
1183
+ return nil
1184
+ end
1185
+
1186
+ if self.voice(verb_node) == "passive"
1187
+ # passive: then what we would like to return as direct object
1188
+ # is the subject of this verb
1189
+ return self.surface_subject(verb_node)
1190
+ else
1191
+
1192
+ # not passive: then the direct object
1193
+ # is an OA sibling of the node verb_node
1194
+ parent = verb_node.parent
1195
+
1196
+ if parent.nil?
1197
+ # verb_node seems to be the root, strangely enough
1198
+ return []
1199
+ end
1200
+
1201
+ return parent.children_by_edgelabels(['OA'])
1202
+ end
1203
+ end
1204
+
1205
+ ###
1206
+ def self.dative_object(verb_node)
1207
+
1208
+ unless self.category(verb_node) == "verb"
1209
+ return nil
1210
+ end
1211
+
1212
+ parent = verb_node.parent
1213
+
1214
+ if parent.nil?
1215
+ return []
1216
+ end
1217
+
1218
+ return parent.children_by_edgelabels(['DA'])
1219
+ end
1220
+
1221
+ ###
1222
+ def self.prep_object(verb_node, preposition)
1223
+
1224
+ unless self.category(verb_node) == "verb"
1225
+ return nil
1226
+ end
1227
+
1228
+ parent = verb_node.parent
1229
+ if parent.nil?
1230
+ # verb_node seems to be the root, strangely enough
1231
+ return []
1232
+ end
1233
+
1234
+ # find all PPs that are siblings of verb_node
1235
+ pps = []
1236
+ parent.each_child { |child|
1237
+ if child.category == 'PP'
1238
+ pps << child
1239
+ end
1240
+ }
1241
+
1242
+ # now filter for those with the right preposition
1243
+ if preposition.nil?
1244
+ return pps
1245
+ else
1246
+ return pps.find_all { |node|
1247
+ # prepositions are AC children of PP nodes
1248
+ node.children_by_edgelabels(['AC']).map { |prep_node|
1249
+ # prepositions are terminal words
1250
+ prep_node.word
1251
+ # we are interested in those that match the parameter 'preposition'
1252
+ }.include? preposition
1253
+ }
1254
+ end
1255
+ end
1256
+
1257
+ ###
1258
+ def self.surface_subject(verb_node)
1259
+
1260
+ max_proj = self.max_projection(verb_node)
1261
+ # test each level in the computation of the maximal projection,
1262
+ # from the lowest (the parent of verb_node)
1263
+ # to the highest
1264
+ max_proj['max_proj_at_level'].each { |node|
1265
+ # test if this node has a SB child
1266
+ # if so, use it
1267
+ sb_children = node.children_by_edgelabels(['SB'])
1268
+
1269
+ unless sb_children.empty?
1270
+ return sb_children
1271
+ end
1272
+ }
1273
+ return []
1274
+ end
1275
+
1276
+
1277
+ ##################
1278
+ # gfs_verb
1279
+ #
1280
+ # given a node (a SynNode object) that is a terminal node
1281
+ # representing a verb, determine
1282
+ # all grammatical functions of this verb
1283
+ # along with their head words
1284
+ #
1285
+ # verb_node: SynNode object, terminal node representing a verb
1286
+ #
1287
+ # returns: a list of pairs [relation(string), node(SynNode)]
1288
+ # 'relation' is 'SB', 'OA', 'DA', 'MO', 'OC'
1289
+ # 'node' is the constituent that stands in this relation to verb_node
1290
+
1291
+ def self.gfs_verb(verb_node)
1292
+
1293
+ unless self.category(verb_node) == "verb"
1294
+ return []
1295
+ end
1296
+
1297
+ # construct a list of pairs [relation, node]
1298
+ nodes = []
1299
+ # subjects:
1300
+ n_arr = self.subject(verb_node)
1301
+
1302
+ if n_arr.length > 0
1303
+ nodes << ["SB", n_arr.first]
1304
+ end
1305
+
1306
+ # direct object:
1307
+ n_arr = self.direct_object(verb_node)
1308
+ if n_arr.length > 0
1309
+ nodes << ["OA", n_arr.first]
1310
+ end
1311
+
1312
+ # dative object:
1313
+ n_arr = self.dative_object(verb_node)
1314
+ if n_arr.length > 0
1315
+ nodes << ["DA", n_arr.first]
1316
+ end
1317
+
1318
+
1319
+ # pp objects and adjuncts:
1320
+ nodes.concat self.prep_object(verb_node, nil).map { |n|
1321
+ unless (edgelabel = n.parent_label)
1322
+ edgelabel = "MO"
1323
+ end
1324
+ [edgelabel + "-" + self.preposition(n).to_s, n]
1325
+ }
1326
+
1327
+ # sentence complement:
1328
+ # verb node's parent has an OC child
1329
+ parent = verb_node.parent
1330
+ unless parent.nil?
1331
+ parent.children_by_edgelabels(["OC"]).each { |n|
1332
+ nodes << ["OC", n]
1333
+ }
1334
+ end
1335
+
1336
+ return nodes
1337
+ end
1338
+
1339
+ ###
1340
+ # gfs_noun
1341
+ #
1342
+ # determine relation names and relation-bearing syntax nodes
1343
+ # for noun targets
1344
+ #
1345
+ # returns: a list of pairs
1346
+ # [rel(string), node(SynNode)]
1347
+ def self.gfs_noun(noun_node, # SynNode object: terminal, noun
1348
+ sent_obj) # SalsaTigerSentence object: sentence in which this noun occurs
1349
+
1350
+
1351
+ # construct a list of pairs [relation, node]
1352
+ retv = []
1353
+
1354
+ ##
1355
+ # determine noun-noun relations:
1356
+ # (1) edge label leading to this node is NK, and
1357
+ # parent of this node has child with edge label not NK
1358
+ # then: that child
1359
+ # (2) or parent of this node is NP/PP, the grandparent is NP,
1360
+ # and parent and grandparent are not linked by an NK edge
1361
+ # then: the grandparent
1362
+ # (3) or grandparent of this node is CNP
1363
+ # then: that CNP's other children
1364
+ parent = noun_node.parent
1365
+ np_pp_labels_without_cnp = ["NP", "PP", "PN"]
1366
+ np_pp_labels = ["NP", "PP", "PN", "CNP"]
1367
+
1368
+ if parent and
1369
+ noun_node.parent_label == "NK"
1370
+ # (1)
1371
+ parent.children.select { |n|
1372
+ n.parent_label != "NK"
1373
+ }.each { |n|
1374
+ unless n == noun_node
1375
+
1376
+ retv << [n.parent_label, n]
1377
+ end
1378
+ }
1379
+ end
1380
+
1381
+ # (2)
1382
+ if parent
1383
+ grandparent = parent.parent
1384
+ end
1385
+
1386
+ if parent and grandparent and
1387
+ np_pp_labels.include? parent.category and
1388
+ np_pp_labels_without_cnp.include? grandparent.category and
1389
+ parent.parent_label != "NK"
1390
+
1391
+ retv << [parent.parent_label, grandparent]
1392
+ end
1393
+
1394
+ # (3)
1395
+ if parent and grandparent and
1396
+ grandparent.category == "CNP"
1397
+
1398
+ grandparent.each_child { |n|
1399
+ if np_pp_labels.include? n.category and
1400
+ n != parent
1401
+
1402
+ retv << ["CJ", n]
1403
+ end
1404
+ }
1405
+ end
1406
+
1407
+ return retv
1408
+ end
1409
+
1410
+ ###
1411
+ # gfs_adj
1412
+ #
1413
+ # determine relation names and relation-bearing syntax nodes
1414
+ # for adjective targets
1415
+ #
1416
+ # returns: a list of pairs
1417
+ # [rel(string), node(SynNode)]
1418
+ #
1419
+ # although in this case it's just one pair (if we can find it),
1420
+ # describing the head noun
1421
+ def self.gfs_adj(adj_node) # SynNode object: terminal, adjective
1422
+
1423
+ parent = adj_node.parent
1424
+
1425
+ if parent.nil?
1426
+ return []
1427
+ end
1428
+
1429
+ if ["NP", "CNP", "PP", "CPP", "PN"].include? parent.category
1430
+ return [["HD", parent]]
1431
+ else
1432
+ return []
1433
+ end
1434
+ end
1435
+
1436
+
1437
+ def self.max_projection(node)
1438
+ parent = node.parent
1439
+ # node has no parent? recover somehow
1440
+ if parent.nil?
1441
+ return {'max_proj' => node,
1442
+ 'max_proj_at_level' => [node]}
1443
+ end
1444
+
1445
+ maxproj_at_level = []
1446
+ maxproj_at_level << parent
1447
+
1448
+ lower = node
1449
+ upper_u = upper_l = parent
1450
+
1451
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
1452
+ maxproj_at_level << upper_u
1453
+
1454
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
1455
+ maxproj_at_level << upper_u
1456
+
1457
+ lower, upper_l, upper_u = project_modal(lower, upper_l, upper_u)
1458
+ maxproj_at_level << upper_u
1459
+
1460
+ lower, upper_l, upper_u = project_participle(lower, upper_l, upper_u)
1461
+ maxproj_at_level << upper_u
1462
+
1463
+ lower, upper_l, upper_u = project_infinitive(lower, upper_l, upper_u)
1464
+ maxproj_at_level << upper_u
1465
+
1466
+ return {'max_proj' => upper_u,
1467
+ 'max_proj_at_level' => maxproj_at_level}
1468
+ end
1469
+
1470
+
1471
+ ###
1472
+ def self.test_localtrees(path)
1473
+ # HIER WEITER: was genau passiert hier?
1474
+ retv = {}
1475
+
1476
+ # test each step
1477
+ path.each { |step|
1478
+ retv = test_step(step, retv)
1479
+
1480
+ if retv.nil?
1481
+ return nil
1482
+ end
1483
+ }
1484
+
1485
+ # return result of last step
1486
+ return retv
1487
+ end
1488
+
1489
+ ######
1490
+ private
1491
+
1492
+ ###
1493
+ def self.test_step(path, previous)
1494
+ if path['from'].nil? or path['to'].nil? or path['edge'].nil?
1495
+ $stderr.puts 'TigerAux error: missing path hash entry'
1496
+ exit 1
1497
+ end
1498
+
1499
+ from_node, *from_descr = path['from']
1500
+ to_node, *to_descr = path['to']
1501
+
1502
+ # using the special flags tp_prev_to and tp_prev_from,
1503
+ # a node can also be set to be the value in the
1504
+ # 'previous' hash
1505
+ from_node = cf_previous(from_node, previous)
1506
+ to_node = cf_previous(to_node, previous)
1507
+
1508
+ # test if 'from' node description matches
1509
+ unless test_node(from_node, from_descr)
1510
+ return nil
1511
+ end
1512
+
1513
+ # try path
1514
+ direction, edgelabel = path['edge']
1515
+ case direction
1516
+ when 'up'
1517
+ label = from_node.parent_label
1518
+ if label =~ edgelabel
1519
+ end_nodes = [from_node.parent]
1520
+ else
1521
+ end_nodes = []
1522
+ end
1523
+ when 'dn'
1524
+ end_nodes = []
1525
+ from_node.each_child { |child|
1526
+ if child.parent_label =~ edgelabel
1527
+ end_nodes << child
1528
+ end
1529
+ }
1530
+ else
1531
+ $stderr.puts 'TigerAux error: unknown direction'
1532
+ exit 1
1533
+ end
1534
+
1535
+ # check all prospective end nodes
1536
+ remaining_end_nodes = end_nodes.select { |prosp_to_node|
1537
+ if to_node.nil? or to_node == prosp_to_node
1538
+ test_node(prosp_to_node, to_descr)
1539
+ else
1540
+ false
1541
+ end
1542
+ }
1543
+
1544
+ if remaining_end_nodes.empty?
1545
+ return nil
1546
+ else
1547
+ return {'from' => from_node,
1548
+ 'to' => remaining_end_nodes}
1549
+ end
1550
+ end
1551
+
1552
+ ###
1553
+ def self.test_node(node, descr)
1554
+
1555
+ cat_or_pos, pattern = descr
1556
+ if node.nil?
1557
+ $stderr.puts 'TigerAux error: test_node nil'
1558
+ exit 1
1559
+ end
1560
+
1561
+ case cat_or_pos
1562
+ when 'pos'
1563
+ if node.part_of_speech =~ pattern
1564
+ return true
1565
+ else
1566
+ return false
1567
+ end
1568
+ when 'cat'
1569
+ if node.category =~ pattern
1570
+ return true
1571
+ else
1572
+ return false
1573
+ end
1574
+ when nil
1575
+ return true
1576
+ else
1577
+ $stderr.puts 'TigerAux error: neither cat nor pos'
1578
+ exit 1
1579
+ end
1580
+ end
1581
+
1582
+ ###
1583
+ def self.cf_previous(node, previous)
1584
+ case node
1585
+ when 'tp_prev_to'
1586
+ return previous['to'].first
1587
+ when 'tp_prev_from'
1588
+ return previous['from']
1589
+ else
1590
+ return node
1591
+ end
1592
+ end
1593
+
1594
+ ###
1595
+ def self.project_participle(lower, upper_l, upper_u)
1596
+ return project_this(lower, upper_l, upper_u,
1597
+ [self.method('conj'),
1598
+ self.method('pp_pp'),
1599
+ self.method('conj'),
1600
+ self.method('pp_fin')])
1601
+ end
1602
+
1603
+ ###
1604
+ def self.project_infinitive(lower, upper_l, upper_u)
1605
+ return project_this(lower, upper_l, upper_u,
1606
+ [self.method('conj'),
1607
+ self.method('inf_fin'),
1608
+ self.method('vzinf_fin'),
1609
+ self.method('cvzinf_fin')
1610
+ ])
1611
+ end
1612
+
1613
+ ###
1614
+ def self.project_modal(lower, upper_l, upper_u)
1615
+ return project_this(lower, upper_l, upper_u,
1616
+ [self.method('conj'),
1617
+ self.method('modal'),
1618
+ self.method('othermodal')
1619
+ ])
1620
+ end
1621
+
1622
+ ###
1623
+ def self.project_participle_(lower, upper_l, upper_u)
1624
+ return project_this(lower, upper_l, upper_u,
1625
+ [self.method('conj'),
1626
+ self.method('pp_pp'),
1627
+ self.method('conj'),
1628
+ self.method('pp_fin')])
1629
+ end
1630
+
1631
+ ###
1632
+ def self.project_this(lower, upper_l, upper_u, method_list)
1633
+ method_list.each { |method|
1634
+ retv = method.call(lower, upper_l, upper_u)
1635
+ unless retv.nil?
1636
+ lower, upper_l, upper_u = retv
1637
+ end
1638
+ }
1639
+ return [lower, upper_l, upper_u]
1640
+ end
1641
+
1642
+ ###
1643
+ def self.pp_pp(lower, upper_l, upper_u)
1644
+
1645
+ retv =
1646
+ test_localtrees([
1647
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
1648
+ 'to' => [upper_l, 'cat', /^C?VP$/],
1649
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
1650
+ {'from' => [upper_u, 'cat', /^C?VP$/],
1651
+ 'to' => [nil, 'cat', /^VP$/],
1652
+ 'edge' => ['up', /^OC$/]},
1653
+ {'from' => ['tp_prev_to', 'cat', /^VP$/],
1654
+ 'to' => [nil, 'pos', /^V[AMV]PP$/],
1655
+ 'edge' => ['dn', /^HD$/]}
1656
+ ])
1657
+
1658
+ if retv.nil?
1659
+ return nil
1660
+ else
1661
+ return [retv['to'].first, retv['from'], retv['from']]
1662
+ end
1663
+ end
1664
+
1665
+ ###
1666
+ def self.pp_fin(lower, upper_l, upper_u)
1667
+
1668
+ retv =
1669
+ test_localtrees([
1670
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
1671
+ 'to' => [upper_l, 'cat', /^C?VP$/],
1672
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
1673
+ {'from' => [upper_u,'cat', /^C?VP$/],
1674
+ 'to' => [nil, 'cat', /^(VP)|S$/],
1675
+ 'edge' => ['up', /^(OC)|(PD)$/]}
1676
+ ])
1677
+
1678
+ if retv.nil?
1679
+ return nil
1680
+ end
1681
+
1682
+ new_upper = retv['to'].first
1683
+
1684
+ # test two alternatives:
1685
+ # head child of new_upper is either a VXFIN or VXINF terminal...
1686
+ retv =
1687
+ test_localtrees([
1688
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
1689
+ 'to' => [nil, 'pos', /^V[AMV]((FIN)|(INF))$/],
1690
+ 'edge' => ['dn', /^HD$/]}
1691
+ ])
1692
+
1693
+ # ... or a VZ nonterminal
1694
+ if retv.nil?
1695
+ retv =
1696
+ test_localtrees([
1697
+ {'from' => [new_upper, 'cat', /^(VP)|S$/],
1698
+ 'to' => [nil, 'cat', /^VZ$/],
1699
+ 'edge' => ['dn', /^HD$/]}
1700
+ ])
1701
+ end
1702
+
1703
+ if retv.nil?
1704
+ return nil
1705
+ else
1706
+ return [retv['to'].first, new_upper, new_upper]
1707
+ end
1708
+ end
1709
+
1710
+
1711
+ ###
1712
+ def self.inf_fin(lower, upper_l, upper_u)
1713
+
1714
+ retv =
1715
+ test_localtrees([
1716
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
1717
+ 'to' => [upper_l, 'cat', /^C?VP$/],
1718
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
1719
+ {'from' => [upper_u,'cat', /^C?VP$/],
1720
+ 'to' => [nil, 'cat', /^(VP)|S$/],
1721
+ 'edge' => ['up', /^OC$/]},
1722
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
1723
+ 'to' => [nil, 'pos', /^(VAFIN)|(VAINF)|(VVINF)$/],
1724
+ 'edge' => ['dn', /^HD$/]}
1725
+ ])
1726
+ if retv.nil?
1727
+ return nil
1728
+ else
1729
+ return [retv['to'].first, retv['from'], retv['from']]
1730
+ end
1731
+ end
1732
+
1733
+
1734
+ ###
1735
+ def self.vzinf_fin(lower, upper_l, upper_u)
1736
+
1737
+ retv =
1738
+ test_localtrees([
1739
+ {'from' => [lower, 'cat', /^VZ$/],
1740
+ 'to' => [upper_l, 'cat', /^VP$/],
1741
+ 'edge' => ['up', /^HD$/]},
1742
+ {'from' => [upper_u,'cat', /^VP$/],
1743
+ 'to' => [nil, 'cat', /^(VP)|S$/],
1744
+ 'edge' => ['up', /^OC$/]},
1745
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
1746
+ 'to' => [nil, 'pos', /^V[AV]FIN$/],
1747
+ 'edge' => ['dn', /^HD$/]}
1748
+ ])
1749
+
1750
+ if retv.nil?
1751
+ return nil
1752
+ else
1753
+ return [retv['to'].first, retv['from'], retv['from']]
1754
+ end
1755
+ end
1756
+
1757
+ ###
1758
+ def self.cvzinf_fin(lower, upper_l, upper_u)
1759
+
1760
+ retv =
1761
+ test_localtrees([
1762
+ {'from' => [lower, 'cat', /^VZ$/],
1763
+ 'to' => [upper_l, 'cat', /^CVP$/],
1764
+ 'edge' => ['up', /^CJ$/]},
1765
+ {'from' => [upper_u,'cat', /^CVP$/],
1766
+ 'to' => [nil, 'cat', /^(VP)|S$/],
1767
+ 'edge' => ['up', /^OC$/]}
1768
+ ])
1769
+
1770
+ if retv.nil?
1771
+ return nil
1772
+ else
1773
+ return [lower, upper_l, retv['to'].first]
1774
+ end
1775
+ end
1776
+
1777
+ ###
1778
+ def self.modal(lower, upper_l, upper_u)
1779
+
1780
+ retv =
1781
+ test_localtrees([
1782
+ {'from' => [lower, 'pos', /^V[AMV]INF$/],
1783
+ 'to' => [upper_l, 'cat', /^C?VP$/],
1784
+ 'edge' => ['up', /^(HD)|(CJ)$/]},
1785
+ {'from' => [upper_u,'cat', /^C?VP$/],
1786
+ 'to' => [nil, 'cat', /^(VP)|S$/],
1787
+ 'edge' => ['up', /^OC$/]},
1788
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
1789
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
1790
+ 'edge' => ['dn', /^HD$/]}
1791
+ ])
1792
+
1793
+ if retv.nil?
1794
+ return nil
1795
+ else
1796
+ return [retv['to'].first, retv['from'], retv['from']]
1797
+ end
1798
+ end
1799
+
1800
+ ###
1801
+ def self.othermodal(lower, upper_l, upper_u)
1802
+
1803
+ retv =
1804
+ test_localtrees([
1805
+ {'from' => [lower, 'pos', /^V[AMV]PP$/],
1806
+ 'to' => [upper_l, 'cat', /^VP$/],
1807
+ 'edge' => ['up', /^OC$/]},
1808
+ {'from' => [upper_l, 'cat', /^VP$/],
1809
+ 'to' => [nil, 'pos', /^V[AMV]((INF)|(FIN))$/],
1810
+ 'edge' => ['dn', /^HD$/]},
1811
+ {'from' => [upper_u,'cat', /^VP$/],
1812
+ 'to' => [nil, 'cat', /^(VP)|S$/],
1813
+ 'edge' => ['up', /^OC$/]},
1814
+ {'from' => ['tp_prev_to', 'cat', /^(VP)|S$/],
1815
+ 'to' => [nil, 'pos', /^VM((PP)|(FIN)|(INF))$/],
1816
+ 'edge' => ['dn', /^HD$/]}
1817
+ ])
1818
+
1819
+ if retv.nil?
1820
+ return nil
1821
+ else
1822
+ return [retv['to'].first, retv['from'], retv['from']]
1823
+ end
1824
+ end
1825
+
1826
+ ###
1827
+ def self.conj(lower, upper_l, upper_u)
1828
+
1829
+ retv = test_localtrees([
1830
+ {'from' => [lower, nil, //],
1831
+ 'to' => [upper_l, 'cat', /^VP$/],
1832
+ 'edge' => ['up', //]},
1833
+ {'from' => [upper_u,'cat', /^VP$/],
1834
+ 'to' => [nil, 'cat', /^CVP$/],
1835
+ 'edge' => ['up', /^CJ$/]}
1836
+ ])
1837
+
1838
+ if retv.nil?
1839
+ return nil
1840
+ else
1841
+ return [lower, upper_l, retv['to'].first]
1842
+ end
1843
+ end
1844
+ end
1845
+ end
1846
+ end