proiel-cli 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,165 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Tokenize < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:tokenize) do |c|
7
+ c.syntax 'tokenize'
8
+ c.description 'Tokenize raw text'
9
+ c.syntax '[options] filename'
10
+
11
+ c.action { |args, options| process(args, options) }
12
+ end
13
+ end
14
+
15
+ def process(args, options)
16
+ if args.empty?
17
+ STDERR.puts 'Missing filename. Use --help for more information.'
18
+ exit 1
19
+ end
20
+
21
+ if args.length > 1
22
+ STDERR.puts 'Too many filenames. Use --help for more information.'
23
+ exit 1
24
+ end
25
+
26
+ builder = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
27
+ builder.instruct! :xml, version: '1.0', encoding: 'UTF-8'
28
+
29
+ filename = args.first
30
+
31
+ File.open(filename, 'r') do |file|
32
+ header = read_header(file)
33
+ body = read_body(file)
34
+
35
+ builder.proiel('export-time' => header.export_time, 'schema-version' => '2.0') do
36
+ builder.source(id: header.id, language: header.language) do
37
+ builder.title header.title
38
+ builder.author header.author
39
+ builder.tag!('citation-part', header.citation_part)
40
+
41
+ tokenize(builder, body)
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ def tokenize(builder, body)
48
+ citation_part = nil
49
+
50
+ body.each_with_index do |sd_body, i|
51
+ builder.div(title: sd_body[:title]) do
52
+ sd_body[:contents].split(/(@[^ ]+|§[^ ]+)/).map do |s|
53
+ if s[0] == '§' or s[0] == '@'
54
+ s
55
+ else
56
+ # It's sensible to place the break not immediately after probable
57
+ # sentence-breaking punctuation like periods and question marks, but
58
+ # after the punctuation mark and characters typically used in pairs,
59
+ # like brackets and apostrophes.
60
+ s.gsub(/([\.:;\?!]+[\s†\]\)"']*)/, '\1|')
61
+ end
62
+ end.join.split('|').each_with_index do |s_body, j|
63
+ builder.sentence(status_tag: 'unannotated') do
64
+ leftover_before = ''
65
+
66
+ # Preserve linebreaks in the text.
67
+ s_body.gsub!(/\s*[\n\r]/, "\u2028")
68
+
69
+ s_body.scan(/([^@§\p{Word}]*)([\p{Word}]+|@[^ ]+|§[^ ]+)([^@§\p{Word}]*)/).each do |(before, form, after)|
70
+ case form
71
+ when /^@(.*)$/
72
+ leftover_before += before unless before.nil?
73
+ leftover_before += $1
74
+ leftover_before += after unless after.nil?
75
+ when /^§(.*)$/
76
+ leftover_before += before unless before.nil?
77
+ citation_part = $1
78
+ leftover_before += after unless after.nil?
79
+ else
80
+ before = leftover_before + before
81
+ leftover_before = ''
82
+
83
+ attrs = { citation_part: citation_part, form: form }
84
+ attrs[:presentation_before] = before unless before == ''
85
+ attrs[:presentation_after] = after unless after == ''
86
+
87
+ builder.token(attrs)
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ VALID_METADATA_FIELDS =
97
+ %w(title author citation_part language id
98
+
99
+ principal funder distributor distributor_address date
100
+ license license_url
101
+ reference_system
102
+ editor editorial_note
103
+ annotator reviewer
104
+
105
+ electronic_text_editor electronic_text_title
106
+ electronic_text_version
107
+ electronic_text_publisher electronic_text_place electronic_text_date
108
+ electronic_text_original_url
109
+ electronic_text_license electronic_text_license_url
110
+
111
+ printed_text_editor printed_text_title
112
+ printed_text_edition
113
+ printed_text_publisher printed_text_place printed_text_date)
114
+
115
+ def read_header(f)
116
+ f.rewind
117
+
118
+ OpenStruct.new.tap do |hdr|
119
+ # We expect a header first, each line starting with %, and we
120
+ # assume that the header ends with the first line that does
121
+ # not start with %.
122
+ f.each_line do |l|
123
+ l.chomp!
124
+
125
+ case l
126
+ when /^%/
127
+ field, value = l.sub(/^%\s*/, '').split(/\s*=\s*/, 2)
128
+
129
+ case field
130
+ when 'id', 'export_time', *VALID_METADATA_FIELDS
131
+ hdr[field] = value.strip
132
+ else
133
+ STDERR.puts "Invalid header field #{field}. Ignoring.".yellow
134
+ end
135
+ else
136
+ break
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ def read_body(f)
143
+ f.rewind
144
+
145
+ Array.new.tap do |bdy|
146
+ f.each_line do |l|
147
+ case l
148
+ when /^%/
149
+ # Ignore header
150
+ when /^\s*$/
151
+ # Ignore empty lines
152
+ when /^#/
153
+ # New source division started
154
+ bdy << { title: l.sub(/^#/, '').strip, contents: '' }
155
+ else
156
+ bdy << { title: '', contents: '' } if bdy.empty?
157
+ bdy.last[:contents] += l
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,42 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Validate < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:validate) do |c|
7
+ c.syntax 'validate'
8
+ c.description 'Validate input data'
9
+ c.action { |args, options| process(args, options) }
10
+ end
11
+ end
12
+
13
+ def process(args, options)
14
+ if args.empty?
15
+ STDERR.puts 'Missing filename(s). Use --help for more information.'
16
+ exit 1
17
+ end
18
+
19
+ @schemas = {}
20
+
21
+ args.each do |filename|
22
+ v = PROIEL::PROIELXML::Validator.new(filename)
23
+
24
+ if v.valid?
25
+ puts "#{filename} is valid".green
26
+
27
+ exit 0
28
+ else
29
+ puts "#{filename} is invalid".red
30
+
31
+ v.errors.each do |error|
32
+ puts "* #{error}"
33
+ end
34
+
35
+ exit 1
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,589 @@
1
+ require 'proiel/cli/converters/conll-u/morphology'
2
+ require 'proiel/cli/converters/conll-u/syntax'
3
+
4
+ module PROIEL
5
+ module Converter
6
+ class CoNLLU
7
+ class << self
8
+ def process(tb, options = [])
9
+ error_count = 0
10
+ sentence_count = 0
11
+ tb.sources.each do |source|
12
+ source.divs.each do |div|
13
+ div.sentences.each do |sentence|
14
+ sentence_count += 1
15
+ n = Sentence.new sentence
16
+ # Unlike other conversions, this one has to rely on
17
+ # certain assumptions about correct linguistic
18
+ # annotation in order to producea meaningful
19
+ # representation in CoNLL-U
20
+ begin
21
+ puts n.convert.to_conll
22
+ puts
23
+ rescue => e
24
+ error_count += 1
25
+ STDERR.puts "Cannot convert #{sentence.id} (#{sentence.citation}): #{e}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ STDERR.puts "#{error_count} sentences out of #{sentence_count} could not be converted"
31
+ end
32
+ end
33
+
34
+ class Sentence
35
+
36
+ attr_accessor :tokens
37
+
38
+ # initializes a PROIEL::Convert::Sentence from PROIEL::PROIELXML::Sentence
39
+ def initialize(sentence)
40
+
41
+ id_to_number = Hash.new(0) #will return id 0 (i.e. root) for nil
42
+
43
+ tk = sentence.tokens.reject { |t| t.empty_token_sort == 'P' }
44
+
45
+ tk.map(&:id).each_with_index.each do |id, i|
46
+ id_to_number[id] = i + 1
47
+ end
48
+
49
+ @tokens = tk.map do |t|
50
+ Token.new(id_to_number[t.id],
51
+ id_to_number[t.head_id],
52
+ t.form.to_s.gsub(/[[:space:]]/, '.'),
53
+ t.lemma.to_s.gsub(/[[:space:]]/, '.'),
54
+ t.part_of_speech,
55
+ t.language,
56
+ t.morphology,
57
+ t.relation,
58
+ t.empty_token_sort,
59
+ t.slashes.map { |relation, target_id| [id_to_number[target_id], relation] },
60
+ self
61
+ )
62
+ end
63
+ end
64
+
65
+ def convert
66
+ restructure_graph!
67
+ relabel_graph!
68
+ map_part_of_speech!
69
+ self
70
+ end
71
+
72
+ def find_token(identifier)
73
+ @tokens.select { |t| t.id == identifier }.first
74
+ end
75
+
76
+ def remove_token!(token)
77
+ @tokens.delete(token)
78
+ end
79
+
80
+ def to_s
81
+ @tokens.map(&:to_s).join("\n")
82
+ end
83
+
84
+ def count_tokens
85
+ roots.map(&:count_subgraph).inject(0, :+)
86
+ end
87
+
88
+ def roots
89
+ @tokens.select { |t| t.head_id == 0 }.sort_by(&:id)
90
+ end
91
+
92
+ def to_graph
93
+ roots.map(&:to_graph).join("\n")
94
+ end
95
+
96
+ def to_conll
97
+ @tokens.map(&:to_conll).join("\n")
98
+ end
99
+
100
+ # TODO: this will leave several root nodes in many cases. For now, raise an error
101
+ def prune_empty_rootnodes!
102
+ unless (empty_roots = roots.select { |r| r.empty_token_sort == 'V' }).empty?
103
+ empty_roots.each do |r|
104
+ # promote the first dependent to root
105
+ new_root = r.dependents.first
106
+ new_root.head_id = 0
107
+ new_root.relation = r.relation
108
+ r.dependents.each { |d| d.head_id = new_root.id }
109
+ remove_token! r
110
+ end
111
+ prune_empty_rootnodes!
112
+ end
113
+ end
114
+
115
+ def demote_subjunctions!
116
+ @tokens.select { |t| t.part_of_speech == 'G-' }.each(&:process_subjunction!)
117
+ end
118
+
119
+ def demote_parentheticals_and_vocatives!
120
+ r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
121
+ if p.any? and r.none?
122
+ # promote the first vocative/parenthetical to head in case there's nothing else
123
+ p.first.relation = 'pred'
124
+ r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
125
+ end
126
+ raise "No unique root in this tree:\n#{to_graph}" if p.any? and !r.one?
127
+ p.each { |x| x.head_id = r.first.id }
128
+ end
129
+
130
+ def relabel_graph!
131
+ roots.each(&:relabel_graph!)
132
+ end
133
+
134
+ def map_part_of_speech!
135
+ roots.each(&:map_part_of_speech!)
136
+ end
137
+
138
+ def restructure_graph!
139
+ @tokens.delete_if { |n| n.empty_token_sort == 'P' }
140
+ @tokens.select(&:preposition?).each(&:process_preposition!)
141
+ roots.each(&:change_coordinations!)
142
+ @tokens.select(&:copula?).each(&:process_copula!)
143
+ prune_empty_rootnodes!
144
+ # do ellipses from left to right for proper remnant treatment
145
+ @tokens.select(&:ellipsis?).sort_by { |e| e.left_corner.id }.each(&:process_ellipsis!)
146
+ demote_subjunctions!
147
+ # DIRTY: remove the rest of the empty nodes by attaching them
148
+ # to their grandmother with remnant. This is the best way to
149
+ # do it given the current state of the UDEP scheme, but
150
+ # revisions will come.
151
+ roots.each(&:remove_empties!)
152
+ demote_parentheticals_and_vocatives!
153
+ end
154
+ end
155
+
156
+ class Token
157
+
158
+ attr_accessor :head_id
159
+ attr_accessor :upos
160
+ attr_reader :relation
161
+ attr_reader :part_of_speech
162
+ attr_reader :id
163
+ attr_reader :lemma
164
+ attr_reader :language
165
+ attr_reader :empty_token_sort
166
+ attr_reader :form
167
+
168
+ def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, sentence)
169
+ @id = id
170
+ @head_id = head_id
171
+ @form = form
172
+ @lemma = lemma
173
+ @part_of_speech = part_of_speech
174
+ @language = language
175
+ @morphology = morphology
176
+ @relation = relation
177
+ @empty_token_sort = empty_token_sort
178
+ @slashes = slashes
179
+ @sentence = sentence
180
+ @features = (morphology ? map_morphology(morphology) : '' )
181
+ @upos = nil
182
+ end
183
+
184
+ MORPHOLOGY_POSITIONAL_TAG_SEQUENCE = [
185
+ :person, :number, :tense, :mood, :voice, :gender, :case,
186
+ :degree, :strength, :inflection
187
+ ]
188
+
189
+ def map_morphology morph
190
+ res = []
191
+ for tag in 0..morph.length - 1
192
+ res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]]
193
+ end
194
+ res.compact.join('|')
195
+ end
196
+
197
+ # returns +true+ if the node is an adjective or an ordinal
198
+ def adjectival?
199
+ @part_of_speech == 'A-' or @part_of_speech == 'Mo'
200
+ end
201
+
202
+ def adverb?
203
+ @part_of_speech =~ /\AD/
204
+ end
205
+
206
+ def cardinal?
207
+ @part_of_speech == 'Ma'
208
+ end
209
+
210
+ # A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute constructino without a verb; or if it is the root (e.g. in a nominal clause)
211
+ def clausal?
212
+ (@part_of_speech == 'V-' and !nominalized?) or
213
+ dependents.any?(&:copula?) or
214
+ dependents.any? { |d| ['sub', 'nsubj', 'nsubjpass', 'csubj', 'csubjpass'].include? d.relation } or
215
+ root?
216
+ end
217
+
218
+ def conjunction?
219
+ part_of_speech == 'C-' or @empty_token_sort == 'C'
220
+ end
221
+
222
+ def coordinated?
223
+ head and head.conjunction? and head.relation == @relation
224
+ end
225
+
226
+ # Returns +true+ if the node has an xobj dependent and either 1)
227
+ # the lemma is copular or 2) the node is empty and has no pid
228
+ # slash or a pid slash to a node with a copular lemma
229
+ def copula?
230
+ @relation == 'cop' or
231
+ (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or
232
+ (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and
233
+ dependents.any? { |d| d.relation == 'xobj' } )
234
+ end
235
+
236
+ def determiner?
237
+ DETERMINERS.include? @part_of_speech
238
+ end
239
+
240
+ def ellipsis?
241
+ @empty_token_sort == 'V'
242
+ end
243
+
244
+ def foreign?
245
+ @part_of_speech == 'F-'
246
+ end
247
+
248
+ def has_content?
249
+ @empty_token_sort.nil? or @empty_token_sort == ''
250
+ end
251
+
252
+ def interjection?
253
+ @part_of_speech == 'I-'
254
+ end
255
+
256
+ def is_empty?
257
+ !has_content?
258
+ end
259
+
260
+ def mediopassive?
261
+ @morphology[4] =~/[mpe]/
262
+ end
263
+
264
+ def negation?
265
+ NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(','))
266
+ end
267
+
268
+ def nominal?
269
+ @part_of_speech =~ /\A[NPM]/ or nominalized?
270
+ end
271
+
272
+ def nominalized?
273
+ dependents.any? do |d|
274
+ d.determiner? and ['atr', 'aux', 'det'].include? d.relation
275
+ end
276
+ end
277
+
278
+ def particle?
279
+ @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
280
+ end
281
+
282
+ def passive?
283
+ @morphology[4] == 'p'
284
+ end
285
+
286
+ def preposition?
287
+ @part_of_speech == 'R-'
288
+ end
289
+
290
+ def proper_noun?
291
+ @part_of_speech == 'Ne'
292
+ end
293
+
294
+ def root?
295
+ @head_id == 0
296
+ end
297
+
298
+ def relation=(rel)
299
+ if conjunction?
300
+ dependents.select { |d| d.relation == @relation }.each do |c|
301
+ c.relation = rel
302
+ end
303
+ end
304
+ @relation = rel
305
+ end
306
+
307
+ def count_subgraph
308
+ dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1)
309
+ end
310
+
311
+ def subgraph_set
312
+ [self] + dependents.map(&:subgraph_set).flatten
313
+ end
314
+
315
+ def left_corner
316
+ ([self] + dependents).sort_by(&:id).first
317
+ end
318
+
319
+ def conj_head
320
+ raise "Not a conjunct" unless @relation == 'conj'
321
+ if head.relation == 'conj'
322
+ head.conj_head
323
+ else
324
+ head
325
+ end
326
+ end
327
+
328
+ def pid
329
+ if pid = @slashes.select { |t, r| r == 'pid' }.first
330
+ @sentence.tokens.select { |t| pid.first == t.id}.first
331
+ else
332
+ nil
333
+ end
334
+ end
335
+
336
+ def format_features(features)
337
+ if features == ''
338
+ '_'
339
+ else
340
+ features.split("|").sort.join("|")
341
+ end
342
+ end
343
+
344
+ def to_conll
345
+ [@id,
346
+ @form,
347
+ @lemma,
348
+ @upos,
349
+ @part_of_speech,
350
+ format_features(@features),
351
+ @head_id,
352
+ (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc
353
+ '_', # slashes here
354
+ '_'].join("\t")
355
+ end
356
+
357
+ def to_s
358
+ [@id, @form, @head_id, @relation].join("\t")
359
+ end
360
+
361
+ def to_n
362
+ [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-')
363
+ end
364
+
365
+ def to_graph(indents = 0)
366
+ ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n")
367
+ end
368
+
369
+ def siblings
370
+ @sentence.tokens.select { |t| t.head_id == @head_id } - [self]
371
+ end
372
+
373
+ def head
374
+ @sentence.tokens.select { |t| t.id == @head_id }.first
375
+ end
376
+
377
+ def dependents
378
+ @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id)
379
+ end
380
+
381
+ def find_appositive_head
382
+ raise "Not an apposition" unless @relation == 'apos'
383
+ if head.conjunction? and head.relation == 'apos'
384
+ head.find_appositive_head
385
+ else
386
+ head
387
+ end
388
+ end
389
+
390
+ def find_relation possible_relations
391
+ rel, crit = possible_relations.shift
392
+ if rel.nil?
393
+ # raise "Found no relation"
394
+ elsif crit.call self
395
+ @relation = rel
396
+ else
397
+ find_relation possible_relations
398
+ end
399
+ end
400
+
401
+ def map_part_of_speech!
402
+ dependents.each(&:map_part_of_speech!)
403
+ @upos = POS_MAP[@part_of_speech].first
404
+ raise "No match found for pos #{part_of_speech.inspect}" unless @upos
405
+ if feat = POS_MAP[@part_of_speech][1]
406
+ @features += ((@features.empty? ? '' : '|') + feat)
407
+ end
408
+ # ugly, but the ugliness comes from UDEP
409
+ @upos = 'ADJ' if @upos == 'DET' and @relation != 'det'
410
+ end
411
+
412
+ def relabel_graph!
413
+ dependents.each(&:relabel_graph!)
414
+ possible_relations = RELATION_MAPPING[@relation]
415
+ case possible_relations
416
+ when String
417
+ @relation = possible_relations
418
+ when Array
419
+ find_relation possible_relations.dup
420
+ when nil
421
+ # do nothing: the token has already changed its relation
422
+ else
423
+ raise "Unknown value #{possible_relations.inspect} for #{@relation}"
424
+ end
425
+ end
426
+
427
+ # attach subjunctions with 'mark' under their verbs and promote
428
+ # the verb to take over the subjunction's relation. If the verb
429
+ # is empty, the subjunction stays as head.
430
+ def process_subjunction!
431
+ # ignore if the subjunction has no dependents or only conj dependents.
432
+ # NB: this requires that the function is called *after* processing conjunctions
433
+ return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty?
434
+ pred = dependents.select { |d| d.relation == 'pred' }
435
+ raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one?
436
+ pred = pred.first
437
+ # promote the subjunction if the verb is empty
438
+ if pred.is_empty?
439
+ pred.dependents.each { |d| d.head_id = id }
440
+ @sentence.remove_token! pred
441
+ # else demote the subjunction
442
+ else
443
+ pred.invert!('mark')
444
+ end
445
+ end
446
+
447
+
448
+
449
+ # TODO: process "implicit pid" through APOS chain too
450
+ def process_ellipsis!
451
+ # First we find the corresponding overt token.
452
+ # If there's an explicit pid slash, we'll grab that one.
453
+ if pid and !subgraph_set.include?(pid)
454
+ overt = pid
455
+ # otherwise, try a conjunct
456
+ elsif @relation == 'conj'
457
+ overt = conj_head
458
+ elsif @relation == 'apos'
459
+ overt = find_appositive_head
460
+ else
461
+ return
462
+ end
463
+
464
+ dependents.each do |d|
465
+ # check if there's a partner with the same relation under the overt node.
466
+ # TODO: this isn't really very convincing when it comes to ADVs
467
+ if partner = overt.dependents.select { |p| p != self and p.relation == d.relation }.first #inserted p != self
468
+ partner = partner.find_remnant
469
+ d.head_id = partner.id
470
+ d.relation = 'remnant'
471
+ # if there's no partner, just attach under the overt node, preserving the relation
472
+ else
473
+ d.head_id = overt.id
474
+ end
475
+ end
476
+ @sentence.remove_token!(self)
477
+ end
478
+
479
+ def find_remnant
480
+ if r = dependents.select { |d| d.relation == 'remnant' }.first
481
+ r.find_remnant
482
+ else
483
+ self
484
+ end
485
+ end
486
+
487
+ def process_copula!
488
+ predicates = dependents.select { |d| d.relation == 'xobj' }
489
+ raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1
490
+ predicates.first.promote!(nil, 'cop')
491
+ end
492
+
493
+ def process_preposition!
494
+ raise "Only prepositions can be processed this way!" unless part_of_speech == 'R-'
495
+ obliques = dependents.select { |d| d.relation == 'obl' }
496
+ raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1
497
+ return if obliques.empty? #shouldn't really happen, but in practice
498
+ obliques.first.invert!("case") # , "adv")
499
+ end
500
+
501
+ def remove_empties!
502
+ dependents.each(&:remove_empties!)
503
+ if is_empty?
504
+ dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' }
505
+ @sentence.remove_token! self
506
+ end
507
+ end
508
+
509
+ # Changes coordinations recursively from the bottom of the graph
510
+ def change_coordinations!
511
+ dependents.each(&:change_coordinations!)
512
+ process_coordination! if conjunction?
513
+ end
514
+
515
+ def process_coordination!
516
+ raise "Only coordinations can be processed this way!" unless conjunction?
517
+ return if dependents.reject { |d| d.relation == 'aux' }.empty?
518
+ distribute_shared_modifiers!
519
+ dependents.reject { |d| d.relation == 'aux' }.first.promote!("conj", "cc")
520
+ end
521
+
522
+ def distribute_shared_modifiers!
523
+ raise "Can only distribute over a conjunction!" unless conjunction?
524
+ conjuncts, modifiers = dependents.reject { |d| d.relation == 'aux' }.partition { |d| d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') }
525
+ first_conjunct = conjuncts.shift
526
+ raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct
527
+ raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty?
528
+ modifiers.each do |m|
529
+ m.head_id = first_conjunct.id
530
+ conjuncts.each { |c| c.add_slash! [m.id, m.relation] }
531
+ end
532
+ end
533
+
534
+ def add_slash!(slash)
535
+ @slashes << slash
536
+ end
537
+
538
+ # Inverts the direction of a dependency relation. By default the
539
+ # labels are also swapped, but new relations can be specified
540
+ # for both the new dependent and the new head.
541
+ def invert!(new_dependent_relation = nil, new_head_relation = nil)
542
+ raise "Cannot promote a token under root!" if @head_id == 0
543
+ new_dependent_relation ||= @relation
544
+ new_head_relation ||= head.relation
545
+ new_head_id = head.head_id
546
+
547
+ head.head_id = @id
548
+ head.relation = new_dependent_relation
549
+ @head_id = new_head_id
550
+ self.relation = new_head_relation
551
+ end
552
+
553
+ # promotes a node to its head's place. The node takes over its
554
+ # former head's relation and all dependents. The new relation
555
+ # for these dependents can be specified; if it is not, they will
556
+ # keep their former relation. The former head is made a
557
+ # dependent of the node (with a specified relation) or,
558
+ # if it is an empty node, destroyed.
559
+
560
+ def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux')
561
+ raise "Cannot promote a token under root!" if @head_id == 0
562
+ new_head_relation = head.relation
563
+ new_head_id = head.head_id
564
+
565
+ # move all dependents of the former head to the new one
566
+ siblings.each do |t|
567
+ t.head_id = @id
568
+ # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings)
569
+ t.relation = new_sibling_relation if (new_sibling_relation and t.relation != 'aux')
570
+ end
571
+
572
+ # remove the former head if it was empty
573
+ if head.is_empty?
574
+ @sentence.remove_token!(head)
575
+ # else make it a dependent of the new head
576
+ else
577
+ head.head_id = @id
578
+ head.relation = new_dependent_relation
579
+ end
580
+
581
+ @head_id = new_head_id
582
+ # don't use relation=, as we don't want this relation to be
583
+ # copied down a tree of conjunctions
584
+ @relation = new_head_relation
585
+ end
586
+ end
587
+ end
588
+ end
589
+ end