proiel-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Tokenize < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:tokenize) do |c|
7
+ c.syntax 'tokenize'
8
+ c.description 'Tokenize raw text'
9
+ c.syntax '[options] filename'
10
+
11
+ c.action { |args, options| process(args, options) }
12
+ end
13
+ end
14
+
15
+ def process(args, options)
16
+ if args.empty?
17
+ STDERR.puts 'Missing filename. Use --help for more information.'
18
+ exit 1
19
+ end
20
+
21
+ if args.length > 1
22
+ STDERR.puts 'Too many filenames. Use --help for more information.'
23
+ exit 1
24
+ end
25
+
26
+ builder = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
27
+ builder.instruct! :xml, version: '1.0', encoding: 'UTF-8'
28
+
29
+ filename = args.first
30
+
31
+ File.open(filename, 'r') do |file|
32
+ header = read_header(file)
33
+ body = read_body(file)
34
+
35
+ builder.proiel('export-time' => header.export_time, 'schema-version' => '2.0') do
36
+ builder.source(id: header.id, language: header.language) do
37
+ builder.title header.title
38
+ builder.author header.author
39
+ builder.tag!('citation-part', header.citation_part)
40
+
41
+ tokenize(builder, body)
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ def tokenize(builder, body)
48
+ citation_part = nil
49
+
50
+ body.each_with_index do |sd_body, i|
51
+ builder.div(title: sd_body[:title]) do
52
+ sd_body[:contents].split(/(@[^ ]+|§[^ ]+)/).map do |s|
53
+ if s[0] == '§' or s[0] == '@'
54
+ s
55
+ else
56
+ # It's sensible to place the break not immediately after probable
57
+ # sentence-breaking punctuation like periods and question marks, but
58
+ # after the punctuation mark and characters typically used in pairs,
59
+ # like brackets and apostrophes.
60
+ s.gsub(/([\.:;\?!]+[\s†\]\)"']*)/, '\1|')
61
+ end
62
+ end.join.split('|').each_with_index do |s_body, j|
63
+ builder.sentence(status_tag: 'unannotated') do
64
+ leftover_before = ''
65
+
66
+ # Preserve linebreaks in the text.
67
+ s_body.gsub!(/\s*[\n\r]/, "\u2028")
68
+
69
+ s_body.scan(/([^@§\p{Word}]*)([\p{Word}]+|@[^ ]+|§[^ ]+)([^@§\p{Word}]*)/).each do |(before, form, after)|
70
+ case form
71
+ when /^@(.*)$/
72
+ leftover_before += before unless before.nil?
73
+ leftover_before += $1
74
+ leftover_before += after unless after.nil?
75
+ when /^§(.*)$/
76
+ leftover_before += before unless before.nil?
77
+ citation_part = $1
78
+ leftover_before += after unless after.nil?
79
+ else
80
+ before = leftover_before + before
81
+ leftover_before = ''
82
+
83
+ attrs = { citation_part: citation_part, form: form }
84
+ attrs[:presentation_before] = before unless before == ''
85
+ attrs[:presentation_after] = after unless after == ''
86
+
87
+ builder.token(attrs)
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ VALID_METADATA_FIELDS =
97
+ %w(title author citation_part language id
98
+
99
+ principal funder distributor distributor_address date
100
+ license license_url
101
+ reference_system
102
+ editor editorial_note
103
+ annotator reviewer
104
+
105
+ electronic_text_editor electronic_text_title
106
+ electronic_text_version
107
+ electronic_text_publisher electronic_text_place electronic_text_date
108
+ electronic_text_original_url
109
+ electronic_text_license electronic_text_license_url
110
+
111
+ printed_text_editor printed_text_title
112
+ printed_text_edition
113
+ printed_text_publisher printed_text_place printed_text_date)
114
+
115
+ def read_header(f)
116
+ f.rewind
117
+
118
+ OpenStruct.new.tap do |hdr|
119
+ # We expect a header first, each line starting with %, and we
120
+ # assume that the header ends with the first line that does
121
+ # not start with %.
122
+ f.each_line do |l|
123
+ l.chomp!
124
+
125
+ case l
126
+ when /^%/
127
+ field, value = l.sub(/^%\s*/, '').split(/\s*=\s*/, 2)
128
+
129
+ case field
130
+ when 'id', 'export_time', *VALID_METADATA_FIELDS
131
+ hdr[field] = value.strip
132
+ else
133
+ STDERR.puts "Invalid header field #{field}. Ignoring.".yellow
134
+ end
135
+ else
136
+ break
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ def read_body(f)
143
+ f.rewind
144
+
145
+ Array.new.tap do |bdy|
146
+ f.each_line do |l|
147
+ case l
148
+ when /^%/
149
+ # Ignore header
150
+ when /^\s*$/
151
+ # Ignore empty lines
152
+ when /^#/
153
+ # New source division started
154
+ bdy << { title: l.sub(/^#/, '').strip, contents: '' }
155
+ else
156
+ bdy << { title: '', contents: '' } if bdy.empty?
157
+ bdy.last[:contents] += l
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,42 @@
1
+ module PROIEL
2
+ module Commands
3
+ class Validate < Command
4
+ class << self
5
+ def init_with_program(prog)
6
+ prog.command(:validate) do |c|
7
+ c.syntax 'validate'
8
+ c.description 'Validate input data'
9
+ c.action { |args, options| process(args, options) }
10
+ end
11
+ end
12
+
13
+ def process(args, options)
14
+ if args.empty?
15
+ STDERR.puts 'Missing filename(s). Use --help for more information.'
16
+ exit 1
17
+ end
18
+
19
+ @schemas = {}
20
+
21
+ args.each do |filename|
22
+ v = PROIEL::PROIELXML::Validator.new(filename)
23
+
24
+ if v.valid?
25
+ puts "#{filename} is valid".green
26
+
27
+ exit 0
28
+ else
29
+ puts "#{filename} is invalid".red
30
+
31
+ v.errors.each do |error|
32
+ puts "* #{error}"
33
+ end
34
+
35
+ exit 1
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,589 @@
1
+ require 'proiel/cli/converters/conll-u/morphology'
2
+ require 'proiel/cli/converters/conll-u/syntax'
3
+
4
+ module PROIEL
5
+ module Converter
6
+ class CoNLLU
7
+ class << self
8
+ def process(tb, options = [])
9
+ error_count = 0
10
+ sentence_count = 0
11
+ tb.sources.each do |source|
12
+ source.divs.each do |div|
13
+ div.sentences.each do |sentence|
14
+ sentence_count += 1
15
+ n = Sentence.new sentence
16
+ # Unlike other conversions, this one has to rely on
17
+ # certain assumptions about correct linguistic
18
+ # annotation in order to producea meaningful
19
+ # representation in CoNLL-U
20
+ begin
21
+ puts n.convert.to_conll
22
+ puts
23
+ rescue => e
24
+ error_count += 1
25
+ STDERR.puts "Cannot convert #{sentence.id} (#{sentence.citation}): #{e}"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ STDERR.puts "#{error_count} sentences out of #{sentence_count} could not be converted"
31
+ end
32
+ end
33
+
34
+ class Sentence
35
+
36
+ attr_accessor :tokens
37
+
38
+ # initializes a PROIEL::Convert::Sentence from PROIEL::PROIELXML::Sentence
39
+ def initialize(sentence)
40
+
41
+ id_to_number = Hash.new(0) #will return id 0 (i.e. root) for nil
42
+
43
+ tk = sentence.tokens.reject { |t| t.empty_token_sort == 'P' }
44
+
45
+ tk.map(&:id).each_with_index.each do |id, i|
46
+ id_to_number[id] = i + 1
47
+ end
48
+
49
+ @tokens = tk.map do |t|
50
+ Token.new(id_to_number[t.id],
51
+ id_to_number[t.head_id],
52
+ t.form.to_s.gsub(/[[:space:]]/, '.'),
53
+ t.lemma.to_s.gsub(/[[:space:]]/, '.'),
54
+ t.part_of_speech,
55
+ t.language,
56
+ t.morphology,
57
+ t.relation,
58
+ t.empty_token_sort,
59
+ t.slashes.map { |relation, target_id| [id_to_number[target_id], relation] },
60
+ self
61
+ )
62
+ end
63
+ end
64
+
65
+ def convert
66
+ restructure_graph!
67
+ relabel_graph!
68
+ map_part_of_speech!
69
+ self
70
+ end
71
+
72
+ def find_token(identifier)
73
+ @tokens.select { |t| t.id == identifier }.first
74
+ end
75
+
76
+ def remove_token!(token)
77
+ @tokens.delete(token)
78
+ end
79
+
80
+ def to_s
81
+ @tokens.map(&:to_s).join("\n")
82
+ end
83
+
84
+ def count_tokens
85
+ roots.map(&:count_subgraph).inject(0, :+)
86
+ end
87
+
88
+ def roots
89
+ @tokens.select { |t| t.head_id == 0 }.sort_by(&:id)
90
+ end
91
+
92
+ def to_graph
93
+ roots.map(&:to_graph).join("\n")
94
+ end
95
+
96
+ def to_conll
97
+ @tokens.map(&:to_conll).join("\n")
98
+ end
99
+
100
+ # TODO: this will leave several root nodes in many cases. For now, raise an error
101
+ def prune_empty_rootnodes!
102
+ unless (empty_roots = roots.select { |r| r.empty_token_sort == 'V' }).empty?
103
+ empty_roots.each do |r|
104
+ # promote the first dependent to root
105
+ new_root = r.dependents.first
106
+ new_root.head_id = 0
107
+ new_root.relation = r.relation
108
+ r.dependents.each { |d| d.head_id = new_root.id }
109
+ remove_token! r
110
+ end
111
+ prune_empty_rootnodes!
112
+ end
113
+ end
114
+
115
+ def demote_subjunctions!
116
+ @tokens.select { |t| t.part_of_speech == 'G-' }.each(&:process_subjunction!)
117
+ end
118
+
119
+ def demote_parentheticals_and_vocatives!
120
+ r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
121
+ if p.any? and r.none?
122
+ # promote the first vocative/parenthetical to head in case there's nothing else
123
+ p.first.relation = 'pred'
124
+ r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
125
+ end
126
+ raise "No unique root in this tree:\n#{to_graph}" if p.any? and !r.one?
127
+ p.each { |x| x.head_id = r.first.id }
128
+ end
129
+
130
+ def relabel_graph!
131
+ roots.each(&:relabel_graph!)
132
+ end
133
+
134
+ def map_part_of_speech!
135
+ roots.each(&:map_part_of_speech!)
136
+ end
137
+
138
+ def restructure_graph!
139
+ @tokens.delete_if { |n| n.empty_token_sort == 'P' }
140
+ @tokens.select(&:preposition?).each(&:process_preposition!)
141
+ roots.each(&:change_coordinations!)
142
+ @tokens.select(&:copula?).each(&:process_copula!)
143
+ prune_empty_rootnodes!
144
+ # do ellipses from left to right for proper remnant treatment
145
+ @tokens.select(&:ellipsis?).sort_by { |e| e.left_corner.id }.each(&:process_ellipsis!)
146
+ demote_subjunctions!
147
+ # DIRTY: remove the rest of the empty nodes by attaching them
148
+ # to their grandmother with remnant. This is the best way to
149
+ # do it given the current state of the UDEP scheme, but
150
+ # revisions will come.
151
+ roots.each(&:remove_empties!)
152
+ demote_parentheticals_and_vocatives!
153
+ end
154
+ end
155
+
156
+ class Token
157
+
158
+ attr_accessor :head_id
159
+ attr_accessor :upos
160
+ attr_reader :relation
161
+ attr_reader :part_of_speech
162
+ attr_reader :id
163
+ attr_reader :lemma
164
+ attr_reader :language
165
+ attr_reader :empty_token_sort
166
+ attr_reader :form
167
+
168
+ def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, sentence)
169
+ @id = id
170
+ @head_id = head_id
171
+ @form = form
172
+ @lemma = lemma
173
+ @part_of_speech = part_of_speech
174
+ @language = language
175
+ @morphology = morphology
176
+ @relation = relation
177
+ @empty_token_sort = empty_token_sort
178
+ @slashes = slashes
179
+ @sentence = sentence
180
+ @features = (morphology ? map_morphology(morphology) : '' )
181
+ @upos = nil
182
+ end
183
+
184
+ MORPHOLOGY_POSITIONAL_TAG_SEQUENCE = [
185
+ :person, :number, :tense, :mood, :voice, :gender, :case,
186
+ :degree, :strength, :inflection
187
+ ]
188
+
189
+ def map_morphology morph
190
+ res = []
191
+ for tag in 0..morph.length - 1
192
+ res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]]
193
+ end
194
+ res.compact.join('|')
195
+ end
196
+
197
+ # returns +true+ if the node is an adjective or an ordinal
198
+ def adjectival?
199
+ @part_of_speech == 'A-' or @part_of_speech == 'Mo'
200
+ end
201
+
202
+ def adverb?
203
+ @part_of_speech =~ /\AD/
204
+ end
205
+
206
+ def cardinal?
207
+ @part_of_speech == 'Ma'
208
+ end
209
+
210
+ # A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute constructino without a verb; or if it is the root (e.g. in a nominal clause)
211
+ def clausal?
212
+ (@part_of_speech == 'V-' and !nominalized?) or
213
+ dependents.any?(&:copula?) or
214
+ dependents.any? { |d| ['sub', 'nsubj', 'nsubjpass', 'csubj', 'csubjpass'].include? d.relation } or
215
+ root?
216
+ end
217
+
218
+ def conjunction?
219
+ part_of_speech == 'C-' or @empty_token_sort == 'C'
220
+ end
221
+
222
+ def coordinated?
223
+ head and head.conjunction? and head.relation == @relation
224
+ end
225
+
226
+ # Returns +true+ if the node has an xobj dependent and either 1)
227
+ # the lemma is copular or 2) the node is empty and has no pid
228
+ # slash or a pid slash to a node with a copular lemma
229
+ def copula?
230
+ @relation == 'cop' or
231
+ (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or
232
+ (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and
233
+ dependents.any? { |d| d.relation == 'xobj' } )
234
+ end
235
+
236
+ def determiner?
237
+ DETERMINERS.include? @part_of_speech
238
+ end
239
+
240
+ def ellipsis?
241
+ @empty_token_sort == 'V'
242
+ end
243
+
244
+ def foreign?
245
+ @part_of_speech == 'F-'
246
+ end
247
+
248
+ def has_content?
249
+ @empty_token_sort.nil? or @empty_token_sort == ''
250
+ end
251
+
252
+ def interjection?
253
+ @part_of_speech == 'I-'
254
+ end
255
+
256
+ def is_empty?
257
+ !has_content?
258
+ end
259
+
260
+ def mediopassive?
261
+ @morphology[4] =~/[mpe]/
262
+ end
263
+
264
+ def negation?
265
+ NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(','))
266
+ end
267
+
268
+ def nominal?
269
+ @part_of_speech =~ /\A[NPM]/ or nominalized?
270
+ end
271
+
272
+ def nominalized?
273
+ dependents.any? do |d|
274
+ d.determiner? and ['atr', 'aux', 'det'].include? d.relation
275
+ end
276
+ end
277
+
278
+ def particle?
279
+ @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
280
+ end
281
+
282
+ def passive?
283
+ @morphology[4] == 'p'
284
+ end
285
+
286
+ def preposition?
287
+ @part_of_speech == 'R-'
288
+ end
289
+
290
+ def proper_noun?
291
+ @part_of_speech == 'Ne'
292
+ end
293
+
294
+ def root?
295
+ @head_id == 0
296
+ end
297
+
298
+ def relation=(rel)
299
+ if conjunction?
300
+ dependents.select { |d| d.relation == @relation }.each do |c|
301
+ c.relation = rel
302
+ end
303
+ end
304
+ @relation = rel
305
+ end
306
+
307
+ def count_subgraph
308
+ dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1)
309
+ end
310
+
311
+ def subgraph_set
312
+ [self] + dependents.map(&:subgraph_set).flatten
313
+ end
314
+
315
+ def left_corner
316
+ ([self] + dependents).sort_by(&:id).first
317
+ end
318
+
319
+ def conj_head
320
+ raise "Not a conjunct" unless @relation == 'conj'
321
+ if head.relation == 'conj'
322
+ head.conj_head
323
+ else
324
+ head
325
+ end
326
+ end
327
+
328
+ def pid
329
+ if pid = @slashes.select { |t, r| r == 'pid' }.first
330
+ @sentence.tokens.select { |t| pid.first == t.id}.first
331
+ else
332
+ nil
333
+ end
334
+ end
335
+
336
+ def format_features(features)
337
+ if features == ''
338
+ '_'
339
+ else
340
+ features.split("|").sort.join("|")
341
+ end
342
+ end
343
+
344
+ def to_conll
345
+ [@id,
346
+ @form,
347
+ @lemma,
348
+ @upos,
349
+ @part_of_speech,
350
+ format_features(@features),
351
+ @head_id,
352
+ (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc
353
+ '_', # slashes here
354
+ '_'].join("\t")
355
+ end
356
+
357
+ def to_s
358
+ [@id, @form, @head_id, @relation].join("\t")
359
+ end
360
+
361
+ def to_n
362
+ [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-')
363
+ end
364
+
365
+ def to_graph(indents = 0)
366
+ ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n")
367
+ end
368
+
369
+ def siblings
370
+ @sentence.tokens.select { |t| t.head_id == @head_id } - [self]
371
+ end
372
+
373
+ def head
374
+ @sentence.tokens.select { |t| t.id == @head_id }.first
375
+ end
376
+
377
+ def dependents
378
+ @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id)
379
+ end
380
+
381
+ def find_appositive_head
382
+ raise "Not an apposition" unless @relation == 'apos'
383
+ if head.conjunction? and head.relation == 'apos'
384
+ head.find_appositive_head
385
+ else
386
+ head
387
+ end
388
+ end
389
+
390
+ def find_relation possible_relations
391
+ rel, crit = possible_relations.shift
392
+ if rel.nil?
393
+ # raise "Found no relation"
394
+ elsif crit.call self
395
+ @relation = rel
396
+ else
397
+ find_relation possible_relations
398
+ end
399
+ end
400
+
401
+ def map_part_of_speech!
402
+ dependents.each(&:map_part_of_speech!)
403
+ @upos = POS_MAP[@part_of_speech].first
404
+ raise "No match found for pos #{part_of_speech.inspect}" unless @upos
405
+ if feat = POS_MAP[@part_of_speech][1]
406
+ @features += ((@features.empty? ? '' : '|') + feat)
407
+ end
408
+ # ugly, but the ugliness comes from UDEP
409
+ @upos = 'ADJ' if @upos == 'DET' and @relation != 'det'
410
+ end
411
+
412
+ def relabel_graph!
413
+ dependents.each(&:relabel_graph!)
414
+ possible_relations = RELATION_MAPPING[@relation]
415
+ case possible_relations
416
+ when String
417
+ @relation = possible_relations
418
+ when Array
419
+ find_relation possible_relations.dup
420
+ when nil
421
+ # do nothing: the token has already changed its relation
422
+ else
423
+ raise "Unknown value #{possible_relations.inspect} for #{@relation}"
424
+ end
425
+ end
426
+
427
+ # attach subjunctions with 'mark' under their verbs and promote
428
+ # the verb to take over the subjunction's relation. If the verb
429
+ # is empty, the subjunction stays as head.
430
+ def process_subjunction!
431
+ # ignore if the subjunction has no dependents or only conj dependents.
432
+ # NB: this requires that the function is called *after* processing conjunctions
433
+ return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty?
434
+ pred = dependents.select { |d| d.relation == 'pred' }
435
+ raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one?
436
+ pred = pred.first
437
+ # promote the subjunction if the verb is empty
438
+ if pred.is_empty?
439
+ pred.dependents.each { |d| d.head_id = id }
440
+ @sentence.remove_token! pred
441
+ # else demote the subjunction
442
+ else
443
+ pred.invert!('mark')
444
+ end
445
+ end
446
+
447
+
448
+
449
+ # TODO: process "implicit pid" through APOS chain too
450
+ def process_ellipsis!
451
+ # First we find the corresponding overt token.
452
+ # If there's an explicit pid slash, we'll grab that one.
453
+ if pid and !subgraph_set.include?(pid)
454
+ overt = pid
455
+ # otherwise, try a conjunct
456
+ elsif @relation == 'conj'
457
+ overt = conj_head
458
+ elsif @relation == 'apos'
459
+ overt = find_appositive_head
460
+ else
461
+ return
462
+ end
463
+
464
+ dependents.each do |d|
465
+ # check if there's a partner with the same relation under the overt node.
466
+ # TODO: this isn't really very convincing when it comes to ADVs
467
+ if partner = overt.dependents.select { |p| p != self and p.relation == d.relation }.first #inserted p != self
468
+ partner = partner.find_remnant
469
+ d.head_id = partner.id
470
+ d.relation = 'remnant'
471
+ # if there's no partner, just attach under the overt node, preserving the relation
472
+ else
473
+ d.head_id = overt.id
474
+ end
475
+ end
476
+ @sentence.remove_token!(self)
477
+ end
478
+
479
+ def find_remnant
480
+ if r = dependents.select { |d| d.relation == 'remnant' }.first
481
+ r.find_remnant
482
+ else
483
+ self
484
+ end
485
+ end
486
+
487
+ def process_copula!
488
+ predicates = dependents.select { |d| d.relation == 'xobj' }
489
+ raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1
490
+ predicates.first.promote!(nil, 'cop')
491
+ end
492
+
493
+ def process_preposition!
494
+ raise "Only prepositions can be processed this way!" unless part_of_speech == 'R-'
495
+ obliques = dependents.select { |d| d.relation == 'obl' }
496
+ raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1
497
+ return if obliques.empty? #shouldn't really happen, but in practice
498
+ obliques.first.invert!("case") # , "adv")
499
+ end
500
+
501
+ def remove_empties!
502
+ dependents.each(&:remove_empties!)
503
+ if is_empty?
504
+ dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' }
505
+ @sentence.remove_token! self
506
+ end
507
+ end
508
+
509
+ # Changes coordinations recursively from the bottom of the graph
510
+ def change_coordinations!
511
+ dependents.each(&:change_coordinations!)
512
+ process_coordination! if conjunction?
513
+ end
514
+
515
+ def process_coordination!
516
+ raise "Only coordinations can be processed this way!" unless conjunction?
517
+ return if dependents.reject { |d| d.relation == 'aux' }.empty?
518
+ distribute_shared_modifiers!
519
+ dependents.reject { |d| d.relation == 'aux' }.first.promote!("conj", "cc")
520
+ end
521
+
522
+ def distribute_shared_modifiers!
523
+ raise "Can only distribute over a conjunction!" unless conjunction?
524
+ conjuncts, modifiers = dependents.reject { |d| d.relation == 'aux' }.partition { |d| d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') }
525
+ first_conjunct = conjuncts.shift
526
+ raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct
527
+ raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty?
528
+ modifiers.each do |m|
529
+ m.head_id = first_conjunct.id
530
+ conjuncts.each { |c| c.add_slash! [m.id, m.relation] }
531
+ end
532
+ end
533
+
534
+ def add_slash!(slash)
535
+ @slashes << slash
536
+ end
537
+
538
+ # Inverts the direction of a dependency relation. By default the
539
+ # labels are also swapped, but new relations can be specified
540
+ # for both the new dependent and the new head.
541
+ def invert!(new_dependent_relation = nil, new_head_relation = nil)
542
+ raise "Cannot promote a token under root!" if @head_id == 0
543
+ new_dependent_relation ||= @relation
544
+ new_head_relation ||= head.relation
545
+ new_head_id = head.head_id
546
+
547
+ head.head_id = @id
548
+ head.relation = new_dependent_relation
549
+ @head_id = new_head_id
550
+ self.relation = new_head_relation
551
+ end
552
+
553
+ # promotes a node to its head's place. The node takes over its
554
+ # former head's relation and all dependents. The new relation
555
+ # for these dependents can be specified; if it is not, they will
556
+ # keep their former relation. The former head is made a
557
+ # dependent of the node (with a specified relation) or,
558
+ # if it is an empty node, destroyed.
559
+
560
+ def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux')
561
+ raise "Cannot promote a token under root!" if @head_id == 0
562
+ new_head_relation = head.relation
563
+ new_head_id = head.head_id
564
+
565
+ # move all dependents of the former head to the new one
566
+ siblings.each do |t|
567
+ t.head_id = @id
568
+ # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings)
569
+ t.relation = new_sibling_relation if (new_sibling_relation and t.relation != 'aux')
570
+ end
571
+
572
+ # remove the former head if it was empty
573
+ if head.is_empty?
574
+ @sentence.remove_token!(head)
575
+ # else make it a dependent of the new head
576
+ else
577
+ head.head_id = @id
578
+ head.relation = new_dependent_relation
579
+ end
580
+
581
+ @head_id = new_head_id
582
+ # don't use relation=, as we don't want this relation to be
583
+ # copied down a tree of conjunctions
584
+ @relation = new_head_relation
585
+ end
586
+ end
587
+ end
588
+ end
589
+ end