proiel-cli 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,680 +1,780 @@
1
1
  require 'proiel/cli/converters/conll-u/morphology'
2
2
  require 'proiel/cli/converters/conll-u/syntax'
3
3
 
4
- # Unlike other conversions, this one has to rely on
5
- # certain assumptions about correct linguistic
6
- # annotation in order to produce a meaningful
7
- # representation in CoNLL-U
8
-
9
-
10
- module PROIEL
11
- module Converter
12
- class CoNLLU
13
- class << self
14
- def process(tb, options = [])
15
- error_count = 0
16
- sentence_count = 0
17
- tb.sources.each do |source|
18
- source.divs.each do |div|
19
- div.sentences.each do |sentence|
20
- sentence_count += 1
21
- n = Sentence.new sentence
22
- begin
23
- # Do the conversion first to avoid spurious headers if the conversion fails
24
- a = n.convert.to_conll
25
- puts "# source = #{source.title}, #{div.title}"
26
- # using printable_form would give us punctuation, which must then be added to the tree
27
- puts "# text = #{sentence.tokens.map(&:form).compact.join(' ')}"
28
- puts "# sent_id = #{sentence.id}"
29
- puts a
30
- puts
31
- rescue => e
32
- error_count += 1
33
- STDERR.puts "Cannot convert #{sentence.id} (#{sentence.citation}): #{e}"
34
- STDERR.puts e.backtrace.join("\n") unless e.is_a? RuntimeError
35
- end
4
+ module PROIEL::Converter
5
+ # Converter that outputs CoNLL-U.
6
+ #
7
+ # This converter relies on certain assumptions about correct linguistic
8
+ # annotation in order to produce a meaningful representation in CoNLL-U.
9
+ class CoNLLU
10
+ class << self
11
+ def process(tb, options = [])
12
+ error_count = 0
13
+ sentence_count = 0
14
+ tb.sources.each do |source|
15
+ source.divs.each do |div|
16
+ div.sentences.each do |sentence|
17
+ sentence_count += 1
18
+ n = Sentence.new sentence
19
+ begin
20
+ # Do the conversion first to avoid spurious headers if the conversion fails
21
+ a = n.convert.to_conll
22
+ puts "# source = #{source.title}, #{div.title}"
23
+ # using printable_form would give us punctuation, which must then be added to the tree
24
+ puts "# text = #{sentence.tokens.map(&:form).compact.join(' ')}"
25
+ puts "# sent_id = #{sentence.id}"
26
+ puts a
27
+ puts
28
+ rescue => e
29
+ error_count += 1
30
+ STDERR.puts "Cannot convert #{sentence.id} (#{sentence.citation}): #{e}"
31
+ STDERR.puts e.backtrace.join("\n") unless e.is_a? RuntimeError
36
32
  end
37
33
  end
38
34
  end
39
- STDERR.puts "#{error_count} sentences out of #{sentence_count} could not be converted"
40
35
  end
36
+ STDERR.puts "#{error_count} sentences out of #{sentence_count} could not be converted"
41
37
  end
38
+ end
42
39
 
43
- class Sentence
44
-
45
- attr_accessor :tokens
46
-
47
- # initializes a PROIEL::Convert::Sentence from PROIEL::PROIELXML::Sentence
48
- def initialize(sentence)
49
-
50
- id_to_number = Hash.new(0) #will return id 0 (i.e. root) for nil
51
-
52
- # initialize array to hold the sentence tokens
53
- tks = []
54
- # keep track of how many new tokens have been created
55
- offset = 0
56
-
57
- sentence.tokens.reject { |t| t.empty_token_sort == 'P' }.each do |tk|
58
-
59
- if tk.form =~ /[[:space:]]/
60
- subtoks = tk.form.split(/[[:space:]]/)
61
-
62
- subtoks.each_with_index do |subtok, i|
63
- tks << PROIEL::Token.new(sentence,
64
- (i == 0 ? tk.id : 1000 + offset), # id
65
- (i == 0 ? tk.head_id : tk.id), # head_id
66
- subtok,
67
- # hope the lemmas split the same way as the tokens. Grab the form is you don't find a lemma
68
- (tk.lemma.split(/[[:space:]]/)[i] || subtok),
69
- tk.part_of_speech, # copy the postag
70
- tk.morphology,
71
- (i == 0 ? tk.relation : "flat"),
72
- nil, #empty_token_sort
73
- tk.citation_part,
74
- (i == 0 ? tk.presentation_before : nil),
75
- (i == (subtoks.size - 1) ? tk.presentation_after : nil),
76
- (i == 0 ? tk.antecedent_id : nil),
77
- (i == 0 ? tk.information_status : nil),
78
- (i == 0 ? tk.contrast_group : nil),
79
- (i == 0 ? tk.foreign_ids : nil),
80
- (i == 0 ? tk.slashes.map { |rel, target| PROIEL::PROIELXML::Reader::Slash.new({:'target_id' => target, :relation => rel} ) } : []), # This needs to be given a real slash object for the initialization, although it throws away the info
81
- (subtok == subtoks.first ? tk.alignment_id : nil)
82
- )
83
- offset += 1
84
- end
85
- else
86
- tks << tk
40
+ class Sentence
41
+
42
+ attr_accessor :tokens
43
+
44
+ # initializes a PROIEL::Convert::Sentence from PROIEL::PROIELXML::Sentence
45
+ def initialize(sentence)
46
+
47
+ id_to_number = Hash.new(0) #will return id 0 (i.e. root) for nil
48
+
49
+ # initialize array to hold the sentence tokens
50
+ tks = []
51
+ # keep track of how many new tokens have been created
52
+ offset = 0
53
+
54
+ sentence.tokens.reject { |t| t.empty_token_sort == 'P' }.each do |tk|
55
+
56
+ if tk.form =~ /[[:space:]]/
57
+ subtoks = tk.form.split(/[[:space:]]/)
58
+
59
+ subtoks.each_with_index do |subtok, i|
60
+ tks << PROIEL::Token.new(sentence,
61
+ (i == 0 ? tk.id : 1000 + offset), # id
62
+ (i == 0 ? tk.head_id : tk.id), # head_id
63
+ subtok,
64
+ # hope the lemmas split the same way as the tokens. Grab the form if you don't find a lemma
65
+ (tk.lemma.split(/[[:space:]]/)[i] || subtok),
66
+ tk.part_of_speech, # copy the postag
67
+ tk.morphology,
68
+ (i == 0 ? tk.relation : 'fixed'),
69
+ nil, #empty_token_sort
70
+ tk.citation_part,
71
+ (i == 0 ? tk.presentation_before : nil),
72
+ (i == (subtoks.size - 1) ? tk.presentation_after : nil),
73
+ (i == 0 ? tk.antecedent_id : nil),
74
+ (i == 0 ? tk.information_status : nil),
75
+ (i == 0 ? tk.contrast_group : nil),
76
+ (i == 0 ? tk.foreign_ids : nil),
77
+ (i == 0 ? tk.slashes.map { |rel, target| PROIEL::PROIELXML::Reader::Slash.new({:'target_id' => target, :relation => rel} ) } : []), # This needs to be given a real slash object for the initialization, although it throws away the info
78
+ (subtok == subtoks.first ? tk.alignment_id : nil)
79
+ )
80
+ offset += 1
87
81
  end
82
+ else
83
+ tks << tk
88
84
  end
85
+ end
89
86
 
90
-
91
- tks.map(&:id).each_with_index.each do |id, i|
92
- id_to_number[id] = i + 1
93
- end
94
87
 
95
- @tokens = tks.map do |t|
96
-
97
- Token.new(id_to_number[t.id],
98
- id_to_number[t.head_id],
99
- #insert dots in any whitespace inside words and lemmata
100
- t.form.to_s.gsub(/[[:space:]]/, '.'),
101
- t.lemma.to_s.gsub(/[[:space:]]/, '.'),
102
- t.part_of_speech,
103
- t.language,
104
- t.morphology,
105
- t.relation,
106
- t.empty_token_sort,
107
- t.slashes.map { |relation, target_id| [id_to_number[target_id], relation] },
108
- t.citation_part,
109
- self
110
- )
111
- end
88
+ tks.map(&:id).each_with_index.each do |id, i|
89
+ id_to_number[id] = i + 1
112
90
  end
113
91
 
114
- def convert
115
- restructure_graph!
116
- relabel_graph!
117
- map_part_of_speech!
118
- self
119
- end
92
+ @tokens = tks.map do |t|
120
93
 
121
- def find_token(identifier)
122
- @tokens.select { |t| t.id == identifier }.first
94
+ Token.new(id_to_number[t.id],
95
+ id_to_number[t.head_id],
96
+ #insert dots in any whitespace inside words and lemmata
97
+ t.form.to_s.gsub(/[[:space:]]/, '.'),
98
+ t.lemma.to_s.gsub(/[[:space:]]/, '.'),
99
+ t.part_of_speech,
100
+ t.language,
101
+ t.morphology,
102
+ t.relation,
103
+ t.empty_token_sort,
104
+ t.slashes.map { |relation, target_id| [id_to_number[target_id], relation] },
105
+ t.citation_part,
106
+ self
107
+ )
123
108
  end
109
+ end
124
110
 
125
- def remove_token!(token)
126
- @tokens.delete(token)
127
- end
111
+ def convert
112
+ restructure_graph!
113
+ relabel_graph!
114
+ check_directionality!
115
+ distribute_conjunctions!
116
+ map_part_of_speech!
117
+ self
118
+ end
128
119
 
129
- def to_s
130
- @tokens.map(&:to_s).join("\n")
120
+ def distribute_conjunctions!
121
+ @tokens.select { |t| t.has_conjunct? }.each do |h|
122
+ conjuncts = h.dependents.select { |d| d.relation == 'conj' }
123
+ conjunctions = h.dependents.select { |d| d.relation == 'cc' }
124
+ conjunctions.each do |c|
125
+ if c.id > h.id
126
+ new_head = conjuncts.select { |cj| cj.id > c.id }.first
127
+ c.head_id = new_head.id if new_head
128
+ end
129
+ end
131
130
  end
131
+ end
132
132
 
133
- def count_tokens
134
- roots.map(&:count_subgraph).inject(0, :+)
133
+ def check_directionality!
134
+ @tokens.select { |t| ['fixed', 'flat:foreign', 'flat:name'].include? t.relation }.each do |f|
135
+ f.promote!(nil, f.relation) if f.id < f.head.id
135
136
  end
136
-
137
- def roots
138
- @tokens.select { |t| t.head_id == 0 }.sort_by(&:id)
137
+ @tokens.select { |t| t.relation == 'conj' }.each do |f|
138
+ raise "conj must go left-to-right (id: #{f.id}, head_id: #{f.head.id}, form: #{f.form}, head_form: #{f.head.form})" if f.id < f.head.id
139
139
  end
140
+ end
140
141
 
141
- def to_graph
142
- roots.map(&:to_graph).join("\n")
143
- end
142
+ def find_token(identifier)
143
+ @tokens.select { |t| t.id == identifier }.first
144
+ end
144
145
 
145
- def to_conll
146
- @tokens.map(&:to_conll).join("\n")
147
- end
146
+ def remove_token!(token)
147
+ @tokens.delete(token)
148
+ end
148
149
 
149
- # TODO: this will leave several root nodes in many cases. For now, raise an error
150
- def prune_empty_rootnodes!
151
- unless (empty_roots = roots.select { |r| r.empty_token_sort == 'V' }).empty?
152
- empty_roots.each do |r|
153
- # promote the first dependent to root
154
- new_root = r.dependents.first
150
+ def to_s
151
+ @tokens.map(&:to_s).join("\n")
152
+ end
153
+
154
+ def count_tokens
155
+ roots.map(&:count_subgraph).inject(0, :+)
156
+ end
157
+
158
+ def roots
159
+ @tokens.select { |t| t.head_id == 0 }.sort_by(&:id)
160
+ end
161
+
162
+ def to_graph
163
+ roots.map(&:to_graph).join("\n")
164
+ end
165
+
166
+ def to_conll
167
+ @tokens.map(&:to_conll).join("\n")
168
+ end
169
+
170
+ # TODO: this will leave several root nodes in many cases. For now, raise an error
171
+ def prune_empty_rootnodes!
172
+ unless (empty_roots = roots.select { |r| r.empty_token_sort == 'V' }).empty?
173
+ empty_roots.each do |r|
174
+ # promote xobj to root if there is one
175
+ xobjs = r.dependents.select { |d| d.relation == 'xobj' }
176
+ if xobjs.any?
177
+ new_root = xobjs.first
155
178
  new_root.head_id = 0
156
179
  new_root.relation = r.relation
157
180
  r.dependents.each { |d| d.head_id = new_root.id }
158
181
  remove_token! r
159
182
  end
160
- prune_empty_rootnodes!
161
183
  end
184
+ #prune_empty_rootnodes!
162
185
  end
186
+ end
163
187
 
164
- def demote_subjunctions!
165
- @tokens.select { |t| t.part_of_speech == 'G-' }.each(&:process_subjunction!)
166
- end
188
+ def demote_subjunctions!
189
+ @tokens.select { |t| t.part_of_speech == 'G-' }.each(&:process_subjunction!)
190
+ end
167
191
 
168
- def demote_parentheticals_and_vocatives!
192
+ def demote_parentheticals_and_vocatives!
193
+ r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
194
+ if p.any? and r.none?
195
+ # promote the first vocative/parenthetical to head in case there's nothing else
196
+ p.first.relation = 'pred'
169
197
  r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
170
- if p.any? and r.none?
171
- # promote the first vocative/parenthetical to head in case there's nothing else
172
- p.first.relation = 'pred'
173
- r, p = roots.partition { |n| !['voc', 'parpred'].include? n.relation }
174
- end
175
- raise "No unique root in this tree:\n#{to_graph}" if p.any? and !r.one?
176
- p.each { |x| x.head_id = r.first.id }
177
198
  end
199
+ raise "No unique root in this tree:\n#{to_graph}" if p.any? and !r.one?
200
+ p.each { |x| x.head_id = r.first.id }
201
+ end
178
202
 
179
- def relabel_graph!
180
- roots.each(&:relabel_graph!)
181
- end
203
+ def relabel_graph!
204
+ roots.each(&:relabel_graph!)
205
+ end
182
206
 
183
- def map_part_of_speech!
184
- roots.each(&:map_part_of_speech!)
185
- end
207
+ def map_part_of_speech!
208
+ roots.each(&:map_part_of_speech!)
209
+ end
186
210
 
187
- def restructure_graph!
188
- @tokens.delete_if { |n| n.empty_token_sort == 'P' }
189
- @tokens.select(&:preposition?).each(&:process_preposition!)
190
- roots.each(&:change_coordinations!)
191
- @tokens.select(&:copula?).each(&:process_copula!)
192
- prune_empty_rootnodes!
193
- # do ellipses from left to right for proper remnant treatment
194
- @tokens.select(&:ellipsis?).sort_by { |e| e.left_corner.id }.each(&:process_ellipsis!)
195
- demote_subjunctions!
196
- # DIRTY: remove the rest of the empty nodes by attaching them
197
- # to their grandmother with remnant. This is the best way to
198
- # do it given the current state of the UDEP scheme, but
199
- # revisions will come.
200
- roots.each(&:remove_empties!)
201
- demote_parentheticals_and_vocatives!
202
- end
211
+ def restructure_graph!
212
+ @tokens.delete_if { |n| n.empty_token_sort == 'P' }
213
+ @tokens.select(&:preposition?).each(&:process_preposition!)
214
+ @tokens.select { |t| t.comparison_word? and t.dependents and t.dependents.select { |d| ['sub','obj','obl','comp','adv'].include?(d.relation) }.any? }.each(&:process_comparison!)
215
+ roots.each(&:change_coordinations!)
216
+ @tokens.select(&:copula?).each(&:process_copula!)
217
+ demote_subjunctions!
218
+ prune_empty_rootnodes!
219
+ # do ellipses from left to right for proper remnant treatment
220
+ @tokens.select(&:ellipsis?).sort_by { |e| e.left_corner.id }.each(&:process_ellipsis!)
221
+ #NB! apos gets overridden by process_comparison so some dislocations are lost
222
+ @tokens.select { |t| t.relation == 'apos' and t.id < t.head_id }.each(&:process_dislocation!)
223
+ # DIRTY: remove the rest of the empty nodes by attaching them
224
+ # to their grandmother with remnant. This is the best way to
225
+ # do it given the current state of the UDEP scheme, but
226
+ # revisions will come.
227
+ roots.each(&:remove_empties!)
228
+ demote_parentheticals_and_vocatives!
203
229
  end
230
+ end
204
231
 
205
- class Token
206
-
207
- attr_accessor :head_id
208
- attr_accessor :upos
209
- attr_reader :relation
210
- attr_reader :part_of_speech
211
- attr_reader :id
212
- attr_reader :lemma
213
- attr_reader :language
214
- attr_reader :empty_token_sort
215
- attr_reader :form
216
- attr_reader :citation_part
217
-
218
- def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence)
219
- @id = id
220
- @head_id = head_id
221
- @form = form
222
- @lemma = lemma
223
- @part_of_speech = part_of_speech
224
- @language = language
225
- @morphology = morphology
226
- @relation = relation
227
- @empty_token_sort = empty_token_sort
228
- @slashes = slashes
229
- @sentence = sentence
230
- @features = (morphology ? map_morphology(morphology) : '' )
231
- @citation_part = "ref=" + (citation_part ? citation_part : "").gsub(/\s/, '_')
232
- @upos = nil
233
- end
232
+ class Token
233
+
234
+ attr_accessor :head_id
235
+ attr_accessor :upos
236
+ attr_reader :relation
237
+ attr_reader :part_of_speech
238
+ attr_reader :id
239
+ attr_reader :lemma
240
+ attr_reader :language
241
+ attr_reader :empty_token_sort
242
+ attr_reader :form
243
+ attr_reader :citation_part
244
+
245
+ def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence)
246
+ @id = id
247
+ @head_id = head_id
248
+ @form = form
249
+ @lemma = lemma
250
+ @baselemma, @variant = @lemma.split('#')
251
+ @part_of_speech = part_of_speech
252
+ @language = language
253
+ @morphology = morphology
254
+ @relation = relation
255
+ @empty_token_sort = empty_token_sort
256
+ @slashes = slashes
257
+ @sentence = sentence
258
+ @features = (morphology ? map_morphology(morphology) : '' )
259
+ @citation_part = 'ref=' + (citation_part ? citation_part : '').gsub(/\s/, '_')
260
+ @upos = nil
261
+ end
234
262
 
235
- MORPHOLOGY_POSITIONAL_TAG_SEQUENCE = [
236
- :person, :number, :tense, :mood, :voice, :gender, :case,
237
- :degree, :strength, :inflection
238
- ]
263
+ MORPHOLOGY_POSITIONAL_TAG_SEQUENCE = [
264
+ :person, :number, :tense, :mood, :voice, :gender, :case,
265
+ :degree, :strength, :inflection
266
+ ]
239
267
 
240
- def map_morphology morph
241
- res = []
242
- for tag in 0..morph.length - 1
243
- res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]]
244
- end
245
- res.compact.join('|')
246
- end
268
+ def map_morphology morph
269
+ res = []
270
+ for tag in 0..morph.length - 1
271
+ res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]]
272
+ end
273
+ res = res.reject {|v| v == 'VerbForm=Part'} if res.include?('VerbForm=PartRes|Tense=Past')
274
+ res = res.reject {|s| s == 'Strength=Weak' } unless @language == 'got'
275
+ res = res.map { |s| s == 'Strength=Strong' ? 'Variant=Short' : s } unless @language == 'got'
276
+ res << 'Polarity=Neg' if ['не.быти','не.бꙑти'].include?(@lemma)
277
+ res.compact.join('|')
278
+ end
247
279
 
248
- def genitive?
249
- @morphology =~ /......g.*/
250
- end
280
+ def genitive?
281
+ @morphology =~ /......g.*/
282
+ end
251
283
 
252
- # returns +true+ if the node is an adjective or an ordinal
253
- def adjectival?
254
- @part_of_speech == 'A-' or @part_of_speech == 'Mo'
255
- end
284
+ # returns +true+ if the node is an adjective or an ordinal
285
+ def adjectival?
286
+ @part_of_speech == 'A-' or @part_of_speech == 'Mo'
287
+ end
256
288
 
257
- def subjunction?
258
- @part_of_speech == 'G-'
259
- end
289
+ def subjunction?
290
+ @part_of_speech == 'G-'
291
+ end
260
292
 
261
- def adverb?
262
- @part_of_speech =~ /\AD/
263
- end
293
+ def adverb?
294
+ @part_of_speech =~ /\AD/
295
+ end
264
296
 
265
- def cardinal?
266
- @part_of_speech == 'Ma'
267
- end
297
+ def cardinal?
298
+ @part_of_speech == 'Ma'
299
+ end
268
300
 
269
- # A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute constructino without a verb; or if it is the root (e.g. in a nominal clause)
270
- def clausal?
271
- (@part_of_speech == 'V-' and !nominalized?) or
272
- dependents.any?(&:copula?) or
273
- dependents.any? { |d| ['sub', 'nsubj', 'nsubjpass', 'csubj', 'csubjpass'].include? d.relation } or
274
- root?
275
- end
301
+ def relative?
302
+ @part_of_speech == 'Pr' or @part_of_speech == 'Dq'
303
+ end
276
304
 
277
- def conjunction?
278
- part_of_speech == 'C-' or @empty_token_sort == 'C'
279
- end
305
+ def verb?
306
+ @part_of_speech == 'V-' or @empty_token_sort == 'V'
307
+ end
280
308
 
281
- def coordinated?
282
- head and head.conjunction? and head.relation == @relation
283
- end
309
+ def orphan?
310
+ relation == 'orphan'
311
+ end
284
312
 
285
- # Returns +true+ if the node has an xobj dependent and either 1)
286
- # the lemma is copular or 2) the node is empty and has no pid
287
- # slash or a pid slash to a node with a copular lemma
288
- def copula?
289
- @relation == 'cop' or
290
- (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or
291
- (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and
292
- dependents.any? { |d| d.relation == 'xobj' } )
293
- end
313
+ # A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute construction without a verb; or it has a subjunction dependent; or it is a relative pronoun/adverb or has a relative pronoun/adverb dependent; or if it is the root (e.g. in a nominal clause)
314
+ def clausal?
315
+ (@part_of_speech == 'V-' and !nominalized? and !has_preposition?) or
316
+ dependents.any?(&:copula?) or
317
+ dependents.any? { |d| ['sub', 'nsubj','nsubj:outer', 'nsubj:pass', 'csubj', 'csubj:pass'].include? d.relation } or
318
+ dependents.any?(&:subjunction?) or
319
+ relative? or
320
+ dependents.any?(&:relative?) or
321
+ dependents.any?(&:orphan?) or
322
+ root?
323
+ end
294
324
 
295
- def auxiliary?
296
- AUXILIARIES.include?([lemma, part_of_speech, language].join(','))
297
- end
298
-
299
- def determiner?
300
- DETERMINERS.include? @part_of_speech
301
- end
325
+ def conjunction?
326
+ part_of_speech == 'C-' or @empty_token_sort == 'C'
327
+ end
302
328
 
303
- def ellipsis?
304
- @empty_token_sort == 'V'
305
- end
329
+ def coordinated?
330
+ head and head.conjunction? and head.relation == @relation
331
+ end
306
332
 
307
- def foreign?
308
- @part_of_speech == 'F-'
309
- end
333
+ def has_conjunct?
334
+ dependents.any? { |d| d.relation == 'conj' }
335
+ end
310
336
 
311
- def has_content?
312
- @empty_token_sort.nil? or @empty_token_sort == ''
313
- end
337
+ # Returns +true+ if the node has an xobj dependent and either 1)
338
+ # the lemma is copular or 2) the node is empty and has no pid
339
+ # slash or a pid slash to a node with a copular lemma
340
+ def copula?
341
+ @relation == 'cop' or
342
+ (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or
343
+ (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and
344
+ dependents.any? { |d| d.relation == 'xobj' } )
345
+ end
314
346
 
315
- def interjection?
316
- @part_of_speech == 'I-'
317
- end
347
+ def has_copula?
348
+ dependents.any?(&:copula?)
349
+ end
318
350
 
319
- def is_empty?
320
- !has_content?
321
- end
351
+ def auxiliary?
352
+ AUXILIARIES.include?([lemma, part_of_speech, language].join(',')) or (part_of_speech == "V-" and relation == 'aux')
353
+ end
322
354
 
323
- def deponent?
324
- DEPONENTS[@language] and DEPONENTS[@language].match(@lemma)
325
- end
355
+ def comparison_word?
356
+ COMPARISON_LEMMATA.include?([lemma,part_of_speech,language].join(','))
357
+ end
326
358
 
327
- def mediopassive?
328
- (!deponent? and @morphology) ? @morphology[4] =~/[mpe]/ : false
329
- end
359
+ def determiner?
360
+ DETERMINERS.include? @part_of_speech
361
+ end
330
362
 
331
- def passive?
332
- (!deponent? and @morphology) ? @morphology[4] == 'p' : false
333
- end
363
+ def ellipsis?
364
+ @empty_token_sort == 'V'
365
+ end
334
366
 
335
- def negation?
336
- NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(','))
337
- end
367
+ def foreign?
368
+ @part_of_speech == 'F-'
369
+ end
338
370
 
339
- def nominal?
340
- @part_of_speech =~ /\A[NPM]/ or nominalized?
341
- end
371
+ def has_content?
372
+ @empty_token_sort.nil? or @empty_token_sort == ''
373
+ end
342
374
 
343
- def nominalized?
344
- dependents.any? do |d|
345
- d.determiner? and ['atr', 'aux', 'det'].include? d.relation
346
- end
347
- end
348
-
349
- def TAM_particle?
350
- @relation == 'aux' and TAM_PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
351
- end
352
-
353
- def particle?
354
- @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
355
- end
375
+ def has_subject?
376
+ dependents.any? { |d| ['sub','nsubj','nsubj:pass','csubj','csubj:pass','nsubj:outer'].include?(d.relation) }
377
+ end
356
378
 
357
- def pronominal?
358
- @part_of_speech =~ /\AP[^st]/ # no evidence that possessives are pronoun/determiner-like
359
- end
360
-
361
- def preposition?
362
- @part_of_speech == 'R-'
363
- end
379
+ def interjection?
380
+ @part_of_speech == 'I-'
381
+ end
364
382
 
365
- def proper_noun?
366
- @part_of_speech == 'Ne'
367
- end
383
+ def is_empty?
384
+ !has_content?
385
+ end
368
386
 
369
- def root?
370
- @head_id == 0
371
- end
387
+ def deponent?
388
+ DEPONENTS[@language] and DEPONENTS[@language].match(@lemma)
389
+ end
372
390
 
373
- def relation=(rel)
374
- if conjunction?
375
- dependents.select { |d| d.relation == @relation }.each do |c|
376
- c.relation = rel
377
- end
378
- end
379
- @relation = rel
380
- end
391
+ def mediopassive?
392
+ (!deponent? and @morphology) ? @morphology[4] =~/[mpe]/ : false
393
+ end
381
394
 
382
- def count_subgraph
383
- dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1)
384
- end
395
+ def passive?
396
+ (!deponent? and @morphology) ? @morphology[4] == 'p' : false
397
+ end
385
398
 
386
- def subgraph_set
387
- [self] + dependents.map(&:subgraph_set).flatten
388
- end
399
+ def negation?
400
+ NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(','))
401
+ end
389
402
 
390
- def left_corner
391
- ([self] + dependents).sort_by(&:id).first
392
- end
403
+ def nominal?
404
+ @part_of_speech =~ /\A[NPM]/ or nominalized?
405
+ end
393
406
 
394
- def conj_head
395
- raise "Not a conjunct" unless @relation == 'conj'
396
- if head.relation == 'conj'
397
- head.conj_head
398
- else
399
- head
400
- end
401
- end
407
+ def long?
408
+ @morphology[8] == 'w'
409
+ end
402
410
 
403
- def pid
404
- if pid = @slashes.select { |t, r| r == 'pid' }.first
405
- @sentence.tokens.select { |t| pid.first == t.id}.first
406
- else
407
- nil
408
- end
411
+ def nominalized?
412
+ dependents.any? do |d|
413
+ d.determiner? and ['atr', 'aux', 'det'].include? d.relation
409
414
  end
415
+ end
410
416
 
411
- def format_features(features)
412
- if features == ''
413
- '_'
414
- else
415
- features.split("|").sort.join("|")
417
+ def tam_particle?
418
+ @relation == 'aux' and TAM_PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
419
+ end
420
+
421
+ def particle?
422
+ @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(','))
423
+ end
424
+
425
+ def pronominal?
426
+ @part_of_speech =~ /\AP./ # no evidence that possessives are pronoun/determiner-like
427
+ end
428
+
429
+ def preposition?
430
+ @part_of_speech == 'R-'
431
+ end
432
+
433
+ def proper_noun?
434
+ @part_of_speech == 'Ne'
435
+ end
436
+
437
+ def root?
438
+ @head_id == 0
439
+ end
440
+
441
+ def relation=(rel)
442
+ if conjunction?
443
+ dependents.select { |d| d.relation == @relation }.each do |c|
444
+ c.relation = rel
416
445
  end
417
446
  end
447
+ @relation = rel
448
+ end
418
449
 
419
- def to_conll
420
- [@id,
421
- @form,
422
- @lemma,
423
- @upos,
424
- @part_of_speech,
425
- format_features(@features),
426
- @head_id,
427
- (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc
428
- '_', # slashes here
429
- @citation_part].join("\t")
430
- end
450
+ def count_subgraph
451
+ dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1)
452
+ end
431
453
 
432
- def to_s
433
- [@id, @form, @head_id, @relation].join("\t")
434
- end
454
+ def subgraph_set
455
+ [self] + dependents.map(&:subgraph_set).flatten
456
+ end
435
457
 
436
- def to_n
437
- [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-')
438
- end
458
+ def left_corner
459
+ ([self] + dependents).sort_by(&:id).first
460
+ end
439
461
 
440
- def to_graph(indents = 0)
441
- ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n")
462
+ def conj_head
463
+ raise 'Not a conjunct' unless @relation == 'conj'
464
+ if head.relation == 'conj'
465
+ head.conj_head
466
+ else
467
+ head
442
468
  end
469
+ end
443
470
 
444
- def siblings
445
- @sentence.tokens.select { |t| t.head_id == @head_id } - [self]
471
+ def pid
472
+ if pid = @slashes.select { |t, r| r == 'pid' }.first
473
+ @sentence.tokens.select { |t| pid.first == t.id}.first
474
+ else
475
+ nil
446
476
  end
477
+ end
447
478
 
448
- def head
449
- @sentence.tokens.select { |t| t.id == @head_id }.first
479
+ def format_features(features)
480
+ if features == ''
481
+ '_'
482
+ else
483
+ features.split('|').sort.join('|')
450
484
  end
485
+ end
451
486
 
452
- def dependents
453
- @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id)
454
- end
487
+ def miscellaneous
488
+ m = @citation_part
489
+ m += "|LId=#{@variant}" if @variant
490
+ m
491
+ end
455
492
 
456
- def find_appositive_head
457
- raise "Not an apposition" unless @relation == 'apos'
458
- if head.conjunction? and head.relation == 'apos'
459
- head.find_appositive_head
460
- else
461
- head
462
- end
463
- end
493
+ def to_conll
494
+ [@id,
495
+ @form,
496
+ @baselemma.gsub(/не\./,''),
497
+ @upos,
498
+ @part_of_speech,
499
+ format_features(@features),
500
+ @head_id,
501
+ (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc
502
+ '_', # slashes here
503
+ miscellaneous].join("\t")
504
+ end
464
505
 
465
- def find_postag possible_postags
466
- tag, crit, feats = possible_postags.shift
467
- if tag.nil?
468
- # raise "Found no postag"
469
- elsif crit.call self
470
- @upos = tag
471
- @features += ((@features.empty? ? '' : '|') + feats) if feats
472
- else
473
- find_postag possible_postags
474
- end
506
+ def to_s
507
+ [@id, @form, @head_id, @relation].join("\t")
508
+ end
509
+
510
+ def to_n
511
+ [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-')
512
+ end
513
+
514
+ def to_graph(indents = 0)
515
+ ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n")
516
+ end
517
+
518
+ def siblings
519
+ @sentence.tokens.select { |t| t.head_id == @head_id } - [self]
520
+ end
521
+
522
+ def head
523
+ @sentence.tokens.select { |t| t.id == @head_id }.first
524
+ end
525
+
526
+ def dependents
527
+ @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id)
528
+ end
529
+
530
+ def find_appositive_head
531
+ raise 'Not an apposition' unless @relation == 'apos'
532
+ if head.conjunction? and head.relation == 'apos'
533
+ head.find_appositive_head
534
+ else
535
+ head
475
536
  end
476
-
477
- def find_relation possible_relations
478
- rel, crit = possible_relations.shift
479
- if rel.nil?
480
- # raise "Found no relation"
481
- elsif crit.call self
482
- rel
483
- else
484
- find_relation possible_relations
485
- end
537
+ end
538
+
539
+ def find_postag possible_postags
540
+ tag, crit, feats = possible_postags.shift
541
+ if tag.nil?
542
+ # raise "Found no postag"
543
+ elsif crit.call self
544
+ @upos = tag
545
+ @features += ((@features.empty? ? '' : '|') + feats) if feats
546
+ else
547
+ find_postag possible_postags
486
548
  end
549
+ end
487
550
 
488
- def map_part_of_speech!
489
- dependents.each(&:map_part_of_speech!)
490
- possible_postags = POS_MAP[@part_of_speech]
491
- find_postag possible_postags.dup
492
- # ugly, but the ugliness comes from UDEP
493
- @upos = 'ADJ' if @upos == 'DET' and @relation != 'det'
551
+ def find_relation possible_relations
552
+ rel, crit = possible_relations.shift
553
+ if rel.nil?
554
+ # raise "Found no relation"
555
+ elsif crit.call self
556
+ rel
557
+ else
558
+ find_relation possible_relations
494
559
  end
560
+ end
495
561
 
496
- def relabel_graph!
497
- dependents.each(&:relabel_graph!)
498
- # TODO: if there are iobjs without an obj among the dependents, one of them should be promoted to obj
499
- @relation = map_relation
500
- raise "No relation for #{form}" unless @relation
562
+ def map_part_of_speech!
563
+ dependents.each(&:map_part_of_speech!)
564
+ possible_postags = POS_MAP[@part_of_speech]
565
+ find_postag possible_postags.dup
566
+ # ugly, but the ugliness comes from UDEP
567
+ @upos = 'PRON' if @upos == 'DET' and @relation != 'det'
568
+ @upos = REL_TO_POS[@relation] if @upos == 'X'
569
+ end
570
+
571
+ def relabel_graph!
572
+ dependents.each(&:relabel_graph!)
573
+ # TODO: if there are iobjs without an obj among the dependents, one of them should be promoted to obj
574
+ @relation = map_relation
575
+ raise "No relation for #{form}" unless @relation
576
+ end
577
+
578
+ def map_relation
579
+ possible_relations = RELATION_MAPPING[@relation]
580
+ case possible_relations
581
+ when String
582
+ possible_relations
583
+ when Array
584
+ x = find_relation possible_relations.dup
585
+ when nil
586
+ # do nothing: the token has already changed its relation
587
+ @relation
588
+ else
589
+ raise "Unknown value #{possible_relations.inspect} for #{@relation}"
501
590
  end
591
+ end
502
592
 
503
- def map_relation
504
- possible_relations = RELATION_MAPPING[@relation]
505
- case possible_relations
506
- when String
507
- possible_relations
508
- when Array
509
- x = find_relation possible_relations.dup
510
- when nil
511
- # do nothing: the token has already changed its relation
512
- @relation
513
- else
514
- raise "Unknown value #{possible_relations.inspect} for #{@relation}"
515
- end
593
+ # attach subjunctions with 'mark' under their verbs and promote
594
+ # the verb to take over the subjunction's relation. If the verb
595
+ # is empty, the subjunction stays as head.
596
+ def process_subjunction!
597
+ # ignore if the subjunction has no dependents or only conj dependents.
598
+ # NB: this requires that the function is called *after* processing conjunctions
599
+ return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty?
600
+ pred = dependents.select { |d| d.relation == 'pred' }
601
+ raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one?
602
+ pred = pred.first
603
+ # promote the subjunction if the verb is empty
604
+ if pred.is_empty?
605
+ pred.dependents.each { |d| d.head_id = id }
606
+ @sentence.remove_token! pred
607
+ # else demote the subjunction
608
+ else
609
+ pred.invert!('mark')
610
+ # move any remaining discourse children to the new head (note that we need to keep some aux'es to get them as "fixed" dependents
611
+ dependents.each { |d| d.head_id = pred.id unless (d.relation == 'aux' and ['Px', 'Pr'].include? d.part_of_speech) or d.relation == 'fixed' }
516
612
  end
613
+ end
517
614
 
518
- # attach subjunctions with 'mark' under their verbs and promote
519
- # the verb to take over the subjunction's relation. If the verb
520
- # is empty, the subjunction stays as head.
521
- def process_subjunction!
522
- # ignore if the subjunction has no dependents or only conj dependents.
523
- # NB: this requires that the function is called *after* processing conjunctions
524
- return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty?
525
- pred = dependents.select { |d| d.relation == 'pred' }
526
- raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one?
527
- pred = pred.first
528
- # promote the subjunction if the verb is empty
529
- if pred.is_empty?
530
- pred.dependents.each { |d| d.head_id = id }
531
- @sentence.remove_token! pred
532
- # else demote the subjunction
533
- else
534
- pred.invert!('mark')
535
- end
615
+ def process_comparison!
616
+ cl = dependents.select { |d| ['sub','obj','obl','comp','adv'].include?(d.relation) }
617
+ head.relation = 'advcl:cmp' if head and head.part_of_speech == 'C-' and head.relation == relation
618
+ comp = cl.first
619
+ comp.invert!('mark','advcl:cmp')
620
+ dependents.each { |d| d.head_id = comp.id }
621
+ end
622
+
623
+ def process_dislocation!
624
+ self.head_id = head.head_id unless head.root?
625
+ self.relation = "dislocated"
626
+ end
627
+
628
+ def process_ellipsis!
629
+ aux = dependents.select(&:auxiliary?).first
630
+ if aux
631
+ aux.promote!
632
+ return
536
633
  end
537
634
 
538
- def process_ellipsis!
539
- aux = dependents.select(&:auxiliary?).first
540
- if aux
541
- aux.promote!
542
- return
543
- end
635
+ sub = dependents.select { |d| d.relation == 'sub' }.first
636
+ new_head = find_highest_daughter
637
+ new_head_sub = new_head.dependents.select { |d| d.relation == 'sub' }.first
638
+ sub.relation = 'nsubj:outer' if sub and new_head_sub
639
+ new_head.promote!('orphan')
544
640
 
545
- new_head = find_highest_daughter
546
- new_head.promote!('orphan')
547
-
548
641
  # dependents.each do |d|
549
- # check if there's a partner with the same relation under the overt node.
550
- # TODO: this isn't really very convincing when it comes to ADVs
642
+ # check if there's a partner with the same relation under the overt node.
643
+ # TODO: this isn't really very convincing when it comes to ADVs
551
644
  # if partner = overt.dependents.select { |p| p != self and p.relation == d.relation }.first #inserted p != self
552
645
  # partner = partner.find_remnant
553
646
  # d.head_id = partner.id
554
647
  # d.relation = 'remnant'
555
- # if there's no partner, just attach under the overt node, preserving the relation
648
+ # if there's no partner, just attach under the overt node, preserving the relation
556
649
  # else
557
650
  # d.head_id = overt.id
558
651
  # end
559
652
  # end
560
- @sentence.remove_token!(self)
561
- end
653
+ @sentence.remove_token!(self)
654
+ end
562
655
 
563
- def find_remnant
564
- if r = dependents.select { |d| d.relation == 'remnant' }.first
565
- r.find_remnant
566
- else
567
- self
568
- end
656
+ def find_remnant
657
+ if r = dependents.select { |d| d.relation == 'remnant' }.first
658
+ r.find_remnant
659
+ else
660
+ self
569
661
  end
662
+ end
570
663
 
571
- def find_highest_daughter
572
- dependents.min_by { |d| OBLIQUENESS_HIERARCHY.find_index(d.map_relation[/[^:]*/]) || 1000 }
573
- end
664
+ def find_highest_daughter
665
+ dependents.min_by { |d| OBLIQUENESS_HIERARCHY.find_index(d.map_relation[/[^:]*/]) || 1000 }
666
+ end
574
667
 
575
- def process_copula!
576
- predicates = dependents.select { |d| d.relation == 'xobj' }
577
- raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1
578
- predicates.first.promote!(nil, 'cop')
579
- end
668
+ def process_copula!
669
+ predicates = dependents.select { |d| d.relation == 'xobj' }
670
+ raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1
671
+ sub = dependents.select { |d| d.relation == 'sub' }.first
672
+ new_head = predicates.first
673
+ new_head_sub = new_head.dependents.select { |d| d.relation == 'sub' }.first
674
+ sub.relation = 'nsubj:outer' if sub and new_head_sub
675
+ predicates.first.promote!(nil, 'cop')
676
+ end
580
677
 
581
- def has_preposition?
582
- dependents.any? { |d| d.preposition? and d.relation == "case" }
583
- end
678
+ def has_preposition?
679
+ dependents.any? { |d| d.preposition? and d.relation == 'case' }
680
+ end
584
681
 
585
- def process_preposition!
586
- raise "Only prepositions can be processed this way!" unless part_of_speech == 'R-'
587
- obliques = dependents.select { |d| d.relation == 'obl' }
588
- raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1
589
- return if obliques.empty? #shouldn't really happen, but in practice
590
- obliques.first.invert!("case") # , "adv")
591
- end
682
+ def process_preposition!
683
+ raise 'Only prepositions can be processed this way!' unless part_of_speech == 'R-'
684
+ obliques = dependents.select { |d| d.relation == 'obl' }
685
+ doublepreps = dependents.select { |d| d.relation == 'aux' and d.preposition? }
686
+ mods = dependents.select { |d| d.relation != 'obl' and !(d.relation == 'aux' and d.preposition?) }
687
+ raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1
688
+ return if obliques.empty? #shouldn't really happen, but in practice
689
+ obliques.first.invert!('case') # , "adv")
690
+ doublepreps.each { |p| p.head_id = obliques.first.id and p.relation = 'case' }
691
+ mods.each { |m| m.head_id = obliques.first.id }
692
+ end
592
693
 
593
- def remove_empties!
594
- dependents.each(&:remove_empties!)
595
- if is_empty?
596
- dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' }
597
- @sentence.remove_token! self
598
- end
694
+ def remove_empties!
695
+ dependents.each(&:remove_empties!)
696
+ if is_empty?
697
+ dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' }
698
+ @sentence.remove_token! self
599
699
  end
700
+ end
600
701
 
601
- # Changes coordinations recursively from the bottom of the graph
602
- def change_coordinations!
603
- dependents.each(&:change_coordinations!)
604
- process_coordination! if conjunction?
605
- end
702
+ # Changes coordinations recursively from the bottom of the graph
703
+ def change_coordinations!
704
+ dependents.each(&:change_coordinations!)
705
+ process_coordination! if conjunction?
706
+ end
606
707
 
607
- def process_coordination!
608
- raise "Only coordinations can be processed this way!" unless conjunction?
609
- return if dependents.reject { |d| d.relation == 'aux' }.empty?
610
- distribute_shared_modifiers!
611
- dependents.reject { |d| d.relation == 'aux' }.sort_by { |d| d.left_corner.id }.first.promote!("conj", "cc")
612
- end
613
-
614
- def distribute_shared_modifiers!
615
- raise "Can only distribute over a conjunction!" unless conjunction?
616
- conjuncts, modifiers = dependents.reject { |d| d.relation == 'aux' }.partition { |d| d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') }
617
- first_conjunct = conjuncts.shift
618
- raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct
619
- raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty?
620
- modifiers.each do |m|
621
- m.head_id = first_conjunct.id
622
- conjuncts.each { |c| c.add_slash! [m.id, m.relation] }
623
- end
624
- end
708
+ def process_coordination!
709
+ raise 'Only coordinations can be processed this way!' unless conjunction?
710
+ return if dependents.reject { |d| d.relation == 'aux' }.empty?
711
+ distribute_shared_modifiers!
712
+ dependents.reject { |d| d.relation == 'aux' }.sort_by { |d| d.left_corner.id }.first.promote!('conj', 'cc')
713
+ end
625
714
 
626
- def add_slash!(slash)
627
- @slashes << slash
715
+ def distribute_shared_modifiers!
716
+ raise 'Can only distribute over a conjunction!' unless conjunction?
717
+ conjuncts, modifiers = dependents.reject { |d| d.relation == 'aux' }.partition { |d| d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') }
718
+ first_conjunct = conjuncts.shift
719
+ raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct
720
+ raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty?
721
+ modifiers.each do |m|
722
+ m.head_id = first_conjunct.id
723
+ conjuncts.each { |c| c.add_slash! [m.id, m.relation] }
628
724
  end
725
+ end
629
726
 
630
- # Inverts the direction of a dependency relation. By default the
631
- # labels are also swapped, but new relations can be specified
632
- # for both the new dependent and the new head.
633
- def invert!(new_dependent_relation = nil, new_head_relation = nil)
634
- raise "Cannot promote a token under root!" if @head_id == 0
635
- new_dependent_relation ||= @relation
636
- new_head_relation ||= head.relation
637
- new_head_id = head.head_id
727
+ def add_slash!(slash)
728
+ @slashes << slash
729
+ end
730
+
731
+ # Inverts the direction of a dependency relation. By default the
732
+ # labels are also swapped, but new relations can be specified
733
+ # for both the new dependent and the new head.
734
+ def invert!(new_dependent_relation = nil, new_head_relation = nil)
735
+ raise 'Cannot promote a token under root!' if @head_id == 0
736
+ new_dependent_relation ||= @relation
737
+ new_head_relation ||= head.relation
738
+ new_head_id = head.head_id
739
+
740
+ head.head_id = @id
741
+ head.relation = new_dependent_relation
742
+ @head_id = new_head_id
743
+ self.relation = new_head_relation
744
+ end
638
745
 
746
+ # promotes a node to its head's place. The node takes over its
747
+ # former head's relation and all dependents. The new relation
748
+ # for these dependents can be specified; if it is not, they will
749
+ # keep their former relation. The former head is made a
750
+ # dependent of the node (with a specified relation) or,
751
+ # if it is an empty node, destroyed.
752
+
753
+ def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux')
754
+ raise 'Cannot promote a token under root!' if @head_id == 0
755
+ new_head_relation = head.relation
756
+ new_head_id = head.head_id
757
+
758
+ # move all dependents of the former head to the new one
759
+ siblings.each do |t|
760
+ t.head_id = @id
761
+ # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings), now also includes conj, cc
762
+ t.relation = new_sibling_relation if (new_sibling_relation and !['aux','conj','cc'].include?(t.relation))
763
+ end
764
+
765
+ # remove the former head if it was empty
766
+ if head.is_empty?
767
+ @sentence.remove_token!(head)
768
+ # else make it a dependent of the new head
769
+ else
639
770
  head.head_id = @id
640
771
  head.relation = new_dependent_relation
641
- @head_id = new_head_id
642
- self.relation = new_head_relation
643
772
  end
644
773
 
645
- # promotes a node to its head's place. The node takes over its
646
- # former head's relation and all dependents. The new relation
647
- # for these dependents can be specified; if it is not, they will
648
- # keep their former relation. The former head is made a
649
- # dependent of the node (with a specified relation) or,
650
- # if it is an empty node, destroyed.
651
-
652
- def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux')
653
- raise "Cannot promote a token under root!" if @head_id == 0
654
- new_head_relation = head.relation
655
- new_head_id = head.head_id
656
-
657
- # move all dependents of the former head to the new one
658
- siblings.each do |t|
659
- t.head_id = @id
660
- # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings)
661
- t.relation = new_sibling_relation if (new_sibling_relation and t.relation != 'aux')
662
- end
663
-
664
- # remove the former head if it was empty
665
- if head.is_empty?
666
- @sentence.remove_token!(head)
667
- # else make it a dependent of the new head
668
- else
669
- head.head_id = @id
670
- head.relation = new_dependent_relation
671
- end
672
-
673
- @head_id = new_head_id
674
- # don't use relation=, as we don't want this relation to be
675
- # copied down a tree of conjunctions
676
- @relation = new_head_relation
677
- end
774
+ @head_id = new_head_id
775
+ # don't use relation=, as we don't want this relation to be
776
+ # copied down a tree of conjunctions
777
+ @relation = new_head_relation
678
778
  end
679
779
  end
680
780
  end