proiel-cli 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  module PROIEL
3
2
  module Converter
4
3
  class CoNLLU
@@ -6,10 +5,10 @@ module PROIEL
6
5
  # try to guess deponency based on the lemma
7
6
  DEPONENTS = { 'lat' => /r\Z/,
8
7
  'grc' => /ομαι\Z/ }
9
- COPULAR_LEMMATA = ['sum,V-,lat', 'εἰμί#1,V-,grc']
8
+ COPULAR_LEMMATA = ['sum,V-,lat', 'eo#2,V-,lat','εἰμί#1,V-,grc', 'быти,V-,orv','стати#2,V-,orv','бꙑти,V-,chu']
10
9
  AUXILIARIES = COPULAR_LEMMATA + []
11
- DETERMINERS = ['S-', 'Pd', 'Px']
12
- NEGATION_LEMMATA = ['non,Df,lat', 'ne,Df,lat',
10
+ DETERMINERS = ['S-', 'Pd', 'Px']
11
+ NEGATION_LEMMATA = ['non,Df,lat', 'ne,Df,lat',
13
12
  'μή,Df,grc',
14
13
  'μήγε,Df,grc',
15
14
  'μηδαμῶς,Df,grc',
@@ -39,28 +38,38 @@ module PROIEL
39
38
  'ni,Df,got',
40
39
  'nibai#2,Df,got',
41
40
  'nih,Df,got',
41
+ 'не,Df,orv',
42
+ 'ни,Df,orv',
43
+ 'ниже,Df,orv',
44
+ 'нѣ,Df,orv',
42
45
  ]
43
-
46
+
44
47
  TAM_PARTICLE_LEMMATA = ['ἄν,Df,grc',
45
48
  ]
46
-
49
+
47
50
  PARTICLE_LEMMATA = [ 'at,Df,lat',
48
51
  'atque,Df,lat',
49
52
  'autem,Df,lat',
50
53
  'certe,Df,lat',
54
+ 'en,Df,lat',
55
+ 'equidem,Df,lat',
51
56
  'ergo,Df,lat',
52
57
  'et,Df,lat',
53
58
  'enim,Df,lat',
59
+ 'etenim,Df,lat',
54
60
  'etiam,Df,lat',
55
61
  'igitur,Df,lat',
56
62
  'immo,Df,lat',
57
63
  'itaque,Df,lat',
58
64
  'nam,Df,lat',
65
+ 'namque,Df,lat',
59
66
  'nonne,Df,lat',
60
67
  'nonne,Du,lat',
68
+ 'num,Df,lat',
61
69
  'quidem,Df,lat',
62
70
  'quoque,Df,lat',
63
71
  'sic,Df,lat',
72
+ 'siquidem,Df,lat',
64
73
  'tamen,Df,lat',
65
74
  'tum,Df,lat',
66
75
  'tunc,Df,lat',
@@ -138,82 +147,162 @@ module PROIEL
138
147
  'þannu,Df,got',
139
148
  'þanuh,Df,got',
140
149
  'þaruh,Df,got',
150
+ 'али,Df,orv',
151
+ 'аль,Df,orv',
152
+ 'ано,Df,orv',
153
+ 'атъ,Df,orv',
154
+ 'ать,Df,orv',
155
+ 'бо,Df,orv',
156
+ 'вѣдь,Df,orv',
157
+ 'да#2,Df,orv',
158
+ 'еда,Df,orv',
159
+ 'же,Df,orv',
160
+ 'зане,Df,orv',
161
+ 'занеже,Df,orv',
162
+ 'ибо,Df,orv',
163
+ 'ино,Df,orv',
164
+ 'ли,Df,orv',
165
+ 'ну,Df,orv',
166
+ 'понеже,Df,orv',
167
+ 'си,Df,orv',
168
+ 'ти,Df,orv',
169
+ 'убо,Df,orv',
170
+ 'ужь,Df,orv',
171
+ 'ци,Df,orv',
172
+ 'яко,Df,orv',
173
+ 'якоже,Df,orv',
141
174
  ]
142
175
 
143
-
144
- POS_MAP =
145
- {
176
+ COMPARISON_LEMMATA = ['alja,Df,got',
177
+ 'ar̄awel,Df,xcl',
178
+ 'atque,Df,lat',
179
+ 'baycʻ,Df,xcl',
180
+ 'etʻe,Df,xcl',
181
+ 'ibrew,Df,xcl',
182
+ 'ibrew z-,Df,xcl',
183
+ 'kʻan z,Df,xcl',
184
+ 'licet,Df,lat',
185
+ 'nibai,Df,got',
186
+ 'nisi,Df,lat',
187
+ 'orpēs,Df,xcl',
188
+ 'praeterquam,Df,lat',
189
+ 'quam,Df,lat',
190
+ 'quasi,Df,lat',
191
+ 'quemadmodum,Df,lat',
192
+ 'si,Df,lat',
193
+ 'sicut,Df,lat',
194
+ 'swaswe,Df,got',
195
+ 'swe,Df,got',
196
+ 'tamquam,Df,lat',
197
+ 'tʻe,Df,xcl',
198
+ 'ut,Df,lat',
199
+ 'velut,Df,lat',
200
+ 'þau,Df,got',
201
+ 'ἅτε,Df,grc',
202
+ 'εἰ,Df,grc',
203
+ 'ἤ,Df,grc',
204
+ 'ἤπερ,Df,grc',
205
+ 'καθάπερ,Df,grc',
206
+ 'καθώς,Df,grc',
207
+ 'οἷα,Df,grc',
208
+ 'ὁμοίως,Df,grc',
209
+ 'ὅτι,Df,grc',
210
+ 'ὡς,Df,grc',
211
+ 'ὡσεί,Df,grc',
212
+ 'ὥσπερ,Df,grc',
213
+ 'ако,Df,orv',
214
+ 'акъже,Df,orv',
215
+ 'акы,Df,orv',
216
+ 'акꙑ,Df,chu',
217
+ 'будьто,Df,orv',
218
+ 'како,Df,orv',
219
+ 'ли,Df,chu',
220
+ 'неже,Df,chu',
221
+ 'нежели,Df,chu',
222
+ 'нежели,Df,orv',
223
+ 'окꙑ,Df,chu',
224
+ 'развѣ,Df,chu',
225
+ 'тъкъмо,Df,chu',
226
+ 'чьто,Df,orv',
227
+ 'яко,Df,orv',
228
+ 'якоже,Df,orv',
229
+ 'ꙗко,Df,chu',
230
+ 'ꙗкоже,Df,chu'
231
+ ]
232
+
233
+ POS_MAP =
234
+ {
146
235
  'A-' => [['ADJ', lambda { |x| true } ]],
147
236
  'C-' => [['CCONJ', lambda { |x| true } ]],
148
- 'Df' => [['AUX', lambda(&:TAM_particle?)],
149
- ['ADV', lambda(&:negation?), "Polarity=Neg"],
237
+ 'Df' => [['AUX', lambda(&:tam_particle?)],
238
+ ['ADV', lambda(&:negation?), 'Polarity=Neg'],
150
239
  ['ADV', lambda { |x| true } ]
151
240
  ],
152
- 'Dq' => [['ADV', lambda { |x| true }, "PronType=Rel"]],
153
- 'Du' => [['ADV', lambda { |x| true }, "PronType=Int"]],
241
+ 'Dq' => [['ADV', lambda { |x| true }, 'PronType=Rel']],
242
+ 'Du' => [['ADV', lambda { |x| true }, 'PronType=Int']],
154
243
  'F-' => [['X', lambda { |x| true } ]],
155
244
  'G-' => [['SCONJ', lambda { |x| true } ]],
156
245
  'I-' => [['INTJ', lambda { |x| true } ]],
157
- 'Ma' => [['NUM', lambda { |x| true } ]],
158
- 'Mo' => [['ADJ', lambda { |x| true } ]],
246
+ 'Ma' => [['NUM', lambda { |x| true } ]],
247
+ 'Mo' => [['ADJ', lambda { |x| true } ]],
159
248
  'N-' => [['SCONJ', lambda { |x| true } ]], #irrelevant for our purposes
160
249
  'Nb' => [['NOUN', lambda { |x| true } ]],
161
250
  'Ne' => [['PROPN', lambda { |x| true } ]],
162
- 'Pc' => [['PRON', lambda { |x| true }, "PronType=Rcp"]],
163
- 'Pd' => [['DET', lambda { |x| true } ]],
164
- 'Pi' => [['PRON', lambda { |x| true }, "PronType=Int"]],
251
+ 'Pc' => [['PRON', lambda { |x| true }, 'PronType=Rcp']],
252
+ 'Pd' => [['DET', lambda { |x| true } ]],
253
+ 'Pi' => [['PRON', lambda { |x| true }, 'PronType=Int']],
165
254
  'Pk' => [['AUX', lambda { |x| x.relation == 'aux' }],
166
- ['PRON', lambda { |x| true }, "PronType=Prs|Reflex=Yes"]],
167
- 'Pp' => [['PRON', lambda { |x| true }, "PronType=Prs"]],
168
- 'Pr' => [['PRON', lambda { |x| true }, "PronType=Rel"]],
169
- 'Ps' => [['ADJ', lambda { |x| true }, "Poss=Yes"]], ### NB no evidence for a pronominal/determiner-like nature here
170
- 'Pt' => [['ADJ', lambda { |x| true }, "Poss=Yes|Reflex=Yes" ]], ### NB no evidence for a pronominal/determiner-like nature here
171
- 'Px' => [['DET', lambda { |x| true } ]],
172
- 'Py' => [['PRON', lambda { |x| true } ]],
255
+ ['PRON', lambda { |x| true }, 'PronType=Prs|Reflex=Yes']],
256
+ 'Pp' => [['PRON', lambda { |x| true }, 'PronType=Prs']],
257
+ 'Pr' => [['PRON', lambda { |x| true }, 'PronType=Rel']],
258
+ 'Ps' => [['DET', lambda { |x| true }, 'Poss=Yes']], ### NB no evidence for a pronominal/determiner-like nature here
259
+ 'Pt' => [['DET', lambda { |x| true }, 'Poss=Yes|Reflex=Yes' ]], ### NB no evidence for a pronominal/determiner-like nature here
260
+ 'Px' => [['DET', lambda { |x| true } ]],
261
+ 'Py' => [['PRON', lambda { |x| true } ]],
173
262
  'R-' => [['ADP', lambda { |x| true } ]],
174
263
  'V-' => [['AUX', lambda(&:auxiliary?)],
175
264
  ['VERB', lambda { |x| true } ]],
176
- 'S-' => [['DET', lambda { |x| true }, "Definite=Def|PronType=Dem"]], # (we only have definite articles)
265
+ 'S-' => [['DET', lambda { |x| true }, 'Definite=Def|PronType=Dem']], # (we only have definite articles)
177
266
  'X-' => [['X', lambda { |x| true } ]]
178
- }
179
-
267
+ }
268
+
180
269
  MORPHOLOGY_MAP = {
181
- :person => {'1' => 'Person=1',
182
- '2' => 'Person=2',
183
- '3' => 'Person=3' } ,
184
- :number => {'s' => 'Number=Sing',
185
- 'd' => 'Number=Dual',
270
+ :person => {'1' => 'Person=1',
271
+ '2' => 'Person=2',
272
+ '3' => 'Person=3' } ,
273
+ :number => {'s' => 'Number=Sing',
274
+ 'd' => 'Number=Dual',
186
275
  'p' => 'Number=Plur' } ,
187
- :tense => {'p' => 'Tense=Pres',
188
- 'i' => 'Tense=Past|Aspect=Imp',
189
- 'r' => 'Tense=Past|Aspect=Perf', #'Tense=Perfect',
190
- 's' => 'Aspect=Res',
276
+ :tense => {'p' => 'Tense=Pres',
277
+ 'i' => 'Tense=Past|Aspect=Imp',
278
+ 'r' => 'Tense=Past|Aspect=Perf', #'Tense=Perfect',
279
+ 's' => 'VerbForm=PartRes|Tense=Past',
191
280
  # tags Perf is not universal
192
- 'a' => 'Tense=Past|Aspect=Perf',
193
- 'u' => 'Tense=Past',
194
- 'l' => 'Tense=Pqp',
195
- 'f' => 'Tense=Fut',
281
+ 'a' => 'Tense=Past|Aspect=Perf',
282
+ 'u' => 'Tense=Past',
283
+ 'l' => 'Tense=Pqp',
284
+ 'f' => 'Tense=Fut',
196
285
  # tag FutPerfect is not universal
197
- 't' => 'Tense=Fut|Aspect=Perf', #FutPerfect'
286
+ 't' => 'Tense=Fut|Aspect=Perf', #FutPerfect'
198
287
  },
199
- :mood => {'i' => 'VerbForm=Fin|Mood=Ind',
200
- 's' => 'VerbForm=Fin|Mood=Sub',
201
- 'm' => 'VerbForm=Fin|Mood=Imp',
202
- 'o' => 'VerbForm=Fin|Mood=Opt',
203
- 'n' => 'VerbForm=Inf',
204
- 'p' => 'VerbForm=Part',
205
- 'd' => 'VerbForm=Ger',
288
+ :mood => {'i' => 'VerbForm=Fin|Mood=Ind',
289
+ 's' => 'VerbForm=Fin|Mood=Sub',
290
+ 'm' => 'VerbForm=Fin|Mood=Imp',
291
+ 'o' => 'VerbForm=Fin|Mood=Opt',
292
+ 'n' => 'VerbForm=Inf',
293
+ 'p' => 'VerbForm=Part',
294
+ 'd' => 'VerbForm=Ger',
206
295
  # Gdv (gerundive) is not universal
207
- 'g' => 'VerbForm=Gdv',
208
- 'u' => 'VerbForm=Sup',
209
- 'e'=> 'VerbForm=Fin|Mood=Ind,Sub',
210
- 'f'=> 'VerbForm=Fin|Mood=Imp,Ind',
211
- 'h'=> 'VerbForm=Fin|Mood=Imp,Sub',
296
+ 'g' => 'VerbForm=Gdv',
297
+ 'u' => 'VerbForm=Sup',
298
+ 'e'=> 'VerbForm=Fin|Mood=Ind,Sub',
299
+ 'f'=> 'VerbForm=Fin|Mood=Imp,Ind',
300
+ 'h'=> 'VerbForm=Fin|Mood=Imp,Sub',
212
301
  't' => 'VerbForm=Fin' },
213
- :voice => {'a' => 'Voice=Act',
302
+ :voice => {'a' => 'Voice=Act',
214
303
  # Med is not universal
215
- 'm' => 'Voice=Mid',
216
- 'p' => 'Voice=Pass',
304
+ 'm' => 'Voice=Mid',
305
+ 'p' => 'Voice=Pass',
217
306
  'e' => 'Voice=Mid,Pass' },
218
307
  :gender => {'m' => 'Gender=Masc',
219
308
  'f' => 'Gender=Fem',
@@ -221,27 +310,28 @@ module PROIEL
221
310
  'p' => 'Gender=Fem,Masc',
222
311
  'o' => 'Gender=Masc,Neut',
223
312
  'r' => 'Gender=Fem,Neut' },
224
- :case => {'n' => 'Case=Nom',
225
- 'a' => 'Case=Acc',
313
+ :case => {'n' => 'Case=Nom',
314
+ 'a' => 'Case=Acc',
226
315
  # Obl(ique) is not universal
227
- 'o' => 'Case=Obl',
228
- 'g' => 'Case=Gen',
229
- 'c' => 'Case=Dat,Gen',
230
- 'e' => 'Case=Acc,Dat',
231
- 'd' => 'Case=Dat',
232
- 'b' => 'Case=Abl',
233
- 'i' => 'Case=Ins',
234
- 'l' => 'Case=Loc',
316
+ 'o' => 'Case=Obl',
317
+ 'g' => 'Case=Gen',
318
+ 'c' => 'Case=Dat,Gen',
319
+ 'e' => 'Case=Acc,Dat',
320
+ 'd' => 'Case=Dat',
321
+ 'b' => 'Case=Abl',
322
+ 'i' => 'Case=Ins',
323
+ 'l' => 'Case=Loc',
235
324
  'v' => 'Case=Voc' },
236
- :degree => {'p' => 'Degree=Pos',
237
- 'c' => 'Degree=Cmp',
325
+ :degree => {'p' => 'Degree=Pos',
326
+ 'c' => 'Degree=Cmp',
238
327
  's' => 'Degree=Sup' },
239
328
  # The whole strength category is not universal
240
- :strength => {'w' => 'Strength=Weak',
241
- 's' => 'Strength=Strong'},
329
+ :strength => {'s' => 'Strength=Strong',
330
+ 'w' => 'Strength=Weak' },
331
+
242
332
  :inflection => {},
243
333
  }
244
334
  end
245
335
  end
246
336
  end
247
-
337
+
@@ -2,86 +2,132 @@ module PROIEL
2
2
  module Converter
3
3
  class CoNLLU
4
4
 
5
- OBLIQUENESS_HIERARCHY = ["nsubj", "obj", "iobj", "obl", "advmod", "csubj", "xcomp", "ccomp", "advcl"]
6
-
5
+ OBLIQUENESS_HIERARCHY = ['nsubj', 'obj', 'iobj', 'obl', 'advmod', 'csubj', 'xcomp', 'ccomp', 'advcl']
6
+ REL_TO_POS = {
7
+ 'acl' => 'VERB',
8
+ 'advcl' => 'VERB',
9
+ 'advcl:cmp' => 'NOUN',
10
+ 'advmod' => 'ADV',
11
+ 'amod' => 'ADJ',
12
+ 'appos' => 'NOUN',
13
+ 'ccomp' => 'VERB',
14
+ 'conj' => 'X',
15
+ 'csubj' => 'VERB',
16
+ 'csubj:pass' => 'NOUN',
17
+ 'dep' => 'X',
18
+ 'det' => 'DET',
19
+ 'dislocated' => 'X',
20
+ 'fixed' => 'X',
21
+ 'flat:foreign' => 'X',
22
+ 'flat:name' => 'PROPN',
23
+ 'nmod' => 'NOUN',
24
+ 'nsubj' => 'NOUN',
25
+ 'nsubj:pass' => 'NOUN',
26
+ 'nsubj:outer' => 'NOUN',
27
+ 'nummod' => 'NUM',
28
+ 'obj' => 'NOUN',
29
+ 'obl' => 'NOUN',
30
+ 'obl:agent' => 'NOUN',
31
+ 'obl:arg' => 'NOUN',
32
+ 'orphan' => 'NOUN',
33
+ 'parataxis' => 'VERB',
34
+ 'root' => 'VERB',
35
+ 'vocative' => 'NOUN',
36
+ 'xcomp' => 'VERB'
37
+ }
38
+
7
39
  RELATION_MAPPING = {
8
- "adnom" => "dep",
9
- "adv" => [["advcl", lambda(&:clausal?) ],
10
- ["advmod", lambda { |x| x.adverb? or x.preposition? } ],
11
- ["advmod", lambda(&:adjectival?) ], # adjective for adverb
12
- ["obl", lambda(&:nominal?) ],
13
- ["advmod", lambda { |x| true } ],
40
+ 'adnom' => 'dep',
41
+ 'adv' => [['advcl', lambda(&:clausal?) ],
42
+ ['advmod', lambda { |x| x.adverb? } ],
43
+ ['advmod', lambda(&:adjectival?) ], # adjective for adverb
44
+ ['obl', lambda { |x| x.nominal? or x.preposition? or x.has_preposition? } ],
45
+ ['advcl', lambda(&:subjunction?) ],
46
+ ['obl', lambda { |x| true } ],
14
47
  ],
15
- "ag" => "obl:agent", # add :agent" once defined
16
- "apos" => [["flat:name", lambda { |x| x.proper_noun? and x.head and x.head.proper_noun? } ],
17
- ["appos", lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.nominal? } ],
18
- ["acl", lambda { |x| x.clausal? and x.head and x.head.nominal? } ], # add :relcl ?
19
- # what to do about sentential appositions?
20
- ["advcl", lambda(&:clausal?) ],
21
- ["appos", lambda { |x| true } ],
48
+ 'ag' => 'obl:agent', # add :agent' once defined
49
+ 'apos' => [['flat:name', lambda { |x| x.proper_noun? and x.head and x.head.proper_noun? } ],
50
+ ['acl', lambda { |x| x.clausal? and x.head and x.head.nominal? } ], # add :relcl ?
51
+
52
+ ['appos', lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.nominal? } ],
53
+ ['parataxis', lambda { |x| x.clausal? and x.head and x.head.clausal? } ],
54
+ # what to do about sentential appositions? attempt here to make them parataxis, but there are some legitimate nominal appos under root nominals, so overgenerates slightly
55
+ ['advcl', lambda(&:clausal?) ],
56
+ ['appos', lambda { |x| true } ],
22
57
  ],
23
- "arg" => "dep",
24
- "atr" => [["nummod", lambda(&:cardinal?) ],
25
- ["det", lambda { |x| x.pronominal? and !(!x.genitive? and x.head and x.head.genitive?) } ], #TODO check
26
- ["nmod", lambda(&:nominal?) ],
27
- ["acl", lambda { |x| x.clausal? } ], # add :relcl?
28
- ["advmod", lambda { |x| x.head and x.head.clausal? } ],
29
- ["amod", lambda { |x| true } ], #default
58
+ 'arg' => 'dep',
59
+ 'atr' => [['nummod', lambda(&:cardinal?) ],
60
+ ['det', lambda { |x| x.pronominal? and !x.clausal? and !(!x.genitive? and x.head and x.head.genitive?) } ], #TODO check
61
+ ['acl', lambda { |x| x.clausal? } ], # add :relcl?
62
+ ['nmod', lambda(&:nominal?) ],
63
+ ['advmod', lambda { |x| x.head and !x.head.nominal? and x.head.clausal? } ],
64
+ ['amod', lambda { |x| true } ], #default
30
65
  ],
31
- "aux" => [["det", lambda(&:determiner?) ],
32
- ["aux:pass", lambda { |x| x.clausal? and x.head.passive? } ],
33
- ["aux", lambda(&:clausal?) ], #v2 probably want the modal particle an to go here too in
34
- ["advmod", lambda(&:negation?) ],
35
- ["discourse", lambda { |x| x.particle? or x.interjection? } ],
36
- # include subjunctions that are aux here; (root sentences with subjunction)
37
- ["advmod", lambda { |x| x.adjectival? or x.adverb? or x.subjunction? } ],
38
- ["cc", lambda(&:conjunction?) ],
39
- ["flat:foreign", lambda(&:foreign?) ],
66
+ 'aux' => [['det', lambda(&:determiner?) ],
67
+ ['fixed', lambda { |x| x.head and x.head.subjunction? } ],
68
+ ['fixed', lambda { |x| x.head and x.head.conjunction? } ],
69
+ ['fixed', lambda { |x| x.head and x.head.adverb? and x.relative? } ],
70
+ ['fixed', lambda { |x| x.head and x.head.pronominal? and x.verb? } ],
71
+ ['aux:pass', lambda { |x| x.clausal? and x.head.passive? } ],
72
+ ['aux', lambda(&:clausal?) ], #v2 probably want the modal particle an to go here too in
73
+ ['advmod', lambda(&:negation?) ],
74
+ ['discourse', lambda { |x| x.particle? or x.interjection? } ],
75
+ ['advmod', lambda { |x| x.adjectival? or x.adverb? } ],
76
+ # make subjunctions in root sentences "mark"
77
+ ['mark', lambda { |x| x.subjunction? } ],
78
+ ['cc', lambda(&:conjunction?) ],
79
+ ['flat:foreign', lambda(&:foreign?) ],
40
80
  # We need some more distinctions to get Gothic and Armenian. Introduce language in the treebank? (Read from xml)
41
- ["mark", lambda { |x| ['R-'].include? x.part_of_speech } ], #'R-' as infinitive marker in Gothic
42
- ["aux", lambda { |x| ['Pk' ].include? x.part_of_speech } ], #reflexive as valency reducer
81
+ ['mark', lambda { |x| ['R-'].include? x.part_of_speech } ], #"R-" as infinitive marker in Gothic
82
+ ['expl:pv', lambda { |x| ['Pk' ].include? x.part_of_speech } ], #reflexive as valency reducer
43
83
  ['amod', lambda { |x| x.preposition? } ], # Armenian DOM
44
84
  ['fixed', lambda { |x| ['Px', 'Pr'].include? x.part_of_speech } ], # NB there are a lot of bogus annotations with 'Px'
45
-
85
+
46
86
  # MISANNOTATION IF A NOUN or a 'Pi' or a 'Pp' or a 'Ps'
47
87
  ],
48
- "comp" => [['csubj:pass', lambda { |x| x.head and x.head.passive? } ],
49
- ['csubj', lambda { |x| x.head and x.head.copula? } ],
88
+ 'comp' => [['csubj:pass', lambda { |x| x.head and x.head.passive? and !x.head.has_subject?} ],
89
+ ['csubj', lambda { |x| x.head and x.head.has_copula? and !x.head.has_subject?} ],
50
90
  ['ccomp', lambda { |x| true } ],
51
91
  ],
52
- "expl" => "expl",
53
- "narg" => [['acl', lambda(&:clausal?) ],
54
- ['nmod', lambda(&:nominal?) ],
92
+ 'expl' => 'expl',
93
+ 'narg' => [['acl', lambda(&:clausal?) ],
94
+ ['nmod', lambda(&:nominal?) ],
55
95
  ['nmod', lambda(&:adjectival?) ], # nominaliezed in this function
56
96
  ['nmod', lambda { |x| true } ],
57
97
  ],
58
- "nonsub" => "dep",
59
- "obj" => "obj:dir",
60
- "obl" => [# normally a preposition will be subordinate to its noun, this captures adverbial use of prepositions
61
- ["advmod", lambda { |x| x.adverb? or x.preposition? } ],
62
- ["obl", lambda { |x| x.has_preposition? } ],
63
- ["iobj", lambda(&:nominal?) ],# if nominal (NB check for presence of article!) TODO: should be "obj" if the verb is monovalent (even by elision)
64
- ["iobj", lambda(&:adjectival?) ], # OBL adjectives are nominalized
65
- ["advcl", lambda(&:clausal?) ], # this seems to happen with ad libros legendos etc. but check closer!
66
- ["iobj", lambda { |x| true } ],
98
+ 'nonsub' => 'dep',
99
+ 'obj' => 'obj',
100
+ 'obl' => [# normally a preposition will be subordinate to its noun, this captures adverbial use of prepositions
101
+ ['advmod', lambda { |x| x.adverb? } ],
102
+ ['obl', lambda { |x| x.has_preposition? or x.preposition? } ],
103
+ ['obl', lambda { |x| x.head and x.head.adverb? } ],
104
+ ['obl:arg', lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.clausal? } ],# if nominal (NB check for presence of article!) TODO: should be 'obj' if the verb is monovalent (even by elision)
105
+ #['obl:arg', lambda(&:adjectival?) ], # OBL adjectives are nominalized
106
+ ['advcl', lambda(&:clausal?) ], # this seems to happen with ad libros legendos etc. but check closer!
107
+ ['obl', lambda { |x| true } ],
67
108
  ],
68
- "parpred" => "parataxis",
69
- "part" => "nmod",
70
- "per" => "dep",
71
- "pid" => ["ERROR", lambda { |x| raise "Remaining pid edge!" } ],
72
- "pred" => [["root", lambda(&:root?) ],
73
- ["ERROR", lambda { |x| raise "#{x.to_n} (head_id #{x.head_id}) is not a root!" }],
109
+ 'parpred' => 'parataxis',
110
+ 'part' => 'nmod',
111
+ 'per' => 'dep',
112
+ 'pid' => ['ERROR', lambda { |x| raise 'Remaining pid edge!' } ],
113
+ 'pred' => [['root', lambda(&:root?) ],
114
+ ['ERROR', lambda { |x| raise "#{x.to_n} (head_id #{x.head_id}) is not a root!" }],
74
115
  ],
75
- "rel" => "acl", # add :relcl?
76
- "sub" => [["nsubj:pass", lambda { |x| x.head and x.head.passive? } ],
77
- ["nsubj", lambda { |x| true }],
116
+ 'rel' => 'acl', # add :relcl?
117
+ 'sub' => [['nsubj:pass', lambda { |x| x.head and x.head.passive? } ],
118
+ #['obl', lambda { |x| x.head and x.head.part_of_speech == 'Df' } ],
119
+ ['nsubj', lambda { |x| true }],
120
+ ],
121
+ 'voc' => [['discourse', lambda { |x| x.part_of_speech == 'I-' } ],
122
+ ['vocative', lambda { |x| true } ],
78
123
  ],
79
- "voc" => "vocative",
80
- "xadv" => [["advcl", lambda(&:clausal?)], #add :contr ?
81
- ["advmod", lambda { |x| true } ], # add :contr ?
124
+ 'xadv' => [['advcl', lambda(&:clausal?)], #add :contr ?
125
+ ['xcomp', lambda { |x| x.nominal? or x.pronominal? or x.cardinal?} ],
126
+ ['advcl', lambda(&:subjunction?)],
127
+ ['advmod', lambda { |x| true } ], # add :contr ?
82
128
  ],
83
- "xobj" => "xcomp", # copula cases have already been taken care of
84
- "xsub" => "xsub",
129
+ 'xobj' => 'xcomp', # copula cases have already been taken care of
130
+ 'xsub' => 'xsub',
85
131
  }
86
132
  end
87
133
  end