proiel-cli 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +18 -3
- data/bin/proiel +1 -1
- data/lib/proiel/cli/commands/build.rb +91 -0
- data/lib/proiel/cli/commands/convert.rb +7 -2
- data/lib/proiel/cli/commands/dictionary.rb +46 -0
- data/lib/proiel/cli/commands/info.rb +1 -1
- data/lib/proiel/cli/commands/shell.rb +34 -0
- data/lib/proiel/cli/commands/tokenize.rb +2 -2
- data/lib/proiel/cli/commands/validate.rb +6 -4
- data/lib/proiel/cli/commands/visualize.rb +14 -11
- data/lib/proiel/cli/converters/conll-u/morphology.rb +162 -72
- data/lib/proiel/cli/converters/conll-u/syntax.rb +108 -62
- data/lib/proiel/cli/converters/conll-u.rb +648 -548
- data/lib/proiel/cli/converters/conll-x.rb +67 -52
- data/lib/proiel/cli/converters/lexc.rb +21 -23
- data/lib/proiel/cli/converters/proielxml.rb +173 -132
- data/lib/proiel/cli/converters/text.rb +69 -71
- data/lib/proiel/cli/converters/tiger.rb +110 -114
- data/lib/proiel/cli/converters/tiger2.rb +139 -141
- data/lib/proiel/cli/converters/tnt.rb +19 -15
- data/lib/proiel/cli/version.rb +1 -1
- data/lib/proiel/cli.rb +26 -1
- metadata +43 -58
- data/bin/setup +0 -8
- data/contrib/proiel-tnt-train +0 -15
- data/lib/proiel/cli/commands.rb +0 -28
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
module PROIEL
|
|
3
2
|
module Converter
|
|
4
3
|
class CoNLLU
|
|
@@ -6,10 +5,10 @@ module PROIEL
|
|
|
6
5
|
# try to guess deponency based on the lemma
|
|
7
6
|
DEPONENTS = { 'lat' => /r\Z/,
|
|
8
7
|
'grc' => /ομαι\Z/ }
|
|
9
|
-
COPULAR_LEMMATA = ['sum,V-,lat', 'εἰμί#1,V-,grc']
|
|
8
|
+
COPULAR_LEMMATA = ['sum,V-,lat', 'eo#2,V-,lat','εἰμί#1,V-,grc', 'быти,V-,orv','стати#2,V-,orv','бꙑти,V-,chu']
|
|
10
9
|
AUXILIARIES = COPULAR_LEMMATA + []
|
|
11
|
-
DETERMINERS = ['S-', 'Pd', 'Px']
|
|
12
|
-
NEGATION_LEMMATA = ['non,Df,lat', 'ne,Df,lat',
|
|
10
|
+
DETERMINERS = ['S-', 'Pd', 'Px']
|
|
11
|
+
NEGATION_LEMMATA = ['non,Df,lat', 'ne,Df,lat',
|
|
13
12
|
'μή,Df,grc',
|
|
14
13
|
'μήγε,Df,grc',
|
|
15
14
|
'μηδαμῶς,Df,grc',
|
|
@@ -39,28 +38,38 @@ module PROIEL
|
|
|
39
38
|
'ni,Df,got',
|
|
40
39
|
'nibai#2,Df,got',
|
|
41
40
|
'nih,Df,got',
|
|
41
|
+
'не,Df,orv',
|
|
42
|
+
'ни,Df,orv',
|
|
43
|
+
'ниже,Df,orv',
|
|
44
|
+
'нѣ,Df,orv',
|
|
42
45
|
]
|
|
43
|
-
|
|
46
|
+
|
|
44
47
|
TAM_PARTICLE_LEMMATA = ['ἄν,Df,grc',
|
|
45
48
|
]
|
|
46
|
-
|
|
49
|
+
|
|
47
50
|
PARTICLE_LEMMATA = [ 'at,Df,lat',
|
|
48
51
|
'atque,Df,lat',
|
|
49
52
|
'autem,Df,lat',
|
|
50
53
|
'certe,Df,lat',
|
|
54
|
+
'en,Df,lat',
|
|
55
|
+
'equidem,Df,lat',
|
|
51
56
|
'ergo,Df,lat',
|
|
52
57
|
'et,Df,lat',
|
|
53
58
|
'enim,Df,lat',
|
|
59
|
+
'etenim,Df,lat',
|
|
54
60
|
'etiam,Df,lat',
|
|
55
61
|
'igitur,Df,lat',
|
|
56
62
|
'immo,Df,lat',
|
|
57
63
|
'itaque,Df,lat',
|
|
58
64
|
'nam,Df,lat',
|
|
65
|
+
'namque,Df,lat',
|
|
59
66
|
'nonne,Df,lat',
|
|
60
67
|
'nonne,Du,lat',
|
|
68
|
+
'num,Df,lat',
|
|
61
69
|
'quidem,Df,lat',
|
|
62
70
|
'quoque,Df,lat',
|
|
63
71
|
'sic,Df,lat',
|
|
72
|
+
'siquidem,Df,lat',
|
|
64
73
|
'tamen,Df,lat',
|
|
65
74
|
'tum,Df,lat',
|
|
66
75
|
'tunc,Df,lat',
|
|
@@ -138,82 +147,162 @@ module PROIEL
|
|
|
138
147
|
'þannu,Df,got',
|
|
139
148
|
'þanuh,Df,got',
|
|
140
149
|
'þaruh,Df,got',
|
|
150
|
+
'али,Df,orv',
|
|
151
|
+
'аль,Df,orv',
|
|
152
|
+
'ано,Df,orv',
|
|
153
|
+
'атъ,Df,orv',
|
|
154
|
+
'ать,Df,orv',
|
|
155
|
+
'бо,Df,orv',
|
|
156
|
+
'вѣдь,Df,orv',
|
|
157
|
+
'да#2,Df,orv',
|
|
158
|
+
'еда,Df,orv',
|
|
159
|
+
'же,Df,orv',
|
|
160
|
+
'зане,Df,orv',
|
|
161
|
+
'занеже,Df,orv',
|
|
162
|
+
'ибо,Df,orv',
|
|
163
|
+
'ино,Df,orv',
|
|
164
|
+
'ли,Df,orv',
|
|
165
|
+
'ну,Df,orv',
|
|
166
|
+
'понеже,Df,orv',
|
|
167
|
+
'си,Df,orv',
|
|
168
|
+
'ти,Df,orv',
|
|
169
|
+
'убо,Df,orv',
|
|
170
|
+
'ужь,Df,orv',
|
|
171
|
+
'ци,Df,orv',
|
|
172
|
+
'яко,Df,orv',
|
|
173
|
+
'якоже,Df,orv',
|
|
141
174
|
]
|
|
142
175
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
176
|
+
COMPARISON_LEMMATA = ['alja,Df,got',
|
|
177
|
+
'ar̄awel,Df,xcl',
|
|
178
|
+
'atque,Df,lat',
|
|
179
|
+
'baycʻ,Df,xcl',
|
|
180
|
+
'etʻe,Df,xcl',
|
|
181
|
+
'ibrew,Df,xcl',
|
|
182
|
+
'ibrew z-,Df,xcl',
|
|
183
|
+
'kʻan z,Df,xcl',
|
|
184
|
+
'licet,Df,lat',
|
|
185
|
+
'nibai,Df,got',
|
|
186
|
+
'nisi,Df,lat',
|
|
187
|
+
'orpēs,Df,xcl',
|
|
188
|
+
'praeterquam,Df,lat',
|
|
189
|
+
'quam,Df,lat',
|
|
190
|
+
'quasi,Df,lat',
|
|
191
|
+
'quemadmodum,Df,lat',
|
|
192
|
+
'si,Df,lat',
|
|
193
|
+
'sicut,Df,lat',
|
|
194
|
+
'swaswe,Df,got',
|
|
195
|
+
'swe,Df,got',
|
|
196
|
+
'tamquam,Df,lat',
|
|
197
|
+
'tʻe,Df,xcl',
|
|
198
|
+
'ut,Df,lat',
|
|
199
|
+
'velut,Df,lat',
|
|
200
|
+
'þau,Df,got',
|
|
201
|
+
'ἅτε,Df,grc',
|
|
202
|
+
'εἰ,Df,grc',
|
|
203
|
+
'ἤ,Df,grc',
|
|
204
|
+
'ἤπερ,Df,grc',
|
|
205
|
+
'καθάπερ,Df,grc',
|
|
206
|
+
'καθώς,Df,grc',
|
|
207
|
+
'οἷα,Df,grc',
|
|
208
|
+
'ὁμοίως,Df,grc',
|
|
209
|
+
'ὅτι,Df,grc',
|
|
210
|
+
'ὡς,Df,grc',
|
|
211
|
+
'ὡσεί,Df,grc',
|
|
212
|
+
'ὥσπερ,Df,grc',
|
|
213
|
+
'ако,Df,orv',
|
|
214
|
+
'акъже,Df,orv',
|
|
215
|
+
'акы,Df,orv',
|
|
216
|
+
'акꙑ,Df,chu',
|
|
217
|
+
'будьто,Df,orv',
|
|
218
|
+
'како,Df,orv',
|
|
219
|
+
'ли,Df,chu',
|
|
220
|
+
'неже,Df,chu',
|
|
221
|
+
'нежели,Df,chu',
|
|
222
|
+
'нежели,Df,orv',
|
|
223
|
+
'окꙑ,Df,chu',
|
|
224
|
+
'развѣ,Df,chu',
|
|
225
|
+
'тъкъмо,Df,chu',
|
|
226
|
+
'чьто,Df,orv',
|
|
227
|
+
'яко,Df,orv',
|
|
228
|
+
'якоже,Df,orv',
|
|
229
|
+
'ꙗко,Df,chu',
|
|
230
|
+
'ꙗкоже,Df,chu'
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
POS_MAP =
|
|
234
|
+
{
|
|
146
235
|
'A-' => [['ADJ', lambda { |x| true } ]],
|
|
147
236
|
'C-' => [['CCONJ', lambda { |x| true } ]],
|
|
148
|
-
'Df' => [['AUX', lambda(&:
|
|
149
|
-
['ADV', lambda(&:negation?),
|
|
237
|
+
'Df' => [['AUX', lambda(&:tam_particle?)],
|
|
238
|
+
['ADV', lambda(&:negation?), 'Polarity=Neg'],
|
|
150
239
|
['ADV', lambda { |x| true } ]
|
|
151
240
|
],
|
|
152
|
-
'Dq' => [['ADV', lambda { |x| true },
|
|
153
|
-
'Du' => [['ADV', lambda { |x| true },
|
|
241
|
+
'Dq' => [['ADV', lambda { |x| true }, 'PronType=Rel']],
|
|
242
|
+
'Du' => [['ADV', lambda { |x| true }, 'PronType=Int']],
|
|
154
243
|
'F-' => [['X', lambda { |x| true } ]],
|
|
155
244
|
'G-' => [['SCONJ', lambda { |x| true } ]],
|
|
156
245
|
'I-' => [['INTJ', lambda { |x| true } ]],
|
|
157
|
-
'Ma' => [['NUM', lambda { |x| true } ]],
|
|
158
|
-
'Mo' => [['ADJ', lambda { |x| true } ]],
|
|
246
|
+
'Ma' => [['NUM', lambda { |x| true } ]],
|
|
247
|
+
'Mo' => [['ADJ', lambda { |x| true } ]],
|
|
159
248
|
'N-' => [['SCONJ', lambda { |x| true } ]], #irrelevant for our purposes
|
|
160
249
|
'Nb' => [['NOUN', lambda { |x| true } ]],
|
|
161
250
|
'Ne' => [['PROPN', lambda { |x| true } ]],
|
|
162
|
-
'Pc' => [['PRON', lambda { |x| true },
|
|
163
|
-
'Pd' => [['DET', lambda { |x| true } ]],
|
|
164
|
-
'Pi' => [['PRON', lambda { |x| true },
|
|
251
|
+
'Pc' => [['PRON', lambda { |x| true }, 'PronType=Rcp']],
|
|
252
|
+
'Pd' => [['DET', lambda { |x| true } ]],
|
|
253
|
+
'Pi' => [['PRON', lambda { |x| true }, 'PronType=Int']],
|
|
165
254
|
'Pk' => [['AUX', lambda { |x| x.relation == 'aux' }],
|
|
166
|
-
['PRON', lambda { |x| true },
|
|
167
|
-
'Pp' => [['PRON', lambda { |x| true },
|
|
168
|
-
'Pr' => [['PRON', lambda { |x| true },
|
|
169
|
-
'Ps' => [['
|
|
170
|
-
'Pt' => [['
|
|
171
|
-
'Px' => [['DET', lambda { |x| true } ]],
|
|
172
|
-
'Py' => [['PRON', lambda { |x| true } ]],
|
|
255
|
+
['PRON', lambda { |x| true }, 'PronType=Prs|Reflex=Yes']],
|
|
256
|
+
'Pp' => [['PRON', lambda { |x| true }, 'PronType=Prs']],
|
|
257
|
+
'Pr' => [['PRON', lambda { |x| true }, 'PronType=Rel']],
|
|
258
|
+
'Ps' => [['DET', lambda { |x| true }, 'Poss=Yes']], ### NB no evidence for a pronominal/determiner-like nature here
|
|
259
|
+
'Pt' => [['DET', lambda { |x| true }, 'Poss=Yes|Reflex=Yes' ]], ### NB no evidence for a pronominal/determiner-like nature here
|
|
260
|
+
'Px' => [['DET', lambda { |x| true } ]],
|
|
261
|
+
'Py' => [['PRON', lambda { |x| true } ]],
|
|
173
262
|
'R-' => [['ADP', lambda { |x| true } ]],
|
|
174
263
|
'V-' => [['AUX', lambda(&:auxiliary?)],
|
|
175
264
|
['VERB', lambda { |x| true } ]],
|
|
176
|
-
'S-' => [['DET', lambda { |x| true },
|
|
265
|
+
'S-' => [['DET', lambda { |x| true }, 'Definite=Def|PronType=Dem']], # (we only have definite articles)
|
|
177
266
|
'X-' => [['X', lambda { |x| true } ]]
|
|
178
|
-
|
|
179
|
-
|
|
267
|
+
}
|
|
268
|
+
|
|
180
269
|
MORPHOLOGY_MAP = {
|
|
181
|
-
:person => {'1' => 'Person=1',
|
|
182
|
-
'2' => 'Person=2',
|
|
183
|
-
'3' => 'Person=3' } ,
|
|
184
|
-
:number => {'s' => 'Number=Sing',
|
|
185
|
-
'd' => 'Number=Dual',
|
|
270
|
+
:person => {'1' => 'Person=1',
|
|
271
|
+
'2' => 'Person=2',
|
|
272
|
+
'3' => 'Person=3' } ,
|
|
273
|
+
:number => {'s' => 'Number=Sing',
|
|
274
|
+
'd' => 'Number=Dual',
|
|
186
275
|
'p' => 'Number=Plur' } ,
|
|
187
|
-
:tense => {'p' => 'Tense=Pres',
|
|
188
|
-
'i' => 'Tense=Past|Aspect=Imp',
|
|
189
|
-
'r' => 'Tense=Past|Aspect=Perf', #'Tense=Perfect',
|
|
190
|
-
's' => '
|
|
276
|
+
:tense => {'p' => 'Tense=Pres',
|
|
277
|
+
'i' => 'Tense=Past|Aspect=Imp',
|
|
278
|
+
'r' => 'Tense=Past|Aspect=Perf', #'Tense=Perfect',
|
|
279
|
+
's' => 'VerbForm=PartRes|Tense=Past',
|
|
191
280
|
# tags Perf is not universal
|
|
192
|
-
'a' => 'Tense=Past|Aspect=Perf',
|
|
193
|
-
'u' => 'Tense=Past',
|
|
194
|
-
'l' => 'Tense=Pqp',
|
|
195
|
-
'f' => 'Tense=Fut',
|
|
281
|
+
'a' => 'Tense=Past|Aspect=Perf',
|
|
282
|
+
'u' => 'Tense=Past',
|
|
283
|
+
'l' => 'Tense=Pqp',
|
|
284
|
+
'f' => 'Tense=Fut',
|
|
196
285
|
# tag FutPerfect is not universal
|
|
197
|
-
't' => 'Tense=Fut|Aspect=Perf', #FutPerfect'
|
|
286
|
+
't' => 'Tense=Fut|Aspect=Perf', #FutPerfect'
|
|
198
287
|
},
|
|
199
|
-
:mood => {'i' => 'VerbForm=Fin|Mood=Ind',
|
|
200
|
-
's' => 'VerbForm=Fin|Mood=Sub',
|
|
201
|
-
'm' => 'VerbForm=Fin|Mood=Imp',
|
|
202
|
-
'o' => 'VerbForm=Fin|Mood=Opt',
|
|
203
|
-
'n' => 'VerbForm=Inf',
|
|
204
|
-
'p' => 'VerbForm=Part',
|
|
205
|
-
'd' => 'VerbForm=Ger',
|
|
288
|
+
:mood => {'i' => 'VerbForm=Fin|Mood=Ind',
|
|
289
|
+
's' => 'VerbForm=Fin|Mood=Sub',
|
|
290
|
+
'm' => 'VerbForm=Fin|Mood=Imp',
|
|
291
|
+
'o' => 'VerbForm=Fin|Mood=Opt',
|
|
292
|
+
'n' => 'VerbForm=Inf',
|
|
293
|
+
'p' => 'VerbForm=Part',
|
|
294
|
+
'd' => 'VerbForm=Ger',
|
|
206
295
|
# Gdv (gerundive) is not universal
|
|
207
|
-
'g' => 'VerbForm=Gdv',
|
|
208
|
-
'u' => 'VerbForm=Sup',
|
|
209
|
-
'e'=> 'VerbForm=Fin|Mood=Ind,Sub',
|
|
210
|
-
'f'=> 'VerbForm=Fin|Mood=Imp,Ind',
|
|
211
|
-
'h'=> 'VerbForm=Fin|Mood=Imp,Sub',
|
|
296
|
+
'g' => 'VerbForm=Gdv',
|
|
297
|
+
'u' => 'VerbForm=Sup',
|
|
298
|
+
'e'=> 'VerbForm=Fin|Mood=Ind,Sub',
|
|
299
|
+
'f'=> 'VerbForm=Fin|Mood=Imp,Ind',
|
|
300
|
+
'h'=> 'VerbForm=Fin|Mood=Imp,Sub',
|
|
212
301
|
't' => 'VerbForm=Fin' },
|
|
213
|
-
:voice => {'a' => 'Voice=Act',
|
|
302
|
+
:voice => {'a' => 'Voice=Act',
|
|
214
303
|
# Med is not universal
|
|
215
|
-
'm' => 'Voice=Mid',
|
|
216
|
-
'p' => 'Voice=Pass',
|
|
304
|
+
'm' => 'Voice=Mid',
|
|
305
|
+
'p' => 'Voice=Pass',
|
|
217
306
|
'e' => 'Voice=Mid,Pass' },
|
|
218
307
|
:gender => {'m' => 'Gender=Masc',
|
|
219
308
|
'f' => 'Gender=Fem',
|
|
@@ -221,27 +310,28 @@ module PROIEL
|
|
|
221
310
|
'p' => 'Gender=Fem,Masc',
|
|
222
311
|
'o' => 'Gender=Masc,Neut',
|
|
223
312
|
'r' => 'Gender=Fem,Neut' },
|
|
224
|
-
:case => {'n' => 'Case=Nom',
|
|
225
|
-
'a' => 'Case=Acc',
|
|
313
|
+
:case => {'n' => 'Case=Nom',
|
|
314
|
+
'a' => 'Case=Acc',
|
|
226
315
|
# Obl(ique) is not universal
|
|
227
|
-
'o' => 'Case=Obl',
|
|
228
|
-
'g' => 'Case=Gen',
|
|
229
|
-
'c' => 'Case=Dat,Gen',
|
|
230
|
-
'e' => 'Case=Acc,Dat',
|
|
231
|
-
'd' => 'Case=Dat',
|
|
232
|
-
'b' => 'Case=Abl',
|
|
233
|
-
'i' => 'Case=Ins',
|
|
234
|
-
'l' => 'Case=Loc',
|
|
316
|
+
'o' => 'Case=Obl',
|
|
317
|
+
'g' => 'Case=Gen',
|
|
318
|
+
'c' => 'Case=Dat,Gen',
|
|
319
|
+
'e' => 'Case=Acc,Dat',
|
|
320
|
+
'd' => 'Case=Dat',
|
|
321
|
+
'b' => 'Case=Abl',
|
|
322
|
+
'i' => 'Case=Ins',
|
|
323
|
+
'l' => 'Case=Loc',
|
|
235
324
|
'v' => 'Case=Voc' },
|
|
236
|
-
:degree => {'p' => 'Degree=Pos',
|
|
237
|
-
'c' => 'Degree=Cmp',
|
|
325
|
+
:degree => {'p' => 'Degree=Pos',
|
|
326
|
+
'c' => 'Degree=Cmp',
|
|
238
327
|
's' => 'Degree=Sup' },
|
|
239
328
|
# The whole strength category is not universal
|
|
240
|
-
:strength => {'
|
|
241
|
-
'
|
|
329
|
+
:strength => {'s' => 'Strength=Strong',
|
|
330
|
+
'w' => 'Strength=Weak' },
|
|
331
|
+
|
|
242
332
|
:inflection => {},
|
|
243
333
|
}
|
|
244
334
|
end
|
|
245
335
|
end
|
|
246
336
|
end
|
|
247
|
-
|
|
337
|
+
|
|
@@ -2,86 +2,132 @@ module PROIEL
|
|
|
2
2
|
module Converter
|
|
3
3
|
class CoNLLU
|
|
4
4
|
|
|
5
|
-
OBLIQUENESS_HIERARCHY = [
|
|
6
|
-
|
|
5
|
+
OBLIQUENESS_HIERARCHY = ['nsubj', 'obj', 'iobj', 'obl', 'advmod', 'csubj', 'xcomp', 'ccomp', 'advcl']
|
|
6
|
+
REL_TO_POS = {
|
|
7
|
+
'acl' => 'VERB',
|
|
8
|
+
'advcl' => 'VERB',
|
|
9
|
+
'advcl:cmp' => 'NOUN',
|
|
10
|
+
'advmod' => 'ADV',
|
|
11
|
+
'amod' => 'ADJ',
|
|
12
|
+
'appos' => 'NOUN',
|
|
13
|
+
'ccomp' => 'VERB',
|
|
14
|
+
'conj' => 'X',
|
|
15
|
+
'csubj' => 'VERB',
|
|
16
|
+
'csubj:pass' => 'NOUN',
|
|
17
|
+
'dep' => 'X',
|
|
18
|
+
'det' => 'DET',
|
|
19
|
+
'dislocated' => 'X',
|
|
20
|
+
'fixed' => 'X',
|
|
21
|
+
'flat:foreign' => 'X',
|
|
22
|
+
'flat:name' => 'PROPN',
|
|
23
|
+
'nmod' => 'NOUN',
|
|
24
|
+
'nsubj' => 'NOUN',
|
|
25
|
+
'nsubj:pass' => 'NOUN',
|
|
26
|
+
'nsubj:outer' => 'NOUN',
|
|
27
|
+
'nummod' => 'NUM',
|
|
28
|
+
'obj' => 'NOUN',
|
|
29
|
+
'obl' => 'NOUN',
|
|
30
|
+
'obl:agent' => 'NOUN',
|
|
31
|
+
'obl:arg' => 'NOUN',
|
|
32
|
+
'orphan' => 'NOUN',
|
|
33
|
+
'parataxis' => 'VERB',
|
|
34
|
+
'root' => 'VERB',
|
|
35
|
+
'vocative' => 'NOUN',
|
|
36
|
+
'xcomp' => 'VERB'
|
|
37
|
+
}
|
|
38
|
+
|
|
7
39
|
RELATION_MAPPING = {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
[
|
|
11
|
-
[
|
|
12
|
-
[
|
|
13
|
-
[
|
|
40
|
+
'adnom' => 'dep',
|
|
41
|
+
'adv' => [['advcl', lambda(&:clausal?) ],
|
|
42
|
+
['advmod', lambda { |x| x.adverb? } ],
|
|
43
|
+
['advmod', lambda(&:adjectival?) ], # adjective for adverb
|
|
44
|
+
['obl', lambda { |x| x.nominal? or x.preposition? or x.has_preposition? } ],
|
|
45
|
+
['advcl', lambda(&:subjunction?) ],
|
|
46
|
+
['obl', lambda { |x| true } ],
|
|
14
47
|
],
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
[
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
[
|
|
21
|
-
|
|
48
|
+
'ag' => 'obl:agent', # add :agent' once defined
|
|
49
|
+
'apos' => [['flat:name', lambda { |x| x.proper_noun? and x.head and x.head.proper_noun? } ],
|
|
50
|
+
['acl', lambda { |x| x.clausal? and x.head and x.head.nominal? } ], # add :relcl ?
|
|
51
|
+
|
|
52
|
+
['appos', lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.nominal? } ],
|
|
53
|
+
['parataxis', lambda { |x| x.clausal? and x.head and x.head.clausal? } ],
|
|
54
|
+
# what to do about sentential appositions? attempt here to make them parataxis, but there are some legitimate nominal appos under root nominals, so overgenerates slightly
|
|
55
|
+
['advcl', lambda(&:clausal?) ],
|
|
56
|
+
['appos', lambda { |x| true } ],
|
|
22
57
|
],
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
[
|
|
26
|
-
[
|
|
27
|
-
[
|
|
28
|
-
[
|
|
29
|
-
[
|
|
58
|
+
'arg' => 'dep',
|
|
59
|
+
'atr' => [['nummod', lambda(&:cardinal?) ],
|
|
60
|
+
['det', lambda { |x| x.pronominal? and !x.clausal? and !(!x.genitive? and x.head and x.head.genitive?) } ], #TODO check
|
|
61
|
+
['acl', lambda { |x| x.clausal? } ], # add :relcl?
|
|
62
|
+
['nmod', lambda(&:nominal?) ],
|
|
63
|
+
['advmod', lambda { |x| x.head and !x.head.nominal? and x.head.clausal? } ],
|
|
64
|
+
['amod', lambda { |x| true } ], #default
|
|
30
65
|
],
|
|
31
|
-
|
|
32
|
-
[
|
|
33
|
-
[
|
|
34
|
-
[
|
|
35
|
-
[
|
|
36
|
-
|
|
37
|
-
[
|
|
38
|
-
[
|
|
39
|
-
[
|
|
66
|
+
'aux' => [['det', lambda(&:determiner?) ],
|
|
67
|
+
['fixed', lambda { |x| x.head and x.head.subjunction? } ],
|
|
68
|
+
['fixed', lambda { |x| x.head and x.head.conjunction? } ],
|
|
69
|
+
['fixed', lambda { |x| x.head and x.head.adverb? and x.relative? } ],
|
|
70
|
+
['fixed', lambda { |x| x.head and x.head.pronominal? and x.verb? } ],
|
|
71
|
+
['aux:pass', lambda { |x| x.clausal? and x.head.passive? } ],
|
|
72
|
+
['aux', lambda(&:clausal?) ], #v2 probably want the modal particle an to go here too in
|
|
73
|
+
['advmod', lambda(&:negation?) ],
|
|
74
|
+
['discourse', lambda { |x| x.particle? or x.interjection? } ],
|
|
75
|
+
['advmod', lambda { |x| x.adjectival? or x.adverb? } ],
|
|
76
|
+
# make subjunctions in root sentences "mark"
|
|
77
|
+
['mark', lambda { |x| x.subjunction? } ],
|
|
78
|
+
['cc', lambda(&:conjunction?) ],
|
|
79
|
+
['flat:foreign', lambda(&:foreign?) ],
|
|
40
80
|
# We need some more distinctions to get Gothic and Armenian. Introduce language in the treebank? (Read from xml)
|
|
41
|
-
[
|
|
42
|
-
[
|
|
81
|
+
['mark', lambda { |x| ['R-'].include? x.part_of_speech } ], #"R-" as infinitive marker in Gothic
|
|
82
|
+
['expl:pv', lambda { |x| ['Pk' ].include? x.part_of_speech } ], #reflexive as valency reducer
|
|
43
83
|
['amod', lambda { |x| x.preposition? } ], # Armenian DOM
|
|
44
84
|
['fixed', lambda { |x| ['Px', 'Pr'].include? x.part_of_speech } ], # NB there are a lot of bogus annotations with 'Px'
|
|
45
|
-
|
|
85
|
+
|
|
46
86
|
# MISANNOTATION IF A NOUN or a 'Pi' or a 'Pp' or a 'Ps'
|
|
47
87
|
],
|
|
48
|
-
|
|
49
|
-
['csubj', lambda { |x| x.head and x.head.
|
|
88
|
+
'comp' => [['csubj:pass', lambda { |x| x.head and x.head.passive? and !x.head.has_subject?} ],
|
|
89
|
+
['csubj', lambda { |x| x.head and x.head.has_copula? and !x.head.has_subject?} ],
|
|
50
90
|
['ccomp', lambda { |x| true } ],
|
|
51
91
|
],
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
['nmod', lambda(&:nominal?) ],
|
|
92
|
+
'expl' => 'expl',
|
|
93
|
+
'narg' => [['acl', lambda(&:clausal?) ],
|
|
94
|
+
['nmod', lambda(&:nominal?) ],
|
|
55
95
|
['nmod', lambda(&:adjectival?) ], # nominaliezed in this function
|
|
56
96
|
['nmod', lambda { |x| true } ],
|
|
57
97
|
],
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
[
|
|
62
|
-
[
|
|
63
|
-
[
|
|
64
|
-
[
|
|
65
|
-
[
|
|
66
|
-
[
|
|
98
|
+
'nonsub' => 'dep',
|
|
99
|
+
'obj' => 'obj',
|
|
100
|
+
'obl' => [# normally a preposition will be subordinate to its noun, this captures adverbial use of prepositions
|
|
101
|
+
['advmod', lambda { |x| x.adverb? } ],
|
|
102
|
+
['obl', lambda { |x| x.has_preposition? or x.preposition? } ],
|
|
103
|
+
['obl', lambda { |x| x.head and x.head.adverb? } ],
|
|
104
|
+
['obl:arg', lambda { |x| (x.nominal? or x.adjectival?) and x.head and x.head.clausal? } ],# if nominal (NB check for presence of article!) TODO: should be 'obj' if the verb is monovalent (even by elision)
|
|
105
|
+
#['obl:arg', lambda(&:adjectival?) ], # OBL adjectives are nominalized
|
|
106
|
+
['advcl', lambda(&:clausal?) ], # this seems to happen with ad libros legendos etc. but check closer!
|
|
107
|
+
['obl', lambda { |x| true } ],
|
|
67
108
|
],
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
[
|
|
109
|
+
'parpred' => 'parataxis',
|
|
110
|
+
'part' => 'nmod',
|
|
111
|
+
'per' => 'dep',
|
|
112
|
+
'pid' => ['ERROR', lambda { |x| raise 'Remaining pid edge!' } ],
|
|
113
|
+
'pred' => [['root', lambda(&:root?) ],
|
|
114
|
+
['ERROR', lambda { |x| raise "#{x.to_n} (head_id #{x.head_id}) is not a root!" }],
|
|
74
115
|
],
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
[
|
|
116
|
+
'rel' => 'acl', # add :relcl?
|
|
117
|
+
'sub' => [['nsubj:pass', lambda { |x| x.head and x.head.passive? } ],
|
|
118
|
+
#['obl', lambda { |x| x.head and x.head.part_of_speech == 'Df' } ],
|
|
119
|
+
['nsubj', lambda { |x| true }],
|
|
120
|
+
],
|
|
121
|
+
'voc' => [['discourse', lambda { |x| x.part_of_speech == 'I-' } ],
|
|
122
|
+
['vocative', lambda { |x| true } ],
|
|
78
123
|
],
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
[
|
|
124
|
+
'xadv' => [['advcl', lambda(&:clausal?)], #add :contr ?
|
|
125
|
+
['xcomp', lambda { |x| x.nominal? or x.pronominal? or x.cardinal?} ],
|
|
126
|
+
['advcl', lambda(&:subjunction?)],
|
|
127
|
+
['advmod', lambda { |x| true } ], # add :contr ?
|
|
82
128
|
],
|
|
83
|
-
|
|
84
|
-
|
|
129
|
+
'xobj' => 'xcomp', # copula cases have already been taken care of
|
|
130
|
+
'xsub' => 'xsub',
|
|
85
131
|
}
|
|
86
132
|
end
|
|
87
133
|
end
|