myasorubka 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,454 @@
1
+ # encoding: utf-8
2
+
3
+ # The Russian MULTEXT-East specifications were developed in
4
+ # the scope of an effort to produce a publicly available tagged
5
+ # corpus of Russian; this corpus and accompanying resources are
6
+ # available from http://corpus.leeds.ac.uk/mocky/. The
7
+ # morphosyntactic specifications and corpus are documented in:
8
+ # Serge Sharoff, Mikhail Kopotev, Tomaž Erjavec, Anna Feldman,
9
+ # Dagmar Divjak. Designing and evaluating Russian tagsets.
10
+ # In Proc. LREC 2008, Marrakech, May, 2008.
11
+ #
12
+ # http://nl.ijs.si/ME/V4/msd/html/msd-ru.html
13
+ #
14
+ # This specification was translated into the Ruby language
15
+ # by [Dmitry Ustalov](http://eveel.ru).
16
+ #
17
+ module Myasorubka::MSD::Russian
18
+ # Russian Noun.
19
+ #
20
+ NOUN = {
21
+ code: 'N',
22
+ attrs: [
23
+ [ :type, {
24
+ common: 'c',
25
+ proper: 'p'
26
+ } ],
27
+ [ :gender, {
28
+ masculine: 'm',
29
+ feminine: 'f',
30
+ neuter: 'n',
31
+ common: 'c'
32
+ } ],
33
+ [ :number, {
34
+ singular: 's',
35
+ plural: 'p'
36
+ } ],
37
+ [ :case, {
38
+ nominative: 'n',
39
+ genitive: 'g',
40
+ dative: 'd',
41
+ accusative: 'a',
42
+ vocative: 'v',
43
+ locative: 'l',
44
+ instrumental: 'i'
45
+ } ],
46
+ [ :animate, {
47
+ no: 'n',
48
+ yes: 'y'
49
+ } ],
50
+ [ :case2, {
51
+ partitive: 'p',
52
+ locative: 'l'
53
+ } ]
54
+ ]
55
+ }
56
+
57
+ # Russian Verb.
58
+ #
59
+ VERB = {
60
+ code: 'V',
61
+ attrs: [
62
+ [ :type, {
63
+ main: 'm',
64
+ auxiliary: 'a'
65
+ } ],
66
+ [ :vform, {
67
+ indicative: 'i',
68
+ imperative: 'm',
69
+ conditional: 'c',
70
+ infinitive: 'n',
71
+ participle: 'p',
72
+ gerund: 'g'
73
+ } ],
74
+ [ :tense, {
75
+ present: 'p',
76
+ future: 'f',
77
+ past: 's'
78
+ } ],
79
+ [ :person, {
80
+ first: '1',
81
+ second: '2',
82
+ third: '3'
83
+ } ],
84
+ [ :number, {
85
+ singular: 's',
86
+ plural: 'p'
87
+ } ],
88
+ [ :gender, {
89
+ masculine: 'm',
90
+ feminine: 'f',
91
+ neuter: 'n'
92
+ } ],
93
+ [ :voice, {
94
+ active: 'a',
95
+ passive: 'p',
96
+ medial: 'm'
97
+ } ],
98
+ [ :definiteness, {
99
+ short_art: 's',
100
+ full_art: 'f'
101
+ } ],
102
+ [ :aspect, {
103
+ progressive: 'p',
104
+ perfective: 'e',
105
+ biaspectual: 'b'
106
+ } ],
107
+ [ :case, {
108
+ nominative: 'n',
109
+ genitive: 'g',
110
+ dative: 'd',
111
+ accusative: 'a',
112
+ locative: 'l',
113
+ instrumental: 'i'
114
+ } ]
115
+ ]
116
+ }
117
+
118
+ # Russian Adjective.
119
+ #
120
+ ADJECTIVE = {
121
+ code: 'A',
122
+ attrs: [
123
+ [ :type, {
124
+ qualificative: 'f',
125
+ possessive: 's'
126
+ } ],
127
+ [ :degree, {
128
+ positive: 'p',
129
+ comparative: 'c',
130
+ superlative: 's'
131
+ } ],
132
+ [ :gender, {
133
+ masculine: 'm',
134
+ feminine: 'f',
135
+ neuter: 'n'
136
+ } ],
137
+ [ :number, {
138
+ singular: 's',
139
+ plural: 'p'
140
+ } ],
141
+ [ :case, {
142
+ nominative: 'n',
143
+ genitive: 'g',
144
+ dative: 'd',
145
+ accusative: 'a',
146
+ locative: 'l',
147
+ instrumental: 'i'
148
+ } ],
149
+ [ :definiteness, {
150
+ short_art: 's',
151
+ full_art: 'f'
152
+ } ]
153
+ ]
154
+ }
155
+
156
+ # Russian Pronoun.
157
+ #
158
+ PRONOUN = {
159
+ code: 'P',
160
+ attrs: [
161
+ [ :type, {
162
+ personal: 'p',
163
+ demonstrative: 'd',
164
+ indefinite: 'i',
165
+ possessive: 's',
166
+ interrogative: 'q',
167
+ relative: 'r',
168
+ reflexive: 'x',
169
+ negative: 'z',
170
+ nonspecific: 'n'
171
+ } ],
172
+ [ :person, {
173
+ first: '1',
174
+ second: '2',
175
+ third: '3'
176
+ } ],
177
+ [ :gender, {
178
+ masculine: 'm',
179
+ feminine: 'f',
180
+ neuter: 'n'
181
+ } ],
182
+ [ :number, {
183
+ singular: 's',
184
+ plural: 'p'
185
+ } ],
186
+ [ :case, {
187
+ nominative: 'n',
188
+ genitive: 'g',
189
+ dative: 'd',
190
+ accusative: 'a',
191
+ vocative: 'v',
192
+ locative: 'l',
193
+ instrumental: 'i'
194
+ } ],
195
+ [ :syntactic_type, {
196
+ nominal: 'n',
197
+ adjectival: 'a',
198
+ adverbial: 'r'
199
+ } ],
200
+ [ :animate, {
201
+ no: 'n',
202
+ yes: 'y'
203
+ } ]
204
+ ]
205
+ }
206
+
207
+ # Russian Adverb.
208
+ #
209
+ ADVERB = {
210
+ code: 'R',
211
+ attrs: [
212
+ [ :degree, {
213
+ positive: 'p',
214
+ comparative: 'c',
215
+ superlative: 's'
216
+ } ]
217
+ ]
218
+ }
219
+
220
+ # Russian Adposition.
221
+ #
222
+ ADPOSITION = {
223
+ code: 'S',
224
+ attrs: [
225
+ [ :type, {
226
+ preposition: 'p'
227
+ } ],
228
+ [ :formation, {
229
+ simple: 's',
230
+ compound: 'c'
231
+ } ],
232
+ [ :case, {
233
+ nominative: 'n',
234
+ genitive: 'g',
235
+ dative: 'd',
236
+ accusative: 'a',
237
+ locative: 'l',
238
+ instrumental: 'i'
239
+ } ]
240
+ ]
241
+ }
242
+
243
+ # Russian Conjunction.
244
+ #
245
+ CONJUNCTION = {
246
+ code: 'C',
247
+ attrs: [
248
+ [ :type, {
249
+ coordinating: 'c',
250
+ subordinating: 's'
251
+ } ],
252
+ [ :formation, {
253
+ simple: 's',
254
+ compound: 'c'
255
+ } ],
256
+ [ :coord_type, {
257
+ sentence: 'p',
258
+ words: 'w'
259
+ } ],
260
+ [ :sub_type, {
261
+ negative: 'z',
262
+ positive: 'p'
263
+ } ],
264
+ ]
265
+ }
266
+
267
+ # Russian Numeral.
268
+ #
269
+ NUMERAL = {
270
+ code: 'M',
271
+ attrs: [
272
+ [ :type, {
273
+ cardinal: 'c',
274
+ ordinal: 'o',
275
+ multiple: 'm',
276
+ collect: 'l'
277
+ } ],
278
+ [ :gender, {
279
+ masculine: 'm',
280
+ feminine: 'f',
281
+ neuter: 'n'
282
+ } ],
283
+ [ :number, {
284
+ singular: 's',
285
+ plural: 'p'
286
+ } ],
287
+ [ :case, {
288
+ nominative: 'n',
289
+ genitive: 'g',
290
+ dative: 'd',
291
+ accusative: 'a',
292
+ locative: 'l',
293
+ instrumental: 'i'
294
+ } ],
295
+ [ :form, {
296
+ digit: 'd',
297
+ roman: 'r',
298
+ letter: 'l'
299
+ } ],
300
+ [ :blank, {} ],
301
+ [ :blank, {} ],
302
+ [ :blank, {} ],
303
+ [ :animate, {
304
+ no: 'n',
305
+ yes: 'y'
306
+ } ]
307
+ ]
308
+ }
309
+
310
+ # Russian Particle.
311
+ #
312
+ PARTICLE = {
313
+ code: 'Q',
314
+ attrs: [
315
+ [ :formation, {
316
+ simple: 's',
317
+ compound: 'c'
318
+ } ]
319
+ ]
320
+ }
321
+
322
+ # Russian Interjection.
323
+ #
324
+ INTERJECTION = {
325
+ code: 'I',
326
+ attrs: [
327
+ [ :formation, {
328
+ simple: 's',
329
+ compound: 'c'
330
+ } ]
331
+ ]
332
+ }
333
+
334
+ # Russian Abbreviation.
335
+ #
336
+ ABBREVIATION = {
337
+ code: 'Y',
338
+ attrs: [
339
+ [ :syntactic_type, {
340
+ nominal: 'n',
341
+ adverbial: 'r'
342
+ } ],
343
+ [ :gender, {
344
+ masculine: 'm',
345
+ feminine: 'f',
346
+ neuter: 'n'
347
+ } ],
348
+ [ :number, {
349
+ singular: 's',
350
+ plural: 'p'
351
+ } ],
352
+ [ :case, {
353
+ nominative: 'n',
354
+ genitive: 'g',
355
+ dative: 'd',
356
+ accusative: 'a',
357
+ locative: 'l',
358
+ instrumental: 'i'
359
+ } ]
360
+ ]
361
+ }
362
+
363
+ # Russian Residual.
364
+ #
365
+ RESIDUAL = {
366
+ code: 'X',
367
+ attrs: []
368
+ }
369
+
370
+ # Russian Crutch.
371
+ #
372
+ # Some AOT definitions are written for meta `*` part of speech,
373
+ # so we have to implement it.
374
+ #
375
+ CRUTCH = {
376
+ code: '*',
377
+ attrs: [
378
+ [ :gender, {
379
+ masculine: 'm',
380
+ feminine: 'f',
381
+ neuter: 'n',
382
+ common: 'c'
383
+ } ],
384
+ [ :animate, {
385
+ no: 'n',
386
+ yes: 'y'
387
+ } ],
388
+ [ :number, {
389
+ singular: 's',
390
+ plural: 'p'
391
+ } ],
392
+ [ :case, {
393
+ nominative: 'n',
394
+ genitive: 'g',
395
+ dative: 'd',
396
+ accusative: 'a',
397
+ vocative: 'v',
398
+ locative: 'l',
399
+ instrumental: 'i'
400
+ } ],
401
+ [ :case2, {
402
+ partitive: 'p',
403
+ locative: 'l'
404
+ } ],
405
+ [ :aspect, {
406
+ progressive: 'p',
407
+ perfective: 'e',
408
+ biaspectual: 'b'
409
+ } ],
410
+ [ :voice, {
411
+ active: 'a',
412
+ passive: 'p',
413
+ medial: 'm'
414
+ } ],
415
+ [ :tense, {
416
+ present: 'p',
417
+ future: 'f',
418
+ past: 's'
419
+ } ],
420
+ [ :person, {
421
+ first: '1',
422
+ second: '2',
423
+ third: '3'
424
+ } ],
425
+ [ :definiteness, {
426
+ short_art: 's',
427
+ full_art: 'f'
428
+ } ],
429
+ [ :degree, {
430
+ positive: 'p',
431
+ comparative: 'c',
432
+ superlative: 's'
433
+ } ]
434
+ ]
435
+ }
436
+
437
+ # Actual part-of-speech mapping.
438
+ #
439
+ CATEGORIES = {
440
+ noun: NOUN,
441
+ verb: VERB,
442
+ adjective: ADJECTIVE,
443
+ pronoun: PRONOUN,
444
+ adverb: ADVERB,
445
+ adposition: ADPOSITION,
446
+ conjunction: CONJUNCTION,
447
+ numeral: NUMERAL,
448
+ particle: PARTICLE,
449
+ interjection: INTERJECTION,
450
+ abbreviation: ABBREVIATION,
451
+ residual: RESIDUAL,
452
+ crutch: CRUTCH
453
+ }
454
+ end