myasorubka 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,454 @@
1
+ # encoding: utf-8
2
+
3
+ # The Russian MULTEXT-East specifications were developed in
4
+ # the scope of an effort to produce a publicly available tagged
5
+ # corpus of Russian; this corpus and accompanying resources are
6
+ # available from http://corpus.leeds.ac.uk/mocky/. The
7
+ # morphosyntactic specifications and corpus are documented in:
8
+ # Serge Sharoff, Mikhail Kopotev, Tomaž Erjavec, Anna Feldman,
9
+ # Dagmar Divjak. Designing and evaluating Russian tagsets.
10
+ # In Proc. LREC 2008, Marrakech, May, 2008.
11
+ #
12
+ # http://nl.ijs.si/ME/V4/msd/html/msd-ru.html
13
+ #
14
+ # This specification was translated into the Ruby language
15
+ # by [Dmitry Ustalov](http://eveel.ru).
16
+ #
17
+ module Myasorubka::MSD::Russian
18
+ # Russian Noun.
19
+ #
20
+ NOUN = {
21
+ code: 'N',
22
+ attrs: [
23
+ [ :type, {
24
+ common: 'c',
25
+ proper: 'p'
26
+ } ],
27
+ [ :gender, {
28
+ masculine: 'm',
29
+ feminine: 'f',
30
+ neuter: 'n',
31
+ common: 'c'
32
+ } ],
33
+ [ :number, {
34
+ singular: 's',
35
+ plural: 'p'
36
+ } ],
37
+ [ :case, {
38
+ nominative: 'n',
39
+ genitive: 'g',
40
+ dative: 'd',
41
+ accusative: 'a',
42
+ vocative: 'v',
43
+ locative: 'l',
44
+ instrumental: 'i'
45
+ } ],
46
+ [ :animate, {
47
+ no: 'n',
48
+ yes: 'y'
49
+ } ],
50
+ [ :case2, {
51
+ partitive: 'p',
52
+ locative: 'l'
53
+ } ]
54
+ ]
55
+ }
56
+
57
+ # Russian Verb.
58
+ #
59
+ VERB = {
60
+ code: 'V',
61
+ attrs: [
62
+ [ :type, {
63
+ main: 'm',
64
+ auxiliary: 'a'
65
+ } ],
66
+ [ :vform, {
67
+ indicative: 'i',
68
+ imperative: 'm',
69
+ conditional: 'c',
70
+ infinitive: 'n',
71
+ participle: 'p',
72
+ gerund: 'g'
73
+ } ],
74
+ [ :tense, {
75
+ present: 'p',
76
+ future: 'f',
77
+ past: 's'
78
+ } ],
79
+ [ :person, {
80
+ first: '1',
81
+ second: '2',
82
+ third: '3'
83
+ } ],
84
+ [ :number, {
85
+ singular: 's',
86
+ plural: 'p'
87
+ } ],
88
+ [ :gender, {
89
+ masculine: 'm',
90
+ feminine: 'f',
91
+ neuter: 'n'
92
+ } ],
93
+ [ :voice, {
94
+ active: 'a',
95
+ passive: 'p',
96
+ medial: 'm'
97
+ } ],
98
+ [ :definiteness, {
99
+ short_art: 's',
100
+ full_art: 'f'
101
+ } ],
102
+ [ :aspect, {
103
+ progressive: 'p',
104
+ perfective: 'e',
105
+ biaspectual: 'b'
106
+ } ],
107
+ [ :case, {
108
+ nominative: 'n',
109
+ genitive: 'g',
110
+ dative: 'd',
111
+ accusative: 'a',
112
+ locative: 'l',
113
+ instrumental: 'i'
114
+ } ]
115
+ ]
116
+ }
117
+
118
+ # Russian Adjective.
119
+ #
120
+ ADJECTIVE = {
121
+ code: 'A',
122
+ attrs: [
123
+ [ :type, {
124
+ qualificative: 'f',
125
+ possessive: 's'
126
+ } ],
127
+ [ :degree, {
128
+ positive: 'p',
129
+ comparative: 'c',
130
+ superlative: 's'
131
+ } ],
132
+ [ :gender, {
133
+ masculine: 'm',
134
+ feminine: 'f',
135
+ neuter: 'n'
136
+ } ],
137
+ [ :number, {
138
+ singular: 's',
139
+ plural: 'p'
140
+ } ],
141
+ [ :case, {
142
+ nominative: 'n',
143
+ genitive: 'g',
144
+ dative: 'd',
145
+ accusative: 'a',
146
+ locative: 'l',
147
+ instrumental: 'i'
148
+ } ],
149
+ [ :definiteness, {
150
+ short_art: 's',
151
+ full_art: 'f'
152
+ } ]
153
+ ]
154
+ }
155
+
156
+ # Russian Pronoun.
157
+ #
158
+ PRONOUN = {
159
+ code: 'P',
160
+ attrs: [
161
+ [ :type, {
162
+ personal: 'p',
163
+ demonstrative: 'd',
164
+ indefinite: 'i',
165
+ possessive: 's',
166
+ interrogative: 'q',
167
+ relative: 'r',
168
+ reflexive: 'x',
169
+ negative: 'z',
170
+ nonspecific: 'n'
171
+ } ],
172
+ [ :person, {
173
+ first: '1',
174
+ second: '2',
175
+ third: '3'
176
+ } ],
177
+ [ :gender, {
178
+ masculine: 'm',
179
+ feminine: 'f',
180
+ neuter: 'n'
181
+ } ],
182
+ [ :number, {
183
+ singular: 's',
184
+ plural: 'p'
185
+ } ],
186
+ [ :case, {
187
+ nominative: 'n',
188
+ genitive: 'g',
189
+ dative: 'd',
190
+ accusative: 'a',
191
+ vocative: 'v',
192
+ locative: 'l',
193
+ instrumental: 'i'
194
+ } ],
195
+ [ :syntactic_type, {
196
+ nominal: 'n',
197
+ adjectival: 'a',
198
+ adverbial: 'r'
199
+ } ],
200
+ [ :animate, {
201
+ no: 'n',
202
+ yes: 'y'
203
+ } ]
204
+ ]
205
+ }
206
+
207
+ # Russian Adverb.
208
+ #
209
+ ADVERB = {
210
+ code: 'R',
211
+ attrs: [
212
+ [ :degree, {
213
+ positive: 'p',
214
+ comparative: 'c',
215
+ superlative: 's'
216
+ } ]
217
+ ]
218
+ }
219
+
220
+ # Russian Adposition.
221
+ #
222
+ ADPOSITION = {
223
+ code: 'S',
224
+ attrs: [
225
+ [ :type, {
226
+ preposition: 'p'
227
+ } ],
228
+ [ :formation, {
229
+ simple: 's',
230
+ compound: 'c'
231
+ } ],
232
+ [ :case, {
233
+ nominative: 'n',
234
+ genitive: 'g',
235
+ dative: 'd',
236
+ accusative: 'a',
237
+ locative: 'l',
238
+ instrumental: 'i'
239
+ } ]
240
+ ]
241
+ }
242
+
243
+ # Russian Conjunction.
244
+ #
245
+ CONJUNCTION = {
246
+ code: 'C',
247
+ attrs: [
248
+ [ :type, {
249
+ coordinating: 'c',
250
+ subordinating: 's'
251
+ } ],
252
+ [ :formation, {
253
+ simple: 's',
254
+ compound: 'c'
255
+ } ],
256
+ [ :coord_type, {
257
+ sentence: 'p',
258
+ words: 'w'
259
+ } ],
260
+ [ :sub_type, {
261
+ negative: 'z',
262
+ positive: 'p'
263
+ } ],
264
+ ]
265
+ }
266
+
267
+ # Russian Numeral.
268
+ #
269
+ NUMERAL = {
270
+ code: 'M',
271
+ attrs: [
272
+ [ :type, {
273
+ cardinal: 'c',
274
+ ordinal: 'o',
275
+ multiple: 'm',
276
+ collect: 'l'
277
+ } ],
278
+ [ :gender, {
279
+ masculine: 'm',
280
+ feminine: 'f',
281
+ neuter: 'n'
282
+ } ],
283
+ [ :number, {
284
+ singular: 's',
285
+ plural: 'p'
286
+ } ],
287
+ [ :case, {
288
+ nominative: 'n',
289
+ genitive: 'g',
290
+ dative: 'd',
291
+ accusative: 'a',
292
+ locative: 'l',
293
+ instrumental: 'i'
294
+ } ],
295
+ [ :form, {
296
+ digit: 'd',
297
+ roman: 'r',
298
+ letter: 'l'
299
+ } ],
300
+ [ :blank, {} ],
301
+ [ :blank, {} ],
302
+ [ :blank, {} ],
303
+ [ :animate, {
304
+ no: 'n',
305
+ yes: 'y'
306
+ } ]
307
+ ]
308
+ }
309
+
310
+ # Russian Particle.
311
+ #
312
+ PARTICLE = {
313
+ code: 'Q',
314
+ attrs: [
315
+ [ :formation, {
316
+ simple: 's',
317
+ compound: 'c'
318
+ } ]
319
+ ]
320
+ }
321
+
322
+ # Russian Interjection.
323
+ #
324
+ INTERJECTION = {
325
+ code: 'I',
326
+ attrs: [
327
+ [ :formation, {
328
+ simple: 's',
329
+ compound: 'c'
330
+ } ]
331
+ ]
332
+ }
333
+
334
+ # Russian Abbreviation.
335
+ #
336
+ ABBREVIATION = {
337
+ code: 'Y',
338
+ attrs: [
339
+ [ :syntactic_type, {
340
+ nominal: 'n',
341
+ adverbial: 'r'
342
+ } ],
343
+ [ :gender, {
344
+ masculine: 'm',
345
+ feminine: 'f',
346
+ neuter: 'n'
347
+ } ],
348
+ [ :number, {
349
+ singular: 's',
350
+ plural: 'p'
351
+ } ],
352
+ [ :case, {
353
+ nominative: 'n',
354
+ genitive: 'g',
355
+ dative: 'd',
356
+ accusative: 'a',
357
+ locative: 'l',
358
+ instrumental: 'i'
359
+ } ]
360
+ ]
361
+ }
362
+
363
+ # Russian Residual.
364
+ #
365
+ RESIDUAL = {
366
+ code: 'X',
367
+ attrs: []
368
+ }
369
+
370
+ # Russian Crutch.
371
+ #
372
+ # Some AOT definitions are written for meta `*` part of speech,
373
+ # so we have to implement it.
374
+ #
375
+ CRUTCH = {
376
+ code: '*',
377
+ attrs: [
378
+ [ :gender, {
379
+ masculine: 'm',
380
+ feminine: 'f',
381
+ neuter: 'n',
382
+ common: 'c'
383
+ } ],
384
+ [ :animate, {
385
+ no: 'n',
386
+ yes: 'y'
387
+ } ],
388
+ [ :number, {
389
+ singular: 's',
390
+ plural: 'p'
391
+ } ],
392
+ [ :case, {
393
+ nominative: 'n',
394
+ genitive: 'g',
395
+ dative: 'd',
396
+ accusative: 'a',
397
+ vocative: 'v',
398
+ locative: 'l',
399
+ instrumental: 'i'
400
+ } ],
401
+ [ :case2, {
402
+ partitive: 'p',
403
+ locative: 'l'
404
+ } ],
405
+ [ :aspect, {
406
+ progressive: 'p',
407
+ perfective: 'e',
408
+ biaspectual: 'b'
409
+ } ],
410
+ [ :voice, {
411
+ active: 'a',
412
+ passive: 'p',
413
+ medial: 'm'
414
+ } ],
415
+ [ :tense, {
416
+ present: 'p',
417
+ future: 'f',
418
+ past: 's'
419
+ } ],
420
+ [ :person, {
421
+ first: '1',
422
+ second: '2',
423
+ third: '3'
424
+ } ],
425
+ [ :definiteness, {
426
+ short_art: 's',
427
+ full_art: 'f'
428
+ } ],
429
+ [ :degree, {
430
+ positive: 'p',
431
+ comparative: 'c',
432
+ superlative: 's'
433
+ } ]
434
+ ]
435
+ }
436
+
437
+ # Actual part-of-speech mapping.
438
+ #
439
+ CATEGORIES = {
440
+ noun: NOUN,
441
+ verb: VERB,
442
+ adjective: ADJECTIVE,
443
+ pronoun: PRONOUN,
444
+ adverb: ADVERB,
445
+ adposition: ADPOSITION,
446
+ conjunction: CONJUNCTION,
447
+ numeral: NUMERAL,
448
+ particle: PARTICLE,
449
+ interjection: INTERJECTION,
450
+ abbreviation: ABBREVIATION,
451
+ residual: RESIDUAL,
452
+ crutch: CRUTCH
453
+ }
454
+ end