scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -0,0 +1,400 @@
1
+ _ 1548
2
+ a 544
3
+ e 410
4
+ i 389
5
+ o 341
6
+ l 278
7
+ n 244
8
+ r 243
9
+ c 205
10
+ s 202
11
+ a_ 182
12
+ t 176
13
+ d 170
14
+ e_ 140
15
+ u 123
16
+ v 115
17
+ o_ 110
18
+ i_ 103
19
+ m 100
20
+ p 99
21
+ _d 84
22
+ _c 84
23
+ _s 79
24
+ , 74
25
+ ,_ 69
26
+ _a 63
27
+ la 62
28
+ g 60
29
+ ar 60
30
+ an 59
31
+ er 59
32
+ h 56
33
+ ri 52
34
+ co 52
35
+ ll 52
36
+ _p 51
37
+ re 49
38
+ ch 46
39
+ ra 46
40
+ el 45
41
+ _i 45
42
+ _m 44
43
+ di 44
44
+ no 44
45
+ to 44
46
+ b 43
47
+ va 43
48
+ l_ 42
49
+ _l 42
50
+ ia 42
51
+ se 41
52
+ in 40
53
+ n_ 40
54
+ f 39
55
+ av 38
56
+ la_ 38
57
+ do 37
58
+ _di 37
59
+ on 36
60
+ al 35
61
+ ca 34
62
+ ta 34
63
+ na 34
64
+ _e 34
65
+ ' 34
66
+ li 34
67
+ en 34
68
+ or 33
69
+ da 32
70
+ si 32
71
+ _n 32
72
+ as 32
73
+ _co 31
74
+ to_ 31
75
+ ol 30
76
+ le 30
77
+ de 30
78
+ lla 30
79
+ pe 29
80
+ cc 29
81
+ re_ 29
82
+ ve 28
83
+ il 28
84
+ ma 28
85
+ . 27
86
+ io 27
87
+ o, 27
88
+ va_ 27
89
+ _v 27
90
+ nd 26
91
+ ne 26
92
+ tt 26
93
+ gl 25
94
+ nt 25
95
+ st 25
96
+ gli 25
97
+ o,_ 24
98
+ sa 24
99
+ _il 24
100
+ ell 24
101
+ me 23
102
+ _f 23
103
+ he 23
104
+ om 23
105
+ at 23
106
+ che 23
107
+ ti 23
108
+ _de 23
109
+ _ch 22
110
+ qu 22
111
+ _qu 22
112
+ a, 22
113
+ ci 22
114
+ _q 22
115
+ q 22
116
+ _b 22
117
+ il_ 22
118
+ _pe 21
119
+ a,_ 21
120
+ te 21
121
+ _ca 21
122
+ _se 21
123
+ _il_ 21
124
+ vi 21
125
+ un 21
126
+ di_ 20
127
+ he_ 20
128
+ ava 20
129
+ ra_ 20
130
+ _che 20
131
+ pa 20
132
+ che_ 20
133
+ del 20
134
+ _e_ 20
135
+ _di_ 20
136
+ lla_ 20
137
+ z 20
138
+ _u 19
139
+ ._ 19
140
+ _del 19
141
+ ss 19
142
+ no_ 19
143
+ ev 19
144
+ es 19
145
+ et 18
146
+ _che_ 18
147
+ and 18
148
+ _ma 18
149
+ is 18
150
+ lo 18
151
+ vo 18
152
+ ia_ 17
153
+ l' 17
154
+ na_ 17
155
+ era 17
156
+ _un 17
157
+ os 17
158
+ ic 17
159
+ si_ 17
160
+ hi 17
161
+ le_ 17
162
+ _la 17
163
+ per 16
164
+ ano 16
165
+ ie 16
166
+ ua 16
167
+ _g 16
168
+ po 16
169
+ chi 16
170
+ sc 16
171
+ ad 15
172
+ it 15
173
+ io_ 15
174
+ are 15
175
+ ava_ 15
176
+ _si 15
177
+ tr 15
178
+ ac 15
179
+ eva 15
180
+ tto 15
181
+ com 15
182
+ _in 15
183
+ su 15
184
+ se_ 15
185
+ 'a 15
186
+ _per 15
187
+ mo 14
188
+ fa 14
189
+ are_ 14
190
+ _la_ 14
191
+ _a_ 14
192
+ _r 14
193
+ ndo 14
194
+ ari 14
195
+ e,_ 14
196
+ _su 14
197
+ oc 14
198
+ e, 14
199
+ li_ 14
200
+ ro 14
201
+ in_ 13
202
+ id 13
203
+ ella 13
204
+ _in_ 13
205
+ on_ 13
206
+ un_ 13
207
+ gli_ 13
208
+ el_ 13
209
+ all 13
210
+ mp 13
211
+ so 13
212
+ ur 13
213
+ a. 12
214
+ � 12
215
+ ue 12
216
+ r_ 12
217
+ ni 12
218
+ ut 12
219
+ _qua 12
220
+ col 12
221
+ pr 12
222
+ sse 12
223
+ mi 12
224
+ qua 12
225
+ era_ 12
226
+ ig 12
227
+ be 12
228
+ uo 12
229
+ tto_ 12
230
+ ce 12
231
+ acc 11
232
+ asa 11
233
+ cas 11
234
+ l'a 11
235
+ _gl 11
236
+ me_ 11
237
+ _com 11
238
+ _cas 11
239
+ _no 11
240
+ ano_ 11
241
+ _al 11
242
+ _vi 11
243
+ ave 11
244
+ _da 11
245
+ cch 11
246
+ _gli 11
247
+ gn 11
248
+ ta_ 11
249
+ pi 11
250
+ �_ 11
251
+ ul 11
252
+ _un_ 11
253
+ ome 11
254
+ dd 11
255
+ iv 11
256
+ _col 11
257
+ _av 10
258
+ _que 10
259
+ van 10
260
+ man 10
261
+ ale 10
262
+ _fa 10
263
+ dell 10
264
+ zi 10
265
+ cchi 10
266
+ _do 10
267
+ _dell 10
268
+ _t 10
269
+ casa 10
270
+ og 10
271
+ _ri 10
272
+ _gli_ 10
273
+ ato 10
274
+ ent 10
275
+ non 10
276
+ que 10
277
+ am 10
278
+ _casa 10
279
+ rr 10
280
+ _ne 10
281
+ far 9
282
+ del_ 9
283
+ _mo 9
284
+ rid 9
285
+ er_ 9
286
+ _an 9
287
+ anda 9
288
+ �_ 9
289
+ ella_ 9
290
+ ne_ 9
291
+ bi 9
292
+ fi 9
293
+ vano 9
294
+ _me 9
295
+ ando 9
296
+ uri 9
297
+ ti_ 9
298
+ da_ 9
299
+ ess 9
300
+ oi 9
301
+ par 9
302
+ _le 9
303
+ come 9
304
+ � 9
305
+ nda 9
306
+ ome_ 9
307
+ ot 9
308
+ _be 9
309
+ eva_ 9
310
+ sta 9
311
+ _del_ 8
312
+ _come 8
313
+ i, 8
314
+ della 8
315
+ ridd 8
316
+ come_ 8
317
+ idd 8
318
+ una_ 8
319
+ ina 8
320
+ L 8
321
+ ba 8
322
+ nz 8
323
+ uel 8
324
+ _l' 8
325
+ una 8
326
+ ! 8
327
+ _ave 8
328
+ ene 8
329
+ con 8
330
+ non_ 8
331
+ ato_ 8
332
+ ant 8
333
+ _st 8
334
+ cco 8
335
+ ser 8
336
+ Tu 8
337
+ T 8
338
+ do_ 8
339
+ occ 8
340
+ S 8
341
+ ; 8
342
+ ir 8
343
+ _all 8
344
+ ec 7
345
+ mpa 7
346
+ zia 7
347
+ dava 7
348
+ Turi 7
349
+ _era 7
350
+ ed 7
351
+ ett 7
352
+ rv 7
353
+ lo_ 7
354
+ cia 7
355
+ ino 7
356
+ per_ 7
357
+ quel 7
358
+ i,_ 7
359
+ _coll 7
360
+ Tur 7
361
+ _non 7
362
+ ap 7
363
+ pre 7
364
+ hi_ 7
365
+ nto 7
366
+ _pr 7
367
+ utt 7
368
+ rc 7
369
+ llo 7
370
+ coll 7
371
+ sse_ 7
372
+ _pa 7
373
+ 'e 7
374
+ sp 7
375
+ ai 7
376
+ _le_ 7
377
+ ogl 7
378
+ dav 7
379
+ ola 7
380
+ _quel 7
381
+ _vo 7
382
+ vano_ 7
383
+ _ad 7
384
+ za 7
385
+ sa_ 7
386
+ oll 7
387
+ _pi 7
388
+ tra 7
389
+ ogli 7
390
+ ndo_ 7
391
+ alla 7
392
+ _si_ 7
393
+ gi 7
394
+ _er 7
395
+ ere 7
396
+ nu 6
397
+ riv 6
398
+ avev 6
399
+ dis 6
400
+ _anda 6