scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1413
2
- a 829
3
- n 397
4
- e 319
5
- i 290
6
- an 239
7
- k 213
8
- d 188
9
- u 174
10
- r 172
11
- t 168
12
- g 162
13
- m 159
14
- s 128
15
- l 126
16
- __ 123
17
- p 123
18
- n_ 120
19
- ng 114
20
- b 108
21
- da 106
22
- an_ 105
23
- h 102
24
- a_ 93
25
- _d 89
26
- ka 83
27
- y 74
28
- ang 74
29
- en 68
30
- ya 64
31
- la 62
32
- ah 62
33
- er 61
34
- i_ 60
35
- _m 60
36
- o 59
37
- . 59
38
- ___ 56
39
- ar 55
40
- ____ 54
41
- me 53
42
- at 53
43
- di 53
44
- _____ 52
45
- ra 52
46
- ng_ 51
47
- g_ 51
48
- ba 48
49
- al 47
50
- ak 47
51
- ang_ 46
52
- _da 45
53
- ga 45
54
- _k 44
55
- ta 44
56
- _me 43
57
- kan 43
58
- _p 42
59
- ._ 41
60
- _b 41
61
- ke 40
62
- pa 39
63
- h_ 38
64
- am 37
65
- as 36
66
- kan_ 36
67
- j 36
68
- k_ 36
69
- _s 36
70
- in 35
71
- sa 35
72
- _di 35
73
- se 34
74
- pe 34
75
- _a 33
76
- yang 33
77
- yan 33
78
- em 32
79
- dan 32
80
- ti 32
81
- c 32
82
- ha 31
83
- it 30
84
- be 30
85
- _y 29
86
- _t 29
87
- _yang 29
88
- el 29
89
- _yan 29
90
- ah_ 29
91
- _ya 29
92
- ala 28
93
- ad 28
94
- tu 28
95
- ik 28
96
- nd 27
97
- yang_ 27
98
- men 26
99
- _ke 26
100
- un 25
101
- ma 25
102
- t_ 25
103
- _pe 25
104
- ia 24
105
- ri 24
106
- li 24
107
- ny 24
108
- _dan 24
109
- na 23
110
- S 23
111
- , 23
112
- ada 23
113
- r_ 22
114
- ,_ 22
115
- te 22
116
- ja 22
117
- nga 22
118
- _men 21
119
- eng 21
120
- ep 21
121
- ap 21
122
- nya 21
123
- _se 21
124
- _i 20
125
- u_ 20
126
- mb 20
127
- si 20
128
- m_ 19
129
- w 19
130
- _ba 19
131
- et 19
132
- aka 19
133
- ber 18
134
- dan_ 18
135
- uk 18
136
- ua 18
137
- nda 18
138
- ara 18
139
- wa 17
140
- M 17
141
- ran 17
142
- gk 17
143
- ai 17
144
- per 17
145
- at_ 17
146
- ek 17
147
- ata 17
148
- ngk 17
149
- aha 17
150
- ak_ 17
151
- pu 17
152
- mem 16
153
- lam 16
154
- _be 16
155
- _dan_ 16
156
- ir 16
157
- gan 15
158
- de 15
159
- _mem 15
160
- end 15
161
- id 15
162
- da_ 15
163
- ur 15
164
- eb 15
165
- dal 15
166
- lah 15
167
- ngan 15
168
- am_ 15
169
- - 14
170
- gi 14
171
- le 14
172
- ada_ 14
173
- pad 14
174
- pada 14
175
- asa 14
176
- tan 14
177
- ki 14
178
- dala 13
179
- ca 13
180
- f 13
181
- ban 13
182
- gka 13
183
- il 13
184
- ut 13
185
- _M 13
186
- ni 13
187
- era 13
188
- ing 13
189
- ari 13
190
- dar 13
191
- ngka 13
192
- mp 13
193
- A 13
194
- gan_ 12
195
- ngan_ 12
196
- mu 12
197
- ag 12
198
- _. 12
199
- ay 12
200
- alam 12
201
- itu 12
202
- pi 12
203
- enga 12
204
- pada_ 12
205
- _S 12
206
- ku 12
207
- ela 12
208
- es 12
209
- and 12
210
- p_ 11
211
- emb 11
212
- _c 11
213
- bi 11
214
- _._ 11
215
- nt 11
216
- us 11
217
- ih 11
218
- dalam 11
219
- _te 11
220
- N 11
221
- bu 11
222
- pat 11
223
- _ber 11
224
- ika 11
225
- s_ 11
226
- angk 11
227
- au 11
228
- _itu 10
229
- ya_ 10
230
- lan 10
231
- dak 10
232
- _ban 10
233
- " 10
234
- enda 10
235
- mba 10
236
- epa 10
237
- _it 10
238
- alam_ 10
239
- di_ 10
240
- lah_ 10
241
- ali 10
242
- ar_ 10
243
- ter 10
244
- ko 10
245
- apa 10
246
- K 10
247
- ed 10
248
- Se 10
249
- akan 10
250
- atan 10
251
- _ti 10
252
- lam_ 10
253
- on 10
254
- _pa 9
255
- T 9
256
- _dala 9
257
- _ma 9
258
- _per 9
259
- _j 9
260
- aj 9
261
- ru 9
262
- eg 9
263
- _dal 9
264
- ib 9
265
- asi 9
266
- tu_ 9
267
- _h 9
268
- awa 9
269
- ka_ 9
270
- angka 9
271
- nj 9
272
- is 9
273
- _l 9
274
- rang 9
275
- nya_ 9
276
- hu 9
277
- rk 9
278
- aw 9
279
- du 9
280
- mi 9
281
- dari 8
282
- ita 8
283
- ju 8
284
- eli 8
285
- ana 8
286
- aa 8
287
- ert 8
288
- re 8
289
- iha 8
290
- ri_ 8
291
- dik 8
292
- atan_ 8
293
- _K 8
294
- ta_ 8
295
- ida 8
296
- ena 8
297
- um 8
298
- ian 8
299
- ai_ 8
300
- bag 8
301
- nc 8
302
- or 8
303
- pun 8
304
- memb 8
305
- sa_ 8
306
- emp 8
307
- ol 8
308
- den 8
309
- B 8
310
- tan_ 8
311
- a. 8
312
- anda 8
313
- eh 8
314
- aya 8
315
- rt 8
316
- akan_ 8
317
- eta 8
318
- su 8
319
- nja 8
320
- _di_ 8
321
- _o 8
322
- ura 8
323
- _de 8
324
- ul 7
325
- ti_ 7
326
- ras 7
327
- kep 7
328
- man 7
329
- ati 7
330
- tid 7
331
- seb 7
332
- _n 7
333
- epada 7
334
- un_ 7
335
- ra_ 7
336
- _sa 7
337
- n. 7
338
- aan 7
339
- gu 7
340
- ama 7
341
- meng 7
342
- si_ 7
343
- apat 7
344
- na_ 7
345
- engan 7
346
- _ka 7
347
- pen 7
348
- idak 7
349
- sep 7
350
- dapa 7
351
- _den 7
352
- eba 7
353
- Me 7
354
- I 7
355
- ira 7
356
- dapat 7
357
- ne 7
358
- inya 7
359
- pat_ 7
360
- kepa 7
361
- bah 7
362
- _memb 7
363
- om 7
364
- _pen 7
365
- pert 7
366
- asa_ 7
367
- ab 7
368
- kepad 7
369
- dak_ 7
370
- ig 7
371
- lu 7
372
- agi 7
373
- epad 7
374
- iny 7
375
- dap 7
376
- ge 7
377
- ci 7
378
- _B 7
379
- an. 6
380
- oleh 6
381
- _kepa 6
382
- u. 6
383
- band 6
384
- uan 6
385
- car 6
386
- leh 6
387
- tidak 6
388
- alah 6
389
- ) 6
390
- _dari 6
391
- ip 6
392
- ap_ 6
393
- Ma 6
394
- aja 6
395
- rah 6
396
- _T 6
397
- ( 6
398
- nk 6
399
- epe 6
400
- i. 6