scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1474
2
- a 417
3
- o 413
4
- i 372
5
- e 330
6
- n 254
7
- s 218
8
- j 192
9
- t 190
10
- u 162
11
- r 153
12
- d 148
13
- l 147
14
- a_ 129
15
- v 127
16
- y 124
17
- m 120
18
- k 116
19
- e_ 116
20
- p 107
21
- o_ 105
22
- je 94
23
- c 91
24
- z 90
25
- i_ 84
26
- b 74
27
- _s 74
28
- , 65
29
- g 64
30
- _n 60
31
- u_ 59
32
- _i 59
33
- ,_ 55
34
- _p 55
35
- st 55
36
- sy 53
37
- je_ 53
38
- _d 51
39
- cy 50
40
- ta 49
41
- na 48
42
- _j 46
43
- . 45
44
- os 45
45
- da 44
46
- ko 43
47
- _je 42
48
- po 42
49
- ni 41
50
- _u 40
51
- bi 39
52
- to 38
53
- _b 37
54
- ti 37
55
- no 36
56
- _o 36
57
- ra 35
58
- ne 35
59
- an 35
60
- _je_ 34
61
- en 34
62
- is 34
63
- re 33
64
- la 32
65
- yi 32
66
- ov 32
67
- li 31
68
- il 31
69
- ._ 31
70
- vo 30
71
- _m 29
72
- _po 29
73
- m_ 29
74
- lo 29
75
- _z 28
76
- og 28
77
- _bi 28
78
- iz 28
79
- cj 27
80
- ja 27
81
- ro 27
82
- pr 27
83
- in 27
84
- ij 27
85
- oj 27
86
- le 26
87
- om 26
88
- _i_ 26
89
- on 26
90
- im 26
91
- za 25
92
- nj 25
93
- _k 25
94
- es 24
95
- _da 24
96
- cyi 23
97
- od 23
98
- va 23
99
- av 23
100
- ar 22
101
- _t 22
102
- da_ 22
103
- n_ 22
104
- na_ 22
105
- ka 22
106
- ju 22
107
- ad 22
108
- vi 21
109
- it 21
110
- ma 21
111
- ve 21
112
- ed 21
113
- _ne 21
114
- sa 21
115
- se 20
116
- ak 20
117
- _iz 20
118
- zy 20
119
- ao 19
120
- nu 19
121
- al 19
122
- mo 19
123
- _da_ 19
124
- de 19
125
- _v 18
126
- _za 18
127
- h 18
128
- tr 18
129
- ek 18
130
- syt 18
131
- lj 18
132
- _na 18
133
- do 18
134
- yt 18
135
- to_ 17
136
- bil 17
137
- ri 17
138
- ac 17
139
- a, 17
140
- ob 17
141
- et 16
142
- _c 16
143
- ye 16
144
- ec 16
145
- ji 16
146
- _u_ 16
147
- _pr 16
148
- go 16
149
- a,_ 16
150
- ik 16
151
- or 16
152
- ot 15
153
- ic 15
154
- _ko 15
155
- ije 15
156
- at 15
157
- osy 15
158
- d_ 15
159
- sta 15
160
- no_ 15
161
- _se 15
162
- ost 15
163
- sto 15
164
- __ 14
165
- _r 14
166
- koj 14
167
- _g 14
168
- em 14
169
- ih 14
170
- _bil 14
171
- te 13
172
- iv 13
173
- sl 13
174
- su 13
175
- ti_ 13
176
- _cy 13
177
- mi 13
178
- ao_ 13
179
- ta_ 13
180
- _l 13
181
- dn 13
182
- ovo 13
183
- jo 13
184
- me 12
185
- pos 12
186
- _na_ 12
187
- e, 12
188
- om_ 12
189
- ga 12
190
- ol 12
191
- h_ 12
192
- ki 12
193
- _on 12
194
- la_ 12
195
- im_ 12
196
- ih_ 12
197
- am 12
198
- ut 12
199
- up 12
200
- lo_ 12
201
- az 11
202
- _a 11
203
- pa 11
204
- io 11
205
- gl 11
206
- _koj 11
207
- _sy 11
208
- a. 11
209
- e,_ 11
210
- nje 11
211
- as 11
212
- _se_ 11
213
- _sa 11
214
- j_ 11
215
- se_ 11
216
- li_ 11
217
- yn 11
218
- gu 10
219
- acy 10
220
- ist 10
221
- aj 10
222
- _ni 10
223
- nos 10
224
- _do 10
225
- iti 10
226
- ba 10
227
- ju_ 10
228
- uc 10
229
- sti 10
230
- io_ 10
231
- ima 10
232
- ko_ 10
233
- jen 10
234
- pi 10
235
- _pos 9
236
- yi_ 9
237
- di 9
238
- ke 9
239
- _is 9
240
- er 9
241
- e. 9
242
- ga_ 9
243
- yl 9
244
- sk 9
245
- nij 9
246
- est 9
247
- el 8
248
- le_ 8
249
- ili 8
250
- ecy 8
251
- sp 8
252
- N 8
253
- _to 8
254
- lik 8
255
- iz_ 8
256
- ap 8
257
- ilo 8
258
- _sv 8
259
- oc 8
260
- ni_ 8
261
- _zy 8
262
- a._ 8
263
- _re 8
264
- icy 8
265
- ucj 8
266
- jao 8
267
- anj 8
268
- ila 8
269
- P 8
270
- pre 8
271
- _ka 8
272
- vr 8
273
- enu 8
274
- _iz_ 8
275
- esy 8
276
- zn 8
277
- sv 8
278
- isy 8
279
- z_ 8
280
- cje 8
281
- on_ 8
282
- nije 7
283
- t_ 7
284
- syto 7
285
- pot 7
286
- nji 7
287
- yu 7
288
- _st 7
289
- mog 7
290
- nu_ 7
291
- sam 7
292
- u, 7
293
- ne_ 7
294
- bio_ 7
295
- ila_ 7
296
- jed 7
297
- _syt 7
298
- bio 7
299
- edn 7
300
- V 7
301
- ru 7
302
- gov 7
303
- odi 7
304
- nek 7
305
- ren 7
306
- be 7
307
- ecj 7
308
- bila 7
309
- _ve 7
310
- jim 7
311
- T 7
312
- jao_ 7
313
- dan 7
314
- nut 7
315
- cyin 7
316
- un 7
317
- ako 7
318
- recy 7
319
- ora 7
320
- yto 7
321
- ije_ 7
322
- ok 7
323
- ovi 7
324
- S 7
325
- eg 7
326
- id 7
327
- pro 7
328
- oje 7
329
- kom 7
330
- du 7
331
- rec 7
332
- yin 7
333
- _mo 7
334
- ogu 6
335
- ma_ 6
336
- _no 6
337
- govo 6
338
- br 6
339
- an_ 6
340
- _od 6
341
- ros 6
342
- ocy 6
343
- tan 6
344
- _su 6
345
- _bila 6
346
- oje_ 6
347
- i, 6
348
- e._ 6
349
- lic 6
350
- su_ 6
351
- rat 6
352
- uta 6
353
- ez 6
354
- _rec 6
355
- za_ 6
356
- _za_ 6
357
- jima 6
358
- tav 6
359
- zna 6
360
- ye_ 6
361
- va_ 6
362
- bila_ 6
363
- _os 6
364
- tu 6
365
- ata 6
366
- I 6
367
- cja 6
368
- ad_ 6
369
- _go 6
370
- slo 6
371
- tv 6
372
- ogl 6
373
- ija 6
374
- syl 6
375
- dj 6
376
- te_ 6
377
- vn 6
378
- tim 6
379
- bilo 6
380
- _de 6
381
- iti_ 6
382
- ka_ 6
383
- vo_ 6
384
- vl 6
385
- y_ 6
386
- _S 6
387
- dno 6
388
- _tr 6
389
- ja_ 6
390
- cyi_ 6
391
- ya 6
392
- _nije 6
393
- ita 6
394
- renu 6
395
- lje 6
396
- oj_ 6
397
- _P 6
398
- i,_ 6
399
- zye 6
400
- ilo_ 6