scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1062
2
- e 329
3
- a 276
4
- t 261
5
- k 203
6
- s 200
7
- l 188
8
- n 172
9
- r 154
10
- z 143
11
- i 140
12
- � 136
13
- o 126
14
- g 121
15
- � 114
16
- m 93
17
- y 80
18
- b 73
19
- d 71
20
- _a 70
21
- v 68
22
- t_ 63
23
- a_ 62
24
- , 54
25
- sz 53
26
- gy 52
27
- k_ 49
28
- ,_ 49
29
- � 48
30
- et 46
31
- h 44
32
- p 43
33
- _m 42
34
- � 41
35
- _a_ 40
36
- en 39
37
- u 38
38
- eg 37
39
- _k 37
40
- le 34
41
- el 33
42
- _e 33
43
- . 33
44
- j 32
45
- n_ 31
46
- s_ 31
47
- _s 31
48
- te 30
49
- i_ 30
50
- ek 29
51
- al 29
52
- ak 29
53
- _� 29
54
- f 28
55
- �s 27
56
- �r 27
57
- tt 26
58
- � 26
59
- er 26
60
- ta 26
61
- y_ 26
62
- es 25
63
- A 25
64
- e_ 25
65
- ke 25
66
- _h 25
67
- � 24
68
- ze 24
69
- re 24
70
- t� 23
71
- _i 23
72
- _t 23
73
- or 23
74
- _sz 22
75
- az 22
76
- z_ 22
77
- ag 21
78
- ny 20
79
- ki 20
80
- ne 20
81
- zt 20
82
- _v 20
83
- gy_ 20
84
- ko 20
85
- ll 19
86
- _n 19
87
- �r 19
88
- me 18
89
- �n 18
90
- at 18
91
- _az 18
92
- t� 18
93
- an 18
94
- ve 18
95
- is 17
96
- �s 17
97
- l_ 17
98
- egy 17
99
- ol 17
100
- _f 17
101
- va 16
102
- � 16
103
- os 16
104
- _b 16
105
- �t 16
106
- �g 16
107
- nk 16
108
- ye 16
109
- na 16
110
- __ 16
111
- be 15
112
- ho 15
113
- rt 15
114
- agy 15
115
- c 15
116
- mi 15
117
- A_ 14
118
- ra 14
119
- �p 14
120
- on 14
121
- tt_ 14
122
- se 14
123
- la 14
124
- z� 14
125
- ak_ 14
126
- em 14
127
- �l 13
128
- _az_ 13
129
- _ki 13
130
- za 13
131
- sze 13
132
- _l 13
133
- �s_ 13
134
- � 13
135
- ok 13
136
- _eg 13
137
- az_ 13
138
- s� 13
139
- to 12
140
- _�s 12
141
- k� 12
142
- ik 12
143
- ar 12
144
- it 12
145
- �n 12
146
- ro 12
147
- _�s_ 12
148
- r_ 12
149
- sa 12
150
- _egy 12
151
- ez 11
152
- g_ 11
153
- ma 11
154
- ett 11
155
- _ho 11
156
- S 11
157
- da 11
158
- �t 11
159
- _is 11
160
- nd 11
161
- ba 11
162
- bb 11
163
- kor 10
164
- ._ 10
165
- et_ 10
166
- ge 10
167
- m� 10
168
- � 10
169
- oz 10
170
- lt 10
171
- rs 10
172
- en_ 10
173
- mo 10
174
- og 10
175
- ek_ 10
176
- ap 10
177
- an_ 10
178
- E 10
179
- iz 10
180
- sz� 10
181
- zer 10
182
- �l 10
183
- _me 10
184
- cs 10
185
- in 10
186
- _va 10
187
- _d 9
188
- v� 9
189
- egy_ 9
190
- �g 9
191
- _mi 9
192
- z� 9
193
- ha 9
194
- ai 9
195
- n� 9
196
- l� 9
197
- pe 9
198
- so 9
199
- li 9
200
- m� 9
201
- nt 9
202
- _j 9
203
- ka 9
204
- t. 9
205
- as 9
206
- �_ 9
207
- M 9
208
- de 9
209
- st 9
210
- ban 8
211
- �v 8
212
- s� 8
213
- �ll 8
214
- ban_ 8
215
- do 8
216
- �ny 8
217
- a, 8
218
- meg 8
219
- �k 8
220
- szer 8
221
- lle 8
222
- k, 8
223
- bi 8
224
- nn 8
225
- ele 8
226
- ut 8
227
- : 8
228
- _le 8
229
- _el 8
230
- ss 8
231
- s�t 8
232
- ad 8
233
- s�g 8
234
- ul 8
235
- k� 8
236
- let 8
237
- l� 8
238
- zi 8
239
- t� 8
240
- _egy_ 8
241
- v� 7
242
- k�r 7
243
- m_ 7
244
- �l 7
245
- hog 7
246
- �k 7
247
- sza 7
248
- � 7
249
- ya 7
250
- _ke 7
251
- gye 7
252
- b_ 7
253
- ket 7
254
- re_ 7
255
- ud 7
256
- _ha 7
257
- _A 7
258
- tet 7
259
- e, 7
260
- v�n 7
261
- �r 7
262
- N 7
263
- t�s 7
264
- _hog 7
265
- �t 7
266
- min 7
267
- ga 7
268
- _n� 7
269
- �b 7
270
- �t 7
271
- kr 7
272
- nak 7
273
- _r 7
274
- k,_ 7
275
- �_ 7
276
- r� 7
277
- _�r 7
278
- ri 7
279
- _meg 7
280
- �v 7
281
- ot 7
282
- ik_ 7
283
- _� 7
284
- ett_ 7
285
- �k 7
286
- am 6
287
- izto 6
288
- j� 6
289
- ai_ 6
290
- _te 6
291
- id 6
292
- hogy 6
293
- _k� 6
294
- yi 6
295
- _hogy 6
296
- esz 6
297
- eh 6
298
- sk 6
299
- ben 6
300
- _mo 6
301
- k. 6
302
- rz 6
303
- izt 6
304
- yek 6
305
- nt_ 6
306
- �n 6
307
- _�v 6
308
- _es 6
309
- e,_ 6
310
- ell 6
311
- rv 6
312
- zs 6
313
- os� 6
314
- tos�t 6
315
- pa 6
316
- il 6
317
- t� 6
318
- ogy 6
319
- rd 6
320
- t, 6
321
- _min 6
322
- z� 6
323
- t,_ 6
324
- zto 6
325
- _be 6
326
- biz 6
327
- kb 6
328
- Sz 6
329
- H 6
330
- ps 6
331
- bizt 6
332
- ly 6
333
- ztos 6
334
- zet 6
335
- ere 6
336
- os�t 6
337
- k� 6
338
- bizto 6
339
- di 6
340
- om 6
341
- tos� 6
342
- tos 6
343
- nye 6
344
- iztos 6
345
- fe 6
346
- a,_ 6
347
- K 6
348
- sok 6
349
- uda 6
350
- - 6
351
- agy_ 6
352
- _p 6
353
- ztos� 6
354
- �t�s 5
355
- em_ 5
356
- ln 5
357
- d� 5
358
- �v� 5
359
- ev 5
360
- isk 5
361
- ny_ 5
362
- elle 5
363
- lm 5
364
- vagy 5
365
- fa 5
366
- ren 5
367
- ors 5
368
- �g_ 5
369
- bb_ 5
370
- _A_ 5
371
- �ps 5
372
- zak 5
373
- _ma 5
374
- ben_ 5
375
- vagy_ 5
376
- vag 5
377
- po 5
378
- kel 5
379
- h� 5
380
- yo 5
381
- I 5
382
- �l 5
383
- _t� 5
384
- t._ 5
385
- �v�n 5
386
- nyek 5
387
- tal 5
388
- lj 5
389
- �t�s 5
390
- �nye 5
391
- �sz 5
392
- _vagy 5
393
- gya 5
394
- _vag 5
395
- v�ny 5
396
- ete 5
397
- s�t� 5
398
- G 5
399
- est 5
400
- �t� 5