scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1540
2
- a 492
3
- e 342
4
- i 317
5
- s 308
6
- n 295
7
- r 240
8
- t 188
9
- l 181
10
- u 172
11
- a_ 168
12
- c 157
13
- d 147
14
- o 132
15
- g 107
16
- m 104
17
- h 100
18
- s_ 92
19
- er 81
20
- p 78
21
- n_ 74
22
- v 72
23
- ch 72
24
- _s 69
25
- _d 64
26
- in 63
27
- _e 62
28
- en 60
29
- sc 57
30
- , 57
31
- da 56
32
- f 54
33
- . 53
34
- r_ 53
35
- _c 50
36
- ra 48
37
- __ 48
38
- ,_ 47
39
- l_ 46
40
- an 45
41
- sch 43
42
- ar 43
43
- _da 42
44
- z 41
45
- _p 40
46
- na 40
47
- ta 40
48
- ai 39
49
- un 38
50
- nt 37
51
- ma 37
52
- _f 37
53
- _m 36
54
- la 35
55
- ._ 34
56
- _i 34
57
- da_ 33
58
- ' 32
59
- as 32
60
- b 29
61
- _a 29
62
- i_ 29
63
- es 29
64
- ia 28
65
- _da_ 28
66
- er_ 28
67
- t_ 28
68
- al 27
69
- re 27
70
- is 27
71
- va 27
72
- _t 26
73
- st 26
74
- gl 25
75
- ei 25
76
- de 25
77
- e_ 25
78
- cu 25
79
- or 24
80
- di 24
81
- el 24
82
- _v 24
83
- he 24
84
- ha 24
85
- ve 23
86
- _l 23
87
- sa 23
88
- ad 23
89
- ts 22
90
- il 22
91
- te 22
92
- � 22
93
- ss 22
94
- pe 21
95
- on 21
96
- d_ 20
97
- ns 20
98
- ain 20
99
- ur 20
100
- as_ 20
101
- rt 20
102
- _n 20
103
- u_ 20
104
- tg 19
105
- ig 19
106
- ng 19
107
- _sc 18
108
- rs 18
109
- per 18
110
- ir 18
111
- li 18
112
- _g 17
113
- se 17
114
- ti 17
115
- q 17
116
- to 17
117
- eg 17
118
- _in 17
119
- qu 17
120
- che 17
121
- _o 17
122
- ie 17
123
- na_ 16
124
- ha_ 16
125
- _ch 16
126
- et 16
127
- iv 16
128
- in_ 16
129
- en_ 16
130
- un_ 16
131
- iu 16
132
- ll 16
133
- ls 16
134
- g_ 16
135
- nd 15
136
- za 15
137
- ent 15
138
- mi 15
139
- ns_ 15
140
- fi 15
141
- cha 15
142
- at 15
143
- I 15
144
- _en 15
145
- ic 14
146
- _qu 14
147
- sta 14
148
- ra_ 14
149
- gn 14
150
- co 14
151
- vi 14
152
- ri 14
153
- _b 14
154
- _cu 14
155
- _q 14
156
- igl 14
157
- gi 13
158
- it 13
159
- " 13
160
- pr 13
161
- _ma 13
162
- _h 13
163
- _e_ 13
164
- ta_ 13
165
- me 13
166
- pa 13
167
- int 13
168
- ni 13
169
- us 13
170
- el_ 13
171
- ed 12
172
- nz 12
173
- nu 12
174
- uo 12
175
- ca 12
176
- sa_ 12
177
- _fi 12
178
- s, 12
179
- ue 12
180
- av 12
181
- su 12
182
- ga 12
183
- ot 12
184
- hi 12
185
- _per 12
186
- _pe 12
187
- A 12
188
- le 11
189
- era 11
190
- lla 11
191
- au 11
192
- um 11
193
- ls_ 11
194
- mai 11
195
- ei_ 11
196
- chi 11
197
- per_ 11
198
- gl_ 11
199
- _di 11
200
- ia_ 11
201
- a. 11
202
- lu 11
203
- _in_ 11
204
- que 11
205
- scha 11
206
- iss 11
207
- - 11
208
- si 11
209
- M 11
210
- am 11
211
- rt_ 10
212
- _r 10
213
- _mi 10
214
- ge 10
215
- a._ 10
216
- mp 10
217
- o_ 10
218
- main 10
219
- os 10
220
- s. 10
221
- esc 10
222
- L 10
223
- fe 10
224
- mo 10
225
- S 10
226
- nza 10
227
- ar_ 10
228
- ut 10
229
- zi 10
230
- _ve 10
231
- man 10
232
- pi 10
233
- _per_ 10
234
- ter 10
235
- iun 10
236
- _que 9
237
- _nu 9
238
- nta 9
239
- cun 9
240
- P 9
241
- �n 9
242
- ing 9
243
- a, 9
244
- ama 9
245
- des 9
246
- esch 9
247
- tsc 9
248
- em 9
249
- _ha 9
250
- igl_ 9
251
- _ha_ 9
252
- fa 9
253
- ac 9
254
- tsch 9
255
- la_ 9
256
- s,_ 9
257
- ts_ 9
258
- gli 9
259
- las 9
260
- 'e 9
261
- _st 9
262
- be 9
263
- om 9
264
- rm 9
265
- _se 8
266
- ch' 8
267
- no 8
268
- ro 8
269
- ver 8
270
- _A 8
271
- ng_ 8
272
- _la 8
273
- a,_ 8
274
- an_ 8
275
- va_ 8
276
- ev 8
277
- h' 8
278
- _u 8
279
- _pa 8
280
- ne 8
281
- x 8
282
- ssa 8
283
- nc 8
284
- cl 8
285
- desc 8
286
- _M 8
287
- _il 8
288
- ier 8
289
- E 8
290
- ins 7
291
- h_ 7
292
- _me 7
293
- za_ 7
294
- R 7
295
- im 7
296
- vai 7
297
- us_ 7
298
- ura 7
299
- schi 7
300
- al_ 7
301
- cha_ 7
302
- nte 7
303
- ed_ 7
304
- sche 7
305
- C 7
306
- pl 7
307
- ag 7
308
- _er 7
309
- eu 7
310
- nas 7
311
- od 7
312
- aint 7
313
- _fa 7
314
- ran 7
315
- _na 7
316
- las_ 7
317
- s._ 7
318
- _ch' 7
319
- n. 7
320
- c_ 7
321
- _sa 7
322
- nts 7
323
- end 7
324
- ava 7
325
- _or 7
326
- sen 7
327
- ch_ 7
328
- il_ 7
329
- sp 7
330
- az 7
331
- _sch 7
332
- _cun 7
333
- ert 7
334
- lla_ 7
335
- _I 7
336
- ors 7
337
- _su 7
338
- _vi 7
339
- gia 7
340
- all 7
341
- uel 7
342
- desch 7
343
- l' 7
344
- _co 7
345
- ir_ 7
346
- sta_ 6
347
- cun_ 6
348
- _fe 6
349
- _pr 6
350
- lar 6
351
- _E 6
352
- _bu 6
353
- _C 6
354
- una 6
355
- vair 6
356
- ziun 6
357
- _las 6
358
- ad_ 6
359
- Il 6
360
- oz 6
361
- _re 6
362
- egn 6
363
- art 6
364
- tg_ 6
365
- air 6
366
- _las_ 6
367
- _en_ 6
368
- _ed 6
369
- _�n 6
370
- _. 6
371
- quel 6
372
- ! 6
373
- _es 6
374
- maing 6
375
- _l' 6
376
- amain 6
377
- res 6
378
- _te 6
379
- rd 6
380
- fiss 6
381
- ert_ 6
382
- _ed_ 6
383
- ats 6
384
- ina 6
385
- fis 6
386
- �s 6
387
- ziu 6
388
- hen 6
389
- ist 6
390
- _R 6
391
- aing 6
392
- _fiss 6
393
- _� 6
394
- nt_ 6
395
- ari 6
396
- chen 6
397
- _de 6
398
- _fis 6
399
- tad 6
400
- G 6