scylla 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/Gemfile +0 -1
  2. data/Gemfile.lock +0 -10
  3. data/README.rdoc +22 -0
  4. data/VERSION +1 -1
  5. data/bin/scylla +13 -0
  6. data/lib/scylla/classifier.rb +2 -2
  7. data/lib/scylla/generator.rb +1 -1
  8. data/lib/scylla/lms/13375P33K.lm +400 -0
  9. data/lib/scylla/lms/afrikaans.lm +400 -0
  10. data/lib/scylla/lms/arabic.lm +400 -0
  11. data/lib/scylla/lms/bulgarian.lm +400 -0
  12. data/lib/scylla/lms/catalan.lm +400 -0
  13. data/lib/scylla/lms/chinese.lm +400 -0
  14. data/lib/scylla/lms/danish.lm +400 -0
  15. data/lib/scylla/lms/english.lm +400 -0
  16. data/lib/scylla/lms/esperanto.lm +400 -0
  17. data/lib/scylla/lms/finnish.lm +400 -0
  18. data/lib/scylla/lms/french.lm +400 -0
  19. data/lib/scylla/lms/german.lm +400 -0
  20. data/lib/scylla/lms/greek-iso8859-7.lm +400 -0
  21. data/lib/scylla/lms/hebrew.lm +400 -0
  22. data/lib/scylla/lms/hindi.lm +400 -0
  23. data/lib/scylla/lms/hungarian.lm +400 -0
  24. data/lib/scylla/lms/icelandic.lm +400 -0
  25. data/lib/scylla/lms/indonesian.lm +400 -0
  26. data/lib/scylla/lms/irish.lm +400 -0
  27. data/lib/scylla/lms/italian.lm +400 -0
  28. data/lib/scylla/lms/japanese.lm +400 -0
  29. data/lib/scylla/lms/kannada.lm +400 -0
  30. data/lib/scylla/lms/korean.lm +400 -0
  31. data/lib/scylla/lms/latin.lm +400 -0
  32. data/lib/scylla/lms/malay.lm +400 -0
  33. data/lib/scylla/lms/marathi.lm +400 -0
  34. data/lib/scylla/lms/mingo.lm +400 -0
  35. data/lib/scylla/lms/nepali.lm +400 -0
  36. data/lib/scylla/lms/norwegian.lm +400 -0
  37. data/lib/scylla/lms/polish.lm +400 -0
  38. data/lib/scylla/lms/portuguese.lm +400 -0
  39. data/lib/scylla/lms/quechua.lm +400 -0
  40. data/lib/scylla/lms/romanian.lm +400 -0
  41. data/lib/scylla/lms/rumantsch.lm +400 -0
  42. data/lib/scylla/lms/russian.lm +400 -0
  43. data/lib/scylla/lms/sanskrit.lm +400 -0
  44. data/lib/scylla/lms/scots_gaelic.lm +400 -0
  45. data/lib/scylla/lms/serbian-ascii.lm +400 -0
  46. data/lib/scylla/lms/slovak-ascii.lm +400 -0
  47. data/lib/scylla/lms/slovenian-ascii.lm +400 -0
  48. data/lib/scylla/lms/spanish.lm +400 -0
  49. data/lib/scylla/lms/swahili.lm +400 -0
  50. data/lib/scylla/lms/swedish.lm +400 -0
  51. data/lib/scylla/lms/tagalog.lm +400 -0
  52. data/lib/scylla/lms/tamil.lm +400 -0
  53. data/lib/scylla/lms/thai.lm +400 -0
  54. data/lib/scylla/lms/turkish.lm +400 -0
  55. data/lib/scylla/lms/ukrainian-koi8_u.lm +400 -0
  56. data/lib/scylla/lms/vietnamese.lm +400 -0
  57. data/lib/scylla/lms/welsh.lm +400 -0
  58. data/lib/scylla/lms/yiddish-utf.lm +400 -0
  59. data/lib/scylla/loader.rb +8 -1
  60. data/scylla-0.1.0.gem +0 -0
  61. data/scylla.gemspec +69 -3
  62. data/source_texts/kannada.txt +283 -0
  63. data/test/classifier_test.rb +7 -0
  64. data/test/fixtures/lms/13375p33k.lm +400 -0
  65. data/test/fixtures/lms/danish.lm +400 -0
  66. data/test/fixtures/lms/english.lm +400 -0
  67. data/test/fixtures/lms/french.lm +400 -0
  68. data/test/fixtures/lms/german.lm +400 -0
  69. data/test/fixtures/lms/japanese.lm +400 -0
  70. data/test/fixtures/lms/kannada.lm +400 -0
  71. data/test/fixtures/lms/spanish.lm +400 -0
  72. data/test/fixtures/source_texts/13375P33K.txt +199 -0
  73. data/test/fixtures/source_texts/japanese.txt +199 -0
  74. data/test/fixtures/source_texts/kannada.txt +283 -0
  75. data/test/generator_test.rb +10 -7
  76. data/test/helper.rb +5 -6
  77. data/test/loader_test.rb +1 -0
  78. data/test/scylla_test.rb +1 -0
  79. metadata +78 -14
  80. data/source_texts/armenian.txt +0 -86
@@ -0,0 +1,400 @@
1
+ _ 1232
2
+ e 467
3
+ i 418
4
+ a 351
5
+ u 343
6
+ t 320
7
+ s 309
8
+ r 278
9
+ m 237
10
+ o 226
11
+ n 223
12
+ c 153
13
+ l 139
14
+ p 115
15
+ e_ 108
16
+ s_ 99
17
+ d 99
18
+ , 90
19
+ q 79
20
+ qu 79
21
+ er 78
22
+ is 71
23
+ v 67
24
+ um 67
25
+ t_ 67
26
+ m_ 66
27
+ ,_ 65
28
+ re 63
29
+ a_ 60
30
+ te 57
31
+ it 55
32
+ ue 55
33
+ in 55
34
+ us 54
35
+ _a 52
36
+ _s 51
37
+ b 49
38
+ que 48
39
+ nt 48
40
+ ra 47
41
+ _c 46
42
+ at 46
43
+ en 44
44
+ _i 44
45
+ ti 43
46
+ or 43
47
+ g 42
48
+ _e 42
49
+ ue_ 41
50
+ _p 41
51
+ que_ 41
52
+ f 41
53
+ am 40
54
+ tu 39
55
+ et 39
56
+ em 38
57
+ ro 38
58
+ li 37
59
+ _m 36
60
+ um_ 35
61
+ os 33
62
+ h 33
63
+ an 32
64
+ _t 32
65
+ _d 32
66
+ is_ 31
67
+ ta 31
68
+ us_ 31
69
+ mi 31
70
+ et_ 30
71
+ de 30
72
+ ur 30
73
+ i_ 30
74
+ _f 29
75
+ ri 29
76
+ on 29
77
+ si 28
78
+ ae 28
79
+ ui 28
80
+ ia 28
81
+ pe 27
82
+ ni 27
83
+ es 27
84
+ _v 26
85
+ im 26
86
+ s, 26
87
+ la 26
88
+ ic 26
89
+ ma 25
90
+ o_ 25
91
+ na 25
92
+ . 24
93
+ st 24
94
+ vi 23
95
+ ns 22
96
+ su 22
97
+ id 22
98
+ r_ 22
99
+ _et_ 22
100
+ ve 22
101
+ _et 22
102
+ di 22
103
+ as 21
104
+ _r 21
105
+ _in 21
106
+ to 21
107
+ ci 21
108
+ ul 20
109
+ el 20
110
+ ct 20
111
+ c_ 20
112
+ ne 20
113
+ un 20
114
+ re_ 20
115
+ s,_ 20
116
+ cu 20
117
+ se 20
118
+ co 20
119
+ ere 19
120
+ I 19
121
+ ru 19
122
+ m, 19
123
+ _h 19
124
+ mu 19
125
+ am_ 18
126
+ ol 18
127
+ le 18
128
+ _l 18
129
+ pr 18
130
+ ec 17
131
+ tr 17
132
+ ar 17
133
+ _n 17
134
+ au 17
135
+ te_ 17
136
+ ent 16
137
+ n_ 16
138
+ ll 16
139
+ no 16
140
+ _qu 16
141
+ _q 16
142
+ sa 16
143
+ qui 15
144
+ hi 15
145
+ ca 15
146
+ al 15
147
+ rt 15
148
+ pi 15
149
+ t, 14
150
+ ce 14
151
+ _te 14
152
+ om 14
153
+ per 14
154
+ _o 14
155
+ squ 14
156
+ sq 14
157
+ sque 14
158
+ os_ 14
159
+ il 14
160
+ nu 14
161
+ ter 14
162
+ me 14
163
+ mo 14
164
+ lu 13
165
+ tis 13
166
+ ib 13
167
+ pro 13
168
+ _su 13
169
+ do 13
170
+ er_ 13
171
+ ant 13
172
+ _de 13
173
+ x 13
174
+ em_ 13
175
+ ss 12
176
+ uis 12
177
+ it_ 12
178
+ lo 12
179
+ vo 12
180
+ T 12
181
+ _co 12
182
+ fe 12
183
+ ere_ 12
184
+ sque_ 12
185
+ _pe 12
186
+ ir 12
187
+ A 12
188
+ unt 12
189
+ _si 12
190
+ ens 12
191
+ pa 12
192
+ pu 12
193
+ _re 12
194
+ _ma 12
195
+ tem 12
196
+ po 12
197
+ nd 12
198
+ era 12
199
+ t,_ 11
200
+ ; 11
201
+ _ca 11
202
+ eq 11
203
+ equ 11
204
+ ? 11
205
+ na_ 11
206
+ ia_ 11
207
+ nte 11
208
+ mp 11
209
+ _pa 11
210
+ nti 11
211
+ _la 11
212
+ rum 11
213
+ _u 11
214
+ ag 11
215
+ _au 11
216
+ iu 11
217
+ uo 11
218
+ up 11
219
+ cum 11
220
+ av 10
221
+ oc 10
222
+ ibu 10
223
+ gi 10
224
+ bi 10
225
+ ros 10
226
+ rr 10
227
+ rat 10
228
+ ac 10
229
+ tor 10
230
+ ba 10
231
+ m,_ 10
232
+ ect 10
233
+ ev 10
234
+ du 10
235
+ : 10
236
+ da 10
237
+ ic_ 10
238
+ ut 10
239
+ ie 10
240
+ bu 10
241
+ ra_ 10
242
+ H 10
243
+ ex 10
244
+ nc 10
245
+ gn 10
246
+ _A 10
247
+ nis 10
248
+ _ve 10
249
+ as_ 10
250
+ ora 10
251
+ tum 10
252
+ ibus 9
253
+ ta_ 9
254
+ eri 9
255
+ sp 9
256
+ ite 9
257
+ op 9
258
+ bus 9
259
+ e,_ 9
260
+ _I 9
261
+ e, 9
262
+ itu 9
263
+ eli 9
264
+ at_ 9
265
+ tur 9
266
+ sc 9
267
+ ver 9
268
+ _sa 9
269
+ ad 9
270
+ _cu 9
271
+ _se 9
272
+ ep 9
273
+ _pr 9
274
+ fer 9
275
+ si_ 9
276
+ _vi 9
277
+ ate 9
278
+ us, 8
279
+ lia 8
280
+ ng 8
281
+ ab 8
282
+ mq 8
283
+ ap 8
284
+ ntem 8
285
+ ed 8
286
+ quo 8
287
+ mqu 8
288
+ mn 8
289
+ eb 8
290
+ rem 8
291
+ min 8
292
+ oe 8
293
+ _no 8
294
+ _me 8
295
+ cr 8
296
+ oq 8
297
+ iam 8
298
+ emi 8
299
+ imp 8
300
+ fu 8
301
+ tus 8
302
+ ibus_ 8
303
+ i, 8
304
+ ig 8
305
+ ill 8
306
+ _hi 8
307
+ _mo 8
308
+ ua 8
309
+ oqu 8
310
+ uc 8
311
+ tis_ 8
312
+ _T 8
313
+ us,_ 8
314
+ um, 8
315
+ d_ 8
316
+ cto 8
317
+ ;_ 8
318
+ bus_ 8
319
+ non 7
320
+ ine 7
321
+ eu 7
322
+ tque_ 7
323
+ tque 7
324
+ ns_ 7
325
+ lt 7
326
+ lle 7
327
+ ud 7
328
+ enti 7
329
+ _per 7
330
+ aqu 7
331
+ err 7
332
+ ina 7
333
+ in_ 7
334
+ _im 7
335
+ _po 7
336
+ est 7
337
+ _om 7
338
+ a, 7
339
+ Qu 7
340
+ _fe 7
341
+ mit 7
342
+ _fer 7
343
+ on_ 7
344
+ _pro 7
345
+ pt 7
346
+ D 7
347
+ equi 7
348
+ uit 7
349
+ aq 7
350
+ tq 7
351
+ Q 7
352
+ nt_ 7
353
+ omn 7
354
+ tqu 7
355
+ _omn 7
356
+ imu 7
357
+ ris 7
358
+ ctor 6
359
+ _ho 6
360
+ inc 6
361
+ io 6
362
+ qua 6
363
+ ini 6
364
+ ora_ 6
365
+ ali 6
366
+ erat 6
367
+ _da 6
368
+ _sup 6
369
+ s: 6
370
+ fa 6
371
+ tra 6
372
+ usq 6
373
+ _fo 6
374
+ lis 6
375
+ tum_ 6
376
+ mag 6
377
+ to_ 6
378
+ cum_ 6
379
+ os, 6
380
+ mque 6
381
+ _quo 6
382
+ ho 6
383
+ se_ 6
384
+ mor 6
385
+ usqu 6
386
+ _vo 6
387
+ oru 6
388
+ lit 6
389
+ _al 6
390
+ _il 6
391
+ non_ 6
392
+ oque 6
393
+ _do 6
394
+ _in_ 6
395
+ t. 6
396
+ ut_ 6
397
+ be 6
398
+ fo 6
399
+ usque 6
400
+ sup 6