scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -0,0 +1,400 @@
1
+ _ 4750
2
+ e 1600
3
+ r 1018
4
+ n 1012
5
+ t 924
6
+ a 857
7
+ s 711
8
+ i 662
9
+ o 640
10
+ l 548
11
+ g 500
12
+ k 479
13
+ d 416
14
+ m 398
15
+ � 323
16
+ e_ 305
17
+ r_ 305
18
+ t_ 291
19
+ er 290
20
+ en 287
21
+ v 284
22
+ n_ 275
23
+ _s 215
24
+ et 214
25
+ an 213
26
+ g_ 205
27
+ or 203
28
+ � 194
29
+ å 194
30
+ de 191
31
+ f 188
32
+ u 188
33
+ h 172
34
+ p 166
35
+ er_ 164
36
+ te 162
37
+ . 161
38
+ re 160
39
+ en_ 158
40
+ _o 156
41
+ ar 145
42
+ _e 144
43
+ st 143
44
+ _h 142
45
+ et_ 138
46
+ , 134
47
+ _d 132
48
+ in 130
49
+ ,_ 130
50
+ sk 129
51
+ i_ 122
52
+ ng 117
53
+ ke 117
54
+ _i 117
55
+ ne 117
56
+ ti 110
57
+ le 110
58
+ _f 109
59
+ om 109
60
+ og 108
61
+ _m 105
62
+ me 105
63
+ j 104
64
+ b 103
65
+ _og 101
66
+ og_ 100
67
+ la 99
68
+ _og_ 97
69
+ _de 94
70
+ _a 94
71
+ m_ 94
72
+ ha 94
73
+ ei 93
74
+ ø 92
75
+ � 92
76
+ se 88
77
+ a_ 88
78
+ å_ 88
79
+ �_ 88
80
+ _t 87
81
+ nd 84
82
+ _v 84
83
+ el 84
84
+ ri 84
85
+ ge 84
86
+ eg 84
87
+ _ha 84
88
+ tt 83
89
+ is 80
90
+ nn 80
91
+ y 79
92
+ ta 79
93
+ av 78
94
+ _i_ 77
95
+ _n 77
96
+ ._ 77
97
+ al 75
98
+ om_ 74
99
+ rs 74
100
+ il 74
101
+ no 73
102
+ _k 73
103
+ on 71
104
+ _b 70
105
+ v_ 70
106
+ l_ 70
107
+ rt 69
108
+ _me 69
109
+ d_ 68
110
+ ve 67
111
+ ar_ 66
112
+ _. 66
113
+ va 65
114
+ ing 63
115
+ _, 62
116
+ det 59
117
+ _p 59
118
+ at 59
119
+ _g 59
120
+ fo 58
121
+ _,_ 58
122
+ ll 58
123
+ ke_ 57
124
+ av_ 57
125
+ un 57
126
+ an_ 56
127
+ k_ 56
128
+ ik 55
129
+ _av 55
130
+ _er 54
131
+ N 54
132
+ so 54
133
+ li 53
134
+ _l 53
135
+ am 52
136
+ nor 52
137
+ je 52
138
+ lan 51
139
+ ed 50
140
+ ors 50
141
+ ns 50
142
+ _er_ 50
143
+ r� 50
144
+ eg_ 49
145
+ _ti 49
146
+ es 49
147
+ ter 49
148
+ ne_ 48
149
+ D 48
150
+ for 48
151
+ til 48
152
+ rsk 48
153
+ _av_ 47
154
+ and 47
155
+ nde 47
156
+ ra 46
157
+ H 46
158
+ it 46
159
+ _r 46
160
+ ka 46
161
+ _N 45
162
+ ske 45
163
+ _til 45
164
+ ga 44
165
+ te_ 44
166
+ orsk 44
167
+ _so 43
168
+ S 43
169
+ som 43
170
+ No 43
171
+ ko 43
172
+ na 43
173
+ nge 42
174
+ det_ 42
175
+ Nor 42
176
+ _fo 42
177
+ ste 42
178
+ _som 41
179
+ _no 41
180
+ il_ 41
181
+ p� 40
182
+ _No 40
183
+ tte 40
184
+ som_ 40
185
+ rå 40
186
+ nt 39
187
+ _Nor 39
188
+ ett 39
189
+ _som_ 39
190
+ på 38
191
+ rd 38
192
+ kk 38
193
+ da 38
194
+ em 38
195
+ _� 38
196
+ ma 38
197
+ ig 38
198
+ De 37
199
+ �r 37
200
+ _ei 37
201
+ ør 37
202
+ han 37
203
+ sa 37
204
+ _u 37
205
+ ten 37
206
+ norsk 36
207
+ ed_ 36
208
+ _han 36
209
+ nors 36
210
+ _p� 36
211
+ _på 36
212
+ gj 36
213
+ vi 36
214
+ den 35
215
+ kt 35
216
+ til_ 35
217
+ reg 35
218
+ ske_ 35
219
+ _for 35
220
+ på_ 35
221
+ re_ 34
222
+ rt_ 34
223
+ _va 34
224
+ med 34
225
+ _på_ 34
226
+ _til_ 34
227
+ ore 33
228
+ lle 33
229
+ _nor 33
230
+ var 33
231
+ de_ 32
232
+ m� 32
233
+ _det 32
234
+ _S 32
235
+ kr 31
236
+ _med 31
237
+ _se 31
238
+ pe 31
239
+ or_ 31
240
+ tr 31
241
+ to 31
242
+ den_ 31
243
+ sp 31
244
+ ag 31
245
+ land 31
246
+ _st 31
247
+ ol 30
248
+ sk_ 30
249
+ ell 30
250
+ fr 29
251
+ le_ 29
252
+ pr 29
253
+ s_ 29
254
+ må 29
255
+ har 29
256
+ ak 29
257
+ ni 29
258
+ med_ 29
259
+ rk 28
260
+ _har 28
261
+ der 28
262
+ si 28
263
+ ss 28
264
+ ek 28
265
+ har_ 28
266
+ mi 28
267
+ _D 28
268
+ _med_ 27
269
+ _re 27
270
+ �l 27
271
+ _ko 27
272
+ æ 27
273
+ �r 27
274
+ år 27
275
+ ål 27
276
+ as 27
277
+ _fr 27
278
+ ks 27
279
+ ie 27
280
+ _la 27
281
+ � 27
282
+ ei_ 27
283
+ _har_ 27
284
+ men 26
285
+ _den 26
286
+ tin 26
287
+ ut 26
288
+ _var 26
289
+ nne 26
290
+ enn 26
291
+ ng_ 26
292
+ ord 26
293
+ ande 26
294
+ ans 26
295
+ - 26
296
+ _han_ 25
297
+ Nore 25
298
+ mål 25
299
+ _den_ 25
300
+ Noreg 25
301
+ oreg 25
302
+ _ve 25
303
+ han_ 25
304
+ ere 25
305
+ ru 25
306
+ ver 25
307
+ ette 25
308
+ eri 25
309
+ gje 25
310
+ A 24
311
+ ts 24
312
+ sj 24
313
+ gen 24
314
+ E 24
315
+ ting 23
316
+ tt_ 23
317
+ __ 23
318
+ ter_ 23
319
+ nga 23
320
+ _å 23
321
+ ten_ 23
322
+ lo 23
323
+ Ha 23
324
+ bl 23
325
+ kke 23
326
+ he 23
327
+ ene 23
328
+ isk 22
329
+ _det_ 22
330
+ _om 22
331
+ s� 22
332
+ kon 22
333
+ ds 22
334
+ ong 22
335
+ sta 22
336
+ _Nore 22
337
+ " 21
338
+ _H 21
339
+ ok 21
340
+ ane 21
341
+ øy 21
342
+ rm 21
343
+ �r 21
344
+ gr 21
345
+ kri 21
346
+ ir 21
347
+ us 21
348
+ �y 21
349
+ ær 21
350
+ art 20
351
+ for_ 20
352
+ _A 20
353
+ _nors 20
354
+ _bl 20
355
+ ad 20
356
+ sl 20
357
+ ld 20
358
+ ist 20
359
+ ndet 20
360
+ _E 19
361
+ orske 19
362
+ _lan 19
363
+ rsk_ 19
364
+ sam 19
365
+ oreg_ 19
366
+ inga 19
367
+ ret 19
368
+ _ka 19
369
+ _ut 19
370
+ f� 19
371
+ orsk_ 19
372
+ _s� 19
373
+ lt 19
374
+ hu 19
375
+ jo 19
376
+ seg 19
377
+ andet 19
378
+ ort 19
379
+ fa 19
380
+ id 19
381
+ ale 19
382
+ _De 19
383
+ un_ 19
384
+ rske 19
385
+ reg_ 19
386
+ ert 19
387
+ _et 18
388
+ ikk 18
389
+ tor 18
390
+ gs 18
391
+ n� 18
392
+ tter 18
393
+ est 18
394
+ _sa 18
395
+ inge 18
396
+ _vi 18
397
+ ov 18
398
+ mn 18
399
+ ro 18
400
+ rin 18