scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1232
2
- e 467
3
- i 418
4
- a 351
5
- u 343
6
- t 320
7
- s 309
8
- r 278
9
- m 237
10
- o 226
11
- n 223
12
- c 153
13
- l 139
14
- p 115
15
- e_ 108
16
- s_ 99
17
- d 99
18
- , 90
19
- q 79
20
- qu 79
21
- er 78
22
- is 71
23
- v 67
24
- um 67
25
- t_ 67
26
- m_ 66
27
- ,_ 65
28
- re 63
29
- a_ 60
30
- te 57
31
- it 55
32
- ue 55
33
- in 55
34
- us 54
35
- _a 52
36
- _s 51
37
- b 49
38
- que 48
39
- nt 48
40
- ra 47
41
- _c 46
42
- at 46
43
- en 44
44
- _i 44
45
- ti 43
46
- or 43
47
- g 42
48
- _e 42
49
- ue_ 41
50
- _p 41
51
- que_ 41
52
- f 41
53
- am 40
54
- tu 39
55
- et 39
56
- em 38
57
- ro 38
58
- li 37
59
- _m 36
60
- um_ 35
61
- os 33
62
- h 33
63
- an 32
64
- _t 32
65
- _d 32
66
- is_ 31
67
- ta 31
68
- us_ 31
69
- mi 31
70
- et_ 30
71
- de 30
72
- ur 30
73
- i_ 30
74
- _f 29
75
- ri 29
76
- on 29
77
- si 28
78
- ae 28
79
- ui 28
80
- ia 28
81
- pe 27
82
- ni 27
83
- es 27
84
- _v 26
85
- im 26
86
- s, 26
87
- la 26
88
- ic 26
89
- ma 25
90
- o_ 25
91
- na 25
92
- . 24
93
- st 24
94
- vi 23
95
- ns 22
96
- su 22
97
- id 22
98
- r_ 22
99
- _et_ 22
100
- ve 22
101
- _et 22
102
- di 22
103
- as 21
104
- _r 21
105
- _in 21
106
- to 21
107
- ci 21
108
- ul 20
109
- el 20
110
- ct 20
111
- c_ 20
112
- ne 20
113
- un 20
114
- re_ 20
115
- s,_ 20
116
- cu 20
117
- se 20
118
- co 20
119
- ere 19
120
- I 19
121
- ru 19
122
- m, 19
123
- _h 19
124
- mu 19
125
- am_ 18
126
- ol 18
127
- le 18
128
- _l 18
129
- pr 18
130
- ec 17
131
- tr 17
132
- ar 17
133
- _n 17
134
- au 17
135
- te_ 17
136
- ent 16
137
- n_ 16
138
- ll 16
139
- no 16
140
- _qu 16
141
- _q 16
142
- sa 16
143
- qui 15
144
- hi 15
145
- ca 15
146
- al 15
147
- rt 15
148
- pi 15
149
- t, 14
150
- ce 14
151
- _te 14
152
- om 14
153
- per 14
154
- _o 14
155
- squ 14
156
- sq 14
157
- sque 14
158
- os_ 14
159
- il 14
160
- nu 14
161
- ter 14
162
- me 14
163
- mo 14
164
- lu 13
165
- tis 13
166
- ib 13
167
- pro 13
168
- _su 13
169
- do 13
170
- er_ 13
171
- ant 13
172
- _de 13
173
- x 13
174
- em_ 13
175
- ss 12
176
- uis 12
177
- it_ 12
178
- lo 12
179
- vo 12
180
- T 12
181
- _co 12
182
- fe 12
183
- ere_ 12
184
- sque_ 12
185
- _pe 12
186
- ir 12
187
- A 12
188
- unt 12
189
- _si 12
190
- ens 12
191
- pa 12
192
- pu 12
193
- _re 12
194
- _ma 12
195
- tem 12
196
- po 12
197
- nd 12
198
- era 12
199
- t,_ 11
200
- ; 11
201
- _ca 11
202
- eq 11
203
- equ 11
204
- ? 11
205
- na_ 11
206
- ia_ 11
207
- nte 11
208
- mp 11
209
- _pa 11
210
- nti 11
211
- _la 11
212
- rum 11
213
- _u 11
214
- ag 11
215
- _au 11
216
- iu 11
217
- uo 11
218
- up 11
219
- cum 11
220
- av 10
221
- oc 10
222
- ibu 10
223
- gi 10
224
- bi 10
225
- ros 10
226
- rr 10
227
- rat 10
228
- ac 10
229
- tor 10
230
- ba 10
231
- m,_ 10
232
- ect 10
233
- ev 10
234
- du 10
235
- : 10
236
- da 10
237
- ic_ 10
238
- ut 10
239
- ie 10
240
- bu 10
241
- ra_ 10
242
- H 10
243
- ex 10
244
- nc 10
245
- gn 10
246
- _A 10
247
- nis 10
248
- _ve 10
249
- as_ 10
250
- ora 10
251
- tum 10
252
- ibus 9
253
- ta_ 9
254
- eri 9
255
- sp 9
256
- ite 9
257
- op 9
258
- bus 9
259
- e,_ 9
260
- _I 9
261
- e, 9
262
- itu 9
263
- eli 9
264
- at_ 9
265
- tur 9
266
- sc 9
267
- ver 9
268
- _sa 9
269
- ad 9
270
- _cu 9
271
- _se 9
272
- ep 9
273
- _pr 9
274
- fer 9
275
- si_ 9
276
- _vi 9
277
- ate 9
278
- us, 8
279
- lia 8
280
- ng 8
281
- ab 8
282
- mq 8
283
- ap 8
284
- ntem 8
285
- ed 8
286
- quo 8
287
- mqu 8
288
- mn 8
289
- eb 8
290
- rem 8
291
- min 8
292
- oe 8
293
- _no 8
294
- _me 8
295
- cr 8
296
- oq 8
297
- iam 8
298
- emi 8
299
- imp 8
300
- fu 8
301
- tus 8
302
- ibus_ 8
303
- i, 8
304
- ig 8
305
- ill 8
306
- _hi 8
307
- _mo 8
308
- ua 8
309
- oqu 8
310
- uc 8
311
- tis_ 8
312
- _T 8
313
- us,_ 8
314
- um, 8
315
- d_ 8
316
- cto 8
317
- ;_ 8
318
- bus_ 8
319
- non 7
320
- ine 7
321
- eu 7
322
- tque_ 7
323
- tque 7
324
- ns_ 7
325
- lt 7
326
- lle 7
327
- ud 7
328
- enti 7
329
- _per 7
330
- aqu 7
331
- err 7
332
- ina 7
333
- in_ 7
334
- _im 7
335
- _po 7
336
- est 7
337
- _om 7
338
- a, 7
339
- Qu 7
340
- _fe 7
341
- mit 7
342
- _fer 7
343
- on_ 7
344
- _pro 7
345
- pt 7
346
- D 7
347
- equi 7
348
- uit 7
349
- aq 7
350
- tq 7
351
- Q 7
352
- nt_ 7
353
- omn 7
354
- tqu 7
355
- _omn 7
356
- imu 7
357
- ris 7
358
- ctor 6
359
- _ho 6
360
- inc 6
361
- io 6
362
- qua 6
363
- ini 6
364
- ora_ 6
365
- ali 6
366
- erat 6
367
- _da 6
368
- _sup 6
369
- s: 6
370
- fa 6
371
- tra 6
372
- usq 6
373
- _fo 6
374
- lis 6
375
- tum_ 6
376
- mag 6
377
- to_ 6
378
- cum_ 6
379
- os, 6
380
- mque 6
381
- _quo 6
382
- ho 6
383
- se_ 6
384
- mor 6
385
- usqu 6
386
- _vo 6
387
- oru 6
388
- lit 6
389
- _al 6
390
- _il 6
391
- non_ 6
392
- oque 6
393
- _do 6
394
- _in_ 6
395
- t. 6
396
- ut_ 6
397
- be 6
398
- fo 6
399
- usque 6
400
- sup 6