scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1474
2
- a 538
3
- i 304
4
- h 268
5
- n 236
6
- r 190
7
- s 181
8
- _a 176
9
- e 167
10
- t 144
11
- l 135
12
- c 128
13
- g 117
14
- o 115
15
- d 106
16
- a_ 100
17
- n_ 95
18
- m 87
19
- u 85
20
- an 82
21
- � 79
22
- � 76
23
- b 75
24
- ea 74
25
- ai 74
26
- � 73
27
- ch 71
28
- _s 68
29
- h_ 62
30
- ar 56
31
- in 55
32
- " 54
33
- , 53
34
- r_ 52
35
- . 51
36
- _b 50
37
- f 48
38
- ac 47
39
- _a_ 47
40
- an_ 47
41
- ag 47
42
- s_ 46
43
- ,_ 45
44
- ir 44
45
- _d 43
46
- bh 43
47
- ha 43
48
- �_ 42
49
- th 41
50
- ach 40
51
- _m 38
52
- _an 37
53
- _c 36
54
- _ag 35
55
- _l 35
56
- le 34
57
- �_ 33
58
- il 33
59
- na 31
60
- is 31
61
- id 31
62
- � 30
63
- _an_ 30
64
- g_ 30
65
- _t 29
66
- l_ 29
67
- _bh 28
68
- nn 28
69
- _i 27
70
- T 27
71
- ar_ 27
72
- p 27
73
- mh 27
74
- _f 27
75
- it 25
76
- _g 25
77
- am 25
78
- �_ 25
79
- dh 25
80
- ra 24
81
- s� 24
82
- ei 24
83
- �i 24
84
- _le 24
85
- _s� 23
86
- � 23
87
- ean 22
88
- o_ 22
89
- ._ 22
90
- �a 22
91
- he 22
92
- e_ 21
93
- al 21
94
- gu 21
95
- us 21
96
- ui 21
97
- s�_ 21
98
- t_ 21
99
- _s�_ 20
100
- _ch 20
101
- ig 20
102
- _ar 20
103
- _n 20
104
- ch_ 19
105
- de 19
106
- as 19
107
- hu 19
108
- A 19
109
- hai 19
110
- oi 19
111
- se 18
112
- te 18
113
- agu 18
114
- sa 18
115
- agus 18
116
- gus 18
117
- ir_ 18
118
- ach_ 18
119
- agus_ 17
120
- st 17
121
- ua 17
122
- ith 17
123
- C 17
124
- us_ 17
125
- gus_ 17
126
- ag_ 17
127
- _agu 16
128
- h� 16
129
- ad 16
130
- _agus 16
131
- ga 16
132
- rt 16
133
- h� 15
134
- tha 15
135
- each 15
136
- ia 15
137
- d_ 15
138
- ann 15
139
- �i 15
140
- na_ 15
141
- go 15
142
- dh_ 15
143
- eac 15
144
- _" 14
145
- _p 14
146
- gh 14
147
- _T 14
148
- ta 14
149
- amh 14
150
- re 14
151
- at 14
152
- M 13
153
- _ac 13
154
- la 13
155
- �o 13
156
- ear 13
157
- il_ 13
158
- nn_ 13
159
- go_ 13
160
- _ag_ 13
161
- hi 13
162
- in_ 13
163
- eo 13
164
- igh 13
165
- hea 13
166
- h�_ 13
167
- ait 13
168
- _ar_ 13
169
- r� 13
170
- ma 12
171
- B 12
172
- h�i 12
173
- inn 12
174
- air 12
175
- _go 12
176
- ! 12
177
- r, 12
178
- - 12
179
- : 11
180
- _r 11
181
- N 11
182
- ho 11
183
- fa 11
184
- _de 11
185
- �n 11
186
- fh 11
187
- rai 11
188
- or 11
189
- aid 11
190
- _go_ 11
191
- cha 10
192
- _se 10
193
- c_ 10
194
- aith 10
195
- ain 10
196
- acha 10
197
- h� 10
198
- _fh 10
199
- om 10
200
- ng 10
201
- har 10
202
- _in 10
203
- ann_ 10
204
- art 10
205
- ? 10
206
- _� 10
207
- F 10
208
- sc 10
209
- aig 10
210
- is_ 10
211
- im 9
212
- _M 9
213
- "_ 9
214
- ne 9
215
- bh� 9
216
- le_ 9
217
- eag 9
218
- e. 9
219
- idh 9
220
- ti 9
221
- be 9
222
- r,_ 9
223
- aga 9
224
- aigh 9
225
- io 9
226
- _dh 9
227
- si 9
228
- i_ 9
229
- �an 9
230
- _� 9
231
- nt 9
232
- ht 9
233
- idh_ 9
234
- th_ 9
235
- uil 9
236
- ile 9
237
- rea 9
238
- as_ 9
239
- li 9
240
- ." 8
241
- sea 8
242
- _m� 8
243
- id_ 8
244
- chu 8
245
- ist 8
246
- m� 8
247
- hr 8
248
- ean_ 8
249
- �s 8
250
- ca 8
251
- ic 8
252
- tea 8
253
- t� 8
254
- s� 8
255
- ri 8
256
- ail 8
257
- bea 8
258
- �i 8
259
- _s� 8
260
- t�_ 8
261
- bhe 8
262
- on 8
263
- hf 8
264
- oc 8
265
- _C 8
266
- hean 8
267
- ith_ 8
268
- N� 8
269
- _ach 8
270
- L 7
271
- bhf 7
272
- hui 7
273
- T�_ 7
274
- deir_ 7
275
- Bh 7
276
- mh_ 7
277
- �id 7
278
- adh 7
279
- _bea 7
280
- _lei 7
281
- eann 7
282
- rt_ 7
283
- _deir 7
284
- lt 7
285
- n� 7
286
- bh�_ 7
287
- mha 7
288
- _o 7
289
- _dei 7
290
- al_ 7
291
- _ma 7
292
- en 7
293
- eir 7
294
- eacha 7
295
- iste 7
296
- deir 7
297
- ba 7
298
- _na 7
299
- ing 7
300
- cu 7
301
- h� 7
302
- seo 7
303
- hua 7
304
- �n_ 7
305
- gh_ 7
306
- ' 7
307
- _bhe 7
308
- sean 7
309
- ," 7
310
- eir_ 7
311
- igh_ 7
312
- _bhf 7
313
- cht 7
314
- lei 7
315
- m_ 7
316
- T� 7
317
- ?" 7
318
- ll 7
319
- ha_ 7
320
- ste 7
321
- D 7
322
- ana 7
323
- dea 7
324
- _be 7
325
- dei 7
326
- rra 6
327
- amh_ 6
328
- _am 6
329
- _mh 6
330
- ta_ 6
331
- l� 6
332
- air_ 6
333
- sa_ 6
334
- sh 6
335
- !_ 6
336
- Bh� 6
337
- ob 6
338
- da 6
339
- bhea 6
340
- aidh 6
341
- u�_ 6
342
- uai 6
343
- acha_ 6
344
- u� 6
345
- ois 6
346
- eis 6
347
- _chu 6
348
- �_ 6
349
- _at 6
350
- _aga 6
351
- ic_ 6
352
- fe 6
353
- I 6
354
- adh_ 6
355
- arra 6
356
- arr 6
357
- aidh_ 6
358
- _ina_ 6
359
- _ina 6
360
- Bh�_ 6
361
- thair 6
362
- _n� 6
363
- _br 6
364
- �st 6
365
- ath 6
366
- teach 6
367
- thai 6
368
- ra_ 6
369
- t� 6
370
- _ach_ 6
371
- _si 6
372
- _sc 6
373
- tr 6
374
- �al 6
375
- _sa 6
376
- n,_ 6
377
- _m�_ 6
378
- Ta 6
379
- n, 6
380
- n. 6
381
- cha_ 6
382
- ns 6
383
- ina_ 6
384
- ,"_ 6
385
- teac 6
386
- hair 6
387
- os 6
388
- dr 6
389
- a� 6
390
- P 6
391
- uil_ 6
392
- pa 6
393
- a, 6
394
- Ch 6
395
- _le_ 6
396
- ina 6
397
- fai 6
398
- och 6
399
- br 6
400
- �il 6