scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 926
2
- a 376
3
- h 237
4
- i 215
5
- n 185
6
- r 117
7
- e 116
8
- d 115
9
- _a 87
10
- __ 84
11
- g 81
12
- s 80
13
- l 79
14
- c 76
15
- an 72
16
- u 70
17
- ai 67
18
- n_ 66
19
- t 63
20
- h_ 59
21
- o 58
22
- ch 56
23
- dh 55
24
- ha 54
25
- ea 53
26
- a_ 48
27
- b 43
28
- id 41
29
- m 41
30
- na 39
31
- ir 38
32
- � 35
33
- nn 34
34
- ac 33
35
- ach 31
36
- idh 31
37
- r_ 31
38
- an_ 31
39
- ad 31
40
- s_ 26
41
- he 26
42
- in 25
43
- _an 25
44
- dh_ 24
45
- _b 24
46
- th 24
47
- bh 24
48
- ig 24
49
- - 23
50
- _c 23
51
- d_ 23
52
- T 23
53
- gh 23
54
- il 22
55
- ei 22
56
- gu 22
57
- ean 21
58
- ann 21
59
- , 20
60
- . 20
61
- ir_ 20
62
- air 19
63
- ? 19
64
- nn_ 19
65
- ar 19
66
- �i 19
67
- _T 18
68
- _t 18
69
- ag 18
70
- is 18
71
- la 17
72
- ha_ 17
73
- as 17
74
- _g 17
75
- C 17
76
- cha 16
77
- _n 16
78
- G 16
79
- ui 16
80
- _ai 16
81
- _na 16
82
- g_ 16
83
- oi 15
84
- adh 15
85
- am 15
86
- ._ 15
87
- aid 15
88
- hai 15
89
- gus 15
90
- us 15
91
- f 15
92
- _s 14
93
- ,_ 14
94
- _r 14
95
- air_ 14
96
- Th 14
97
- eac 14
98
- da 14
99
- le 13
100
- ig_ 13
101
- us_ 13
102
- idh_ 13
103
- hea 13
104
- gus_ 13
105
- e_ 13
106
- _ch 13
107
- ann_ 13
108
- on 13
109
- a? 13
110
- _a? 13
111
- _C 13
112
- ri 13
113
- hu 12
114
- each 12
115
- agus 12
116
- rd 12
117
- agu 12
118
- ch_ 12
119
- hl 12
120
- hi 12
121
- aidh 12
122
- _na_ 11
123
- _an_ 11
124
- na_ 11
125
- _e 11
126
- chd 11
127
- _f 11
128
- �id 11
129
- ao 11
130
- aig 11
131
- hd 11
132
- _d 11
133
- �idh 11
134
- ma 11
135
- it 10
136
- achd 10
137
- _ag 10
138
- _agu 10
139
- al 10
140
- te 10
141
- sa 10
142
- _agus 10
143
- _air 10
144
- l_ 10
145
- E 10
146
- ne 10
147
- B 10
148
- _ann 10
149
- ith 10
150
- _G 10
151
- ach_ 10
152
- _bh 10
153
- _a_ 10
154
- Tha 10
155
- Tha_ 10
156
- agus_ 10
157
- _Th 10
158
- ra 9
159
- � 9
160
- ad_ 9
161
- acha 9
162
- G�idh 9
163
- li 9
164
- inn 9
165
- ua 9
166
- ta 9
167
- aidh_ 9
168
- G�id 9
169
- m_ 9
170
- se 9
171
- G� 9
172
- lea 9
173
- adh_ 9
174
- ga 9
175
- mh 9
176
- G�i 9
177
- _ann_ 9
178
- h- 9
179
- chai 8
180
- ho 8
181
- ?_ 8
182
- lig 8
183
- hli 8
184
- _gu 8
185
- ia 8
186
- sg 8
187
- �r 8
188
- Ch 8
189
- re 8
190
- am_ 8
191
- dhe 8
192
- dhl 8
193
- haid 8
194
- � 8
195
- lt 8
196
- ain 8
197
- ba 8
198
- _air_ 8
199
- eil 8
200
- hei 8
201
- bh_ 7
202
- A 7
203
- _G� 7
204
- idhli 7
205
- D 7
206
- ana 7
207
- _i 7
208
- dhlig 7
209
- � 7
210
- h. 7
211
- _Tha_ 7
212
- idhe 7
213
- _a?_ 7
214
- a?_ 7
215
- u_ 7
216
- idhl 7
217
- _Tha 7
218
- _se 7
219
- _G�i 7
220
- dhli 7
221
- hui 7
222
- �idhl 7
223
- _B 7
224
- hlig 7
225
- _G�id 7
226
- igh 7
227
- _th 7
228
- I 7
229
- fh 7
230
- h� 6
231
- agh 6
232
- p 6
233
- ona 6
234
- han 6
235
- on_ 6
236
- gh_ 6
237
- haidh 6
238
- Ei 6
239
- _E 6
240
- ada 6
241
- ic 6
242
- lig_ 6
243
- tr 6
244
- ilt 6
245
- nan 6
246
- aoi 6
247
- nam 6
248
- _Ch 6
249
- _I 6
250
- tha 6
251
- _dh 6
252
- th_ 6
253
- hlig_ 6
254
- de 6
255
- ' 6
256
- ile 6
257
- chaid 6
258
- oin 6
259
- nea 6
260
- un 6
261
- o_ 6
262
- ean_ 6
263
- ich 5
264
- hd_ 5
265
- in_ 5
266
- _A 5
267
- _D 5
268
- _am 5
269
- _h- 5
270
- nach 5
271
- ain_ 5
272
- eacha 5
273
- F 5
274
- S 5
275
- _tr 5
276
- bhe 5
277
- eann 5
278
- gha 5
279
- ilea 5
280
- ll 5
281
- iad_ 5
282
- _fa 5
283
- hean 5
284
- eal 5
285
- _Ta 5
286
- _ba 5
287
- Ta 5
288
- is_ 5
289
- ithe 5
290
- ob 5
291
- N 5
292
- ine 5
293
- _h 5
294
- nac 5
295
- _aig 5
296
- idhea 5
297
- aoine 5
298
- eo 5
299
- t_ 5
300
- rd_ 5
301
- h._ 5
302
- had 5
303
- eir 5
304
- neach 5
305
- han_ 5
306
- ur 5
307
- iad 5
308
- ead 5
309
- im 5
310
- neac 5
311
- dhea 5
312
- _bar 5
313
- io 5
314
- eu 5
315
- rt 5
316
- at 5
317
- aig_ 5
318
- oine 5
319
- � 5
320
- the 5
321
- bar 5
322
- _ri 5
323
- chu 5
324
- fa 5
325
- aoin 5
326
- chd_ 5
327
- sm 4
328
- b� 4
329
- thea 4
330
- uid 4
331
- nai 4
332
- lean 4
333
- ro 4
334
- dac 4
335
- _gh 4
336
- sga 4
337
- il_ 4
338
- un_ 4
339
- fha 4
340
- _� 4
341
- hadh 4
342
- _iad 4
343
- achd_ 4
344
- uir 4
345
- ilean 4
346
- du 4
347
- nt 4
348
- n, 4
349
- irt 4
350
- eis 4
351
- ru 4
352
- obh 4
353
- sma 4
354
- idea 4
355
- r- 4
356
- sea 4
357
- ab 4
358
- ide 4
359
- ais 4
360
- oinea 4
361
- cl 4
362
- chadh 4
363
- ni 4
364
- _F 4
365
- me 4
366
- asg 4
367
- _aig_ 4
368
- gu_ 4
369
- ith_ 4
370
- Cha 4
371
- n- 4
372
- ar_ 4
373
- e� 4
374
- �rd 4
375
- eachd 4
376
- dach 4
377
- hith 4
378
- ail 4
379
- r� 4
380
- ug 4
381
- chi 4
382
- _l 4
383
- as_ 4
384
- �n 4
385
- Bh 4
386
- heil 4
387
- ithea 4
388
- _ia 4
389
- aigh 4
390
- hit 4
391
- _chi 4
392
- tha_ 4
393
- �na 4
394
- huir 4
395
- _chu 4
396
- P 4
397
- _P 4
398
- nan_ 4
399
- _bho 4
400
- bho 4