scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 962
2
- a 844
3
- n 323
4
- i 270
5
- u 256
6
- h 194
7
- t 186
8
- s 185
9
- y 169
10
- k 165
11
- q 164
12
- p 155
13
- c 150
14
- m 149
15
- r 137
16
- ch 136
17
- a_ 130
18
- an 126
19
- ta 119
20
- ay 106
21
- qa 104
22
- e 96
23
- ma 96
24
- ha 96
25
- pa 88
26
- l 84
27
- ku 79
28
- j 76
29
- na 74
30
- o 73
31
- cha 69
32
- , 66
33
- in 66
34
- as 60
35
- un 58
36
- ar 57
37
- ,_ 54
38
- man 52
39
- ac 48
40
- am 44
41
- _k 42
42
- nt 42
43
- n_ 42
44
- ri 42
45
- at 42
46
- w 41
47
- qa_ 41
48
- ach 39
49
- ka 38
50
- _m 38
51
- y_ 38
52
- pi 37
53
- sq 37
54
- ta_ 37
55
- __ 37
56
- . 36
57
- wa 36
58
- nc 36
59
- sqa 35
60
- hu 35
61
- rq 35
62
- uy 34
63
- nch 34
64
- i_ 34
65
- _p 34
66
- aj 34
67
- rqa 34
68
- aq 32
69
- a, 32
70
- sp 32
71
- hay 31
72
- ata 31
73
- una 31
74
- nta 31
75
- us 30
76
- ll 29
77
- ni 29
78
- _ma 29
79
- ay_ 28
80
- is 28
81
- a,_ 27
82
- ' 27
83
- spa 27
84
- _ka 27
85
- _c 26
86
- ya 26
87
- hi 26
88
- ama 26
89
- _s 25
90
- _ch 25
91
- kun 25
92
- chay 24
93
- ant 24
94
- la 24
95
- acha 24
96
- ti 24
97
- ik 23
98
- yk 23
99
- u_ 23
100
- chu 22
101
- nk 22
102
- he 22
103
- ari 22
104
- an_ 21
105
- ak 21
106
- kuna 21
107
- lla 21
108
- chi 21
109
- a. 21
110
- ._ 21
111
- ap 20
112
- taj 20
113
- su 20
114
- s_ 20
115
- anta 20
116
- _t 20
117
- anch 20
118
- ayk 20
119
- tu 20
120
- al 20
121
- uk 20
122
- anc 20
123
- yt 19
124
- ki 19
125
- ana 19
126
- aw 19
127
- na_ 19
128
- im 19
129
- ra 19
130
- _q 18
131
- asqa 18
132
- _a 18
133
- st 18
134
- asq 18
135
- _n 18
136
- sqa_ 18
137
- nin 18
138
- yta 18
139
- awa 18
140
- qan 18
141
- iku 17
142
- J 17
143
- arq 17
144
- j_ 17
145
- A 17
146
- � 17
147
- _pa 17
148
- che 16
149
- ncha 16
150
- yn 16
151
- _j 16
152
- _r 16
153
- os 16
154
- _chay 16
155
- kuy 16
156
- pi_ 16
157
- q_ 16
158
- arqa 16
159
- _cha 16
160
- aq_ 15
161
- _man 15
162
- up 15
163
- a._ 15
164
- si 15
165
- pa_ 14
166
- _i 14
167
- nku 14
168
- mp 14
169
- ru 14
170
- man_ 14
171
- ej 14
172
- mant 14
173
- rik 14
174
- aman 14
175
- ayku 14
176
- yku 14
177
- manta 14
178
- ur 13
179
- er 13
180
- _J 13
181
- pu 13
182
- ayt 13
183
- taq 13
184
- ank 13
185
- nta_ 13
186
- en 13
187
- �a 13
188
- ina 13
189
- all 13
190
- C 13
191
- ima 13
192
- um 12
193
- ayta 12
194
- aku 12
195
- mi 12
196
- erqa 12
197
- mu 12
198
- uc 12
199
- tin 12
200
- li 12
201
- ancha 12
202
- it 12
203
- � 12
204
- nan 12
205
- _y 12
206
- n, 12
207
- uch 12
208
- nq 12
209
- _u 12
210
- es 12
211
- aj_ 12
212
- erq 12
213
- _h 11
214
- pay 11
215
- yta_ 11
216
- g 11
217
- _su 11
218
- iy 11
219
- M 11
220
- 'a 11
221
- ut 11
222
- Ch 11
223
- taq_ 11
224
- _w 11
225
- uma 10
226
- n,_ 10
227
- qa, 10
228
- yni 10
229
- asp 10
230
- sta 10
231
- chej 10
232
- mana 10
233
- ku_ 10
234
- qh 10
235
- ray 10
236
- io 10
237
- har 10
238
- yp 10
239
- _C 10
240
- hej 10
241
- kh 10
242
- hay_ 10
243
- _tu 10
244
- riku 10
245
- ia 10
246
- khu 9
247
- ypi 9
248
- kay 9
249
- anku 9
250
- inc 9
251
- usq 9
252
- nche 9
253
- uya 9
254
- nqa 9
255
- ayn 9
256
- _pu 9
257
- qha 9
258
- ita 9
259
- Je 9
260
- ynin 9
261
- K 9
262
- nchej 9
263
- ir 9
264
- ha_ 9
265
- rqan 9
266
- _mana 9
267
- rqa_ 9
268
- aspa 9
269
- may 9
270
- asqa_ 9
271
- : 9
272
- ayp 9
273
- qo 9
274
- qay 9
275
- _ni 9
276
- usqa 9
277
- ta, 9
278
- in_ 9
279
- ne 9
280
- Jes 9
281
- _Ch 9
282
- _wa 8
283
- nas 8
284
- manch 8
285
- S 8
286
- spa, 8
287
- _Dios 8
288
- _l 8
289
- pac 8
290
- _qh 8
291
- yki 8
292
- tukuy 8
293
- qa,_ 8
294
- yq 8
295
- _Di 8
296
- kar 8
297
- aypi 8
298
- upa 8
299
- tuku 8
300
- sn 8
301
- re 8
302
- int 8
303
- api 8
304
- alla 8
305
- taj_ 8
306
- una_ 8
307
- inch 8
308
- oq 8
309
- sus 8
310
- Di 8
311
- ataj 8
312
- uku 8
313
- pa,_ 8
314
- d 8
315
- _D 8
316
- apa 8
317
- sa 8
318
- ast 8
319
- Ma 8
320
- lo 8
321
- D 8
322
- kus 8
323
- _ya 8
324
- ios 8
325
- anta_ 8
326
- arik 8
327
- Dio 8
328
- ja 8
329
- Dios 8
330
- inta 8
331
- Cha 8
332
- ukuy 8
333
- ana_ 8
334
- spa,_ 8
335
- wan 8
336
- au 8
337
- _Dio 8
338
- tuk 8
339
- run 8
340
- cha_ 8
341
- pa, 8
342
- ayq 8
343
- manc 8
344
- _pay 8
345
- nat 7
346
- Chay 7
347
- _im 7
348
- ikun 7
349
- paj 7
350
- _ima 7
351
- yqa 7
352
- chay_ 7
353
- _M 7
354
- aj, 7
355
- aman_ 7
356
- ikuna 7
357
- nata 7
358
- unc 7
359
- _tuk 7
360
- _tuku 7
361
- ariku 7
362
- paq 7
363
- _kar 7
364
- mpa 7
365
- r� 7
366
- e_ 7
367
- Jesus 7
368
- pat 7
369
- an, 7
370
- _ku 7
371
- pach 7
372
- mana_ 7
373
- L 7
374
- em 7
375
- jt 7
376
- ispa 7
377
- sqan 7
378
- mach 7
379
- i, 7
380
- kay_ 7
381
- _may 7
382
- rayku 7
383
- rin 7
384
- ayqa 7
385
- ta,_ 7
386
- kuna_ 7
387
- achi 7
388
- uspa 7
389
- ym 7
390
- :_ 7
391
- Jesu 7
392
- was 7
393
- j, 7
394
- or 7
395
- mac 7
396
- _qha 7
397
- esu 7
398
- usp 7
399
- ic 7
400
- ayta_ 7