scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- a 1131
2
- _ 984
3
- h 314
4
- i 269
5
- t 206
6
- aa 186
7
- n 182
8
- s 181
9
- r 177
10
- | 171
11
- ha 141
12
- m 131
13
- v 130
14
- a_ 130
15
- d 121
16
- y 116
17
- e 108
18
- _| 103
19
- u 93
20
- p 90
21
- ya 90
22
- va 89
23
- na 89
24
- ar 88
25
- ta 85
26
- an 82
27
- A 78
28
- ra 78
29
- k 76
30
- am 75
31
- ma 75
32
- sh 74
33
- __ 72
34
- . 68
35
- || 68
36
- _|| 66
37
- at 66
38
- M 61
39
- as 59
40
- i_ 55
41
- o 55
42
- g 52
43
- N 50
44
- aM 50
45
- pa 49
46
- M_ 49
47
- b 48
48
- c 43
49
- ch 43
50
- aM_ 43
51
- e_ 42
52
- j 42
53
- ana 42
54
- hi 42
55
- it 41
56
- _s 40
57
- ad 40
58
- aan 40
59
- sa 37
60
- ama 37
61
- |_ 37
62
- av 36
63
- H 36
64
- ay 35
65
- da 35
66
- dh 34
67
- _n 34
68
- ah 34
69
- sha 33
70
- ka 32
71
- ||_ 32
72
- aN 32
73
- bh 31
74
- ara 31
75
- is 31
76
- ii 31
77
- l 30
78
- H_ 30
79
- _||_ 30
80
- ap 29
81
- aa_ 29
82
- aH 29
83
- a. 28
84
- _p 28
85
- na_ 28
86
- ja 28
87
- aH_ 27
88
- ai 26
89
- ga 26
90
- .n 26
91
- _na 26
92
- a.n 24
93
- ti 24
94
- aana 23
95
- aya 23
96
- Na 23
97
- vi 22
98
- aha 22
99
- st 22
100
- ava 22
101
- ata 22
102
- .h 22
103
- ish 22
104
- di 22
105
- _j 21
106
- haa 21
107
- la 21
108
- .h_ 21
109
- vaa 21
110
- _b 21
111
- h_ 21
112
- al 20
113
- ni 20
114
- _d 20
115
- hh 19
116
- es 19
117
- _sa 18
118
- ak 18
119
- esh 18
120
- ha_ 18
121
- _v 18
122
- ri 18
123
- ita 18
124
- chi 17
125
- _k 17
126
- yaa 17
127
- _ch 17
128
- _t 17
129
- _c 17
130
- maa 17
131
- nam 17
132
- _m 17
133
- x 16
134
- ala 16
135
- ag 16
136
- id 16
137
- me 16
138
- taa 16
139
- tva 16
140
- tv 16
141
- pr 16
142
- bha 16
143
- T 16
144
- _pa 15
145
- ^ 15
146
- aNa 15
147
- th 15
148
- har 15
149
- ya_ 15
150
- shh 15
151
- D 15
152
- cha 15
153
- _ja 15
154
- I 14
155
- aka 14
156
- paa 14
157
- sy 14
158
- ik 14
159
- yat 14
160
- tas 14
161
- ash 14
162
- _g 14
163
- sya 14
164
- te 14
165
- sta 13
166
- esha 13
167
- A_ 13
168
- _na_ 13
169
- in 13
170
- pra 13
171
- ru 13
172
- asya 13
173
- o_ 13
174
- hA 13
175
- su 13
176
- asy 13
177
- hy 13
178
- uu 13
179
- re 12
180
- naa 12
181
- R 12
182
- _ma 12
183
- eva 12
184
- ^i 12
185
- hya 12
186
- ab 12
187
- R^ 12
188
- hav 12
189
- ev 12
190
- ada 12
191
- nama 12
192
- par 12
193
- R^i 12
194
- _nam 11
195
- m.h_ 11
196
- mi 11
197
- sar 11
198
- rt 11
199
- tr 11
200
- ye 11
201
- m. 11
202
- \ 11
203
- ast 11
204
- haM 11
205
- dha 11
206
- _bh 11
207
- am.h_ 11
208
- m.h 11
209
- ir 11
210
- haM_ 11
211
- am. 11
212
- aas 11
213
- am.h 11
214
- aay 11
215
- daa 11
216
- aam 11
217
- tra 10
218
- aj 10
219
- ati 10
220
- hava 10
221
- kh 10
222
- _y 10
223
- ii_ 10
224
- jaa 10
225
- _sh 10
226
- .a 10
227
- _cha 10
228
- ham 10
229
- kaa 10
230
- ra_ 10
231
- ishh 10
232
- ed 10
233
- iv 10
234
- aah 10
235
- apa 10
236
- asta 10
237
- ur 9
238
- ai_ 9
239
- anaa 9
240
- nt 9
241
- ani 9
242
- adh 9
243
- vaM 9
244
- aga 9
245
- va_ 9
246
- he 9
247
- ari 9
248
- te_ 9
249
- _bha 9
250
- yai 9
251
- var 9
252
- Ax 9
253
- nd 9
254
- rA 9
255
- jaan 9
256
- ke 9
257
- dhi 9
258
- ami 9
259
- ^it 8
260
- mas 8
261
- tasya 8
262
- aya_ 8
263
- Ar 8
264
- ana_ 8
265
- R^it 8
266
- bhav 8
267
- raa 8
268
- syai 8
269
- tha 8
270
- rv 8
271
- art 8
272
- adi 8
273
- rva 8
274
- ti_ 8
275
- ram 8
276
- u_ 8
277
- amas 8
278
- are 8
279
- _jaa 8
280
- us 8
281
- hch 8
282
- vaan 8
283
- aar 8
284
- vaM_ 8
285
- ve 8
286
- ye_ 8
287
- ath 8
288
- asyai 8
289
- _pr 8
290
- _ya 8
291
- asha 8
292
- _nama 8
293
- tasy 8
294
- ut 8
295
- hit 8
296
- _vi 8
297
- hc 8
298
- aad 8
299
- na.n 7
300
- G 7
301
- stasy 7
302
- iit 7
303
- GY 7
304
- arva 7
305
- _par 7
306
- yam 7
307
- Am 7
308
- na. 7
309
- ame 7
310
- hva 7
311
- _ni 7
312
- tat 7
313
- .ah 7
314
- _jaan 7
315
- raN 7
316
- aaya 7
317
- vat 7
318
- hv 7
319
- aD 7
320
- |n 7
321
- aru 7
322
- aat 7
323
- Y 7
324
- _ga 7
325
- araN 7
326
- aaN 7
327
- stas 7
328
- Ad 7
329
- arv 7
330
- yai_ 7
331
- aa. 7
332
- saa 7
333
- mar 7
334
- hha 7
335
- amaa 7
336
- cha_ 7
337
- bhava 7
338
- man 7
339
- yo 7
340
- up 7
341
- om 7
342
- ika 7
343
- ida 7
344
- tu 7
345
- pi 7
346
- _h 7
347
- ba 7
348
- ho 7
349
- shi 7
350
- jaana 7
351
- avi 7
352
- de 7
353
- _dh 7
354
- _pra 7
355
- ip 7
356
- _va 7
357
- ta_ 7
358
- .aha 7
359
- raM 6
360
- hT 6
361
- hat 6
362
- atra 6
363
- to 6
364
- n_ 6
365
- shc 6
366
- aal 6
367
- a.n_ 6
368
- namas 6
369
- At 6
370
- mast 6
371
- gu 6
372
- _cha_ 6
373
- taa_ 6
374
- re_ 6
375
- kar 6
376
- tan 6
377
- atr 6
378
- avaa 6
379
- tar 6
380
- op 6
381
- ku 6
382
- aham 6
383
- syai_ 6
384
- hhT 6
385
- pu 6
386
- amast 6
387
- gat 6
388
- masta 6
389
- ac 6
390
- .\ 6
391
- |t 6
392
- sarva 6
393
- para 6
394
- rat 6
395
- ina 6
396
- sarv 6
397
- nA 6
398
- maH 6
399
- jay 6
400
- dar 6