scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- _ 1290
2
- ' 360
3
- a 318
4
- k 294
5
- t 293
6
- n 289
7
- e 236
8
- � 222
9
- h 215
10
- u 191
11
- y 187
12
- s 183
13
- w 171
14
- � 156
15
- '_ 141
16
- _n 122
17
- . 98
18
- i 90
19
- � 82
20
- e_ 77
21
- e' 76
22
- ne 73
23
- ._ 71
24
- a' 71
25
- _k 66
26
- " 65
27
- wa 64
28
- u' 64
29
- � 60
30
- _ne 59
31
- kw 56
32
- __ 56
33
- _t 55
34
- � 54
35
- � 54
36
- N 53
37
- e'_ 53
38
- hu 50
39
- _h 49
40
- � 48
41
- n� 47
42
- �' 47
43
- at 46
44
- ne_ 43
45
- yu 43
46
- _u 42
47
- ak 42
48
- 't 42
49
- _ne_ 41
50
- ha 41
51
- kh 41
52
- w� 39
53
- ta 38
54
- _� 38
55
- hs 38
56
- �' 37
57
- _N 36
58
- ka 35
59
- te 34
60
- �n 34
61
- s_ 33
62
- th 32
63
- � 30
64
- ny 30
65
- '. 30
66
- � 29
67
- _" 29
68
- �_ 28
69
- Ne' 28
70
- _w 28
71
- Ne 28
72
- k� 27
73
- ya 27
74
- t_ 27
75
- _. 27
76
- Ne'_ 27
77
- ty 27
78
- _._ 26
79
- yu' 26
80
- 'k 26
81
- ni 26
82
- n� 25
83
- , 25
84
- khu 25
85
- w�n 25
86
- �t 25
87
- �_ 24
88
- �t 24
89
- �y 24
90
- - 23
91
- y� 23
92
- kwa 23
93
- _wa 23
94
- _Ne' 22
95
- ye 22
96
- _Ne 22
97
- ek 21
98
- _Ne'_ 21
99
- aw 21
100
- _kh 21
101
- u'_ 21
102
- ,_ 21
103
- �'_ 21
104
- ae 21
105
- n�' 21
106
- ne' 21
107
- _khu 21
108
- hu_ 20
109
- sa 20
110
- �s 20
111
- u_ 20
112
- ? 20
113
- un 20
114
- ne'_ 20
115
- n�y 19
116
- n� 19
117
- "_ 19
118
- �k 19
119
- i' 19
120
- sh 19
121
- y� 19
122
- as 19
123
- iy 19
124
- '._ 19
125
- k� 18
126
- '� 18
127
- �yu 18
128
- '� 18
129
- ts 18
130
- ku 18
131
- 's 18
132
- � 18
133
- �_ 18
134
- �_ 18
135
- �h 18
136
- �yu' 17
137
- w�n� 17
138
- w�n�y 17
139
- n�yu 17
140
- �n�yu 17
141
- _hu 17
142
- n�yu' 17
143
- u't 17
144
- �k 17
145
- t� 17
146
- ut 17
147
- �n�y 17
148
- khu_ 17
149
- �n� 17
150
- wa' 16
151
- si 16
152
- us 16
153
- T 16
154
- _ne' 16
155
- _s 16
156
- t� 16
157
- st 16
158
- aw� 16
159
- na 16
160
- _na 16
161
- n� 16
162
- _k� 16
163
- ae_ 15
164
- �k 15
165
- an 15
166
- n- 15
167
- _ne'_ 15
168
- ah 15
169
- "N 15
170
- yu'_ 15
171
- _a 15
172
- ke 15
173
- _ha 15
174
- w� 15
175
- �t 15
176
- �t 15
177
- h� 14
178
- _un 14
179
- ay 14
180
- i_ 14
181
- _n- 14
182
- �n 14
183
- a't 14
184
- _"N 14
185
- _te 14
186
- �hs 14
187
- y� 14
188
- �n 13
189
- '�_ 13
190
- t� 13
191
- ta' 13
192
- k�s 13
193
- t� 13
194
- he 13
195
- _khu_ 13
196
- ." 13
197
- K 13
198
- aw�n 13
199
- �_ 13
200
- aw�n� 13
201
- 't_ 13
202
- y�_ 12
203
- y� 12
204
- n�_ 12
205
- �� 12
206
- _ka 12
207
- tak 12
208
- akw 12
209
- ey 12
210
- -a 12
211
- _�k 12
212
- �k 12
213
- tw 12
214
- we 12
215
- ?" 12
216
- a_ 12
217
- �n� 12
218
- ik 12
219
- sy 12
220
- �' 12
221
- ht 12
222
- niy 11
223
- _K 11
224
- _ni 11
225
- �k 11
226
- hsa 11
227
- �yu'_ 11
228
- w� 11
229
- nae 11
230
- h� 11
231
- uk 11
232
- ui 11
233
- _nae 11
234
- ha' 11
235
- sat 11
236
- �n 10
237
- _hui 10
238
- uik� 10
239
- _huik 10
240
- wat 10
241
- �� 10
242
- u'k 10
243
- ?"_ 10
244
- te' 10
245
- _�hs 10
246
- �n 10
247
- k�_ 10
248
- k_ 10
249
- huik 10
250
- tk 10
251
- uik�_ 10
252
- s. 10
253
- �h 10
254
- _sh 10
255
- sk 10
256
- uik 10
257
- huik� 10
258
- _k�s 10
259
- w� 10
260
- �s_ 10
261
- hui 10
262
- ik� 10
263
- ya' 10
264
- �' 10
265
- ik�_ 10
266
- ti 10
267
- _�h 10
268
- uw 10
269
- k�s_ 9
270
- �' 9
271
- k�� 9
272
- k� 9
273
- �'_ 9
274
- aty 9
275
- ai 9
276
- y� 9
277
- -aw 9
278
- n�t 9
279
- aa 9
280
- _nae_ 9
281
- nae_ 9
282
- '�� 9
283
- �s 9
284
- sn 9
285
- a'_ 9
286
- k� 9
287
- hsat 9
288
- Ka 9
289
- ', 9
290
- y�' 9
291
- ekh 9
292
- eh 9
293
- -aw�n 8
294
- _ut 8
295
- hsi 8
296
- sh� 8
297
- wai 8
298
- si' 8
299
- kwa' 8
300
- N-a 8
301
- Kan 8
302
- _un� 8
303
- ',_ 8
304
- �te 8
305
- �_ 8
306
- un� 8
307
- N-aw� 8
308
- Na 8
309
- ths 8
310
- N-aw 8
311
- se 8
312
- _sh� 8
313
- _N-a 8
314
- _N- 8
315
- t� 8
316
- he_ 8
317
- _u' 8
318
- h� 8
319
- N- 8
320
- _ha' 8
321
- tek 8
322
- _ta 8
323
- h�_ 8
324
- _�t 8
325
- _N-aw 8
326
- un� 8
327
- _wai 8
328
- tey 8
329
- _t� 8
330
- ?_ 8
331
- _T 8
332
- 'u' 8
333
- a'k 8
334
- kwe 8
335
- kw� 8
336
- ka' 8
337
- �n 8
338
- -aw� 8
339
- nek 8
340
- twa 8
341
- w�t 8
342
- 'u 8
343
- kn 7
344
- w�_ 7
345
- _Ka 7
346
- _y 7
347
- a� 7
348
- _he 7
349
- a'u 7
350
- kw� 7
351
- S 7
352
- N�_ 7
353
- a'u' 7
354
- niyu 7
355
- 't� 7
356
- nya 7
357
- �t_ 7
358
- _wai_ 7
359
- �'_ 7
360
- 'h 7
361
- iyu 7
362
- _ts 7
363
- ya't 7
364
- �_ 7
365
- w�' 7
366
- �� 7
367
- k�' 7
368
- u't_ 7
369
- y� 7
370
- �'s 7
371
- ata 7
372
- N� 7
373
- u't� 7
374
- �y 7
375
- tsi 7
376
- ai_ 7
377
- _un�_ 7
378
- _uk 7
379
- _he_ 7
380
- "T 7
381
- ��' 7
382
- �th 7
383
- �� 7
384
- _kw 7
385
- '� 7
386
- ust 7
387
- y�'_ 7
388
- '? 7
389
- un�_ 7
390
- wai_ 7
391
- s._ 7
392
- �' 7
393
- ku' 7
394
- hah 7
395
- �ka 7
396
- a'u't 7
397
- wi 7
398
- �'� 7
399
- H 7
400
- ahs 7