scylla 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. data/Gemfile +1 -0
  2. data/Gemfile.lock +10 -0
  3. data/VERSION +1 -1
  4. data/lib/scylla/generator.rb +1 -1
  5. data/lib/scylla/lms/13375P33K.lm +156 -156
  6. data/lib/scylla/lms/arabic.lm +133 -133
  7. data/lib/scylla/lms/bulgarian.lm +122 -122
  8. data/lib/scylla/lms/catalan.lm +151 -151
  9. data/lib/scylla/lms/danish.lm +137 -137
  10. data/lib/scylla/lms/english.lm +207 -207
  11. data/lib/scylla/lms/french.lm +400 -400
  12. data/lib/scylla/lms/japanese.lm +400 -400
  13. data/lib/scylla/lms/korean.lm +233 -233
  14. data/lib/scylla/lms/norwegian.lm +398 -398
  15. data/lib/scylla/lms/spanish.lm +98 -98
  16. data/lib/scylla/lms/swedish.lm +123 -123
  17. data/lib/scylla/lms/tagalog.lm +223 -223
  18. data/lib/scylla/lms/welsh.lm +234 -234
  19. data/lib/scylla/resources.rb +10 -10
  20. data/scylla.gemspec +17 -40
  21. data/source_texts/catalan.txt +28 -28
  22. data/source_texts/danish.txt +62 -62
  23. data/source_texts/english.txt +10 -10
  24. data/source_texts/french.txt +470 -77
  25. data/source_texts/japanese.txt +453 -199
  26. data/source_texts/norwegian.txt +96 -63
  27. data/source_texts/spanish.txt +269 -269
  28. data/test/classifier_test.rb +2 -2
  29. data/test/fixtures/lms/13375p33k.lm +156 -156
  30. data/test/fixtures/lms/danish.lm +137 -137
  31. data/test/fixtures/lms/english.lm +207 -207
  32. data/test/fixtures/lms/french.lm +400 -400
  33. data/test/fixtures/lms/hindi.lm +400 -0
  34. data/test/fixtures/lms/italian.lm +400 -0
  35. data/test/fixtures/lms/japanese.lm +400 -400
  36. data/test/fixtures/lms/norwegian.lm +400 -0
  37. data/test/fixtures/lms/spanish.lm +98 -98
  38. data/test/fixtures/source_texts/danish.txt +62 -62
  39. data/test/fixtures/source_texts/english.txt +10 -10
  40. data/test/fixtures/source_texts/french.txt +470 -77
  41. data/test/fixtures/source_texts/hindi.txt +199 -0
  42. data/test/fixtures/source_texts/italian.txt +120 -0
  43. data/test/fixtures/source_texts/japanese.txt +453 -199
  44. data/test/fixtures/source_texts/norwegian.txt +190 -0
  45. data/test/fixtures/source_texts/spanish.txt +269 -269
  46. data/test/fixtures/test_languages/english +61 -0
  47. data/test/fixtures/test_languages/french +0 -0
  48. data/test/fixtures/test_languages/german +29 -0
  49. data/test/fixtures/test_languages/hindi +3 -0
  50. data/test/fixtures/test_languages/italian +6 -0
  51. data/test/fixtures/test_languages/japanese +79 -0
  52. data/test/fixtures/test_languages/norwegian +14 -0
  53. data/test/fixtures/test_languages/spanish +22 -0
  54. data/test/generator_test.rb +0 -1
  55. data/test/language_test.rb +28 -0
  56. metadata +20 -43
  57. data/lib/scylla/lms/esperanto.lm +0 -400
  58. data/lib/scylla/lms/hungarian.lm +0 -400
  59. data/lib/scylla/lms/irish.lm +0 -400
  60. data/lib/scylla/lms/kannada.lm +0 -400
  61. data/lib/scylla/lms/latin.lm +0 -400
  62. data/lib/scylla/lms/malay.lm +0 -400
  63. data/lib/scylla/lms/marathi.lm +0 -400
  64. data/lib/scylla/lms/mingo.lm +0 -400
  65. data/lib/scylla/lms/nepali.lm +0 -400
  66. data/lib/scylla/lms/quechua.lm +0 -400
  67. data/lib/scylla/lms/rumantsch.lm +0 -400
  68. data/lib/scylla/lms/sanskrit.lm +0 -400
  69. data/lib/scylla/lms/scots_gaelic.lm +0 -400
  70. data/lib/scylla/lms/serbian.lm +0 -400
  71. data/lib/scylla/lms/swahili.lm +0 -400
  72. data/lib/scylla/lms/tamil.lm +0 -400
  73. data/lib/scylla/lms/ukrainian.lm +0 -400
  74. data/lib/scylla/lms/yiddish.lm +0 -400
  75. data/source_texts/esperanto.txt +0 -199
  76. data/source_texts/hungarian.txt +0 -102
  77. data/source_texts/irish.txt +0 -209
  78. data/source_texts/kannada.txt +0 -283
  79. data/source_texts/latin.txt +0 -120
  80. data/source_texts/malay.txt +0 -108
  81. data/source_texts/marathi.txt +0 -100
  82. data/source_texts/mingo.txt +0 -146
  83. data/source_texts/nepali.txt +0 -131
  84. data/source_texts/quechua.txt +0 -108
  85. data/source_texts/rumantsch.txt +0 -110
  86. data/source_texts/sanskrit.txt +0 -135
  87. data/source_texts/scots_gaelic.txt +0 -93
  88. data/source_texts/serbian.txt +0 -121
  89. data/source_texts/swahili.txt +0 -120
  90. data/source_texts/tamil.txt +0 -167
  91. data/source_texts/ukrainian.txt +0 -214
  92. data/source_texts/yiddish-utf.txt +0 -83
  93. data/test/fixtures/lms/kannada.lm +0 -400
  94. data/test/fixtures/source_texts/kannada.txt +0 -283
@@ -1,400 +0,0 @@
1
- � 44428
2
- � 33131
3
- � 31240
4
- � 14531
5
- � 13188
6
- _ 12544
7
- ் 7272
8
- � 7272
9
- �� 7272
10
- �� 5073
11
- ்� 5073
12
- ��� 5073
13
- ��� 5073
14
- ்� 5073
15
- �� 5073
16
- _� 4880
17
- _� 4880
18
- � 3855
19
- �� 3855
20
- க 3855
21
- �� 3598
22
- க� 3598
23
- ��� 3598
24
- �� 3518
25
- � 3518
26
- ி 3518
27
- �� 3248
28
- ி� 3248
29
- ��� 3248
30
- ி� 3248
31
- ��� 3248
32
- �� 3248
33
- ு 3027
34
- � 3027
35
- �� 3027
36
- � 2754
37
- த 2754
38
- �� 2754
39
- �� 2602
40
- த� 2602
41
- ��� 2602
42
- �� 2395
43
- � 2395
44
- ப 2395
45
- ப� 2388
46
- ��� 2388
47
- �� 2388
48
- ��� 2074
49
- �� 2074
50
- �� 2074
51
- ��� 2074
52
- ு� 2074
53
- ு� 2074
54
- � 2009
55
- �� 2009
56
- ட 2009
57
- �� 1919
58
- ்_ 1919
59
- ��_ 1919
60
- ��� 1919
61
- ட� 1919
62
- �_ 1919
63
- க� 1903
64
- ��� 1903
65
- �� 1903
66
- �� 1891
67
- ம 1891
68
- �� 1887
69
- ம� 1887
70
- ��� 1887
71
- �� 1695
72
- க� 1695
73
- ��� 1695
74
- ர 1564
75
- �� 1564
76
- � 1564
77
- ��� 1540
78
- �� 1540
79
- ர� 1540
80
- �� 1531
81
- ா 1531
82
- � 1531
83
- � 1475
84
- �� 1475
85
- வ 1475
86
- வ� 1470
87
- �� 1470
88
- ��� 1470
89
- ம� 1405
90
- ��� 1405
91
- �� 1405
92
- �� 1400
93
- ��� 1400
94
- த� 1400
95
- ��க 1382
96
- �க 1382
97
- �� 1371
98
- ா� 1371
99
- �� 1371
100
- ��� 1371
101
- ��� 1371
102
- ா� 1371
103
- ய 1343
104
- �� 1343
105
- ��� 1335
106
- ப� 1335
107
- �� 1335
108
- �க� 1329
109
- � 1249
110
- �� 1241
111
- ை 1241
112
- �� 1239
113
- ற 1239
114
- � 1239
115
- �� 1237
116
- � 1237
117
- ல 1237
118
- �� 1202
119
- ��� 1202
120
- த� 1202
121
- ��� 1197
122
- � 1197
123
- ற� 1197
124
- �� 1197
125
- �� 1197
126
- ன 1197
127
- ய� 1196
128
- ��� 1196
129
- �� 1196
130
- �� 1177
131
- ��� 1177
132
- ல� 1177
133
- ள 1143
134
- � 1143
135
- �� 1143
136
- ��� 1097
137
- ட� 1097
138
- �� 1097
139
- ��� 1091
140
- வ� 1091
141
- �� 1091
142
- ��� 1087
143
- �� 1087
144
- ள� 1087
145
- ப� 1053
146
- �� 1053
147
- ��� 1053
148
- ர� 1042
149
- �� 1042
150
- ��� 1042
151
- ��� 1020
152
- �� 1020
153
- ன� 1020
154
- �் 929
155
- ��் 929
156
- �ப 921
157
- ��ப 921
158
- �ப� 916
159
- ��� 890
160
- ய� 890
161
- �� 890
162
- ��் 876
163
- �் 876
164
- ல� 863
165
- �� 863
166
- ��� 863
167
- �்� 860
168
- __ 836
169
- ��� 822
170
- �� 822
171
- ட� 822
172
- ��த 816
173
- �த 816
174
- �� 810
175
- �� 810
176
- ��� 803
177
- ை� 803
178
- ை� 803
179
- ��� 803
180
- � 763
181
- ச 763
182
- �� 763
183
- �� 750
184
- ச� 750
185
- ��� 750
186
- ற� 749
187
- ன� 749
188
- ��� 749
189
- �� 749
190
- ��� 749
191
- �� 749
192
- ��் 736
193
- �் 736
194
- ு_ 701
195
- �_ 701
196
- ��_ 701
197
- �த� 697
198
- �்� 694
199
- �் 652
200
- ��் 652
201
- �்_ 646
202
- �ட 645
203
- ��ட 645
204
- ��் 630
205
- �் 630
206
- �� 627
207
- � 627
208
- �� 627
209
- �ு 625
210
- ��ு 625
211
- ��� 622
212
- �� 622
213
- ள� 622
214
- � 619
215
- ��ய 609
216
- �ய 609
217
- �ள� 584
218
- �ள 584
219
- ��ள 584
220
- _ப 574
221
- _ப� 574
222
- �ம 568
223
- ��ம 568
224
- �ட� 568
225
- �ம� 567
226
- �� 562
227
- �� 562
228
- _க� 558
229
- _க 558
230
- ��� 556
231
- �� 556
232
- ந 556
233
- ந� 556
234
- �� 556
235
- � 556
236
- ��ி 549
237
- �ி 549
238
- _வ� 546
239
- _வ 546
240
- ��் 536
241
- �் 536
242
- �ய� 529
243
- ��ி 525
244
- �ி 525
245
- �ி� 524
246
- �் 515
247
- ��் 515
248
- ��க 507
249
- �க 507
250
- �ி 503
251
- �க� 503
252
- ��ி 503
253
- ��் 500
254
- �் 500
255
- ��� 498
256
- ர� 498
257
- �� 498
258
- _ம 497
259
- _ம� 497
260
- �ி� 496
261
- �ி� 493
262
- �்� 488
263
- ம� 482
264
- �� 482
265
- ��� 482
266
- �ு 474
267
- ��ு 474
268
- ��� 465
269
- �்� 465
270
- �� 465
271
- ள� 465
272
- ற� 448
273
- ��� 448
274
- �� 448
275
- ��ி 434
276
- �ி 434
277
- �ற 424
278
- ��ற 424
279
- � 421
280
- ��� 421
281
- அ� 421
282
- அ� 421
283
- �� 421
284
- �� 421
285
- ��� 421
286
- �� 421
287
- அ 421
288
- ��ல 416
289
- �ல 416
290
- _அ 412
291
- _அ� 412
292
- � 411
293
- ண 411
294
- �� 411
295
- �்_ 407
296
- ண� 406
297
- �� 406
298
- ��� 406
299
- . 403
300
- ��ு 403
301
- �ு 403
302
- �ா 402
303
- ��ா 402
304
- �ி� 393
305
- �ற� 387
306
- ��க 387
307
- �க 387
308
- �_ 386
309
- ை_ 385
310
- ��_ 385
311
- �க� 383
312
- �ு� 383
313
- ��் 382
314
- �் 382
315
- �ல� 381
316
- வ� 379
317
- ��� 379
318
- �� 379
319
- ��� 378
320
- ச� 378
321
- �� 378
322
- ச� 372
323
- ��� 372
324
- �� 372
325
- _ச 368
326
- _ச� 368
327
- �்� 365
328
- ��� 362
329
- �� 362
330
- �ய 362
331
- ெ 362
332
- ெ� 362
333
- ��� 362
334
- ��ய 362
335
- ெ� 362
336
- ��ப 361
337
- ��் 361
338
- �ப 361
339
- �் 361
340
- �ப� 361
341
- �்� 360
342
- ��� 359
343
- �� 359
344
- ந� 359
345
- ��ு 357
346
- �ு 357
347
- _த� 347
348
- _த 347
349
- �்� 340
350
- ��ன 329
351
- �ன 329
352
- �ன� 328
353
- �� 318
354
- �� 318
355
- � 318
356
- �ட 315
357
- ��ட 315
358
- ல� 314
359
- �� 314
360
- ��� 314
361
- ே 312
362
- �� 312
363
- �ட� 312
364
- ._ 308
365
- ொ� 307
366
- ��� 307
367
- இ 307
368
- ��� 307
369
- ��� 307
370
- �� 307
371
- இ� 307
372
- இ� 307
373
- �� 307
374
- ொ 307
375
- ��� 307
376
- ொ� 307
377
- �� 306
378
- ��� 306
379
- ய� 306
380
- �ய� 303
381
- ��� 301
382
- ண� 301
383
- �� 301
384
- �ு 300
385
- ��ு 300
386
- � 292
387
- ீ 290
388
- �� 290
389
- _இ 288
390
- _இ� 288
391
- ீ� 284
392
- ��� 284
393
- �� 284
394
- ீ� 284
395
- ��� 284
396
- �� 284
397
- �த 283
398
- ��த 283
399
- ��க 283
400
- �க 283