scylla 0.9.3 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +3 -12
- data/bin/scylla +1 -1
- data/lib/scylla/classifier.rb +3 -2
- data/lib/scylla/generator.rb +11 -4
- data/lib/scylla/lms/arabic.lm +400 -400
- data/lib/scylla/lms/bulgarian.lm +400 -400
- data/lib/scylla/lms/catalan.lm +400 -400
- data/lib/scylla/lms/chinese.lm +395 -395
- data/lib/scylla/lms/czech.lm +397 -397
- data/lib/scylla/lms/danish.lm +372 -372
- data/lib/scylla/lms/dutch.lm +382 -382
- data/lib/scylla/lms/english.lm +378 -378
- data/lib/scylla/lms/finnish.lm +388 -388
- data/lib/scylla/lms/french.lm +392 -392
- data/lib/scylla/lms/german.lm +396 -396
- data/lib/scylla/lms/greek.lm +400 -400
- data/lib/scylla/lms/hebrew.lm +400 -400
- data/lib/scylla/lms/hindi.lm +399 -399
- data/lib/scylla/lms/icelandic.lm +225 -225
- data/lib/scylla/lms/indonesian.lm +391 -391
- data/lib/scylla/lms/italian.lm +382 -382
- data/lib/scylla/lms/japanese.lm +400 -400
- data/lib/scylla/lms/kannada.lm +392 -392
- data/lib/scylla/lms/korean.lm +391 -391
- data/lib/scylla/lms/marathi.lm +388 -388
- data/lib/scylla/lms/norwegian.lm +375 -375
- data/lib/scylla/lms/persian.lm +399 -399
- data/lib/scylla/lms/polish.lm +399 -399
- data/lib/scylla/lms/portuguese.lm +390 -390
- data/lib/scylla/lms/romanian.lm +353 -353
- data/lib/scylla/lms/russian.lm +400 -400
- data/lib/scylla/lms/slovak.lm +361 -361
- data/lib/scylla/lms/slovenian.lm +273 -273
- data/lib/scylla/lms/spanish.lm +371 -371
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/tagalog.lm +295 -295
- data/lib/scylla/lms/thai.lm +400 -400
- data/lib/scylla/lms/turkish.lm +377 -377
- data/lib/scylla/lms/vietnamese.lm +400 -400
- data/lib/scylla/lms/welsh.lm +311 -311
- data/lib/scylla/loader.rb +1 -1
- data/test/classifier_test.rb +6 -4
- data/test/fixtures/lms/arabic.lm +400 -0
- data/test/fixtures/lms/bulgarian.lm +400 -0
- data/test/fixtures/lms/catalan.lm +400 -0
- data/test/fixtures/lms/chinese.lm +400 -0
- data/test/fixtures/lms/czech.lm +400 -0
- data/test/fixtures/lms/danish.lm +399 -399
- data/test/fixtures/lms/dutch.lm +400 -0
- data/test/fixtures/lms/english.lm +400 -400
- data/test/fixtures/lms/finnish.lm +400 -0
- data/test/fixtures/lms/french.lm +397 -397
- data/test/fixtures/lms/german.lm +400 -400
- data/test/fixtures/lms/greek.lm +400 -0
- data/test/fixtures/lms/hebrew.lm +400 -0
- data/test/fixtures/lms/hindi.lm +400 -400
- data/test/fixtures/lms/icelandic.lm +400 -0
- data/test/fixtures/lms/indonesian.lm +400 -0
- data/test/fixtures/lms/italian.lm +400 -400
- data/test/fixtures/lms/japanese.lm +400 -400
- data/test/fixtures/lms/kannada.lm +400 -0
- data/test/fixtures/lms/korean.lm +400 -0
- data/test/fixtures/lms/marathi.lm +400 -0
- data/test/fixtures/lms/norwegian.lm +399 -399
- data/test/fixtures/lms/persian.lm +400 -0
- data/test/fixtures/lms/polish.lm +400 -0
- data/test/fixtures/lms/portuguese.lm +400 -0
- data/test/fixtures/lms/romanian.lm +400 -0
- data/test/fixtures/lms/russian.lm +400 -0
- data/test/fixtures/lms/slovak.lm +400 -0
- data/test/fixtures/lms/slovenian.lm +400 -0
- data/test/fixtures/lms/spanish.lm +400 -400
- data/test/fixtures/lms/swedish.lm +400 -0
- data/test/fixtures/lms/tagalog.lm +400 -0
- data/test/fixtures/lms/thai.lm +400 -0
- data/test/fixtures/lms/turkish.lm +400 -0
- data/test/fixtures/lms/vietnamese.lm +400 -0
- data/test/fixtures/lms/welsh.lm +400 -0
- data/test/fixtures/test_languages/japanese +149 -67
- data/test/generator_test.rb +2 -44
- data/test/helper.rb +3 -3
- data/test/language_test.rb +6 -2
- data/test/loader_test.rb +2 -2
- data/test/scylla_test.rb +7 -7
- metadata +144 -95
data/lib/scylla/lms/icelandic.lm
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
_ 3270
|
2
|
-
� 1142
|
3
2
|
r 775
|
4
3
|
a 756
|
5
4
|
n 717
|
@@ -9,392 +8,393 @@ e 517
|
|
9
8
|
l 417
|
10
9
|
t 401
|
11
10
|
u 389
|
12
|
-
_� 355
|
13
|
-
� 327
|
14
11
|
ð 327
|
15
12
|
g 310
|
16
|
-
m 304
|
17
13
|
r_ 304
|
14
|
+
m 304
|
18
15
|
f 255
|
19
16
|
k 233
|
20
17
|
d 217
|
21
|
-
á 191
|
22
18
|
o 191
|
23
|
-
|
19
|
+
á 191
|
24
20
|
_s 185
|
25
|
-
í 176
|
26
21
|
ð_ 176
|
27
|
-
|
28
|
-
� 176
|
22
|
+
í 176
|
29
23
|
v 175
|
30
24
|
ar 162
|
31
25
|
_e 158
|
32
26
|
i_ 153
|
33
27
|
nd 152
|
34
28
|
in 147
|
35
|
-
an 145
|
36
29
|
a_ 145
|
30
|
+
an 145
|
37
31
|
h 137
|
38
|
-
st 129
|
39
32
|
m_ 129
|
33
|
+
st 129
|
40
34
|
er 123
|
41
|
-
� 122
|
42
35
|
þ 122
|
43
36
|
_á 113
|
44
37
|
_þ 110
|
38
|
+
_í 107
|
45
39
|
g_ 107
|
46
40
|
j 107
|
47
|
-
_í 107
|
48
41
|
_h 106
|
49
42
|
n_ 104
|
50
43
|
ar_ 104
|
51
|
-
r� 101
|
52
44
|
la 99
|
53
45
|
um 99
|
54
46
|
_f 92
|
55
47
|
_v 90
|
56
|
-
ö 89
|
57
|
-
a� 89
|
58
48
|
u_ 89
|
59
|
-
|
60
|
-
� 89
|
61
|
-
ir 88
|
62
|
-
� 88
|
63
|
-
ið 88
|
49
|
+
ö 89
|
64
50
|
að 88
|
65
51
|
ó 88
|
66
|
-
|
67
|
-
|
52
|
+
ið 88
|
53
|
+
ir 88
|
68
54
|
um_ 86
|
55
|
+
nn 86
|
69
56
|
and 86
|
70
57
|
_o 85
|
71
|
-
_a 84
|
72
58
|
t_ 84
|
73
|
-
|
59
|
+
_a 84
|
74
60
|
á_ 83
|
75
|
-
|
76
|
-
en 82
|
77
|
-
di 82
|
78
|
-
og_ 82
|
61
|
+
og 83
|
79
62
|
_og 82
|
63
|
+
og_ 82
|
64
|
+
di 82
|
65
|
+
en 82
|
80
66
|
ri 82
|
81
67
|
ti 79
|
82
68
|
ei 78
|
69
|
+
_er 74
|
83
70
|
sl 74
|
84
71
|
ur 74
|
85
|
-
_er 74
|
86
72
|
ir_ 72
|
87
73
|
na 71
|
74
|
+
ið_ 71
|
88
75
|
ndi 70
|
89
76
|
ng 70
|
90
77
|
lan 70
|
91
78
|
b 65
|
79
|
+
að_ 65
|
92
80
|
le 65
|
93
|
-
ta 64
|
94
|
-
� 64
|
95
81
|
æ 64
|
82
|
+
ta 64
|
96
83
|
ur_ 63
|
97
84
|
_t 60
|
98
|
-
y 59
|
99
85
|
s_ 59
|
86
|
+
y 59
|
87
|
+
_á_ 59
|
100
88
|
_n 58
|
101
|
-
ga 57
|
102
89
|
un 57
|
90
|
+
ga 57
|
103
91
|
ve 56
|
92
|
+
ísl 56
|
93
|
+
_ís 56
|
104
94
|
ís 56
|
105
|
-
�sl 56
|
106
|
-
�s 56
|
107
|
-
_l 55
|
108
95
|
_u 55
|
96
|
+
_l 55
|
109
97
|
_m 52
|
110
98
|
í_ 51
|
111
|
-
�_ 51
|
112
99
|
p 50
|
113
|
-
il 49
|
114
100
|
se 49
|
115
101
|
or 49
|
102
|
+
il 49
|
103
|
+
ra 48
|
116
104
|
af 48
|
105
|
+
sk 48
|
117
106
|
ki 48
|
118
107
|
jó 48
|
119
|
-
ra 48
|
120
|
-
sk 48
|
121
108
|
ing 47
|
122
109
|
ni 46
|
123
110
|
er_ 46
|
124
|
-
al 45
|
125
111
|
ru 45
|
126
|
-
|
127
|
-
|
128
|
-
nu 44
|
112
|
+
al 45
|
113
|
+
_í_ 45
|
129
114
|
_k 44
|
115
|
+
is 44
|
116
|
+
l_ 44
|
130
117
|
va 44
|
118
|
+
nu 44
|
131
119
|
ár 43
|
132
|
-
�r 43
|
133
120
|
sla 43
|
134
|
-
|
135
|
-
ns 40
|
136
|
-
s� 40
|
121
|
+
_ár 41
|
137
122
|
me 40
|
138
123
|
ú 40
|
124
|
+
ns 40
|
139
125
|
ha 39
|
140
126
|
ða 38
|
141
127
|
rn 38
|
128
|
+
rið 38
|
142
129
|
_r 38
|
143
|
-
|
144
|
-
�a 38
|
145
|
-
ði 37
|
146
|
-
_a� 37
|
147
|
-
di_ 37
|
148
|
-
eg 37
|
149
|
-
�i 37
|
130
|
+
_st 37
|
150
131
|
am 37
|
151
|
-
|
132
|
+
ði 37
|
152
133
|
_va 37
|
153
|
-
|
154
|
-
|
134
|
+
d_ 37
|
135
|
+
eg 37
|
136
|
+
_að 37
|
137
|
+
di_ 37
|
155
138
|
_se 36
|
139
|
+
gu 36
|
156
140
|
inn 35
|
157
|
-
lu 35
|
158
141
|
st_ 35
|
159
|
-
|
142
|
+
lu 35
|
160
143
|
ld 35
|
161
|
-
ef 34
|
162
144
|
þa 34
|
163
|
-
|
164
|
-
|
165
|
-
_s� 34
|
145
|
+
ef 34
|
146
|
+
_þa 34
|
166
147
|
em 33
|
167
|
-
tt 33
|
168
148
|
da 33
|
169
|
-
|
149
|
+
tt 33
|
170
150
|
il_ 32
|
171
|
-
rs 32
|
172
151
|
var 32
|
152
|
+
rs 32
|
173
153
|
he 32
|
174
|
-
|
175
|
-
_en 31
|
154
|
+
ru_ 32
|
176
155
|
_ha 31
|
177
|
-
|
178
|
-
|
179
|
-
n� 30
|
180
|
-
sa 30
|
181
|
-
fl 30
|
156
|
+
_en 31
|
157
|
+
_g 31
|
182
158
|
rð 30
|
183
|
-
�u 30
|
184
159
|
ðu 30
|
185
|
-
|
160
|
+
til 30
|
161
|
+
fl 30
|
162
|
+
sa 30
|
186
163
|
ór 30
|
187
|
-
|
188
|
-
sem 29
|
189
|
-
_r� 29
|
190
|
-
ein 29
|
164
|
+
þe 29
|
191
165
|
em_ 29
|
192
|
-
�e 29
|
193
|
-
_ti 29
|
194
166
|
tj 29
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
167
|
+
ein 29
|
168
|
+
_þe 29
|
169
|
+
et 29
|
170
|
+
_ti 29
|
171
|
+
sem 29
|
199
172
|
ka 28
|
200
|
-
|
173
|
+
ver 28
|
174
|
+
ári 28
|
175
|
+
ku 28
|
176
|
+
jór 27
|
177
|
+
eru 27
|
201
178
|
tu 27
|
202
|
-
|
179
|
+
re 27
|
203
180
|
gi 27
|
204
|
-
tj� 27
|
205
|
-
eru 27
|
206
181
|
gar 27
|
182
|
+
ma 27
|
207
183
|
_b 27
|
208
|
-
|
209
|
-
stj 26
|
184
|
+
órn 26
|
210
185
|
na_ 26
|
186
|
+
stj 26
|
187
|
+
tjó 26
|
211
188
|
_me 26
|
212
189
|
_um 26
|
213
|
-
|
214
|
-
ns_ 25
|
215
|
-
_n� 25
|
216
|
-
vi 25
|
217
|
-
�l 25
|
218
|
-
öl 25
|
190
|
+
_he 25
|
219
191
|
jö 25
|
220
|
-
|
192
|
+
öl 25
|
193
|
+
ns_ 25
|
221
194
|
eð 25
|
222
|
-
|
223
|
-
ta_ 24
|
224
|
-
end 24
|
225
|
-
nn_ 24
|
226
|
-
rí 24
|
227
|
-
f� 24
|
228
|
-
kk 24
|
229
|
-
� 24
|
195
|
+
vi 25
|
230
196
|
und 24
|
197
|
+
ý 24
|
198
|
+
nd_ 24
|
231
199
|
len 24
|
232
200
|
en_ 24
|
201
|
+
nn_ 24
|
202
|
+
_la 24
|
203
|
+
end 24
|
233
204
|
ko 24
|
234
|
-
nd_ 24
|
235
205
|
ti_ 24
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
fa 23
|
206
|
+
ta_ 24
|
207
|
+
kk 24
|
208
|
+
rí 24
|
240
209
|
_ve 23
|
241
|
-
ík 23
|
242
|
-
f_ 23
|
243
|
-
�k 23
|
244
210
|
ds 23
|
211
|
+
fa 23
|
212
|
+
ík 23
|
245
213
|
nar 23
|
246
|
-
nds 23
|
247
214
|
fn 23
|
248
|
-
|
215
|
+
f_ 23
|
216
|
+
nds 23
|
217
|
+
sv 22
|
218
|
+
ann 22
|
219
|
+
rá 22
|
249
220
|
_sa 22
|
250
221
|
nga 22
|
251
222
|
sam 22
|
252
|
-
sv 22
|
253
|
-
rá 22
|
254
|
-
_h� 22
|
255
|
-
ins 22
|
256
223
|
ll 22
|
257
|
-
|
258
|
-
|
259
|
-
|
224
|
+
ag 22
|
225
|
+
ins 22
|
226
|
+
fu 21
|
227
|
+
li 21
|
260
228
|
si 21
|
229
|
+
rík 21
|
261
230
|
_d 21
|
262
|
-
|
263
|
-
fu 21
|
231
|
+
fr 21
|
264
232
|
é 21
|
265
|
-
|
266
|
-
|
267
|
-
|
233
|
+
ði_ 21
|
234
|
+
_af 20
|
235
|
+
íki 20
|
268
236
|
ega 20
|
269
237
|
ss 20
|
270
|
-
_no 20
|
271
|
-
�ki 20
|
272
238
|
mi 20
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
rl 19
|
277
|
-
rr 19
|
278
|
-
fi 19
|
239
|
+
með 20
|
240
|
+
_no 20
|
241
|
+
no 20
|
279
242
|
num 19
|
280
|
-
�� 19
|
281
243
|
dir 19
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
244
|
+
fi 19
|
245
|
+
_rí 19
|
246
|
+
rr 19
|
247
|
+
rl 19
|
248
|
+
kis 19
|
249
|
+
_sk 18
|
250
|
+
óð 18
|
251
|
+
það 18
|
290
252
|
_un 18
|
253
|
+
nna 18
|
254
|
+
ey 18
|
291
255
|
yr 18
|
256
|
+
haf 18
|
292
257
|
sta 18
|
293
|
-
|
294
|
-
ey 18
|
295
|
-
�ð 18
|
258
|
+
es 17
|
296
259
|
fy 17
|
297
|
-
_vi 17
|
298
260
|
fo 17
|
299
|
-
|
300
|
-
|
261
|
+
for 17
|
262
|
+
jóð 17
|
263
|
+
við 17
|
264
|
+
þj 17
|
265
|
+
þjó 17
|
266
|
+
_ef 17
|
267
|
+
af_ 17
|
268
|
+
_vi 17
|
301
269
|
ut 17
|
302
270
|
_fo 17
|
303
|
-
|
271
|
+
k_ 17
|
304
272
|
ek 17
|
305
|
-
for 17
|
306
273
|
din 17
|
307
|
-
|
308
|
-
af_ 17
|
309
|
-
_l� 17
|
310
|
-
es 17
|
311
|
-
vi� 17
|
312
|
-
_ef 17
|
313
|
-
ör 16
|
314
|
-
_fy 16
|
315
|
-
du 16
|
316
|
-
_i 16
|
317
|
-
fj� 16
|
318
|
-
�r 16
|
319
|
-
ra_ 16
|
274
|
+
it 16
|
320
275
|
fj 16
|
321
|
-
|
322
|
-
ál 16
|
276
|
+
enn 16
|
323
277
|
el 16
|
324
|
-
sin 16
|
325
|
-
kv 16
|
326
|
-
ge 16
|
327
278
|
ga_ 16
|
328
|
-
|
279
|
+
jöl 16
|
280
|
+
nin 16
|
329
281
|
ön 16
|
330
|
-
|
282
|
+
du 16
|
283
|
+
sin 16
|
284
|
+
ge 16
|
285
|
+
ál 16
|
286
|
+
_i 16
|
331
287
|
au 16
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
288
|
+
kv 16
|
289
|
+
_fy 16
|
290
|
+
ör 16
|
291
|
+
_fr 16
|
292
|
+
ra_ 16
|
293
|
+
fyr 15
|
294
|
+
eð_ 15
|
295
|
+
_al 15
|
296
|
+
_sv 15
|
297
|
+
set 15
|
340
298
|
ok 15
|
341
|
-
|
299
|
+
sí 15
|
342
300
|
leg 15
|
343
|
-
|
301
|
+
fjö 15
|
302
|
+
ja 15
|
303
|
+
all 15
|
304
|
+
íð 15
|
305
|
+
ngu 15
|
344
306
|
_ge 15
|
307
|
+
tir 15
|
308
|
+
_ei 15
|
345
309
|
nni 15
|
346
|
-
|
347
|
-
|
348
|
-
set 15
|
349
|
-
_sv 15
|
350
|
-
tt_ 14
|
310
|
+
nda 14
|
311
|
+
ðar 14
|
351
312
|
fur 14
|
313
|
+
efn 14
|
314
|
+
vo 14
|
352
315
|
_ko 14
|
353
|
-
ms 14
|
354
316
|
gur 14
|
355
|
-
|
356
|
-
|
317
|
+
ms 14
|
318
|
+
_sí 14
|
319
|
+
ki_ 14
|
320
|
+
áð 14
|
357
321
|
lut 14
|
358
|
-
ni_ 14
|
359
322
|
in_ 14
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
á� 14
|
323
|
+
nor 14
|
324
|
+
ni_ 14
|
325
|
+
tt_ 14
|
326
|
+
þin 13
|
327
|
+
hi 13
|
328
|
+
hö 13
|
367
329
|
þi 13
|
368
|
-
|
369
|
-
sle 13
|
370
|
-
st� 13
|
330
|
+
as 13
|
371
331
|
væ 13
|
332
|
+
fle 13
|
333
|
+
sle 13
|
334
|
+
fa_ 13
|
372
335
|
kr 13
|
373
|
-
hl 13
|
374
|
-
gs 13
|
375
|
-
�i 13
|
376
336
|
ær 13
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
�r 13
|
381
|
-
fa_ 13
|
382
|
-
hi 13
|
383
|
-
rir 13
|
384
|
-
bo 13
|
337
|
+
ski 13
|
338
|
+
gs 13
|
339
|
+
_þj 13
|
385
340
|
dan 13
|
386
|
-
|
341
|
+
bo 13
|
342
|
+
ls 13
|
343
|
+
hl 13
|
387
344
|
ví 13
|
388
|
-
|
389
|
-
|
390
|
-
kv� 12
|
345
|
+
rir 13
|
346
|
+
erð 12
|
391
347
|
ors 12
|
392
|
-
|
393
|
-
|
394
|
-
|
348
|
+
rei 12
|
349
|
+
ald 12
|
350
|
+
já 12
|
351
|
+
æm 12
|
395
352
|
ist 12
|
396
|
-
|
353
|
+
ld_ 12
|
397
354
|
rt 12
|
355
|
+
fna 12
|
356
|
+
öf 12
|
357
|
+
ip 12
|
398
358
|
ðs 12
|
399
|
-
|
400
|
-
|
359
|
+
rst 12
|
360
|
+
æð 12
|
361
|
+
kum 12
|
362
|
+
ig 12
|
363
|
+
us 12
|
364
|
+
nu_ 11
|
365
|
+
vei 11
|
366
|
+
ds_ 11
|
367
|
+
im 11
|
368
|
+
un_ 11
|
369
|
+
ft 11
|
370
|
+
ekk 11
|
371
|
+
_da 11
|
372
|
+
han 11
|
373
|
+
ne 11
|
374
|
+
ndu 11
|
375
|
+
an_ 11
|
376
|
+
hef 11
|
377
|
+
slu 11
|
378
|
+
stu 11
|
379
|
+
_hl 11
|
380
|
+
kip 11
|
381
|
+
kvæ 11
|
382
|
+
ýs 11
|
383
|
+
da_ 11
|
384
|
+
_fj 11
|
385
|
+
okk 11
|
386
|
+
man 11
|
387
|
+
þar 11
|
388
|
+
öld 11
|
389
|
+
_fl 11
|
390
|
+
má 11
|
391
|
+
mál 11
|
392
|
+
ara 10
|
393
|
+
kj 10
|
394
|
+
up 10
|
395
|
+
yri 10
|
396
|
+
eir 10
|
397
|
+
val 10
|
398
|
+
eng 10
|
399
|
+
ett 10
|
400
|
+
rg 10
|