scylla 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scylla/lms/arabic.lm +399 -399
- data/lib/scylla/lms/bulgarian.lm +400 -400
- data/lib/scylla/lms/catalan.lm +323 -323
- data/lib/scylla/lms/chinese.lm +389 -389
- data/lib/scylla/lms/czech.lm +377 -377
- data/lib/scylla/lms/danish.lm +383 -383
- data/lib/scylla/lms/dutch.lm +398 -398
- data/lib/scylla/lms/english.lm +355 -355
- data/lib/scylla/lms/finnish.lm +381 -381
- data/lib/scylla/lms/french.lm +379 -379
- data/lib/scylla/lms/german.lm +382 -382
- data/lib/scylla/lms/greek.lm +400 -400
- data/lib/scylla/lms/hebrew.lm +400 -400
- data/lib/scylla/lms/hindi.lm +400 -400
- data/lib/scylla/lms/icelandic.lm +219 -219
- data/lib/scylla/lms/indonesian.lm +364 -364
- data/lib/scylla/lms/italian.lm +381 -381
- data/lib/scylla/lms/japanese.lm +400 -400
- data/lib/scylla/lms/kannada.lm +392 -392
- data/lib/scylla/lms/korean.lm +389 -389
- data/lib/scylla/lms/marathi.lm +364 -364
- data/lib/scylla/lms/norwegian.lm +325 -325
- data/lib/scylla/lms/persian.lm +397 -397
- data/lib/scylla/lms/polish.lm +380 -380
- data/lib/scylla/lms/portuguese.lm +375 -375
- data/lib/scylla/lms/romanian.lm +318 -318
- data/lib/scylla/lms/russian.lm +398 -398
- data/lib/scylla/lms/slovak.lm +358 -358
- data/lib/scylla/lms/slovenian.lm +256 -256
- data/lib/scylla/lms/spanish.lm +353 -353
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/tagalog.lm +245 -245
- data/lib/scylla/lms/thai.lm +400 -400
- data/lib/scylla/lms/turkish.lm +379 -379
- data/lib/scylla/lms/vietnamese.lm +373 -373
- data/lib/scylla/lms/welsh.lm +293 -293
- data/test/classifier_test.rb +5 -3
- data/test/fixtures/lms/arabic.lm +400 -0
- data/test/fixtures/lms/bulgarian.lm +400 -0
- data/test/fixtures/lms/catalan.lm +400 -0
- data/test/fixtures/lms/chinese.lm +400 -0
- data/test/fixtures/lms/czech.lm +400 -0
- data/test/fixtures/lms/danish.lm +399 -399
- data/test/fixtures/lms/dutch.lm +400 -0
- data/test/fixtures/lms/english.lm +400 -400
- data/test/fixtures/lms/finnish.lm +400 -0
- data/test/fixtures/lms/french.lm +397 -397
- data/test/fixtures/lms/german.lm +400 -400
- data/test/fixtures/lms/greek.lm +400 -0
- data/test/fixtures/lms/hebrew.lm +400 -0
- data/test/fixtures/lms/hindi.lm +400 -400
- data/test/fixtures/lms/icelandic.lm +400 -0
- data/test/fixtures/lms/indonesian.lm +400 -0
- data/test/fixtures/lms/italian.lm +400 -400
- data/test/fixtures/lms/japanese.lm +400 -400
- data/test/fixtures/lms/kannada.lm +400 -0
- data/test/fixtures/lms/korean.lm +400 -0
- data/test/fixtures/lms/marathi.lm +400 -0
- data/test/fixtures/lms/norwegian.lm +399 -399
- data/test/fixtures/lms/persian.lm +400 -0
- data/test/fixtures/lms/polish.lm +400 -0
- data/test/fixtures/lms/portuguese.lm +400 -0
- data/test/fixtures/lms/romanian.lm +400 -0
- data/test/fixtures/lms/russian.lm +400 -0
- data/test/fixtures/lms/slovak.lm +400 -0
- data/test/fixtures/lms/slovenian.lm +400 -0
- data/test/fixtures/lms/spanish.lm +400 -400
- data/test/fixtures/lms/swedish.lm +400 -0
- data/test/fixtures/lms/tagalog.lm +400 -0
- data/test/fixtures/lms/thai.lm +400 -0
- data/test/fixtures/lms/turkish.lm +400 -0
- data/test/fixtures/lms/vietnamese.lm +400 -0
- data/test/fixtures/lms/welsh.lm +400 -0
- data/test/fixtures/test_languages/japanese +149 -67
- data/test/generator_test.rb +1 -43
- data/test/language_test.rb +5 -1
- data/test/loader_test.rb +1 -1
- data/test/scylla_test.rb +4 -4
- metadata +105 -63
data/lib/scylla/lms/icelandic.lm
CHANGED
@@ -1,31 +1,25 @@
|
|
1
|
-
_
|
2
|
-
� 1142
|
1
|
+
_ 3274
|
3
2
|
r 775
|
4
3
|
a 756
|
5
|
-
n
|
6
|
-
i
|
7
|
-
s
|
4
|
+
n 718
|
5
|
+
i 642
|
6
|
+
s 545
|
8
7
|
e 517
|
9
8
|
l 417
|
10
9
|
t 401
|
11
10
|
u 389
|
12
|
-
_� 355
|
13
|
-
� 327
|
14
11
|
ð 327
|
15
12
|
g 310
|
16
|
-
m 304
|
17
13
|
r_ 304
|
14
|
+
m 304
|
18
15
|
f 255
|
19
16
|
k 233
|
20
17
|
d 217
|
21
18
|
á 191
|
22
19
|
o 191
|
23
|
-
� 191
|
24
20
|
_s 185
|
25
|
-
í
|
21
|
+
í 177
|
26
22
|
ð_ 176
|
27
|
-
�_ 176
|
28
|
-
� 176
|
29
23
|
v 175
|
30
24
|
ar 162
|
31
25
|
_e 158
|
@@ -35,49 +29,41 @@ in 147
|
|
35
29
|
an 145
|
36
30
|
a_ 145
|
37
31
|
h 137
|
38
|
-
st 129
|
39
32
|
m_ 129
|
33
|
+
st 129
|
34
|
+
þ 123
|
40
35
|
er 123
|
41
|
-
� 122
|
42
|
-
þ 122
|
43
36
|
_á 113
|
44
|
-
_þ
|
37
|
+
_þ 111
|
45
38
|
g_ 107
|
46
39
|
j 107
|
47
40
|
_í 107
|
48
41
|
_h 106
|
49
|
-
n_
|
42
|
+
n_ 105
|
50
43
|
ar_ 104
|
51
|
-
r� 101
|
52
44
|
la 99
|
53
45
|
um 99
|
54
46
|
_f 92
|
55
47
|
_v 90
|
56
|
-
ö 89
|
57
|
-
a� 89
|
58
48
|
u_ 89
|
59
|
-
|
60
|
-
� 89
|
61
|
-
ir 88
|
62
|
-
� 88
|
49
|
+
ö 89
|
63
50
|
ið 88
|
64
|
-
að 88
|
65
51
|
ó 88
|
66
|
-
|
52
|
+
ir 88
|
53
|
+
að 88
|
54
|
+
and 86
|
67
55
|
nn 86
|
68
56
|
um_ 86
|
69
|
-
and 86
|
70
57
|
_o 85
|
71
|
-
_a 84
|
72
58
|
t_ 84
|
59
|
+
_a 84
|
73
60
|
og 83
|
74
61
|
á_ 83
|
75
|
-
|
76
|
-
|
62
|
+
ri 82
|
63
|
+
_og 82
|
77
64
|
di 82
|
78
65
|
og_ 82
|
79
|
-
|
80
|
-
ri 82
|
66
|
+
en 82
|
81
67
|
ti 79
|
82
68
|
ei 78
|
83
69
|
sl 74
|
@@ -85,316 +71,330 @@ ur 74
|
|
85
71
|
_er 74
|
86
72
|
ir_ 72
|
87
73
|
na 71
|
74
|
+
ið_ 71
|
75
|
+
lan 70
|
88
76
|
ndi 70
|
89
77
|
ng 70
|
90
|
-
lan 70
|
91
78
|
b 65
|
92
79
|
le 65
|
93
|
-
|
94
|
-
� 64
|
80
|
+
að_ 65
|
95
81
|
æ 64
|
82
|
+
ta 64
|
96
83
|
ur_ 63
|
97
84
|
_t 60
|
85
|
+
s_ 60
|
86
|
+
_á_ 59
|
98
87
|
y 59
|
99
|
-
s_ 59
|
100
88
|
_n 58
|
101
89
|
ga 57
|
102
90
|
un 57
|
103
|
-
ve 56
|
104
91
|
ís 56
|
105
|
-
|
106
|
-
|
107
|
-
|
92
|
+
ísl 56
|
93
|
+
_ís 56
|
94
|
+
ve 56
|
108
95
|
_u 55
|
96
|
+
_l 55
|
109
97
|
_m 52
|
110
98
|
í_ 51
|
111
|
-
�_ 51
|
112
99
|
p 50
|
113
100
|
il 49
|
114
101
|
se 49
|
115
102
|
or 49
|
116
|
-
|
103
|
+
sk 48
|
104
|
+
ra 48
|
117
105
|
ki 48
|
118
106
|
jó 48
|
119
|
-
|
120
|
-
sk 48
|
107
|
+
af 48
|
121
108
|
ing 47
|
122
|
-
ni 46
|
123
109
|
er_ 46
|
110
|
+
ni 46
|
124
111
|
al 45
|
112
|
+
_í_ 45
|
125
113
|
ru 45
|
126
|
-
|
127
|
-
|
114
|
+
is 45
|
115
|
+
va 44
|
128
116
|
nu 44
|
129
117
|
_k 44
|
130
|
-
|
131
|
-
ár 43
|
132
|
-
�r 43
|
118
|
+
l_ 44
|
133
119
|
sla 43
|
134
|
-
|
135
|
-
|
136
|
-
s� 40
|
137
|
-
me 40
|
120
|
+
ár 43
|
121
|
+
_ár 41
|
138
122
|
ú 40
|
123
|
+
me 40
|
124
|
+
ns 40
|
139
125
|
ha 39
|
140
126
|
ða 38
|
141
127
|
rn 38
|
142
128
|
_r 38
|
143
|
-
|
144
|
-
�a 38
|
145
|
-
ði 37
|
146
|
-
_a� 37
|
129
|
+
rið 38
|
147
130
|
di_ 37
|
131
|
+
_að 37
|
132
|
+
ði 37
|
148
133
|
eg 37
|
149
|
-
�i 37
|
150
|
-
am 37
|
151
134
|
d_ 37
|
152
|
-
_va 37
|
153
135
|
_st 37
|
136
|
+
_va 37
|
137
|
+
am 37
|
154
138
|
gu 36
|
155
139
|
_se 36
|
140
|
+
ld 35
|
156
141
|
inn 35
|
157
|
-
lu 35
|
158
142
|
st_ 35
|
159
|
-
|
160
|
-
ld 35
|
143
|
+
lu 35
|
161
144
|
ef 34
|
162
145
|
þa 34
|
163
|
-
|
164
|
-
|
165
|
-
_s� 34
|
146
|
+
_þa 34
|
147
|
+
da 33
|
166
148
|
em 33
|
167
149
|
tt 33
|
168
|
-
da 33
|
169
|
-
ru_ 32
|
170
|
-
il_ 32
|
171
150
|
rs 32
|
151
|
+
il_ 32
|
152
|
+
ru_ 32
|
172
153
|
var 32
|
173
154
|
he 32
|
174
|
-
_g 31
|
175
155
|
_en 31
|
156
|
+
_g 31
|
176
157
|
_ha 31
|
177
|
-
t� 30
|
178
|
-
til 30
|
179
|
-
n� 30
|
180
|
-
sa 30
|
181
|
-
fl 30
|
182
|
-
rð 30
|
183
|
-
�u 30
|
184
158
|
ðu 30
|
185
|
-
|
159
|
+
til 30
|
186
160
|
ór 30
|
161
|
+
rð 30
|
162
|
+
fl 30
|
163
|
+
sa 30
|
187
164
|
et 29
|
165
|
+
_þe 29
|
166
|
+
_ti 29
|
188
167
|
sem 29
|
189
|
-
|
190
|
-
ein 29
|
168
|
+
þe 29
|
191
169
|
em_ 29
|
192
|
-
�e 29
|
193
|
-
_ti 29
|
194
170
|
tj 29
|
195
|
-
|
196
|
-
ku 28
|
197
|
-
�ri 28
|
171
|
+
ein 29
|
198
172
|
ver 28
|
173
|
+
ári 28
|
199
174
|
ka 28
|
200
|
-
|
201
|
-
tu 27
|
202
|
-
h� 27
|
175
|
+
ku 28
|
203
176
|
gi 27
|
204
|
-
tj� 27
|
205
177
|
eru 27
|
206
|
-
gar 27
|
207
178
|
_b 27
|
208
179
|
re 27
|
180
|
+
jór 27
|
181
|
+
ma 27
|
182
|
+
gar 27
|
183
|
+
tu 27
|
209
184
|
stj 26
|
185
|
+
tjó 26
|
210
186
|
na_ 26
|
211
|
-
|
187
|
+
órn 26
|
212
188
|
_um 26
|
213
|
-
|
214
|
-
ns_ 25
|
215
|
-
_n� 25
|
189
|
+
_me 26
|
216
190
|
vi 25
|
217
|
-
�l 25
|
218
191
|
öl 25
|
219
|
-
jö 25
|
220
|
-
e� 25
|
221
|
-
eð 25
|
222
192
|
_he 25
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
rí 24
|
227
|
-
f� 24
|
228
|
-
kk 24
|
229
|
-
� 24
|
193
|
+
eð 25
|
194
|
+
jö 25
|
195
|
+
ns_ 25
|
230
196
|
und 24
|
231
|
-
len 24
|
232
197
|
en_ 24
|
198
|
+
_la 24
|
199
|
+
ta_ 24
|
200
|
+
kk 24
|
233
201
|
ko 24
|
202
|
+
nn_ 24
|
203
|
+
end 24
|
234
204
|
nd_ 24
|
235
|
-
ti_ 24
|
236
|
-
m� 24
|
237
|
-
_la 24
|
238
205
|
ý 24
|
239
|
-
|
240
|
-
|
206
|
+
ti_ 24
|
207
|
+
rí 24
|
208
|
+
len 24
|
241
209
|
ík 23
|
242
|
-
f_ 23
|
243
|
-
�k 23
|
244
210
|
ds 23
|
211
|
+
f_ 23
|
212
|
+
fa 23
|
213
|
+
_ve 23
|
245
214
|
nar 23
|
246
|
-
nds 23
|
247
215
|
fn 23
|
248
|
-
|
216
|
+
nds 23
|
217
|
+
ann 22
|
249
218
|
_sa 22
|
219
|
+
ag 22
|
220
|
+
ll 22
|
221
|
+
sv 22
|
222
|
+
ins 22
|
250
223
|
nga 22
|
251
224
|
sam 22
|
252
|
-
sv 22
|
253
225
|
rá 22
|
254
|
-
_h� 22
|
255
|
-
ins 22
|
256
|
-
ll 22
|
257
|
-
ann 22
|
258
|
-
fr 21
|
259
|
-
�i_ 21
|
260
226
|
si 21
|
261
227
|
_d 21
|
228
|
+
rík 21
|
229
|
+
fr 21
|
230
|
+
ði_ 21
|
231
|
+
é 21
|
262
232
|
li 21
|
263
233
|
fu 21
|
264
|
-
é 21
|
265
|
-
� 21
|
266
|
-
no 20
|
267
|
-
me� 20
|
268
|
-
ega 20
|
269
|
-
ss 20
|
270
234
|
_no 20
|
271
|
-
�ki 20
|
272
|
-
mi 20
|
273
235
|
_af 20
|
274
|
-
|
236
|
+
með 20
|
237
|
+
íki 20
|
238
|
+
ss 20
|
239
|
+
ega 20
|
240
|
+
mi 20
|
241
|
+
no 20
|
275
242
|
kis 19
|
276
|
-
rl 19
|
277
|
-
rr 19
|
278
243
|
fi 19
|
279
|
-
|
280
|
-
�� 19
|
244
|
+
rr 19
|
281
245
|
dir 19
|
282
|
-
|
283
|
-
|
246
|
+
_rí 19
|
247
|
+
num 19
|
248
|
+
rl 19
|
249
|
+
ey 18
|
284
250
|
haf 18
|
285
|
-
|
251
|
+
sta 18
|
252
|
+
óð 18
|
253
|
+
_sk 18
|
286
254
|
nna 18
|
287
|
-
|
288
|
-
�a� 18
|
289
|
-
ó� 18
|
255
|
+
það 18
|
290
256
|
_un 18
|
291
257
|
yr 18
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
258
|
+
_ef 17
|
259
|
+
es 17
|
260
|
+
af_ 17
|
261
|
+
við 17
|
262
|
+
din 17
|
263
|
+
_i 17
|
298
264
|
fo 17
|
299
|
-
|
265
|
+
jóð 17
|
266
|
+
ek 17
|
267
|
+
þj 17
|
268
|
+
fy 17
|
300
269
|
k_ 17
|
270
|
+
þjó 17
|
271
|
+
_vi 17
|
301
272
|
ut 17
|
302
273
|
_fo 17
|
303
|
-
þj 17
|
304
|
-
ek 17
|
305
274
|
for 17
|
306
|
-
|
307
|
-
�j� 17
|
308
|
-
af_ 17
|
309
|
-
_l� 17
|
310
|
-
es 17
|
311
|
-
vi� 17
|
312
|
-
_ef 17
|
313
|
-
ör 16
|
275
|
+
enn 16
|
314
276
|
_fy 16
|
277
|
+
sin 16
|
278
|
+
it 16
|
279
|
+
ör 16
|
315
280
|
du 16
|
316
|
-
_i 16
|
317
|
-
fj� 16
|
318
|
-
�r 16
|
319
|
-
ra_ 16
|
320
281
|
fj 16
|
321
282
|
nin 16
|
322
|
-
|
323
|
-
el 16
|
324
|
-
sin 16
|
325
|
-
kv 16
|
283
|
+
jöl 16
|
326
284
|
ge 16
|
327
|
-
ga_ 16
|
328
|
-
it 16
|
329
|
-
ön 16
|
330
285
|
_fr 16
|
331
286
|
au 16
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
�ð 15
|
287
|
+
ön 16
|
288
|
+
el 16
|
289
|
+
ga_ 16
|
290
|
+
ál 16
|
291
|
+
ra_ 16
|
292
|
+
kv 16
|
293
|
+
íð 15
|
340
294
|
ok 15
|
341
|
-
|
342
|
-
|
295
|
+
_al 15
|
296
|
+
_ei 15
|
343
297
|
fyr 15
|
344
|
-
_ge 15
|
345
298
|
nni 15
|
346
|
-
|
347
|
-
all 15
|
348
|
-
set 15
|
299
|
+
leg 15
|
349
300
|
_sv 15
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
301
|
+
_ge 15
|
302
|
+
fjö 15
|
303
|
+
sí 15
|
304
|
+
set 15
|
305
|
+
ja 15
|
306
|
+
tir 15
|
307
|
+
ngu 15
|
308
|
+
all 15
|
309
|
+
eð_ 15
|
357
310
|
lut 14
|
311
|
+
nor 14
|
358
312
|
ni_ 14
|
359
|
-
|
360
|
-
|
313
|
+
ðar 14
|
314
|
+
áð 14
|
315
|
+
_sí 14
|
316
|
+
gur 14
|
317
|
+
tt_ 14
|
361
318
|
nda 14
|
362
|
-
�ð 14
|
363
319
|
vo 14
|
320
|
+
in_ 14
|
321
|
+
efn 14
|
364
322
|
ki_ 14
|
365
|
-
|
366
|
-
|
323
|
+
_ko 14
|
324
|
+
ms 14
|
325
|
+
fur 14
|
326
|
+
rir 13
|
327
|
+
_þj 13
|
367
328
|
þi 13
|
368
|
-
ski 13
|
369
|
-
sle 13
|
370
|
-
st� 13
|
371
|
-
væ 13
|
372
329
|
kr 13
|
373
|
-
hl 13
|
374
330
|
gs 13
|
375
|
-
|
376
|
-
ær 13
|
377
|
-
er� 13
|
378
|
-
ls 13
|
379
|
-
as 13
|
380
|
-
�r 13
|
331
|
+
þin 13
|
381
332
|
fa_ 13
|
333
|
+
fle 13
|
382
334
|
hi 13
|
383
|
-
|
384
|
-
|
335
|
+
ls 13
|
336
|
+
as 13
|
385
337
|
dan 13
|
386
|
-
|
338
|
+
sle 13
|
339
|
+
ski 13
|
340
|
+
hl 13
|
341
|
+
ær 13
|
342
|
+
bo 13
|
343
|
+
væ 13
|
387
344
|
ví 13
|
388
|
-
fle 13
|
389
345
|
hö 13
|
390
|
-
|
346
|
+
æm 12
|
347
|
+
æð 12
|
348
|
+
rst 12
|
349
|
+
ip 12
|
350
|
+
öf 12
|
351
|
+
erð 12
|
352
|
+
rt 12
|
391
353
|
ors 12
|
392
|
-
�ð 12
|
393
|
-
æ� 12
|
394
|
-
�m 12
|
395
354
|
ist 12
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
355
|
+
ig 12
|
356
|
+
ald 12
|
357
|
+
us 12
|
358
|
+
ld_ 12
|
359
|
+
kum 12
|
360
|
+
fna 12
|
400
361
|
rei 12
|
362
|
+
já 12
|
363
|
+
ðs 12
|
364
|
+
okk 11
|
365
|
+
vei 11
|
366
|
+
ds_ 11
|
367
|
+
ýs 11
|
368
|
+
nu_ 11
|
369
|
+
ndu 11
|
370
|
+
kip 11
|
371
|
+
ft 11
|
372
|
+
ekk 11
|
373
|
+
ne 11
|
374
|
+
má 11
|
375
|
+
mál 11
|
376
|
+
öld 11
|
377
|
+
_fl 11
|
378
|
+
an_ 11
|
379
|
+
_hl 11
|
380
|
+
un_ 11
|
381
|
+
im 11
|
382
|
+
han 11
|
383
|
+
da_ 11
|
384
|
+
kvæ 11
|
385
|
+
man 11
|
386
|
+
stu 11
|
387
|
+
_fj 11
|
388
|
+
hef 11
|
389
|
+
þar 11
|
390
|
+
_da 11
|
391
|
+
slu 11
|
392
|
+
eim 10
|
393
|
+
ög 10
|
394
|
+
afa 10
|
395
|
+
_mi 10
|
396
|
+
gi_ 10
|
397
|
+
sti 10
|
398
|
+
ðh 10
|
399
|
+
ða_ 10
|
400
|
+
yri 10
|