scylla 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/lms/arabic.lm +399 -399
- data/lib/scylla/lms/bulgarian.lm +400 -400
- data/lib/scylla/lms/catalan.lm +323 -323
- data/lib/scylla/lms/chinese.lm +389 -389
- data/lib/scylla/lms/czech.lm +377 -377
- data/lib/scylla/lms/danish.lm +383 -383
- data/lib/scylla/lms/dutch.lm +398 -398
- data/lib/scylla/lms/english.lm +355 -355
- data/lib/scylla/lms/finnish.lm +381 -381
- data/lib/scylla/lms/french.lm +379 -379
- data/lib/scylla/lms/german.lm +382 -382
- data/lib/scylla/lms/greek.lm +400 -400
- data/lib/scylla/lms/hebrew.lm +400 -400
- data/lib/scylla/lms/hindi.lm +400 -400
- data/lib/scylla/lms/icelandic.lm +219 -219
- data/lib/scylla/lms/indonesian.lm +364 -364
- data/lib/scylla/lms/italian.lm +381 -381
- data/lib/scylla/lms/japanese.lm +400 -400
- data/lib/scylla/lms/kannada.lm +392 -392
- data/lib/scylla/lms/korean.lm +389 -389
- data/lib/scylla/lms/marathi.lm +364 -364
- data/lib/scylla/lms/norwegian.lm +325 -325
- data/lib/scylla/lms/persian.lm +397 -397
- data/lib/scylla/lms/polish.lm +380 -380
- data/lib/scylla/lms/portuguese.lm +375 -375
- data/lib/scylla/lms/romanian.lm +318 -318
- data/lib/scylla/lms/russian.lm +398 -398
- data/lib/scylla/lms/slovak.lm +358 -358
- data/lib/scylla/lms/slovenian.lm +256 -256
- data/lib/scylla/lms/spanish.lm +353 -353
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/tagalog.lm +245 -245
- data/lib/scylla/lms/thai.lm +400 -400
- data/lib/scylla/lms/turkish.lm +379 -379
- data/lib/scylla/lms/vietnamese.lm +373 -373
- data/lib/scylla/lms/welsh.lm +293 -293
- data/test/classifier_test.rb +5 -3
- data/test/fixtures/lms/arabic.lm +400 -0
- data/test/fixtures/lms/bulgarian.lm +400 -0
- data/test/fixtures/lms/catalan.lm +400 -0
- data/test/fixtures/lms/chinese.lm +400 -0
- data/test/fixtures/lms/czech.lm +400 -0
- data/test/fixtures/lms/danish.lm +399 -399
- data/test/fixtures/lms/dutch.lm +400 -0
- data/test/fixtures/lms/english.lm +400 -400
- data/test/fixtures/lms/finnish.lm +400 -0
- data/test/fixtures/lms/french.lm +397 -397
- data/test/fixtures/lms/german.lm +400 -400
- data/test/fixtures/lms/greek.lm +400 -0
- data/test/fixtures/lms/hebrew.lm +400 -0
- data/test/fixtures/lms/hindi.lm +400 -400
- data/test/fixtures/lms/icelandic.lm +400 -0
- data/test/fixtures/lms/indonesian.lm +400 -0
- data/test/fixtures/lms/italian.lm +400 -400
- data/test/fixtures/lms/japanese.lm +400 -400
- data/test/fixtures/lms/kannada.lm +400 -0
- data/test/fixtures/lms/korean.lm +400 -0
- data/test/fixtures/lms/marathi.lm +400 -0
- data/test/fixtures/lms/norwegian.lm +399 -399
- data/test/fixtures/lms/persian.lm +400 -0
- data/test/fixtures/lms/polish.lm +400 -0
- data/test/fixtures/lms/portuguese.lm +400 -0
- data/test/fixtures/lms/romanian.lm +400 -0
- data/test/fixtures/lms/russian.lm +400 -0
- data/test/fixtures/lms/slovak.lm +400 -0
- data/test/fixtures/lms/slovenian.lm +400 -0
- data/test/fixtures/lms/spanish.lm +400 -400
- data/test/fixtures/lms/swedish.lm +400 -0
- data/test/fixtures/lms/tagalog.lm +400 -0
- data/test/fixtures/lms/thai.lm +400 -0
- data/test/fixtures/lms/turkish.lm +400 -0
- data/test/fixtures/lms/vietnamese.lm +400 -0
- data/test/fixtures/lms/welsh.lm +400 -0
- data/test/fixtures/test_languages/japanese +149 -67
- data/test/generator_test.rb +1 -43
- data/test/language_test.rb +5 -1
- data/test/loader_test.rb +1 -1
- data/test/scylla_test.rb +4 -4
- metadata +105 -63
data/lib/scylla/lms/icelandic.lm
CHANGED
@@ -1,31 +1,25 @@
|
|
1
|
-
_
|
2
|
-
� 1142
|
1
|
+
_ 3274
|
3
2
|
r 775
|
4
3
|
a 756
|
5
|
-
n
|
6
|
-
i
|
7
|
-
s
|
4
|
+
n 718
|
5
|
+
i 642
|
6
|
+
s 545
|
8
7
|
e 517
|
9
8
|
l 417
|
10
9
|
t 401
|
11
10
|
u 389
|
12
|
-
_� 355
|
13
|
-
� 327
|
14
11
|
ð 327
|
15
12
|
g 310
|
16
|
-
m 304
|
17
13
|
r_ 304
|
14
|
+
m 304
|
18
15
|
f 255
|
19
16
|
k 233
|
20
17
|
d 217
|
21
18
|
á 191
|
22
19
|
o 191
|
23
|
-
� 191
|
24
20
|
_s 185
|
25
|
-
í
|
21
|
+
í 177
|
26
22
|
ð_ 176
|
27
|
-
�_ 176
|
28
|
-
� 176
|
29
23
|
v 175
|
30
24
|
ar 162
|
31
25
|
_e 158
|
@@ -35,49 +29,41 @@ in 147
|
|
35
29
|
an 145
|
36
30
|
a_ 145
|
37
31
|
h 137
|
38
|
-
st 129
|
39
32
|
m_ 129
|
33
|
+
st 129
|
34
|
+
þ 123
|
40
35
|
er 123
|
41
|
-
� 122
|
42
|
-
þ 122
|
43
36
|
_á 113
|
44
|
-
_þ
|
37
|
+
_þ 111
|
45
38
|
g_ 107
|
46
39
|
j 107
|
47
40
|
_í 107
|
48
41
|
_h 106
|
49
|
-
n_
|
42
|
+
n_ 105
|
50
43
|
ar_ 104
|
51
|
-
r� 101
|
52
44
|
la 99
|
53
45
|
um 99
|
54
46
|
_f 92
|
55
47
|
_v 90
|
56
|
-
ö 89
|
57
|
-
a� 89
|
58
48
|
u_ 89
|
59
|
-
|
60
|
-
� 89
|
61
|
-
ir 88
|
62
|
-
� 88
|
49
|
+
ö 89
|
63
50
|
ið 88
|
64
|
-
að 88
|
65
51
|
ó 88
|
66
|
-
|
52
|
+
ir 88
|
53
|
+
að 88
|
54
|
+
and 86
|
67
55
|
nn 86
|
68
56
|
um_ 86
|
69
|
-
and 86
|
70
57
|
_o 85
|
71
|
-
_a 84
|
72
58
|
t_ 84
|
59
|
+
_a 84
|
73
60
|
og 83
|
74
61
|
á_ 83
|
75
|
-
|
76
|
-
|
62
|
+
ri 82
|
63
|
+
_og 82
|
77
64
|
di 82
|
78
65
|
og_ 82
|
79
|
-
|
80
|
-
ri 82
|
66
|
+
en 82
|
81
67
|
ti 79
|
82
68
|
ei 78
|
83
69
|
sl 74
|
@@ -85,316 +71,330 @@ ur 74
|
|
85
71
|
_er 74
|
86
72
|
ir_ 72
|
87
73
|
na 71
|
74
|
+
ið_ 71
|
75
|
+
lan 70
|
88
76
|
ndi 70
|
89
77
|
ng 70
|
90
|
-
lan 70
|
91
78
|
b 65
|
92
79
|
le 65
|
93
|
-
|
94
|
-
� 64
|
80
|
+
að_ 65
|
95
81
|
æ 64
|
82
|
+
ta 64
|
96
83
|
ur_ 63
|
97
84
|
_t 60
|
85
|
+
s_ 60
|
86
|
+
_á_ 59
|
98
87
|
y 59
|
99
|
-
s_ 59
|
100
88
|
_n 58
|
101
89
|
ga 57
|
102
90
|
un 57
|
103
|
-
ve 56
|
104
91
|
ís 56
|
105
|
-
|
106
|
-
|
107
|
-
|
92
|
+
ísl 56
|
93
|
+
_ís 56
|
94
|
+
ve 56
|
108
95
|
_u 55
|
96
|
+
_l 55
|
109
97
|
_m 52
|
110
98
|
í_ 51
|
111
|
-
�_ 51
|
112
99
|
p 50
|
113
100
|
il 49
|
114
101
|
se 49
|
115
102
|
or 49
|
116
|
-
|
103
|
+
sk 48
|
104
|
+
ra 48
|
117
105
|
ki 48
|
118
106
|
jó 48
|
119
|
-
|
120
|
-
sk 48
|
107
|
+
af 48
|
121
108
|
ing 47
|
122
|
-
ni 46
|
123
109
|
er_ 46
|
110
|
+
ni 46
|
124
111
|
al 45
|
112
|
+
_í_ 45
|
125
113
|
ru 45
|
126
|
-
|
127
|
-
|
114
|
+
is 45
|
115
|
+
va 44
|
128
116
|
nu 44
|
129
117
|
_k 44
|
130
|
-
|
131
|
-
ár 43
|
132
|
-
�r 43
|
118
|
+
l_ 44
|
133
119
|
sla 43
|
134
|
-
|
135
|
-
|
136
|
-
s� 40
|
137
|
-
me 40
|
120
|
+
ár 43
|
121
|
+
_ár 41
|
138
122
|
ú 40
|
123
|
+
me 40
|
124
|
+
ns 40
|
139
125
|
ha 39
|
140
126
|
ða 38
|
141
127
|
rn 38
|
142
128
|
_r 38
|
143
|
-
|
144
|
-
�a 38
|
145
|
-
ði 37
|
146
|
-
_a� 37
|
129
|
+
rið 38
|
147
130
|
di_ 37
|
131
|
+
_að 37
|
132
|
+
ði 37
|
148
133
|
eg 37
|
149
|
-
�i 37
|
150
|
-
am 37
|
151
134
|
d_ 37
|
152
|
-
_va 37
|
153
135
|
_st 37
|
136
|
+
_va 37
|
137
|
+
am 37
|
154
138
|
gu 36
|
155
139
|
_se 36
|
140
|
+
ld 35
|
156
141
|
inn 35
|
157
|
-
lu 35
|
158
142
|
st_ 35
|
159
|
-
|
160
|
-
ld 35
|
143
|
+
lu 35
|
161
144
|
ef 34
|
162
145
|
þa 34
|
163
|
-
|
164
|
-
|
165
|
-
_s� 34
|
146
|
+
_þa 34
|
147
|
+
da 33
|
166
148
|
em 33
|
167
149
|
tt 33
|
168
|
-
da 33
|
169
|
-
ru_ 32
|
170
|
-
il_ 32
|
171
150
|
rs 32
|
151
|
+
il_ 32
|
152
|
+
ru_ 32
|
172
153
|
var 32
|
173
154
|
he 32
|
174
|
-
_g 31
|
175
155
|
_en 31
|
156
|
+
_g 31
|
176
157
|
_ha 31
|
177
|
-
t� 30
|
178
|
-
til 30
|
179
|
-
n� 30
|
180
|
-
sa 30
|
181
|
-
fl 30
|
182
|
-
rð 30
|
183
|
-
�u 30
|
184
158
|
ðu 30
|
185
|
-
|
159
|
+
til 30
|
186
160
|
ór 30
|
161
|
+
rð 30
|
162
|
+
fl 30
|
163
|
+
sa 30
|
187
164
|
et 29
|
165
|
+
_þe 29
|
166
|
+
_ti 29
|
188
167
|
sem 29
|
189
|
-
|
190
|
-
ein 29
|
168
|
+
þe 29
|
191
169
|
em_ 29
|
192
|
-
�e 29
|
193
|
-
_ti 29
|
194
170
|
tj 29
|
195
|
-
|
196
|
-
ku 28
|
197
|
-
�ri 28
|
171
|
+
ein 29
|
198
172
|
ver 28
|
173
|
+
ári 28
|
199
174
|
ka 28
|
200
|
-
|
201
|
-
tu 27
|
202
|
-
h� 27
|
175
|
+
ku 28
|
203
176
|
gi 27
|
204
|
-
tj� 27
|
205
177
|
eru 27
|
206
|
-
gar 27
|
207
178
|
_b 27
|
208
179
|
re 27
|
180
|
+
jór 27
|
181
|
+
ma 27
|
182
|
+
gar 27
|
183
|
+
tu 27
|
209
184
|
stj 26
|
185
|
+
tjó 26
|
210
186
|
na_ 26
|
211
|
-
|
187
|
+
órn 26
|
212
188
|
_um 26
|
213
|
-
|
214
|
-
ns_ 25
|
215
|
-
_n� 25
|
189
|
+
_me 26
|
216
190
|
vi 25
|
217
|
-
�l 25
|
218
191
|
öl 25
|
219
|
-
jö 25
|
220
|
-
e� 25
|
221
|
-
eð 25
|
222
192
|
_he 25
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
rí 24
|
227
|
-
f� 24
|
228
|
-
kk 24
|
229
|
-
� 24
|
193
|
+
eð 25
|
194
|
+
jö 25
|
195
|
+
ns_ 25
|
230
196
|
und 24
|
231
|
-
len 24
|
232
197
|
en_ 24
|
198
|
+
_la 24
|
199
|
+
ta_ 24
|
200
|
+
kk 24
|
233
201
|
ko 24
|
202
|
+
nn_ 24
|
203
|
+
end 24
|
234
204
|
nd_ 24
|
235
|
-
ti_ 24
|
236
|
-
m� 24
|
237
|
-
_la 24
|
238
205
|
ý 24
|
239
|
-
|
240
|
-
|
206
|
+
ti_ 24
|
207
|
+
rí 24
|
208
|
+
len 24
|
241
209
|
ík 23
|
242
|
-
f_ 23
|
243
|
-
�k 23
|
244
210
|
ds 23
|
211
|
+
f_ 23
|
212
|
+
fa 23
|
213
|
+
_ve 23
|
245
214
|
nar 23
|
246
|
-
nds 23
|
247
215
|
fn 23
|
248
|
-
|
216
|
+
nds 23
|
217
|
+
ann 22
|
249
218
|
_sa 22
|
219
|
+
ag 22
|
220
|
+
ll 22
|
221
|
+
sv 22
|
222
|
+
ins 22
|
250
223
|
nga 22
|
251
224
|
sam 22
|
252
|
-
sv 22
|
253
225
|
rá 22
|
254
|
-
_h� 22
|
255
|
-
ins 22
|
256
|
-
ll 22
|
257
|
-
ann 22
|
258
|
-
fr 21
|
259
|
-
�i_ 21
|
260
226
|
si 21
|
261
227
|
_d 21
|
228
|
+
rík 21
|
229
|
+
fr 21
|
230
|
+
ði_ 21
|
231
|
+
é 21
|
262
232
|
li 21
|
263
233
|
fu 21
|
264
|
-
é 21
|
265
|
-
� 21
|
266
|
-
no 20
|
267
|
-
me� 20
|
268
|
-
ega 20
|
269
|
-
ss 20
|
270
234
|
_no 20
|
271
|
-
�ki 20
|
272
|
-
mi 20
|
273
235
|
_af 20
|
274
|
-
|
236
|
+
með 20
|
237
|
+
íki 20
|
238
|
+
ss 20
|
239
|
+
ega 20
|
240
|
+
mi 20
|
241
|
+
no 20
|
275
242
|
kis 19
|
276
|
-
rl 19
|
277
|
-
rr 19
|
278
243
|
fi 19
|
279
|
-
|
280
|
-
�� 19
|
244
|
+
rr 19
|
281
245
|
dir 19
|
282
|
-
|
283
|
-
|
246
|
+
_rí 19
|
247
|
+
num 19
|
248
|
+
rl 19
|
249
|
+
ey 18
|
284
250
|
haf 18
|
285
|
-
|
251
|
+
sta 18
|
252
|
+
óð 18
|
253
|
+
_sk 18
|
286
254
|
nna 18
|
287
|
-
|
288
|
-
�a� 18
|
289
|
-
ó� 18
|
255
|
+
það 18
|
290
256
|
_un 18
|
291
257
|
yr 18
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
258
|
+
_ef 17
|
259
|
+
es 17
|
260
|
+
af_ 17
|
261
|
+
við 17
|
262
|
+
din 17
|
263
|
+
_i 17
|
298
264
|
fo 17
|
299
|
-
|
265
|
+
jóð 17
|
266
|
+
ek 17
|
267
|
+
þj 17
|
268
|
+
fy 17
|
300
269
|
k_ 17
|
270
|
+
þjó 17
|
271
|
+
_vi 17
|
301
272
|
ut 17
|
302
273
|
_fo 17
|
303
|
-
þj 17
|
304
|
-
ek 17
|
305
274
|
for 17
|
306
|
-
|
307
|
-
�j� 17
|
308
|
-
af_ 17
|
309
|
-
_l� 17
|
310
|
-
es 17
|
311
|
-
vi� 17
|
312
|
-
_ef 17
|
313
|
-
ör 16
|
275
|
+
enn 16
|
314
276
|
_fy 16
|
277
|
+
sin 16
|
278
|
+
it 16
|
279
|
+
ör 16
|
315
280
|
du 16
|
316
|
-
_i 16
|
317
|
-
fj� 16
|
318
|
-
�r 16
|
319
|
-
ra_ 16
|
320
281
|
fj 16
|
321
282
|
nin 16
|
322
|
-
|
323
|
-
el 16
|
324
|
-
sin 16
|
325
|
-
kv 16
|
283
|
+
jöl 16
|
326
284
|
ge 16
|
327
|
-
ga_ 16
|
328
|
-
it 16
|
329
|
-
ön 16
|
330
285
|
_fr 16
|
331
286
|
au 16
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
�ð 15
|
287
|
+
ön 16
|
288
|
+
el 16
|
289
|
+
ga_ 16
|
290
|
+
ál 16
|
291
|
+
ra_ 16
|
292
|
+
kv 16
|
293
|
+
íð 15
|
340
294
|
ok 15
|
341
|
-
|
342
|
-
|
295
|
+
_al 15
|
296
|
+
_ei 15
|
343
297
|
fyr 15
|
344
|
-
_ge 15
|
345
298
|
nni 15
|
346
|
-
|
347
|
-
all 15
|
348
|
-
set 15
|
299
|
+
leg 15
|
349
300
|
_sv 15
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
301
|
+
_ge 15
|
302
|
+
fjö 15
|
303
|
+
sí 15
|
304
|
+
set 15
|
305
|
+
ja 15
|
306
|
+
tir 15
|
307
|
+
ngu 15
|
308
|
+
all 15
|
309
|
+
eð_ 15
|
357
310
|
lut 14
|
311
|
+
nor 14
|
358
312
|
ni_ 14
|
359
|
-
|
360
|
-
|
313
|
+
ðar 14
|
314
|
+
áð 14
|
315
|
+
_sí 14
|
316
|
+
gur 14
|
317
|
+
tt_ 14
|
361
318
|
nda 14
|
362
|
-
�ð 14
|
363
319
|
vo 14
|
320
|
+
in_ 14
|
321
|
+
efn 14
|
364
322
|
ki_ 14
|
365
|
-
|
366
|
-
|
323
|
+
_ko 14
|
324
|
+
ms 14
|
325
|
+
fur 14
|
326
|
+
rir 13
|
327
|
+
_þj 13
|
367
328
|
þi 13
|
368
|
-
ski 13
|
369
|
-
sle 13
|
370
|
-
st� 13
|
371
|
-
væ 13
|
372
329
|
kr 13
|
373
|
-
hl 13
|
374
330
|
gs 13
|
375
|
-
|
376
|
-
ær 13
|
377
|
-
er� 13
|
378
|
-
ls 13
|
379
|
-
as 13
|
380
|
-
�r 13
|
331
|
+
þin 13
|
381
332
|
fa_ 13
|
333
|
+
fle 13
|
382
334
|
hi 13
|
383
|
-
|
384
|
-
|
335
|
+
ls 13
|
336
|
+
as 13
|
385
337
|
dan 13
|
386
|
-
|
338
|
+
sle 13
|
339
|
+
ski 13
|
340
|
+
hl 13
|
341
|
+
ær 13
|
342
|
+
bo 13
|
343
|
+
væ 13
|
387
344
|
ví 13
|
388
|
-
fle 13
|
389
345
|
hö 13
|
390
|
-
|
346
|
+
æm 12
|
347
|
+
æð 12
|
348
|
+
rst 12
|
349
|
+
ip 12
|
350
|
+
öf 12
|
351
|
+
erð 12
|
352
|
+
rt 12
|
391
353
|
ors 12
|
392
|
-
�ð 12
|
393
|
-
æ� 12
|
394
|
-
�m 12
|
395
354
|
ist 12
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
355
|
+
ig 12
|
356
|
+
ald 12
|
357
|
+
us 12
|
358
|
+
ld_ 12
|
359
|
+
kum 12
|
360
|
+
fna 12
|
400
361
|
rei 12
|
362
|
+
já 12
|
363
|
+
ðs 12
|
364
|
+
okk 11
|
365
|
+
vei 11
|
366
|
+
ds_ 11
|
367
|
+
ýs 11
|
368
|
+
nu_ 11
|
369
|
+
ndu 11
|
370
|
+
kip 11
|
371
|
+
ft 11
|
372
|
+
ekk 11
|
373
|
+
ne 11
|
374
|
+
má 11
|
375
|
+
mál 11
|
376
|
+
öld 11
|
377
|
+
_fl 11
|
378
|
+
an_ 11
|
379
|
+
_hl 11
|
380
|
+
un_ 11
|
381
|
+
im 11
|
382
|
+
han 11
|
383
|
+
da_ 11
|
384
|
+
kvæ 11
|
385
|
+
man 11
|
386
|
+
stu 11
|
387
|
+
_fj 11
|
388
|
+
hef 11
|
389
|
+
þar 11
|
390
|
+
_da 11
|
391
|
+
slu 11
|
392
|
+
eim 10
|
393
|
+
ög 10
|
394
|
+
afa 10
|
395
|
+
_mi 10
|
396
|
+
gi_ 10
|
397
|
+
sti 10
|
398
|
+
ðh 10
|
399
|
+
ða_ 10
|
400
|
+
yri 10
|