scylla 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/lms/arabic.lm +399 -399
- data/lib/scylla/lms/bulgarian.lm +400 -400
- data/lib/scylla/lms/catalan.lm +323 -323
- data/lib/scylla/lms/chinese.lm +389 -389
- data/lib/scylla/lms/czech.lm +377 -377
- data/lib/scylla/lms/danish.lm +383 -383
- data/lib/scylla/lms/dutch.lm +398 -398
- data/lib/scylla/lms/english.lm +355 -355
- data/lib/scylla/lms/finnish.lm +381 -381
- data/lib/scylla/lms/french.lm +379 -379
- data/lib/scylla/lms/german.lm +382 -382
- data/lib/scylla/lms/greek.lm +400 -400
- data/lib/scylla/lms/hebrew.lm +400 -400
- data/lib/scylla/lms/hindi.lm +400 -400
- data/lib/scylla/lms/icelandic.lm +219 -219
- data/lib/scylla/lms/indonesian.lm +364 -364
- data/lib/scylla/lms/italian.lm +381 -381
- data/lib/scylla/lms/japanese.lm +400 -400
- data/lib/scylla/lms/kannada.lm +392 -392
- data/lib/scylla/lms/korean.lm +389 -389
- data/lib/scylla/lms/marathi.lm +364 -364
- data/lib/scylla/lms/norwegian.lm +325 -325
- data/lib/scylla/lms/persian.lm +397 -397
- data/lib/scylla/lms/polish.lm +380 -380
- data/lib/scylla/lms/portuguese.lm +375 -375
- data/lib/scylla/lms/romanian.lm +318 -318
- data/lib/scylla/lms/russian.lm +398 -398
- data/lib/scylla/lms/slovak.lm +358 -358
- data/lib/scylla/lms/slovenian.lm +256 -256
- data/lib/scylla/lms/spanish.lm +353 -353
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/tagalog.lm +245 -245
- data/lib/scylla/lms/thai.lm +400 -400
- data/lib/scylla/lms/turkish.lm +379 -379
- data/lib/scylla/lms/vietnamese.lm +373 -373
- data/lib/scylla/lms/welsh.lm +293 -293
- data/test/classifier_test.rb +5 -3
- data/test/fixtures/lms/arabic.lm +400 -0
- data/test/fixtures/lms/bulgarian.lm +400 -0
- data/test/fixtures/lms/catalan.lm +400 -0
- data/test/fixtures/lms/chinese.lm +400 -0
- data/test/fixtures/lms/czech.lm +400 -0
- data/test/fixtures/lms/danish.lm +399 -399
- data/test/fixtures/lms/dutch.lm +400 -0
- data/test/fixtures/lms/english.lm +400 -400
- data/test/fixtures/lms/finnish.lm +400 -0
- data/test/fixtures/lms/french.lm +397 -397
- data/test/fixtures/lms/german.lm +400 -400
- data/test/fixtures/lms/greek.lm +400 -0
- data/test/fixtures/lms/hebrew.lm +400 -0
- data/test/fixtures/lms/hindi.lm +400 -400
- data/test/fixtures/lms/icelandic.lm +400 -0
- data/test/fixtures/lms/indonesian.lm +400 -0
- data/test/fixtures/lms/italian.lm +400 -400
- data/test/fixtures/lms/japanese.lm +400 -400
- data/test/fixtures/lms/kannada.lm +400 -0
- data/test/fixtures/lms/korean.lm +400 -0
- data/test/fixtures/lms/marathi.lm +400 -0
- data/test/fixtures/lms/norwegian.lm +399 -399
- data/test/fixtures/lms/persian.lm +400 -0
- data/test/fixtures/lms/polish.lm +400 -0
- data/test/fixtures/lms/portuguese.lm +400 -0
- data/test/fixtures/lms/romanian.lm +400 -0
- data/test/fixtures/lms/russian.lm +400 -0
- data/test/fixtures/lms/slovak.lm +400 -0
- data/test/fixtures/lms/slovenian.lm +400 -0
- data/test/fixtures/lms/spanish.lm +400 -400
- data/test/fixtures/lms/swedish.lm +400 -0
- data/test/fixtures/lms/tagalog.lm +400 -0
- data/test/fixtures/lms/thai.lm +400 -0
- data/test/fixtures/lms/turkish.lm +400 -0
- data/test/fixtures/lms/vietnamese.lm +400 -0
- data/test/fixtures/lms/welsh.lm +400 -0
- data/test/fixtures/test_languages/japanese +149 -67
- data/test/generator_test.rb +1 -43
- data/test/language_test.rb +5 -1
- data/test/loader_test.rb +1 -1
- data/test/scylla_test.rb +4 -4
- metadata +105 -63
data/lib/scylla/lms/slovenian.lm
CHANGED
@@ -1,400 +1,400 @@
|
|
1
1
|
_ 1600
|
2
|
-
a
|
3
|
-
e
|
4
|
-
i
|
2
|
+
a 446
|
3
|
+
e 430
|
4
|
+
i 393
|
5
5
|
o 380
|
6
|
-
n
|
6
|
+
n 325
|
7
7
|
v 261
|
8
|
-
s
|
8
|
+
s 260
|
9
9
|
r 232
|
10
|
-
l
|
10
|
+
l 212
|
11
11
|
j 199
|
12
|
-
a_
|
13
|
-
t
|
14
|
-
d
|
12
|
+
a_ 192
|
13
|
+
t 169
|
14
|
+
d 164
|
15
15
|
p 140
|
16
|
-
e_
|
17
|
-
|
18
|
-
|
19
|
-
k
|
20
|
-
m
|
16
|
+
e_ 132
|
17
|
+
i_ 122
|
18
|
+
_s 121
|
19
|
+
k 119
|
20
|
+
m 103
|
21
21
|
_p 98
|
22
22
|
z 95
|
23
23
|
je 92
|
24
|
-
u 81
|
25
24
|
o_ 81
|
26
|
-
|
25
|
+
u 81
|
27
26
|
na 75
|
28
|
-
|
27
|
+
ov 74
|
28
|
+
ni 73
|
29
29
|
_n 72
|
30
|
-
|
31
|
-
� 70
|
30
|
+
st 72
|
32
31
|
je_ 67
|
33
|
-
_v
|
32
|
+
_v 64
|
34
33
|
ve 63
|
35
34
|
en 63
|
35
|
+
g 60
|
36
36
|
ra 59
|
37
|
-
|
38
|
-
_i 56
|
37
|
+
_i 55
|
39
38
|
b 55
|
40
|
-
pr 54
|
41
39
|
_j 54
|
42
|
-
|
43
|
-
v_ 53
|
40
|
+
pr 54
|
44
41
|
_d 53
|
42
|
+
in 53
|
43
|
+
v_ 52
|
45
44
|
_na 50
|
46
45
|
lo 48
|
47
|
-
n_ 47
|
48
|
-
_o 46
|
49
46
|
_pr 46
|
47
|
+
n_ 46
|
48
|
+
_o 46
|
50
49
|
na_ 45
|
51
|
-
|
50
|
+
_je 44
|
52
51
|
sk 44
|
53
|
-
č 44
|
54
52
|
po 44
|
55
|
-
|
56
|
-
� 44
|
57
|
-
la 42
|
58
|
-
no 42
|
53
|
+
č 44
|
59
54
|
sl 42
|
55
|
+
la 42
|
60
56
|
ne 41
|
57
|
+
no 41
|
61
58
|
_v_ 40
|
62
59
|
lov 39
|
63
|
-
va 38
|
64
|
-
� 38
|
65
|
-
_po 38
|
66
60
|
š 38
|
67
|
-
|
68
|
-
|
69
|
-
_in 37
|
61
|
+
_po 38
|
62
|
+
va 38
|
70
63
|
os 37
|
64
|
+
ro 37
|
71
65
|
h 37
|
72
66
|
_sl 37
|
73
|
-
le
|
74
|
-
ja 36
|
67
|
+
le 36
|
75
68
|
li 36
|
76
|
-
|
69
|
+
ja 36
|
70
|
+
_in 36
|
77
71
|
slo 36
|
72
|
+
in_ 36
|
73
|
+
an 36
|
78
74
|
_z 35
|
79
75
|
ij 35
|
80
76
|
av 35
|
81
77
|
ven 33
|
82
|
-
ed 33
|
83
78
|
ove 33
|
84
|
-
|
85
|
-
� 32
|
86
|
-
el 32
|
79
|
+
od 32
|
87
80
|
vi 32
|
88
81
|
ta 31
|
82
|
+
el 31
|
83
|
+
ž 31
|
84
|
+
ed 31
|
89
85
|
re 31
|
90
|
-
de 31
|
91
|
-
od 31
|
92
86
|
ev 30
|
93
87
|
eni 30
|
94
88
|
t_ 30
|
89
|
+
de 29
|
95
90
|
lj 29
|
96
|
-
ki
|
97
|
-
ri 28
|
91
|
+
ki 29
|
98
92
|
al 28
|
93
|
+
ri 28
|
99
94
|
ja_ 28
|
100
|
-
_r 27
|
101
95
|
u_ 27
|
102
|
-
|
96
|
+
_r 27
|
97
|
+
ost 26
|
103
98
|
et 26
|
99
|
+
se 26
|
100
|
+
_k 26
|
104
101
|
ti 26
|
105
|
-
ost 26
|
106
|
-
em 25
|
107
|
-
h_ 25
|
108
|
-
dn 25
|
109
|
-
_k 25
|
110
102
|
nij 25
|
103
|
+
dn 25
|
104
|
+
ni_ 25
|
105
|
+
h_ 25
|
111
106
|
ar 24
|
112
|
-
|
113
|
-
|
114
|
-
it 23
|
115
|
-
ih_ 23
|
116
|
-
ad 23
|
107
|
+
vo 24
|
108
|
+
em 24
|
117
109
|
ih 23
|
118
110
|
me 23
|
119
|
-
vo 23
|
120
111
|
ko 23
|
112
|
+
ih_ 23
|
113
|
+
ad 23
|
114
|
+
it 23
|
121
115
|
ol 22
|
116
|
+
aj 22
|
122
117
|
ik 22
|
123
118
|
_l 22
|
124
|
-
|
125
|
-
c 21
|
126
|
-
az 21
|
119
|
+
m_ 22
|
127
120
|
la_ 21
|
128
|
-
za 21
|
129
|
-
raz 21
|
130
121
|
ka 21
|
122
|
+
za 21
|
131
123
|
ob 21
|
124
|
+
c 21
|
125
|
+
raz 21
|
126
|
+
az 21
|
127
|
+
di 20
|
132
128
|
del 20
|
133
129
|
dr 20
|
134
|
-
|
135
|
-
nj 19
|
136
|
-
_ra 19
|
130
|
+
ki_ 20
|
137
131
|
no_ 19
|
138
|
-
|
139
|
-
ki_ 19
|
140
|
-
r� 19
|
141
|
-
di 19
|
142
|
-
pre 19
|
132
|
+
nj 19
|
143
133
|
om 19
|
144
|
-
|
145
|
-
|
134
|
+
pre 19
|
135
|
+
_se 19
|
136
|
+
_ra 19
|
137
|
+
ma 18
|
146
138
|
to 18
|
147
|
-
|
139
|
+
ji 18
|
148
140
|
_le 18
|
149
|
-
ma 18
|
150
141
|
_za 18
|
142
|
+
_g 18
|
143
|
+
_m 18
|
144
|
+
_t 18
|
145
|
+
as 17
|
146
|
+
ga 17
|
151
147
|
let 17
|
152
|
-
ne_ 17
|
153
|
-
er 17
|
154
|
-
_g 17
|
155
148
|
_ob 17
|
156
|
-
|
157
|
-
as 17
|
149
|
+
er 17
|
158
150
|
da 17
|
159
|
-
|
160
|
-
|
151
|
+
ne_ 17
|
152
|
+
jo 16
|
161
153
|
vn 16
|
162
|
-
|
154
|
+
ija 16
|
163
155
|
so 16
|
164
|
-
|
165
|
-
ga 16
|
156
|
+
_dr 16
|
166
157
|
rs 16
|
167
|
-
|
168
|
-
|
169
|
-
|
158
|
+
vr 16
|
159
|
+
te 16
|
160
|
+
_de 15
|
161
|
+
ta_ 15
|
162
|
+
di_ 15
|
170
163
|
mo 15
|
171
|
-
|
172
|
-
�a 15
|
164
|
+
oč 15
|
173
165
|
pro 15
|
166
|
+
st_ 15
|
167
|
+
ga_ 15
|
168
|
+
ža 15
|
174
169
|
red 15
|
175
|
-
|
170
|
+
jo_ 15
|
176
171
|
or 15
|
177
|
-
ža 15
|
178
|
-
ta_ 15
|
179
172
|
tv 15
|
173
|
+
rž 14
|
180
174
|
do 14
|
175
|
+
žav 14
|
176
|
+
im 14
|
177
|
+
drž 14
|
181
178
|
ke 14
|
182
|
-
di_ 14
|
183
|
-
�av 14
|
184
179
|
iv 14
|
185
|
-
|
180
|
+
_so 14
|
186
181
|
bl 14
|
187
|
-
rž 14
|
188
|
-
im 14
|
189
182
|
_u 14
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
nos 13
|
183
|
+
rža 14
|
184
|
+
so_ 13
|
185
|
+
sta 13
|
194
186
|
s_ 13
|
195
187
|
ova 13
|
196
|
-
il 13
|
197
|
-
sta 13
|
198
188
|
avn 13
|
189
|
+
ns 13
|
190
|
+
nsk 13
|
199
191
|
ka_ 13
|
192
|
+
nos 13
|
193
|
+
oz 13
|
200
194
|
ju 13
|
201
|
-
so_ 13
|
202
|
-
ns 13
|
203
195
|
is 13
|
204
|
-
|
196
|
+
il 13
|
205
197
|
f 12
|
206
|
-
_š 12
|
207
|
-
go 12
|
208
|
-
eg 12
|
209
198
|
ru 12
|
210
|
-
|
199
|
+
va_ 12
|
211
200
|
lje 12
|
212
|
-
iz 12
|
213
|
-
sti 12
|
214
201
|
val 12
|
215
|
-
|
202
|
+
go 12
|
203
|
+
sti 12
|
204
|
+
eg 12
|
205
|
+
ot 12
|
216
206
|
ze 12
|
217
|
-
|
207
|
+
_š 12
|
218
208
|
pa 12
|
219
|
-
nje 12
|
220
209
|
ma_ 12
|
221
|
-
|
210
|
+
jen 12
|
211
|
+
nje 12
|
212
|
+
iz 12
|
222
213
|
iva 11
|
223
|
-
|
214
|
+
ti_ 11
|
224
215
|
d_ 11
|
225
|
-
sa 11
|
226
|
-
i� 11
|
227
216
|
ros 11
|
217
|
+
_st 11
|
228
218
|
kr 11
|
219
|
+
sa 11
|
220
|
+
bli 10
|
221
|
+
op 10
|
222
|
+
vi_ 10
|
223
|
+
elj 10
|
224
|
+
ud 10
|
225
|
+
pod 10
|
229
226
|
li_ 10
|
230
227
|
_ne 10
|
231
|
-
|
232
|
-
|
228
|
+
ske 10
|
229
|
+
tu 10
|
230
|
+
ir 10
|
231
|
+
ski 10
|
232
|
+
sto 10
|
233
|
+
zem 10
|
234
|
+
lja 10
|
235
|
+
ega 10
|
233
236
|
ej 10
|
237
|
+
tr 10
|
234
238
|
se_ 10
|
235
239
|
ic 10
|
236
|
-
lja 10
|
237
|
-
bli 10
|
238
|
-
i� 10
|
239
|
-
ske 10
|
240
|
-
bi 10
|
241
|
-
zem 10
|
242
|
-
ir 10
|
243
|
-
pod 10
|
244
|
-
op 10
|
245
|
-
ije 10
|
246
|
-
em_ 10
|
247
240
|
ič 10
|
248
|
-
|
249
|
-
ud 10
|
250
|
-
elj 10
|
251
|
-
tu 10
|
241
|
+
bi 10
|
252
242
|
am 10
|
253
|
-
|
254
|
-
|
255
|
-
|
243
|
+
ije 10
|
244
|
+
r_ 10
|
245
|
+
kih 9
|
246
|
+
či 9
|
247
|
+
ji_ 9
|
256
248
|
iš 9
|
257
|
-
|
258
|
-
_tu 9
|
259
|
-
men 9
|
260
|
-
lik 9
|
261
|
-
ča 9
|
262
|
-
�i 9
|
249
|
+
_iz 9
|
263
250
|
stv 9
|
264
|
-
|
265
|
-
|
266
|
-
et_ 9
|
267
|
-
edn 9
|
268
|
-
zn 9
|
269
|
-
dan 9
|
270
|
-
či 9
|
251
|
+
mi 9
|
252
|
+
ča 9
|
271
253
|
_oz 9
|
272
|
-
|
273
|
-
kih 9
|
274
|
-
�i 9
|
275
|
-
ovi 9
|
276
|
-
rad 9
|
277
|
-
ži 9
|
278
|
-
_e 9
|
254
|
+
pri 9
|
279
255
|
ve_ 9
|
280
|
-
|
256
|
+
zn 9
|
281
257
|
vl 9
|
282
|
-
mi 9
|
283
|
-
�a 9
|
284
258
|
_a 9
|
285
|
-
|
286
|
-
|
259
|
+
ge 9
|
260
|
+
dan 9
|
261
|
+
_e 9
|
262
|
+
lik 9
|
263
|
+
rad 9
|
264
|
+
_ki 9
|
265
|
+
_tu 9
|
266
|
+
et_ 9
|
267
|
+
ovi 9
|
268
|
+
_sk 9
|
269
|
+
udi 9
|
270
|
+
men 9
|
271
|
+
ži 9
|
272
|
+
em_ 9
|
273
|
+
og 9
|
274
|
+
tud 8
|
275
|
+
še 8
|
276
|
+
ala 8
|
277
|
+
ela 8
|
278
|
+
k_ 8
|
279
|
+
pol 8
|
280
|
+
ska 8
|
281
|
+
dne 8
|
282
|
+
nar 8
|
287
283
|
nik 8
|
284
|
+
edn 8
|
285
|
+
tav 8
|
288
286
|
ke_ 8
|
289
|
-
|
290
|
-
|
287
|
+
ajo 8
|
288
|
+
_pa 8
|
291
289
|
bo 8
|
290
|
+
up 8
|
291
|
+
dni 8
|
292
|
+
ep 8
|
292
293
|
eta 8
|
293
|
-
|
294
|
-
ajo 8
|
295
|
-
še 8
|
296
|
-
ska 8
|
297
|
-
es 8
|
298
|
-
gr 8
|
294
|
+
iti 8
|
299
295
|
ok 8
|
300
|
-
pol 8
|
301
|
-
ep 8
|
302
296
|
zv 8
|
303
|
-
|
304
|
-
_ki 8
|
305
|
-
dne 8
|
306
|
-
nar 8
|
307
|
-
�e 8
|
308
|
-
iti 8
|
309
|
-
ala 8
|
310
|
-
up 8
|
311
|
-
_pa 8
|
297
|
+
es 8
|
312
298
|
_s_ 8
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
299
|
+
_b 8
|
300
|
+
gr 8
|
301
|
+
pra 7
|
302
|
+
pi 7
|
317
303
|
rav 7
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
anj 7
|
304
|
+
van 7
|
305
|
+
pu 7
|
306
|
+
sv 7
|
322
307
|
odn 7
|
323
|
-
|
308
|
+
zg 7
|
309
|
+
gl 7
|
324
310
|
ši 7
|
325
|
-
|
326
|
-
�č 7
|
327
|
-
van 7
|
328
|
-
�i 7
|
329
|
-
�j 7
|
330
|
-
jih 7
|
331
|
-
sed 7
|
311
|
+
ur 7
|
332
312
|
ani 7
|
313
|
+
vni 7
|
314
|
+
jih 7
|
315
|
+
_me 7
|
316
|
+
ez 7
|
317
|
+
pa_ 7
|
318
|
+
eb 7
|
319
|
+
zi 7
|
333
320
|
čj 7
|
334
|
-
|
335
|
-
_da 7
|
336
|
-
dni 7
|
337
|
-
eo 7
|
338
|
-
a� 7
|
321
|
+
aš 7
|
339
322
|
iki 7
|
323
|
+
j_ 7
|
340
324
|
ko_ 7
|
341
|
-
|
342
|
-
|
325
|
+
ars 7
|
326
|
+
rst 7
|
327
|
+
rsk 7
|
328
|
+
anj 7
|
343
329
|
ci 7
|
344
330
|
pe 7
|
345
|
-
_me 7
|
346
|
-
ur 7
|
347
|
-
pa_ 7
|
348
|
-
ez 7
|
349
|
-
eb 7
|
350
|
-
nov 7
|
351
|
-
š� 7
|
352
|
-
�� 7
|
353
|
-
rst 7
|
354
331
|
ev_ 7
|
355
|
-
ars 7
|
356
|
-
pi 7
|
357
332
|
eno 7
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
ub 6
|
364
|
-
tn 6
|
365
|
-
om_ 6
|
366
|
-
_sv 6
|
367
|
-
�n 6
|
368
|
-
ov_ 6
|
369
|
-
ima 6
|
370
|
-
rit 6
|
333
|
+
zd 7
|
334
|
+
_da 7
|
335
|
+
_do 7
|
336
|
+
eo 7
|
337
|
+
šč 7
|
371
338
|
ogr 6
|
372
|
-
|
373
|
-
|
374
|
-
|
339
|
+
tn 6
|
340
|
+
ru_ 6
|
341
|
+
eve 6
|
342
|
+
sr 6
|
343
|
+
še_ 6
|
375
344
|
ava 6
|
376
|
-
|
377
|
-
vo_ 6
|
378
|
-
str 6
|
379
|
-
št 6
|
380
|
-
e� 6
|
345
|
+
ak 6
|
381
346
|
tvo 6
|
382
|
-
|
347
|
+
iji 6
|
383
348
|
eli 6
|
384
|
-
|
385
|
-
|
386
|
-
|
349
|
+
ug 6
|
350
|
+
_sv 6
|
351
|
+
_ž 6
|
352
|
+
jn 6
|
353
|
+
ho 6
|
354
|
+
ast 6
|
355
|
+
du 6
|
356
|
+
ave 6
|
357
|
+
kra 6
|
358
|
+
om_ 6
|
359
|
+
lad 6
|
360
|
+
eds 6
|
361
|
+
vo_ 6
|
362
|
+
ds 6
|
363
|
+
at 6
|
364
|
+
als 6
|
365
|
+
ls 6
|
387
366
|
_ka 6
|
388
|
-
ali 6
|
389
367
|
ver 6
|
368
|
+
z_ 6
|
369
|
+
str 6
|
370
|
+
rit 6
|
371
|
+
gla 6
|
372
|
+
nov 6
|
373
|
+
ubl 6
|
390
374
|
vro 6
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
375
|
+
_bi 6
|
376
|
+
ub 6
|
377
|
+
sn 6
|
378
|
+
sed 6
|
379
|
+
_al 6
|
380
|
+
eč 6
|
381
|
+
ura 6
|
382
|
+
sko 6
|
383
|
+
kov 6
|
384
|
+
obl 6
|
385
|
+
št 6
|
386
|
+
ml 6
|
387
|
+
oze 6
|
388
|
+
tev 6
|
397
389
|
pov 6
|
398
390
|
evi 6
|
391
|
+
_ur 6
|
392
|
+
ima 6
|
393
|
+
ali 6
|
394
|
+
oma 6
|
399
395
|
čn 6
|
400
|
-
|
396
|
+
gra 6
|
397
|
+
ine 6
|
398
|
+
_ni 6
|
399
|
+
ede 6
|
400
|
+
očj 5
|