scylla 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/scylla/generator.rb +0 -1
- data/lib/scylla/lms/{greek-iso8859-7.lm → greek.lm} +0 -0
- data/lib/scylla/lms/{serbian-ascii.lm → serbian.lm} +0 -0
- data/lib/scylla/lms/{slovak-ascii.lm → slovak.lm} +0 -0
- data/lib/scylla/lms/{slovenian-ascii.lm → slovenian.lm} +0 -0
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/{ukrainian-koi8_u.lm → ukrainian.lm} +0 -0
- data/lib/scylla/lms/{yiddish-utf.lm → yiddish.lm} +0 -0
- data/lib/scylla/resources.rb +53 -0
- data/lib/scylla/string.rb +18 -4
- data/lib/scylla.rb +2 -1
- data/scylla.gemspec +14 -13
- data/source_texts/{greek-iso8859-7.txt → greek.txt} +0 -0
- data/source_texts/{serbian-ascii.txt → serbian.txt} +0 -0
- data/source_texts/{slovak-ascii.txt → slovak.txt} +0 -0
- data/source_texts/{slovenian-ascii.txt → slovenian.txt} +0 -0
- data/source_texts/swedish.txt +479 -66
- data/source_texts/{ukrainian-koi8_u.txt → ukrainian.txt} +0 -0
- data/test/classifier_test.rb +1 -1
- data/test/loader_test.rb +1 -1
- data/test/scylla_test.rb +5 -1
- metadata +16 -15
data/lib/scylla/lms/swedish.lm
CHANGED
@@ -1,400 +1,400 @@
|
|
1
|
-
_
|
2
|
-
e
|
3
|
-
|
4
|
-
a
|
5
|
-
|
6
|
-
|
7
|
-
i
|
8
|
-
s
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
v
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
in
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
c
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
ch
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
_av_
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
r
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
�
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
1
|
+
_ 20224
|
2
|
+
e 4682
|
3
|
+
r 4185
|
4
|
+
a 4010
|
5
|
+
n 3825
|
6
|
+
t 3529
|
7
|
+
i 2832
|
8
|
+
s 2764
|
9
|
+
__ 2494
|
10
|
+
l 2337
|
11
|
+
� 2073
|
12
|
+
d 2021
|
13
|
+
o 1814
|
14
|
+
g 1572
|
15
|
+
k 1402
|
16
|
+
m 1359
|
17
|
+
v 1194
|
18
|
+
er 1164
|
19
|
+
r_ 1002
|
20
|
+
en 976
|
21
|
+
ä 888
|
22
|
+
� 888
|
23
|
+
de 842
|
24
|
+
n_ 833
|
25
|
+
t_ 812
|
26
|
+
f 745
|
27
|
+
a_ 744
|
28
|
+
ar 732
|
29
|
+
u 729
|
30
|
+
an 718
|
31
|
+
p 716
|
32
|
+
h 695
|
33
|
+
st 647
|
34
|
+
_s 643
|
35
|
+
in 614
|
36
|
+
ö 597
|
37
|
+
� 597
|
38
|
+
nd 582
|
39
|
+
e_ 576
|
40
|
+
ri 540
|
41
|
+
c 527
|
42
|
+
et 526
|
43
|
+
en_ 523
|
44
|
+
� 523
|
45
|
+
å 523
|
46
|
+
ge 519
|
47
|
+
ra 512
|
48
|
+
. 509
|
49
|
+
te 498
|
50
|
+
ti 471
|
51
|
+
ig 467
|
52
|
+
_i 461
|
53
|
+
la 437
|
54
|
+
s_ 415
|
55
|
+
b 412
|
56
|
+
ta 403
|
57
|
+
re 402
|
58
|
+
S 401
|
59
|
+
ve 397
|
60
|
+
_a 393
|
61
|
+
_o 391
|
62
|
+
_f 386
|
63
|
+
oc 379
|
64
|
+
_m 376
|
65
|
+
, 376
|
66
|
+
ll 375
|
67
|
+
,_ 370
|
68
|
+
_d 364
|
69
|
+
ng 362
|
70
|
+
._ 353
|
71
|
+
er_ 349
|
72
|
+
and 345
|
73
|
+
sk 343
|
74
|
+
na 342
|
75
|
+
om 341
|
76
|
+
at 339
|
77
|
+
_� 338
|
78
|
+
al 337
|
79
|
+
ka 334
|
80
|
+
_S 325
|
81
|
+
i_ 318
|
82
|
+
or 315
|
83
|
+
�r 311
|
84
|
+
är 311
|
85
|
+
ns 304
|
86
|
+
_e 303
|
87
|
+
tt 298
|
88
|
+
el 288
|
89
|
+
_oc 288
|
90
|
+
ch 287
|
91
|
+
ige 282
|
92
|
+
eri 281
|
93
|
+
ar_ 280
|
94
|
+
ver 277
|
95
|
+
h_ 273
|
96
|
+
�r 272
|
97
|
+
ör 272
|
98
|
+
ed 271
|
99
|
+
och 269
|
100
|
+
ch_ 269
|
101
|
+
_och 268
|
102
|
+
och_ 267
|
103
|
+
y 266
|
104
|
+
_och_ 266
|
105
|
+
_i_ 266
|
106
|
+
li 260
|
107
|
+
Sv 257
|
108
|
+
ing 257
|
109
|
+
Sve 256
|
110
|
+
rig 254
|
111
|
+
me 254
|
112
|
+
on 254
|
113
|
+
le 252
|
114
|
+
_t 250
|
115
|
+
d_ 247
|
116
|
+
_de 247
|
117
|
+
is 246
|
118
|
+
j 245
|
119
|
+
_v 245
|
120
|
+
es 243
|
121
|
+
et_ 243
|
122
|
+
m_ 241
|
123
|
+
rige 238
|
124
|
+
nde 237
|
125
|
+
_h 235
|
126
|
+
_k 232
|
127
|
+
_l 230
|
128
|
+
ni 229
|
129
|
+
_p 229
|
130
|
+
il 228
|
131
|
+
erige 226
|
132
|
+
verig 226
|
133
|
+
f� 226
|
134
|
+
erig 226
|
135
|
+
veri 226
|
136
|
+
Sver 224
|
137
|
+
Sveri 224
|
138
|
+
_Sv 220
|
139
|
+
de_ 219
|
140
|
+
_Sve 219
|
141
|
+
av 217
|
142
|
+
ter 217
|
143
|
+
v_ 212
|
144
|
+
va 212
|
145
|
+
da 209
|
146
|
+
nt 206
|
147
|
+
ne 205
|
148
|
+
ga 204
|
149
|
+
ik 199
|
150
|
+
lan 198
|
151
|
+
r� 196
|
152
|
+
_b 196
|
153
|
+
fö 195
|
154
|
+
_Sver 193
|
155
|
+
g_ 193
|
156
|
+
rn 191
|
157
|
+
l_ 190
|
158
|
+
om_ 190
|
159
|
+
_av 187
|
160
|
+
ha 187
|
161
|
+
se 187
|
162
|
+
av_ 186
|
163
|
+
�n 184
|
164
|
+
än 184
|
165
|
+
ad 179
|
166
|
+
_ä 178
|
167
|
+
ska 176
|
168
|
+
_me 174
|
169
|
+
_av_ 174
|
170
|
+
_in 173
|
171
|
+
_r 173
|
172
|
+
land 172
|
173
|
+
för 172
|
174
|
+
so 172
|
175
|
+
ol 171
|
176
|
+
it 167
|
177
|
+
sta 166
|
178
|
+
_u 164
|
179
|
+
�r_ 164
|
180
|
+
är_ 164
|
181
|
+
kt 163
|
182
|
+
to 163
|
183
|
+
der 161
|
184
|
+
ma 160
|
185
|
+
un 160
|
186
|
+
v� 160
|
187
|
+
_ha 159
|
188
|
+
l� 159
|
189
|
+
_f� 159
|
190
|
+
tr 158
|
191
|
+
rs 156
|
192
|
+
am 152
|
193
|
+
ag 152
|
194
|
+
_st 151
|
195
|
+
ka_ 151
|
196
|
+
_en 150
|
197
|
+
era 148
|
198
|
+
io 147
|
199
|
+
ro 146
|
200
|
+
�n 143
|
201
|
+
ån 143
|
202
|
+
�_ 143
|
203
|
+
å_ 143
|
204
|
+
- 142
|
205
|
+
den 142
|
206
|
+
ts 142
|
207
|
+
sa 142
|
208
|
+
_fö 141
|
209
|
+
tt_ 139
|
210
|
+
] 139
|
211
|
+
_är 139
|
212
|
+
[ 139
|
213
|
+
_ti 138
|
214
|
+
ut 138
|
215
|
+
_är_ 137
|
216
|
+
ion 136
|
217
|
+
ill 136
|
218
|
+
med 136
|
219
|
+
ge_ 131
|
220
|
+
gen 131
|
221
|
+
ra_ 129
|
222
|
+
som 129
|
223
|
+
nin 129
|
224
|
+
ning 129
|
225
|
+
_so 128
|
226
|
+
rd 128
|
227
|
+
rna 127
|
228
|
+
be 127
|
229
|
+
gs 126
|
230
|
+
vi 126
|
231
|
+
ko 125
|
232
|
+
ens 124
|
233
|
+
es_ 124
|
234
|
+
_n 124
|
235
|
+
t� 123
|
236
|
+
di 123
|
237
|
+
lä 123
|
238
|
+
til 122
|
239
|
+
ige_ 122
|
240
|
+
an_ 122
|
241
|
+
vä 122
|
242
|
+
rt 122
|
243
|
+
har 121
|
244
|
+
_för 121
|
245
|
+
rk 121
|
246
|
+
rige_ 121
|
247
|
+
till 119
|
248
|
+
som_ 119
|
249
|
+
as 119
|
250
|
+
_l� 119
|
251
|
+
_g 118
|
252
|
+
_med 118
|
253
|
+
_har 118
|
254
|
+
ck 118
|
255
|
+
ll_ 118
|
256
|
+
_- 117
|
257
|
+
ande 117
|
258
|
+
ska_ 116
|
259
|
+
har_ 116
|
260
|
+
no 115
|
261
|
+
_som 115
|
262
|
+
ds 115
|
263
|
+
dr 115
|
264
|
+
_har_ 114
|
265
|
+
_en_ 114
|
266
|
+
ade 114
|
267
|
+
ke 114
|
268
|
+
pe 113
|
269
|
+
na_ 113
|
270
|
+
nn 113
|
271
|
+
lt 112
|
272
|
+
del 112
|
273
|
+
_till 111
|
274
|
+
_til 111
|
275
|
+
k_ 110
|
276
|
+
fr 109
|
277
|
+
_som_ 109
|
278
|
+
mi 107
|
279
|
+
pr 107
|
280
|
+
ng_ 106
|
281
|
+
D 106
|
282
|
+
em 105
|
283
|
+
den_ 104
|
284
|
+
ent 104
|
285
|
+
var 104
|
286
|
+
gr 103
|
287
|
+
si 102
|
288
|
+
nsk 102
|
289
|
+
s� 101
|
290
|
+
att 101
|
291
|
+
m� 101
|
292
|
+
ger 101
|
293
|
+
tio 100
|
294
|
+
ste 100
|
295
|
+
län 100
|
296
|
+
_lä 99
|
297
|
+
ern 99
|
298
|
+
tal 97
|
299
|
+
det 97
|
300
|
+
re_ 96
|
301
|
+
ed_ 96
|
302
|
+
ta_ 96
|
303
|
+
tion 96
|
304
|
+
kr 96
|
305
|
+
_va 96
|
306
|
+
ten 96
|
307
|
+
isk 95
|
308
|
+
ill_ 94
|
309
|
+
id 94
|
310
|
+
[_ 93
|
311
|
+
ot 93
|
312
|
+
ks 93
|
313
|
+
_] 93
|
314
|
+
ur 92
|
315
|
+
are 92
|
316
|
+
ss 92
|
317
|
+
sv 92
|
318
|
+
ven 92
|
319
|
+
till_ 90
|
320
|
+
ell 89
|
321
|
+
_fr 89
|
322
|
+
ati 89
|
323
|
+
med_ 88
|
324
|
+
lig 88
|
325
|
+
lla 88
|
326
|
+
rå 88
|
327
|
+
ld 88
|
328
|
+
_D 87
|
329
|
+
ru 87
|
330
|
+
pp 86
|
331
|
+
�r 86
|
332
|
+
_län 86
|
333
|
+
år 86
|
334
|
+
gar 85
|
335
|
+
der_ 85
|
336
|
+
ing_ 85
|
337
|
+
he 83
|
338
|
+
N 83
|
339
|
+
ls 83
|
340
|
+
nder 82
|
341
|
+
p� 82
|
342
|
+
_med_ 82
|
343
|
+
_re 82
|
344
|
+
rl 82
|
345
|
+
up 81
|
346
|
+
one 81
|
347
|
+
ft 80
|
348
|
+
ns_ 80
|
349
|
+
st� 80
|
350
|
+
på 80
|
351
|
+
rin 80
|
352
|
+
t. 79
|
353
|
+
) 78
|
354
|
+
( 78
|
355
|
+
erna 78
|
356
|
+
ner 78
|
357
|
+
_p� 77
|
358
|
+
j� 77
|
359
|
+
eg 77
|
360
|
+
nte 77
|
361
|
+
_den 77
|
362
|
+
E 77
|
363
|
+
ät 77
|
364
|
+
�t 77
|
365
|
+
_på 77
|
366
|
+
_( 77
|
367
|
+
pa 77
|
368
|
+
r, 76
|
369
|
+
ie 76
|
370
|
+
på_ 76
|
371
|
+
-_ 76
|
372
|
+
rg 76
|
373
|
+
h� 75
|
374
|
+
und 75
|
375
|
+
tor 75
|
376
|
+
rna_ 75
|
377
|
+
det_ 75
|
378
|
+
n. 75
|
379
|
+
r,_ 75
|
380
|
+
nde_ 74
|
381
|
+
F 74
|
382
|
+
ensk 73
|
383
|
+
äl 73
|
384
|
+
_ut 73
|
385
|
+
�l 73
|
386
|
+
_på_ 73
|
387
|
+
ges 73
|
388
|
+
ist 73
|
389
|
+
rä 73
|
390
|
+
dra 73
|
391
|
+
ring 73
|
392
|
+
_vi 72
|
393
|
+
ett 72
|
394
|
+
r. 72
|
395
|
+
_-_ 71
|
396
|
+
ms 71
|
397
|
+
ter_ 71
|
398
|
+
gen_ 71
|
399
|
+
sta_ 71
|
400
|
+
_be 71
|