scylla 0.7.0 → 0.7.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scylla/classifier.rb +3 -4
- data/lib/scylla/generator.rb +4 -7
- data/lib/scylla/lms/afrikaans.lm +280 -280
- data/lib/scylla/lms/arabic.lm +225 -225
- data/lib/scylla/lms/bulgarian.lm +208 -208
- data/lib/scylla/lms/catalan.lm +212 -212
- data/lib/scylla/lms/chinese.lm +201 -201
- data/lib/scylla/lms/danish.lm +155 -155
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/finnish.lm +259 -259
- data/lib/scylla/lms/french.lm +203 -203
- data/lib/scylla/lms/german.lm +280 -280
- data/lib/scylla/lms/greek.lm +276 -276
- data/lib/scylla/lms/hebrew.lm +170 -170
- data/lib/scylla/lms/hindi.lm +241 -241
- data/lib/scylla/lms/icelandic.lm +264 -264
- data/lib/scylla/lms/indonesian.lm +272 -272
- data/lib/scylla/lms/italian.lm +283 -283
- data/lib/scylla/lms/japanese.lm +105 -105
- data/lib/scylla/lms/korean.lm +400 -400
- data/lib/scylla/lms/norwegian.lm +235 -235
- data/lib/scylla/lms/polish.lm +264 -264
- data/lib/scylla/lms/portuguese.lm +269 -269
- data/lib/scylla/lms/romanian.lm +278 -278
- data/lib/scylla/lms/russian.lm +127 -127
- data/lib/scylla/lms/slovak.lm +281 -281
- data/lib/scylla/lms/slovenian.lm +276 -276
- data/lib/scylla/lms/spanish.lm +190 -190
- data/lib/scylla/lms/swedish.lm +195 -195
- data/lib/scylla/lms/tagalog.lm +282 -282
- data/lib/scylla/lms/thai.lm +257 -257
- data/lib/scylla/lms/turkish.lm +300 -300
- data/lib/scylla/lms/vietnamese.lm +277 -277
- data/lib/scylla/lms/welsh.lm +271 -271
- data/scylla.gemspec +3 -22
- data/source_texts/korean.txt +219 -134
- metadata +15 -14
- data/scylla-0.6.0.gem +0 -0
data/lib/scylla/lms/swedish.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 15574
|
2
2
|
e 4682
|
3
3
|
r 4185
|
4
4
|
a 4010
|
@@ -6,7 +6,6 @@ n 3825
|
|
6
6
|
t 3529
|
7
7
|
i 2832
|
8
8
|
s 2764
|
9
|
-
__ 2633
|
10
9
|
l 2337
|
11
10
|
� 2073
|
12
11
|
d 2021
|
@@ -16,385 +15,386 @@ k 1402
|
|
16
15
|
m 1359
|
17
16
|
v 1194
|
18
17
|
er 1164
|
19
|
-
r_
|
18
|
+
r_ 1035
|
20
19
|
en 976
|
21
|
-
ä 888
|
22
20
|
� 888
|
23
|
-
|
24
|
-
n_
|
25
|
-
t_
|
26
|
-
|
21
|
+
ä 888
|
22
|
+
n_ 884
|
23
|
+
t_ 848
|
24
|
+
de 841
|
25
|
+
a_ 799
|
27
26
|
f 745
|
28
27
|
ar 732
|
29
28
|
u 729
|
30
29
|
an 718
|
31
30
|
p 716
|
32
31
|
h 695
|
33
|
-
_s
|
32
|
+
_s 665
|
34
33
|
st 647
|
35
34
|
in 614
|
36
|
-
|
35
|
+
e_ 611
|
37
36
|
ö 597
|
38
|
-
|
37
|
+
� 597
|
39
38
|
nd 582
|
39
|
+
en_ 546
|
40
40
|
ri 540
|
41
41
|
c 527
|
42
42
|
et 526
|
43
|
-
en_ 525
|
44
|
-
å 523
|
45
43
|
� 523
|
44
|
+
å 523
|
46
45
|
ge 519
|
47
46
|
ra 512
|
48
47
|
. 509
|
49
|
-
|
48
|
+
._ 503
|
49
|
+
te 497
|
50
50
|
ti 471
|
51
51
|
ig 467
|
52
|
-
_i
|
52
|
+
_i 467
|
53
53
|
la 437
|
54
|
-
s_
|
55
|
-
._ 415
|
54
|
+
s_ 436
|
56
55
|
b 412
|
57
56
|
ta 403
|
58
|
-
re 402
|
59
57
|
S 401
|
58
|
+
re 400
|
60
59
|
ve 397
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
60
|
+
_S 397
|
61
|
+
_a 396
|
62
|
+
_o 395
|
63
|
+
_f 390
|
64
|
+
_m 386
|
65
65
|
oc 379
|
66
66
|
, 376
|
67
67
|
ll 375
|
68
|
-
,_
|
69
|
-
_d
|
68
|
+
,_ 374
|
69
|
+
_d 369
|
70
|
+
er_ 362
|
70
71
|
ng 362
|
71
|
-
|
72
|
+
_� 355
|
72
73
|
and 345
|
73
|
-
sk
|
74
|
-
_� 343
|
74
|
+
sk 343
|
75
75
|
na 342
|
76
76
|
om 341
|
77
77
|
at 339
|
78
78
|
al 337
|
79
79
|
ka 334
|
80
|
-
|
81
|
-
i_ 318
|
80
|
+
i_ 322
|
82
81
|
or 315
|
83
|
-
är 311
|
84
82
|
�r 311
|
85
|
-
_e
|
86
|
-
|
83
|
+
_e 311
|
84
|
+
är 311
|
85
|
+
ns 303
|
86
|
+
_t 303
|
87
87
|
tt 298
|
88
|
+
ar_ 292
|
89
|
+
_oc 289
|
88
90
|
el 288
|
89
|
-
_oc 288
|
90
91
|
ch 287
|
91
|
-
|
92
|
+
d_ 283
|
92
93
|
ige 282
|
93
94
|
eri 281
|
94
95
|
ver 277
|
95
|
-
h_
|
96
|
-
ör 272
|
96
|
+
h_ 274
|
97
97
|
�r 272
|
98
|
+
ör 272
|
98
99
|
ed 271
|
100
|
+
ch_ 270
|
99
101
|
och 269
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
och_
|
104
|
-
_och_ 266
|
102
|
+
_och 269
|
103
|
+
_i_ 268
|
104
|
+
_och_ 268
|
105
|
+
och_ 268
|
105
106
|
y 266
|
106
107
|
li 260
|
107
|
-
|
108
|
+
_Sv 257
|
108
109
|
Sv 257
|
110
|
+
ing 257
|
109
111
|
Sve 256
|
112
|
+
_Sve 256
|
110
113
|
on 254
|
111
114
|
rig 254
|
112
|
-
me
|
113
|
-
_t 253
|
115
|
+
me 253
|
114
116
|
le 252
|
115
|
-
|
117
|
+
_v 251
|
118
|
+
et_ 250
|
116
119
|
_de 249
|
117
|
-
|
118
|
-
is 246
|
119
|
-
et_ 246
|
120
|
+
m_ 249
|
120
121
|
j 245
|
122
|
+
is 245
|
123
|
+
_k 244
|
121
124
|
es 243
|
122
|
-
|
123
|
-
_h 240
|
125
|
+
_h 242
|
124
126
|
rige 238
|
125
127
|
nde 237
|
126
|
-
|
127
|
-
_l
|
128
|
-
|
128
|
+
_p 235
|
129
|
+
_l 233
|
130
|
+
g_ 230
|
129
131
|
ni 229
|
130
132
|
il 228
|
131
|
-
f� 226
|
132
133
|
erig 226
|
134
|
+
verig 226
|
133
135
|
erige 226
|
134
136
|
veri 226
|
135
|
-
|
137
|
+
f� 226
|
136
138
|
Sveri 224
|
139
|
+
_Sver 224
|
137
140
|
Sver 224
|
138
|
-
|
139
|
-
_Sve 220
|
140
|
-
g_ 220
|
141
|
-
de_ 220
|
142
|
-
av 217
|
141
|
+
de_ 223
|
143
142
|
ter 217
|
143
|
+
av 217
|
144
|
+
v_ 214
|
144
145
|
va 212
|
145
|
-
|
146
|
+
_r 209
|
146
147
|
da 209
|
147
|
-
nt
|
148
|
-
_r 206
|
148
|
+
nt 205
|
149
149
|
ne 205
|
150
|
+
_b 204
|
150
151
|
ga 204
|
151
|
-
_b 200
|
152
152
|
ik 199
|
153
153
|
lan 198
|
154
154
|
r� 196
|
155
155
|
fö 195
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
156
|
+
l_ 194
|
157
|
+
om_ 192
|
158
|
+
rn 190
|
159
|
+
ha 187
|
160
160
|
_av 187
|
161
161
|
se 187
|
162
|
-
|
163
|
-
av_ 186
|
164
|
-
än 184
|
162
|
+
av_ 187
|
165
163
|
�n 184
|
164
|
+
än 184
|
165
|
+
_me 180
|
166
166
|
ad 179
|
167
167
|
_ä 178
|
168
|
-
|
168
|
+
_in 177
|
169
169
|
ska 176
|
170
|
-
|
171
|
-
_av_ 174
|
172
|
-
so 172
|
170
|
+
_av_ 175
|
173
171
|
för 172
|
174
172
|
land 172
|
173
|
+
so 172
|
175
174
|
ol 171
|
175
|
+
_u 168
|
176
176
|
it 167
|
177
177
|
sta 166
|
178
|
-
_u 165
|
179
|
-
är_ 164
|
180
178
|
�r_ 164
|
181
179
|
_ha 164
|
180
|
+
är_ 164
|
182
181
|
to 163
|
183
182
|
kt 163
|
183
|
+
ra_ 162
|
184
|
+
_f� 161
|
184
185
|
der 161
|
185
|
-
ma 160
|
186
|
-
un 160
|
187
186
|
v� 160
|
188
|
-
|
187
|
+
un 160
|
188
|
+
ma 160
|
189
189
|
l� 159
|
190
190
|
tr 158
|
191
191
|
rs 156
|
192
|
+
_en 153
|
193
|
+
ka_ 152
|
192
194
|
ag 152
|
193
|
-
am 152
|
194
|
-
_en 151
|
195
195
|
_st 151
|
196
|
-
|
196
|
+
am 151
|
197
197
|
era 148
|
198
198
|
io 147
|
199
199
|
ro 146
|
200
|
-
å_
|
201
|
-
�_
|
202
|
-
|
200
|
+
å_ 146
|
201
|
+
�_ 146
|
202
|
+
ge_ 145
|
203
|
+
tt_ 144
|
203
204
|
ån 143
|
205
|
+
�n 143
|
204
206
|
sa 142
|
205
|
-
den 142
|
206
207
|
ts 142
|
207
|
-
|
208
|
-
|
208
|
+
_fö 142
|
209
|
+
_ti 142
|
210
|
+
den 141
|
209
211
|
_är 139
|
210
|
-
_ti 139
|
211
|
-
tt_ 139
|
212
212
|
ut 138
|
213
213
|
_är_ 137
|
214
|
-
|
214
|
+
ng_ 137
|
215
215
|
ill 136
|
216
216
|
ion 136
|
217
|
-
|
218
|
-
|
217
|
+
med 136
|
218
|
+
_so 133
|
219
|
+
ige_ 133
|
220
|
+
rige_ 132
|
219
221
|
gen 131
|
220
|
-
|
222
|
+
ning 129
|
221
223
|
som 129
|
222
|
-
|
224
|
+
_n 129
|
223
225
|
nin 129
|
224
|
-
ning 129
|
225
226
|
rd 128
|
226
|
-
be 127
|
227
227
|
rna 127
|
228
|
-
|
228
|
+
be 127
|
229
|
+
es_ 126
|
229
230
|
vi 126
|
230
231
|
gs 126
|
231
|
-
ige_ 126
|
232
|
-
rige_ 125
|
233
232
|
ko 125
|
234
|
-
es_ 124
|
235
|
-
ens 124
|
236
233
|
t� 123
|
237
|
-
lä 123
|
238
234
|
di 123
|
239
|
-
|
240
|
-
|
235
|
+
lä 123
|
236
|
+
an_ 123
|
237
|
+
ens 123
|
238
|
+
_g 122
|
239
|
+
_för 122
|
241
240
|
til 122
|
241
|
+
_med 122
|
242
|
+
rt 122
|
242
243
|
vä 122
|
243
|
-
|
244
|
+
_l� 121
|
244
245
|
har 121
|
245
246
|
rk 121
|
246
|
-
|
247
|
-
|
248
|
-
|
247
|
+
som_ 121
|
248
|
+
_en_ 120
|
249
|
+
ll_ 120
|
249
250
|
till 119
|
250
|
-
som_ 119
|
251
251
|
as 119
|
252
|
-
|
252
|
+
na_ 118
|
253
|
+
k_ 118
|
253
254
|
_har 118
|
254
|
-
ll_ 118
|
255
255
|
ck 118
|
256
|
+
har_ 118
|
257
|
+
_som 118
|
256
258
|
ande 117
|
257
|
-
ska_
|
258
|
-
|
259
|
+
ska_ 117
|
260
|
+
_har_ 116
|
259
261
|
dr 115
|
260
|
-
|
261
|
-
ds 115
|
262
|
-
_som 115
|
263
|
-
_re 114
|
264
|
-
ke 114
|
265
|
-
_en_ 114
|
266
|
-
_har_ 114
|
262
|
+
_re 115
|
267
263
|
ade 114
|
268
|
-
|
269
|
-
|
264
|
+
ds 114
|
265
|
+
ke 114
|
266
|
+
_som_ 114
|
267
|
+
no 114
|
268
|
+
ing_ 113
|
270
269
|
pe 113
|
271
|
-
|
270
|
+
nn 113
|
271
|
+
_til 112
|
272
272
|
del 112
|
273
|
-
|
274
|
-
|
275
|
-
_till 111
|
273
|
+
_till 112
|
274
|
+
lt 112
|
276
275
|
fr 109
|
277
|
-
ing_ 109
|
278
|
-
_som_ 109
|
279
|
-
mi 107
|
280
276
|
pr 107
|
277
|
+
mi 107
|
281
278
|
D 106
|
282
|
-
|
283
|
-
ent 104
|
284
|
-
den_ 104
|
279
|
+
re_ 104
|
285
280
|
var 104
|
281
|
+
den_ 103
|
282
|
+
em 103
|
286
283
|
gr 103
|
287
|
-
|
284
|
+
ent 103
|
285
|
+
_D 103
|
288
286
|
si 102
|
287
|
+
nsk 102
|
289
288
|
att 101
|
290
|
-
|
291
|
-
s� 101
|
289
|
+
_lä 101
|
292
290
|
ger 101
|
293
|
-
|
294
|
-
tio 100
|
291
|
+
m� 101
|
295
292
|
län 100
|
296
293
|
ste 100
|
297
|
-
|
294
|
+
tio 100
|
295
|
+
s� 100
|
298
296
|
ern 99
|
297
|
+
ta_ 98
|
298
|
+
ed_ 98
|
299
299
|
det 97
|
300
300
|
tal 97
|
301
|
-
|
302
|
-
|
303
|
-
_va 96
|
301
|
+
_va 97
|
302
|
+
ill_ 96
|
304
303
|
kr 96
|
305
304
|
ten 96
|
306
305
|
tion 96
|
307
306
|
isk 95
|
308
307
|
id 94
|
309
|
-
ill_ 94
|
310
308
|
ks 93
|
311
309
|
ot 93
|
312
|
-
_D 92
|
313
310
|
ven 92
|
314
311
|
ur 92
|
315
|
-
ss 92
|
316
312
|
sv 92
|
317
|
-
|
318
|
-
till_
|
313
|
+
ss 92
|
314
|
+
till_ 91
|
315
|
+
are 91
|
316
|
+
med_ 90
|
319
317
|
_fr 89
|
320
318
|
ell 89
|
321
|
-
med_ 89
|
322
319
|
ati 89
|
320
|
+
_län 88
|
323
321
|
ld 88
|
322
|
+
rå 88
|
324
323
|
lla 88
|
325
324
|
lig 88
|
326
|
-
rå 88
|
327
|
-
_län 87
|
328
325
|
ru 87
|
329
|
-
der_
|
330
|
-
|
326
|
+
der_ 87
|
327
|
+
_med_ 86
|
331
328
|
�r 86
|
329
|
+
år 86
|
332
330
|
pp 86
|
331
|
+
nd_ 86
|
333
332
|
gar 85
|
334
|
-
_med_ 84
|
335
|
-
N 83
|
336
333
|
he 83
|
337
|
-
|
334
|
+
N 83
|
335
|
+
ns_ 82
|
336
|
+
ls 82
|
337
|
+
p� 82
|
338
338
|
nder 82
|
339
339
|
rl 82
|
340
|
-
p� 82
|
341
|
-
t. 81
|
342
|
-
one 81
|
343
340
|
up 81
|
344
|
-
|
345
|
-
|
341
|
+
one 81
|
342
|
+
st� 80
|
346
343
|
på 80
|
344
|
+
rna_ 80
|
345
|
+
rin 80
|
347
346
|
ft 80
|
348
|
-
|
349
|
-
|
350
|
-
_p
|
347
|
+
_på 79
|
348
|
+
t. 79
|
349
|
+
_p� 79
|
351
350
|
ner 78
|
351
|
+
t._ 78
|
352
352
|
erna 78
|
353
|
-
|
354
|
-
eg 77
|
353
|
+
E 77
|
355
354
|
�t 77
|
355
|
+
eg 77
|
356
|
+
_vi 77
|
357
|
+
pa 77
|
356
358
|
nte 77
|
357
|
-
E 77
|
358
359
|
_den 77
|
359
|
-
pa 77
|
360
|
-
r, 77
|
361
360
|
j� 77
|
362
361
|
ät 77
|
363
|
-
rna_ 76
|
364
362
|
det_ 76
|
363
|
+
_. 76
|
364
|
+
r, 76
|
365
365
|
på_ 76
|
366
|
-
rg 76
|
367
|
-
-_ 76
|
368
|
-
ie 76
|
369
366
|
r,_ 76
|
370
|
-
|
371
|
-
|
367
|
+
nde_ 76
|
368
|
+
_ut 76
|
369
|
+
and_ 76
|
370
|
+
ter_ 76
|
371
|
+
ie 76
|
372
372
|
tor 75
|
373
|
+
rg 75
|
374
|
+
_på_ 75
|
373
375
|
h� 75
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
376
|
+
und 75
|
377
|
+
_._ 75
|
378
|
+
n. 75
|
379
|
+
n._ 75
|
380
|
+
gen_ 74
|
381
|
+
ning_ 74
|
382
|
+
_E 74
|
383
|
+
land_ 74
|
378
384
|
F 74
|
379
|
-
äl 73
|
380
|
-
ring 73
|
381
|
-
ges 73
|
382
385
|
�l 73
|
386
|
+
_F 73
|
383
387
|
ensk 73
|
384
388
|
rä 73
|
385
|
-
|
386
|
-
|
389
|
+
ring 73
|
390
|
+
äl 73
|
391
|
+
ges 73
|
387
392
|
ist 73
|
388
|
-
|
393
|
+
dra 73
|
389
394
|
ett 72
|
390
|
-
|
395
|
+
r._ 72
|
396
|
+
_,_ 72
|
397
|
+
_, 72
|
398
|
+
sta_ 72
|
399
|
+
r. 72
|
391
400
|
ms 71
|
392
|
-
_-_ 71
|
393
|
-
_be 71
|
394
|
-
sta_ 71
|
395
|
-
riges 70
|
396
|
-
dig 70
|
397
|
-
iges 70
|
398
|
-
ges_ 70
|
399
|
-
_v� 70
|
400
|
-
fi 70
|