scylla 0.7.0 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/classifier.rb +3 -4
- data/lib/scylla/generator.rb +4 -7
- data/lib/scylla/lms/afrikaans.lm +280 -280
- data/lib/scylla/lms/arabic.lm +225 -225
- data/lib/scylla/lms/bulgarian.lm +208 -208
- data/lib/scylla/lms/catalan.lm +212 -212
- data/lib/scylla/lms/chinese.lm +201 -201
- data/lib/scylla/lms/danish.lm +155 -155
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/finnish.lm +259 -259
- data/lib/scylla/lms/french.lm +203 -203
- data/lib/scylla/lms/german.lm +280 -280
- data/lib/scylla/lms/greek.lm +276 -276
- data/lib/scylla/lms/hebrew.lm +170 -170
- data/lib/scylla/lms/hindi.lm +241 -241
- data/lib/scylla/lms/icelandic.lm +264 -264
- data/lib/scylla/lms/indonesian.lm +272 -272
- data/lib/scylla/lms/italian.lm +283 -283
- data/lib/scylla/lms/japanese.lm +105 -105
- data/lib/scylla/lms/korean.lm +400 -400
- data/lib/scylla/lms/norwegian.lm +235 -235
- data/lib/scylla/lms/polish.lm +264 -264
- data/lib/scylla/lms/portuguese.lm +269 -269
- data/lib/scylla/lms/romanian.lm +278 -278
- data/lib/scylla/lms/russian.lm +127 -127
- data/lib/scylla/lms/slovak.lm +281 -281
- data/lib/scylla/lms/slovenian.lm +276 -276
- data/lib/scylla/lms/spanish.lm +190 -190
- data/lib/scylla/lms/swedish.lm +195 -195
- data/lib/scylla/lms/tagalog.lm +282 -282
- data/lib/scylla/lms/thai.lm +257 -257
- data/lib/scylla/lms/turkish.lm +300 -300
- data/lib/scylla/lms/vietnamese.lm +277 -277
- data/lib/scylla/lms/welsh.lm +271 -271
- data/scylla.gemspec +3 -22
- data/source_texts/korean.txt +219 -134
- metadata +15 -14
- data/scylla-0.6.0.gem +0 -0
data/lib/scylla/lms/swedish.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 15574
|
2
2
|
e 4682
|
3
3
|
r 4185
|
4
4
|
a 4010
|
@@ -6,7 +6,6 @@ n 3825
|
|
6
6
|
t 3529
|
7
7
|
i 2832
|
8
8
|
s 2764
|
9
|
-
__ 2633
|
10
9
|
l 2337
|
11
10
|
� 2073
|
12
11
|
d 2021
|
@@ -16,385 +15,386 @@ k 1402
|
|
16
15
|
m 1359
|
17
16
|
v 1194
|
18
17
|
er 1164
|
19
|
-
r_
|
18
|
+
r_ 1035
|
20
19
|
en 976
|
21
|
-
ä 888
|
22
20
|
� 888
|
23
|
-
|
24
|
-
n_
|
25
|
-
t_
|
26
|
-
|
21
|
+
ä 888
|
22
|
+
n_ 884
|
23
|
+
t_ 848
|
24
|
+
de 841
|
25
|
+
a_ 799
|
27
26
|
f 745
|
28
27
|
ar 732
|
29
28
|
u 729
|
30
29
|
an 718
|
31
30
|
p 716
|
32
31
|
h 695
|
33
|
-
_s
|
32
|
+
_s 665
|
34
33
|
st 647
|
35
34
|
in 614
|
36
|
-
|
35
|
+
e_ 611
|
37
36
|
ö 597
|
38
|
-
|
37
|
+
� 597
|
39
38
|
nd 582
|
39
|
+
en_ 546
|
40
40
|
ri 540
|
41
41
|
c 527
|
42
42
|
et 526
|
43
|
-
en_ 525
|
44
|
-
å 523
|
45
43
|
� 523
|
44
|
+
å 523
|
46
45
|
ge 519
|
47
46
|
ra 512
|
48
47
|
. 509
|
49
|
-
|
48
|
+
._ 503
|
49
|
+
te 497
|
50
50
|
ti 471
|
51
51
|
ig 467
|
52
|
-
_i
|
52
|
+
_i 467
|
53
53
|
la 437
|
54
|
-
s_
|
55
|
-
._ 415
|
54
|
+
s_ 436
|
56
55
|
b 412
|
57
56
|
ta 403
|
58
|
-
re 402
|
59
57
|
S 401
|
58
|
+
re 400
|
60
59
|
ve 397
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
60
|
+
_S 397
|
61
|
+
_a 396
|
62
|
+
_o 395
|
63
|
+
_f 390
|
64
|
+
_m 386
|
65
65
|
oc 379
|
66
66
|
, 376
|
67
67
|
ll 375
|
68
|
-
,_
|
69
|
-
_d
|
68
|
+
,_ 374
|
69
|
+
_d 369
|
70
|
+
er_ 362
|
70
71
|
ng 362
|
71
|
-
|
72
|
+
_� 355
|
72
73
|
and 345
|
73
|
-
sk
|
74
|
-
_� 343
|
74
|
+
sk 343
|
75
75
|
na 342
|
76
76
|
om 341
|
77
77
|
at 339
|
78
78
|
al 337
|
79
79
|
ka 334
|
80
|
-
|
81
|
-
i_ 318
|
80
|
+
i_ 322
|
82
81
|
or 315
|
83
|
-
är 311
|
84
82
|
�r 311
|
85
|
-
_e
|
86
|
-
|
83
|
+
_e 311
|
84
|
+
är 311
|
85
|
+
ns 303
|
86
|
+
_t 303
|
87
87
|
tt 298
|
88
|
+
ar_ 292
|
89
|
+
_oc 289
|
88
90
|
el 288
|
89
|
-
_oc 288
|
90
91
|
ch 287
|
91
|
-
|
92
|
+
d_ 283
|
92
93
|
ige 282
|
93
94
|
eri 281
|
94
95
|
ver 277
|
95
|
-
h_
|
96
|
-
ör 272
|
96
|
+
h_ 274
|
97
97
|
�r 272
|
98
|
+
ör 272
|
98
99
|
ed 271
|
100
|
+
ch_ 270
|
99
101
|
och 269
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
och_
|
104
|
-
_och_ 266
|
102
|
+
_och 269
|
103
|
+
_i_ 268
|
104
|
+
_och_ 268
|
105
|
+
och_ 268
|
105
106
|
y 266
|
106
107
|
li 260
|
107
|
-
|
108
|
+
_Sv 257
|
108
109
|
Sv 257
|
110
|
+
ing 257
|
109
111
|
Sve 256
|
112
|
+
_Sve 256
|
110
113
|
on 254
|
111
114
|
rig 254
|
112
|
-
me
|
113
|
-
_t 253
|
115
|
+
me 253
|
114
116
|
le 252
|
115
|
-
|
117
|
+
_v 251
|
118
|
+
et_ 250
|
116
119
|
_de 249
|
117
|
-
|
118
|
-
is 246
|
119
|
-
et_ 246
|
120
|
+
m_ 249
|
120
121
|
j 245
|
122
|
+
is 245
|
123
|
+
_k 244
|
121
124
|
es 243
|
122
|
-
|
123
|
-
_h 240
|
125
|
+
_h 242
|
124
126
|
rige 238
|
125
127
|
nde 237
|
126
|
-
|
127
|
-
_l
|
128
|
-
|
128
|
+
_p 235
|
129
|
+
_l 233
|
130
|
+
g_ 230
|
129
131
|
ni 229
|
130
132
|
il 228
|
131
|
-
f� 226
|
132
133
|
erig 226
|
134
|
+
verig 226
|
133
135
|
erige 226
|
134
136
|
veri 226
|
135
|
-
|
137
|
+
f� 226
|
136
138
|
Sveri 224
|
139
|
+
_Sver 224
|
137
140
|
Sver 224
|
138
|
-
|
139
|
-
_Sve 220
|
140
|
-
g_ 220
|
141
|
-
de_ 220
|
142
|
-
av 217
|
141
|
+
de_ 223
|
143
142
|
ter 217
|
143
|
+
av 217
|
144
|
+
v_ 214
|
144
145
|
va 212
|
145
|
-
|
146
|
+
_r 209
|
146
147
|
da 209
|
147
|
-
nt
|
148
|
-
_r 206
|
148
|
+
nt 205
|
149
149
|
ne 205
|
150
|
+
_b 204
|
150
151
|
ga 204
|
151
|
-
_b 200
|
152
152
|
ik 199
|
153
153
|
lan 198
|
154
154
|
r� 196
|
155
155
|
fö 195
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
156
|
+
l_ 194
|
157
|
+
om_ 192
|
158
|
+
rn 190
|
159
|
+
ha 187
|
160
160
|
_av 187
|
161
161
|
se 187
|
162
|
-
|
163
|
-
av_ 186
|
164
|
-
än 184
|
162
|
+
av_ 187
|
165
163
|
�n 184
|
164
|
+
än 184
|
165
|
+
_me 180
|
166
166
|
ad 179
|
167
167
|
_ä 178
|
168
|
-
|
168
|
+
_in 177
|
169
169
|
ska 176
|
170
|
-
|
171
|
-
_av_ 174
|
172
|
-
so 172
|
170
|
+
_av_ 175
|
173
171
|
för 172
|
174
172
|
land 172
|
173
|
+
so 172
|
175
174
|
ol 171
|
175
|
+
_u 168
|
176
176
|
it 167
|
177
177
|
sta 166
|
178
|
-
_u 165
|
179
|
-
är_ 164
|
180
178
|
�r_ 164
|
181
179
|
_ha 164
|
180
|
+
är_ 164
|
182
181
|
to 163
|
183
182
|
kt 163
|
183
|
+
ra_ 162
|
184
|
+
_f� 161
|
184
185
|
der 161
|
185
|
-
ma 160
|
186
|
-
un 160
|
187
186
|
v� 160
|
188
|
-
|
187
|
+
un 160
|
188
|
+
ma 160
|
189
189
|
l� 159
|
190
190
|
tr 158
|
191
191
|
rs 156
|
192
|
+
_en 153
|
193
|
+
ka_ 152
|
192
194
|
ag 152
|
193
|
-
am 152
|
194
|
-
_en 151
|
195
195
|
_st 151
|
196
|
-
|
196
|
+
am 151
|
197
197
|
era 148
|
198
198
|
io 147
|
199
199
|
ro 146
|
200
|
-
å_
|
201
|
-
�_
|
202
|
-
|
200
|
+
å_ 146
|
201
|
+
�_ 146
|
202
|
+
ge_ 145
|
203
|
+
tt_ 144
|
203
204
|
ån 143
|
205
|
+
�n 143
|
204
206
|
sa 142
|
205
|
-
den 142
|
206
207
|
ts 142
|
207
|
-
|
208
|
-
|
208
|
+
_fö 142
|
209
|
+
_ti 142
|
210
|
+
den 141
|
209
211
|
_är 139
|
210
|
-
_ti 139
|
211
|
-
tt_ 139
|
212
212
|
ut 138
|
213
213
|
_är_ 137
|
214
|
-
|
214
|
+
ng_ 137
|
215
215
|
ill 136
|
216
216
|
ion 136
|
217
|
-
|
218
|
-
|
217
|
+
med 136
|
218
|
+
_so 133
|
219
|
+
ige_ 133
|
220
|
+
rige_ 132
|
219
221
|
gen 131
|
220
|
-
|
222
|
+
ning 129
|
221
223
|
som 129
|
222
|
-
|
224
|
+
_n 129
|
223
225
|
nin 129
|
224
|
-
ning 129
|
225
226
|
rd 128
|
226
|
-
be 127
|
227
227
|
rna 127
|
228
|
-
|
228
|
+
be 127
|
229
|
+
es_ 126
|
229
230
|
vi 126
|
230
231
|
gs 126
|
231
|
-
ige_ 126
|
232
|
-
rige_ 125
|
233
232
|
ko 125
|
234
|
-
es_ 124
|
235
|
-
ens 124
|
236
233
|
t� 123
|
237
|
-
lä 123
|
238
234
|
di 123
|
239
|
-
|
240
|
-
|
235
|
+
lä 123
|
236
|
+
an_ 123
|
237
|
+
ens 123
|
238
|
+
_g 122
|
239
|
+
_för 122
|
241
240
|
til 122
|
241
|
+
_med 122
|
242
|
+
rt 122
|
242
243
|
vä 122
|
243
|
-
|
244
|
+
_l� 121
|
244
245
|
har 121
|
245
246
|
rk 121
|
246
|
-
|
247
|
-
|
248
|
-
|
247
|
+
som_ 121
|
248
|
+
_en_ 120
|
249
|
+
ll_ 120
|
249
250
|
till 119
|
250
|
-
som_ 119
|
251
251
|
as 119
|
252
|
-
|
252
|
+
na_ 118
|
253
|
+
k_ 118
|
253
254
|
_har 118
|
254
|
-
ll_ 118
|
255
255
|
ck 118
|
256
|
+
har_ 118
|
257
|
+
_som 118
|
256
258
|
ande 117
|
257
|
-
ska_
|
258
|
-
|
259
|
+
ska_ 117
|
260
|
+
_har_ 116
|
259
261
|
dr 115
|
260
|
-
|
261
|
-
ds 115
|
262
|
-
_som 115
|
263
|
-
_re 114
|
264
|
-
ke 114
|
265
|
-
_en_ 114
|
266
|
-
_har_ 114
|
262
|
+
_re 115
|
267
263
|
ade 114
|
268
|
-
|
269
|
-
|
264
|
+
ds 114
|
265
|
+
ke 114
|
266
|
+
_som_ 114
|
267
|
+
no 114
|
268
|
+
ing_ 113
|
270
269
|
pe 113
|
271
|
-
|
270
|
+
nn 113
|
271
|
+
_til 112
|
272
272
|
del 112
|
273
|
-
|
274
|
-
|
275
|
-
_till 111
|
273
|
+
_till 112
|
274
|
+
lt 112
|
276
275
|
fr 109
|
277
|
-
ing_ 109
|
278
|
-
_som_ 109
|
279
|
-
mi 107
|
280
276
|
pr 107
|
277
|
+
mi 107
|
281
278
|
D 106
|
282
|
-
|
283
|
-
ent 104
|
284
|
-
den_ 104
|
279
|
+
re_ 104
|
285
280
|
var 104
|
281
|
+
den_ 103
|
282
|
+
em 103
|
286
283
|
gr 103
|
287
|
-
|
284
|
+
ent 103
|
285
|
+
_D 103
|
288
286
|
si 102
|
287
|
+
nsk 102
|
289
288
|
att 101
|
290
|
-
|
291
|
-
s� 101
|
289
|
+
_lä 101
|
292
290
|
ger 101
|
293
|
-
|
294
|
-
tio 100
|
291
|
+
m� 101
|
295
292
|
län 100
|
296
293
|
ste 100
|
297
|
-
|
294
|
+
tio 100
|
295
|
+
s� 100
|
298
296
|
ern 99
|
297
|
+
ta_ 98
|
298
|
+
ed_ 98
|
299
299
|
det 97
|
300
300
|
tal 97
|
301
|
-
|
302
|
-
|
303
|
-
_va 96
|
301
|
+
_va 97
|
302
|
+
ill_ 96
|
304
303
|
kr 96
|
305
304
|
ten 96
|
306
305
|
tion 96
|
307
306
|
isk 95
|
308
307
|
id 94
|
309
|
-
ill_ 94
|
310
308
|
ks 93
|
311
309
|
ot 93
|
312
|
-
_D 92
|
313
310
|
ven 92
|
314
311
|
ur 92
|
315
|
-
ss 92
|
316
312
|
sv 92
|
317
|
-
|
318
|
-
till_
|
313
|
+
ss 92
|
314
|
+
till_ 91
|
315
|
+
are 91
|
316
|
+
med_ 90
|
319
317
|
_fr 89
|
320
318
|
ell 89
|
321
|
-
med_ 89
|
322
319
|
ati 89
|
320
|
+
_län 88
|
323
321
|
ld 88
|
322
|
+
rå 88
|
324
323
|
lla 88
|
325
324
|
lig 88
|
326
|
-
rå 88
|
327
|
-
_län 87
|
328
325
|
ru 87
|
329
|
-
der_
|
330
|
-
|
326
|
+
der_ 87
|
327
|
+
_med_ 86
|
331
328
|
�r 86
|
329
|
+
år 86
|
332
330
|
pp 86
|
331
|
+
nd_ 86
|
333
332
|
gar 85
|
334
|
-
_med_ 84
|
335
|
-
N 83
|
336
333
|
he 83
|
337
|
-
|
334
|
+
N 83
|
335
|
+
ns_ 82
|
336
|
+
ls 82
|
337
|
+
p� 82
|
338
338
|
nder 82
|
339
339
|
rl 82
|
340
|
-
p� 82
|
341
|
-
t. 81
|
342
|
-
one 81
|
343
340
|
up 81
|
344
|
-
|
345
|
-
|
341
|
+
one 81
|
342
|
+
st� 80
|
346
343
|
på 80
|
344
|
+
rna_ 80
|
345
|
+
rin 80
|
347
346
|
ft 80
|
348
|
-
|
349
|
-
|
350
|
-
_p
|
347
|
+
_på 79
|
348
|
+
t. 79
|
349
|
+
_p� 79
|
351
350
|
ner 78
|
351
|
+
t._ 78
|
352
352
|
erna 78
|
353
|
-
|
354
|
-
eg 77
|
353
|
+
E 77
|
355
354
|
�t 77
|
355
|
+
eg 77
|
356
|
+
_vi 77
|
357
|
+
pa 77
|
356
358
|
nte 77
|
357
|
-
E 77
|
358
359
|
_den 77
|
359
|
-
pa 77
|
360
|
-
r, 77
|
361
360
|
j� 77
|
362
361
|
ät 77
|
363
|
-
rna_ 76
|
364
362
|
det_ 76
|
363
|
+
_. 76
|
364
|
+
r, 76
|
365
365
|
på_ 76
|
366
|
-
rg 76
|
367
|
-
-_ 76
|
368
|
-
ie 76
|
369
366
|
r,_ 76
|
370
|
-
|
371
|
-
|
367
|
+
nde_ 76
|
368
|
+
_ut 76
|
369
|
+
and_ 76
|
370
|
+
ter_ 76
|
371
|
+
ie 76
|
372
372
|
tor 75
|
373
|
+
rg 75
|
374
|
+
_på_ 75
|
373
375
|
h� 75
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
376
|
+
und 75
|
377
|
+
_._ 75
|
378
|
+
n. 75
|
379
|
+
n._ 75
|
380
|
+
gen_ 74
|
381
|
+
ning_ 74
|
382
|
+
_E 74
|
383
|
+
land_ 74
|
378
384
|
F 74
|
379
|
-
äl 73
|
380
|
-
ring 73
|
381
|
-
ges 73
|
382
385
|
�l 73
|
386
|
+
_F 73
|
383
387
|
ensk 73
|
384
388
|
rä 73
|
385
|
-
|
386
|
-
|
389
|
+
ring 73
|
390
|
+
äl 73
|
391
|
+
ges 73
|
387
392
|
ist 73
|
388
|
-
|
393
|
+
dra 73
|
389
394
|
ett 72
|
390
|
-
|
395
|
+
r._ 72
|
396
|
+
_,_ 72
|
397
|
+
_, 72
|
398
|
+
sta_ 72
|
399
|
+
r. 72
|
391
400
|
ms 71
|
392
|
-
_-_ 71
|
393
|
-
_be 71
|
394
|
-
sta_ 71
|
395
|
-
riges 70
|
396
|
-
dig 70
|
397
|
-
iges 70
|
398
|
-
ges_ 70
|
399
|
-
_v� 70
|
400
|
-
fi 70
|