scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/swedish.lm
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
_
|
1
|
+
_ 20224
|
2
2
|
e 4682
|
3
3
|
r 4185
|
4
4
|
a 4010
|
5
5
|
n 3825
|
6
6
|
t 3529
|
7
7
|
i 2832
|
8
|
-
__ 2797
|
9
8
|
s 2764
|
9
|
+
__ 2633
|
10
10
|
l 2337
|
11
11
|
� 2073
|
12
12
|
d 2021
|
@@ -16,31 +16,31 @@ k 1402
|
|
16
16
|
m 1359
|
17
17
|
v 1194
|
18
18
|
er 1164
|
19
|
-
r_
|
19
|
+
r_ 1011
|
20
20
|
en 976
|
21
|
-
� 888
|
22
21
|
ä 888
|
22
|
+
� 888
|
23
23
|
de 842
|
24
|
-
n_
|
25
|
-
t_
|
26
|
-
a_
|
24
|
+
n_ 835
|
25
|
+
t_ 819
|
26
|
+
a_ 750
|
27
27
|
f 745
|
28
28
|
ar 732
|
29
29
|
u 729
|
30
30
|
an 718
|
31
31
|
p 716
|
32
32
|
h 695
|
33
|
+
_s 649
|
33
34
|
st 647
|
34
|
-
_s 643
|
35
35
|
in 614
|
36
36
|
� 597
|
37
37
|
ö 597
|
38
|
+
e_ 586
|
38
39
|
nd 582
|
39
|
-
e_ 579
|
40
40
|
ri 540
|
41
41
|
c 527
|
42
42
|
et 526
|
43
|
-
en_
|
43
|
+
en_ 525
|
44
44
|
å 523
|
45
45
|
� 523
|
46
46
|
ge 519
|
@@ -49,48 +49,48 @@ ra 512
|
|
49
49
|
te 498
|
50
50
|
ti 471
|
51
51
|
ig 467
|
52
|
-
_i
|
52
|
+
_i 464
|
53
53
|
la 437
|
54
|
-
s_
|
55
|
-
._
|
54
|
+
s_ 422
|
55
|
+
._ 415
|
56
56
|
b 412
|
57
57
|
ta 403
|
58
58
|
re 402
|
59
59
|
S 401
|
60
60
|
ve 397
|
61
|
-
_a
|
62
|
-
_o
|
63
|
-
_f
|
61
|
+
_a 395
|
62
|
+
_o 392
|
63
|
+
_f 387
|
64
|
+
_m 380
|
64
65
|
oc 379
|
65
|
-
_m 376
|
66
66
|
, 376
|
67
67
|
ll 375
|
68
68
|
,_ 372
|
69
|
-
_d
|
69
|
+
_d 366
|
70
70
|
ng 362
|
71
|
-
er_
|
71
|
+
er_ 352
|
72
72
|
and 345
|
73
|
-
sk
|
73
|
+
sk 344
|
74
|
+
_� 343
|
74
75
|
na 342
|
75
76
|
om 341
|
76
77
|
at 339
|
77
|
-
_� 339
|
78
78
|
al 337
|
79
|
-
_S 336
|
80
79
|
ka 334
|
80
|
+
_S 332
|
81
81
|
i_ 318
|
82
82
|
or 315
|
83
83
|
är 311
|
84
84
|
�r 311
|
85
|
+
_e 306
|
85
86
|
ns 304
|
86
|
-
_e 303
|
87
87
|
tt 298
|
88
88
|
el 288
|
89
89
|
_oc 288
|
90
90
|
ch 287
|
91
|
+
ar_ 283
|
91
92
|
ige 282
|
92
93
|
eri 281
|
93
|
-
ar_ 281
|
94
94
|
ver 277
|
95
95
|
h_ 273
|
96
96
|
ör 272
|
@@ -99,302 +99,302 @@ ed 271
|
|
99
99
|
och 269
|
100
100
|
ch_ 269
|
101
101
|
_och 268
|
102
|
+
_i_ 267
|
102
103
|
och_ 267
|
103
|
-
_i_ 266
|
104
104
|
_och_ 266
|
105
105
|
y 266
|
106
106
|
li 260
|
107
107
|
ing 257
|
108
108
|
Sv 257
|
109
109
|
Sve 256
|
110
|
-
me 254
|
111
|
-
rig 254
|
112
110
|
on 254
|
111
|
+
rig 254
|
112
|
+
me 254
|
113
|
+
_t 253
|
113
114
|
le 252
|
114
|
-
|
115
|
-
|
116
|
-
|
115
|
+
d_ 251
|
116
|
+
_de 249
|
117
|
+
_v 247
|
117
118
|
is 246
|
118
|
-
|
119
|
+
et_ 246
|
119
120
|
j 245
|
120
|
-
et_ 244
|
121
121
|
es 243
|
122
122
|
m_ 242
|
123
|
-
|
123
|
+
_h 240
|
124
124
|
rige 238
|
125
125
|
nde 237
|
126
|
-
|
127
|
-
_l
|
126
|
+
_k 233
|
127
|
+
_l 231
|
128
|
+
_p 230
|
128
129
|
ni 229
|
129
|
-
_p 229
|
130
130
|
il 228
|
131
|
+
f� 226
|
131
132
|
erig 226
|
132
|
-
veri 226
|
133
133
|
erige 226
|
134
|
+
veri 226
|
134
135
|
verig 226
|
135
|
-
f� 226
|
136
|
-
_Sv 225
|
137
136
|
Sveri 224
|
138
|
-
_Sve 224
|
139
137
|
Sver 224
|
140
|
-
|
141
|
-
|
138
|
+
_Sv 221
|
139
|
+
_Sve 220
|
140
|
+
g_ 220
|
141
|
+
de_ 220
|
142
142
|
av 217
|
143
|
+
ter 217
|
143
144
|
va 212
|
144
145
|
v_ 212
|
145
146
|
da 209
|
146
147
|
nt 206
|
147
|
-
_r
|
148
|
+
_r 206
|
148
149
|
ne 205
|
149
150
|
ga 204
|
151
|
+
_b 200
|
150
152
|
ik 199
|
151
|
-
_Sver 198
|
152
153
|
lan 198
|
153
154
|
r� 196
|
154
|
-
_b 196
|
155
155
|
fö 195
|
156
|
-
|
156
|
+
_Sver 194
|
157
157
|
rn 191
|
158
158
|
l_ 191
|
159
159
|
om_ 190
|
160
|
-
ha 187
|
161
160
|
_av 187
|
162
161
|
se 187
|
162
|
+
ha 187
|
163
163
|
av_ 186
|
164
164
|
än 184
|
165
165
|
�n 184
|
166
166
|
ad 179
|
167
167
|
_ä 178
|
168
|
+
_me 178
|
168
169
|
ska 176
|
169
|
-
|
170
|
+
_in 175
|
170
171
|
_av_ 174
|
171
|
-
_in 173
|
172
172
|
so 172
|
173
173
|
för 172
|
174
174
|
land 172
|
175
175
|
ol 171
|
176
176
|
it 167
|
177
177
|
sta 166
|
178
|
+
_u 165
|
178
179
|
är_ 164
|
179
|
-
_u 164
|
180
180
|
�r_ 164
|
181
|
+
_ha 164
|
181
182
|
to 163
|
182
183
|
kt 163
|
183
184
|
der 161
|
184
|
-
ra_ 161
|
185
|
-
v� 160
|
186
185
|
ma 160
|
187
186
|
un 160
|
188
|
-
|
189
|
-
_ha 159
|
187
|
+
v� 160
|
190
188
|
_f� 159
|
189
|
+
l� 159
|
191
190
|
tr 158
|
192
191
|
rs 156
|
193
192
|
ag 152
|
194
193
|
am 152
|
194
|
+
_en 151
|
195
195
|
_st 151
|
196
196
|
ka_ 151
|
197
|
-
_en 150
|
198
197
|
era 148
|
199
198
|
io 147
|
200
199
|
ro 146
|
200
|
+
å_ 145
|
201
|
+
�_ 145
|
201
202
|
�n 143
|
202
|
-
å_ 143
|
203
|
-
�_ 143
|
204
203
|
ån 143
|
205
|
-
den 142
|
206
|
-
- 142
|
207
204
|
sa 142
|
205
|
+
den 142
|
208
206
|
ts 142
|
207
|
+
- 142
|
209
208
|
_fö 141
|
210
|
-
tt_ 139
|
211
209
|
_är 139
|
210
|
+
_ti 139
|
211
|
+
tt_ 139
|
212
212
|
ut 138
|
213
|
-
_ti 138
|
214
213
|
_är_ 137
|
215
214
|
med 136
|
216
|
-
ion 136
|
217
215
|
ill 136
|
216
|
+
ion 136
|
217
|
+
ge_ 135
|
218
|
+
ng_ 131
|
218
219
|
gen 131
|
219
|
-
|
220
|
+
_so 129
|
221
|
+
som 129
|
222
|
+
ra_ 129
|
220
223
|
nin 129
|
221
224
|
ning 129
|
222
|
-
som 129
|
223
225
|
rd 128
|
224
|
-
_so 128
|
225
|
-
rna 127
|
226
226
|
be 127
|
227
|
-
|
227
|
+
rna 127
|
228
|
+
_n 126
|
228
229
|
vi 126
|
230
|
+
gs 126
|
231
|
+
ige_ 126
|
232
|
+
rige_ 125
|
229
233
|
ko 125
|
230
|
-
ens 124
|
231
|
-
_n 124
|
232
234
|
es_ 124
|
233
|
-
|
235
|
+
ens 124
|
234
236
|
t� 123
|
235
237
|
lä 123
|
238
|
+
di 123
|
236
239
|
an_ 122
|
237
|
-
til 122
|
238
240
|
rt 122
|
239
|
-
|
241
|
+
til 122
|
240
242
|
vä 122
|
241
|
-
rige_ 121
|
242
|
-
rk 121
|
243
|
-
har 121
|
244
243
|
_för 121
|
245
|
-
|
246
|
-
|
244
|
+
har 121
|
245
|
+
rk 121
|
246
|
+
_med 120
|
247
|
+
_l� 120
|
248
|
+
_g 120
|
247
249
|
till 119
|
248
|
-
|
250
|
+
som_ 119
|
249
251
|
as 119
|
250
|
-
|
252
|
+
_- 118
|
251
253
|
_har 118
|
252
|
-
_med 118
|
253
254
|
ll_ 118
|
254
|
-
|
255
|
+
ck 118
|
255
256
|
ande 117
|
256
|
-
har_ 116
|
257
257
|
ska_ 116
|
258
|
+
har_ 116
|
259
|
+
dr 115
|
258
260
|
no 115
|
259
261
|
ds 115
|
260
|
-
dr 115
|
261
262
|
_som 115
|
262
|
-
|
263
|
+
_re 114
|
263
264
|
ke 114
|
264
265
|
_en_ 114
|
265
266
|
_har_ 114
|
266
|
-
|
267
|
-
na_
|
267
|
+
ade 114
|
268
|
+
na_ 114
|
268
269
|
nn 113
|
269
270
|
pe 113
|
270
271
|
lt 112
|
271
272
|
del 112
|
272
|
-
|
273
|
+
k_ 111
|
273
274
|
_til 111
|
274
|
-
|
275
|
+
_till 111
|
275
276
|
fr 109
|
277
|
+
ing_ 109
|
276
278
|
_som_ 109
|
277
|
-
pr 107
|
278
279
|
mi 107
|
279
|
-
|
280
|
+
pr 107
|
280
281
|
D 106
|
281
282
|
em 105
|
282
283
|
ent 104
|
283
|
-
var 104
|
284
284
|
den_ 104
|
285
|
+
var 104
|
285
286
|
gr 103
|
286
287
|
nsk 102
|
287
288
|
si 102
|
288
289
|
att 101
|
289
|
-
s� 101
|
290
290
|
m� 101
|
291
|
+
s� 101
|
291
292
|
ger 101
|
292
|
-
|
293
|
-
län 100
|
293
|
+
_lä 100
|
294
294
|
tio 100
|
295
|
-
|
295
|
+
län 100
|
296
|
+
ste 100
|
296
297
|
re_ 99
|
297
298
|
ern 99
|
298
|
-
tal 97
|
299
299
|
det 97
|
300
|
-
|
301
|
-
|
300
|
+
tal 97
|
301
|
+
ed_ 97
|
302
|
+
ta_ 96
|
303
|
+
_va 96
|
302
304
|
kr 96
|
303
|
-
ed_ 96
|
304
305
|
ten 96
|
305
|
-
|
306
|
+
tion 96
|
306
307
|
isk 95
|
307
|
-
ill_ 94
|
308
308
|
id 94
|
309
|
-
|
309
|
+
ill_ 94
|
310
310
|
ks 93
|
311
|
-
|
312
|
-
|
313
|
-
sv 92
|
311
|
+
ot 93
|
312
|
+
_D 92
|
314
313
|
ven 92
|
315
314
|
ur 92
|
316
|
-
|
315
|
+
ss 92
|
316
|
+
sv 92
|
317
|
+
are 92
|
317
318
|
till_ 90
|
318
|
-
ati 89
|
319
319
|
_fr 89
|
320
320
|
ell 89
|
321
|
-
|
321
|
+
med_ 89
|
322
|
+
ati 89
|
323
|
+
ld 88
|
322
324
|
lla 88
|
323
325
|
lig 88
|
324
|
-
|
325
|
-
|
326
|
+
rå 88
|
327
|
+
_län 87
|
326
328
|
ru 87
|
327
|
-
|
328
|
-
_län 86
|
329
|
+
der_ 86
|
329
330
|
år 86
|
331
|
+
�r 86
|
330
332
|
pp 86
|
331
333
|
gar 85
|
332
|
-
|
333
|
-
der_ 85
|
334
|
+
_med_ 84
|
334
335
|
N 83
|
335
|
-
ls 83
|
336
336
|
he 83
|
337
|
-
|
337
|
+
ls 83
|
338
338
|
nder 82
|
339
|
-
p� 82
|
340
339
|
rl 82
|
341
|
-
|
340
|
+
p� 82
|
341
|
+
t. 81
|
342
342
|
one 81
|
343
|
+
up 81
|
343
344
|
ns_ 80
|
344
|
-
|
345
|
+
rin 80
|
345
346
|
på 80
|
347
|
+
ft 80
|
346
348
|
st� 80
|
347
|
-
|
348
|
-
|
349
|
-
) 78
|
350
|
-
erna 78
|
349
|
+
n. 78
|
350
|
+
_på 78
|
351
351
|
ner 78
|
352
|
-
|
353
|
-
|
354
|
-
pa 77
|
355
|
-
nte 77
|
356
|
-
_på 77
|
352
|
+
erna 78
|
353
|
+
_p� 78
|
357
354
|
eg 77
|
358
|
-
|
355
|
+
�t 77
|
356
|
+
nte 77
|
357
|
+
E 77
|
359
358
|
_den 77
|
360
|
-
|
359
|
+
pa 77
|
360
|
+
r, 77
|
361
361
|
j� 77
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
rg 76
|
362
|
+
ät 77
|
363
|
+
rna_ 76
|
364
|
+
det_ 76
|
366
365
|
på_ 76
|
366
|
+
rg 76
|
367
367
|
-_ 76
|
368
368
|
ie 76
|
369
|
-
r,_
|
370
|
-
h� 75
|
371
|
-
det_ 75
|
372
|
-
tor 75
|
373
|
-
rna_ 75
|
369
|
+
r,_ 76
|
374
370
|
und 75
|
375
|
-
|
376
|
-
|
371
|
+
r. 75
|
372
|
+
tor 75
|
373
|
+
h� 75
|
374
|
+
_ut 74
|
375
|
+
_på_ 74
|
377
376
|
nde_ 74
|
378
|
-
|
379
|
-
|
380
|
-
ges 73
|
381
|
-
ring 73
|
382
|
-
ist 73
|
377
|
+
_vi 74
|
378
|
+
F 74
|
383
379
|
äl 73
|
384
|
-
|
385
|
-
|
386
|
-
rä 73
|
380
|
+
ring 73
|
381
|
+
ges 73
|
387
382
|
�l 73
|
388
|
-
|
383
|
+
ensk 73
|
384
|
+
rä 73
|
385
|
+
ning_ 73
|
386
|
+
dra 73
|
387
|
+
ist 73
|
388
|
+
ter_ 72
|
389
389
|
ett 72
|
390
|
-
_vi 72
|
391
|
-
_be 71
|
392
|
-
ms 71
|
393
390
|
gen_ 71
|
394
|
-
|
391
|
+
ms 71
|
395
392
|
_-_ 71
|
396
|
-
|
397
|
-
|
393
|
+
_be 71
|
394
|
+
sta_ 71
|
398
395
|
riges 70
|
399
|
-
|
396
|
+
dig 70
|
400
397
|
iges 70
|
398
|
+
ges_ 70
|
399
|
+
_v� 70
|
400
|
+
fi 70
|