scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/swedish.lm
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
_
|
1
|
+
_ 20224
|
2
2
|
e 4682
|
3
3
|
r 4185
|
4
4
|
a 4010
|
5
5
|
n 3825
|
6
6
|
t 3529
|
7
7
|
i 2832
|
8
|
-
__ 2797
|
9
8
|
s 2764
|
9
|
+
__ 2633
|
10
10
|
l 2337
|
11
11
|
� 2073
|
12
12
|
d 2021
|
@@ -16,31 +16,31 @@ k 1402
|
|
16
16
|
m 1359
|
17
17
|
v 1194
|
18
18
|
er 1164
|
19
|
-
r_
|
19
|
+
r_ 1011
|
20
20
|
en 976
|
21
|
-
� 888
|
22
21
|
ä 888
|
22
|
+
� 888
|
23
23
|
de 842
|
24
|
-
n_
|
25
|
-
t_
|
26
|
-
a_
|
24
|
+
n_ 835
|
25
|
+
t_ 819
|
26
|
+
a_ 750
|
27
27
|
f 745
|
28
28
|
ar 732
|
29
29
|
u 729
|
30
30
|
an 718
|
31
31
|
p 716
|
32
32
|
h 695
|
33
|
+
_s 649
|
33
34
|
st 647
|
34
|
-
_s 643
|
35
35
|
in 614
|
36
36
|
� 597
|
37
37
|
ö 597
|
38
|
+
e_ 586
|
38
39
|
nd 582
|
39
|
-
e_ 579
|
40
40
|
ri 540
|
41
41
|
c 527
|
42
42
|
et 526
|
43
|
-
en_
|
43
|
+
en_ 525
|
44
44
|
å 523
|
45
45
|
� 523
|
46
46
|
ge 519
|
@@ -49,48 +49,48 @@ ra 512
|
|
49
49
|
te 498
|
50
50
|
ti 471
|
51
51
|
ig 467
|
52
|
-
_i
|
52
|
+
_i 464
|
53
53
|
la 437
|
54
|
-
s_
|
55
|
-
._
|
54
|
+
s_ 422
|
55
|
+
._ 415
|
56
56
|
b 412
|
57
57
|
ta 403
|
58
58
|
re 402
|
59
59
|
S 401
|
60
60
|
ve 397
|
61
|
-
_a
|
62
|
-
_o
|
63
|
-
_f
|
61
|
+
_a 395
|
62
|
+
_o 392
|
63
|
+
_f 387
|
64
|
+
_m 380
|
64
65
|
oc 379
|
65
|
-
_m 376
|
66
66
|
, 376
|
67
67
|
ll 375
|
68
68
|
,_ 372
|
69
|
-
_d
|
69
|
+
_d 366
|
70
70
|
ng 362
|
71
|
-
er_
|
71
|
+
er_ 352
|
72
72
|
and 345
|
73
|
-
sk
|
73
|
+
sk 344
|
74
|
+
_� 343
|
74
75
|
na 342
|
75
76
|
om 341
|
76
77
|
at 339
|
77
|
-
_� 339
|
78
78
|
al 337
|
79
|
-
_S 336
|
80
79
|
ka 334
|
80
|
+
_S 332
|
81
81
|
i_ 318
|
82
82
|
or 315
|
83
83
|
är 311
|
84
84
|
�r 311
|
85
|
+
_e 306
|
85
86
|
ns 304
|
86
|
-
_e 303
|
87
87
|
tt 298
|
88
88
|
el 288
|
89
89
|
_oc 288
|
90
90
|
ch 287
|
91
|
+
ar_ 283
|
91
92
|
ige 282
|
92
93
|
eri 281
|
93
|
-
ar_ 281
|
94
94
|
ver 277
|
95
95
|
h_ 273
|
96
96
|
ör 272
|
@@ -99,302 +99,302 @@ ed 271
|
|
99
99
|
och 269
|
100
100
|
ch_ 269
|
101
101
|
_och 268
|
102
|
+
_i_ 267
|
102
103
|
och_ 267
|
103
|
-
_i_ 266
|
104
104
|
_och_ 266
|
105
105
|
y 266
|
106
106
|
li 260
|
107
107
|
ing 257
|
108
108
|
Sv 257
|
109
109
|
Sve 256
|
110
|
-
me 254
|
111
|
-
rig 254
|
112
110
|
on 254
|
111
|
+
rig 254
|
112
|
+
me 254
|
113
|
+
_t 253
|
113
114
|
le 252
|
114
|
-
|
115
|
-
|
116
|
-
|
115
|
+
d_ 251
|
116
|
+
_de 249
|
117
|
+
_v 247
|
117
118
|
is 246
|
118
|
-
|
119
|
+
et_ 246
|
119
120
|
j 245
|
120
|
-
et_ 244
|
121
121
|
es 243
|
122
122
|
m_ 242
|
123
|
-
|
123
|
+
_h 240
|
124
124
|
rige 238
|
125
125
|
nde 237
|
126
|
-
|
127
|
-
_l
|
126
|
+
_k 233
|
127
|
+
_l 231
|
128
|
+
_p 230
|
128
129
|
ni 229
|
129
|
-
_p 229
|
130
130
|
il 228
|
131
|
+
f� 226
|
131
132
|
erig 226
|
132
|
-
veri 226
|
133
133
|
erige 226
|
134
|
+
veri 226
|
134
135
|
verig 226
|
135
|
-
f� 226
|
136
|
-
_Sv 225
|
137
136
|
Sveri 224
|
138
|
-
_Sve 224
|
139
137
|
Sver 224
|
140
|
-
|
141
|
-
|
138
|
+
_Sv 221
|
139
|
+
_Sve 220
|
140
|
+
g_ 220
|
141
|
+
de_ 220
|
142
142
|
av 217
|
143
|
+
ter 217
|
143
144
|
va 212
|
144
145
|
v_ 212
|
145
146
|
da 209
|
146
147
|
nt 206
|
147
|
-
_r
|
148
|
+
_r 206
|
148
149
|
ne 205
|
149
150
|
ga 204
|
151
|
+
_b 200
|
150
152
|
ik 199
|
151
|
-
_Sver 198
|
152
153
|
lan 198
|
153
154
|
r� 196
|
154
|
-
_b 196
|
155
155
|
fö 195
|
156
|
-
|
156
|
+
_Sver 194
|
157
157
|
rn 191
|
158
158
|
l_ 191
|
159
159
|
om_ 190
|
160
|
-
ha 187
|
161
160
|
_av 187
|
162
161
|
se 187
|
162
|
+
ha 187
|
163
163
|
av_ 186
|
164
164
|
än 184
|
165
165
|
�n 184
|
166
166
|
ad 179
|
167
167
|
_ä 178
|
168
|
+
_me 178
|
168
169
|
ska 176
|
169
|
-
|
170
|
+
_in 175
|
170
171
|
_av_ 174
|
171
|
-
_in 173
|
172
172
|
so 172
|
173
173
|
för 172
|
174
174
|
land 172
|
175
175
|
ol 171
|
176
176
|
it 167
|
177
177
|
sta 166
|
178
|
+
_u 165
|
178
179
|
är_ 164
|
179
|
-
_u 164
|
180
180
|
�r_ 164
|
181
|
+
_ha 164
|
181
182
|
to 163
|
182
183
|
kt 163
|
183
184
|
der 161
|
184
|
-
ra_ 161
|
185
|
-
v� 160
|
186
185
|
ma 160
|
187
186
|
un 160
|
188
|
-
|
189
|
-
_ha 159
|
187
|
+
v� 160
|
190
188
|
_f� 159
|
189
|
+
l� 159
|
191
190
|
tr 158
|
192
191
|
rs 156
|
193
192
|
ag 152
|
194
193
|
am 152
|
194
|
+
_en 151
|
195
195
|
_st 151
|
196
196
|
ka_ 151
|
197
|
-
_en 150
|
198
197
|
era 148
|
199
198
|
io 147
|
200
199
|
ro 146
|
200
|
+
å_ 145
|
201
|
+
�_ 145
|
201
202
|
�n 143
|
202
|
-
å_ 143
|
203
|
-
�_ 143
|
204
203
|
ån 143
|
205
|
-
den 142
|
206
|
-
- 142
|
207
204
|
sa 142
|
205
|
+
den 142
|
208
206
|
ts 142
|
207
|
+
- 142
|
209
208
|
_fö 141
|
210
|
-
tt_ 139
|
211
209
|
_är 139
|
210
|
+
_ti 139
|
211
|
+
tt_ 139
|
212
212
|
ut 138
|
213
|
-
_ti 138
|
214
213
|
_är_ 137
|
215
214
|
med 136
|
216
|
-
ion 136
|
217
215
|
ill 136
|
216
|
+
ion 136
|
217
|
+
ge_ 135
|
218
|
+
ng_ 131
|
218
219
|
gen 131
|
219
|
-
|
220
|
+
_so 129
|
221
|
+
som 129
|
222
|
+
ra_ 129
|
220
223
|
nin 129
|
221
224
|
ning 129
|
222
|
-
som 129
|
223
225
|
rd 128
|
224
|
-
_so 128
|
225
|
-
rna 127
|
226
226
|
be 127
|
227
|
-
|
227
|
+
rna 127
|
228
|
+
_n 126
|
228
229
|
vi 126
|
230
|
+
gs 126
|
231
|
+
ige_ 126
|
232
|
+
rige_ 125
|
229
233
|
ko 125
|
230
|
-
ens 124
|
231
|
-
_n 124
|
232
234
|
es_ 124
|
233
|
-
|
235
|
+
ens 124
|
234
236
|
t� 123
|
235
237
|
lä 123
|
238
|
+
di 123
|
236
239
|
an_ 122
|
237
|
-
til 122
|
238
240
|
rt 122
|
239
|
-
|
241
|
+
til 122
|
240
242
|
vä 122
|
241
|
-
rige_ 121
|
242
|
-
rk 121
|
243
|
-
har 121
|
244
243
|
_för 121
|
245
|
-
|
246
|
-
|
244
|
+
har 121
|
245
|
+
rk 121
|
246
|
+
_med 120
|
247
|
+
_l� 120
|
248
|
+
_g 120
|
247
249
|
till 119
|
248
|
-
|
250
|
+
som_ 119
|
249
251
|
as 119
|
250
|
-
|
252
|
+
_- 118
|
251
253
|
_har 118
|
252
|
-
_med 118
|
253
254
|
ll_ 118
|
254
|
-
|
255
|
+
ck 118
|
255
256
|
ande 117
|
256
|
-
har_ 116
|
257
257
|
ska_ 116
|
258
|
+
har_ 116
|
259
|
+
dr 115
|
258
260
|
no 115
|
259
261
|
ds 115
|
260
|
-
dr 115
|
261
262
|
_som 115
|
262
|
-
|
263
|
+
_re 114
|
263
264
|
ke 114
|
264
265
|
_en_ 114
|
265
266
|
_har_ 114
|
266
|
-
|
267
|
-
na_
|
267
|
+
ade 114
|
268
|
+
na_ 114
|
268
269
|
nn 113
|
269
270
|
pe 113
|
270
271
|
lt 112
|
271
272
|
del 112
|
272
|
-
|
273
|
+
k_ 111
|
273
274
|
_til 111
|
274
|
-
|
275
|
+
_till 111
|
275
276
|
fr 109
|
277
|
+
ing_ 109
|
276
278
|
_som_ 109
|
277
|
-
pr 107
|
278
279
|
mi 107
|
279
|
-
|
280
|
+
pr 107
|
280
281
|
D 106
|
281
282
|
em 105
|
282
283
|
ent 104
|
283
|
-
var 104
|
284
284
|
den_ 104
|
285
|
+
var 104
|
285
286
|
gr 103
|
286
287
|
nsk 102
|
287
288
|
si 102
|
288
289
|
att 101
|
289
|
-
s� 101
|
290
290
|
m� 101
|
291
|
+
s� 101
|
291
292
|
ger 101
|
292
|
-
|
293
|
-
län 100
|
293
|
+
_lä 100
|
294
294
|
tio 100
|
295
|
-
|
295
|
+
län 100
|
296
|
+
ste 100
|
296
297
|
re_ 99
|
297
298
|
ern 99
|
298
|
-
tal 97
|
299
299
|
det 97
|
300
|
-
|
301
|
-
|
300
|
+
tal 97
|
301
|
+
ed_ 97
|
302
|
+
ta_ 96
|
303
|
+
_va 96
|
302
304
|
kr 96
|
303
|
-
ed_ 96
|
304
305
|
ten 96
|
305
|
-
|
306
|
+
tion 96
|
306
307
|
isk 95
|
307
|
-
ill_ 94
|
308
308
|
id 94
|
309
|
-
|
309
|
+
ill_ 94
|
310
310
|
ks 93
|
311
|
-
|
312
|
-
|
313
|
-
sv 92
|
311
|
+
ot 93
|
312
|
+
_D 92
|
314
313
|
ven 92
|
315
314
|
ur 92
|
316
|
-
|
315
|
+
ss 92
|
316
|
+
sv 92
|
317
|
+
are 92
|
317
318
|
till_ 90
|
318
|
-
ati 89
|
319
319
|
_fr 89
|
320
320
|
ell 89
|
321
|
-
|
321
|
+
med_ 89
|
322
|
+
ati 89
|
323
|
+
ld 88
|
322
324
|
lla 88
|
323
325
|
lig 88
|
324
|
-
|
325
|
-
|
326
|
+
rå 88
|
327
|
+
_län 87
|
326
328
|
ru 87
|
327
|
-
|
328
|
-
_län 86
|
329
|
+
der_ 86
|
329
330
|
år 86
|
331
|
+
�r 86
|
330
332
|
pp 86
|
331
333
|
gar 85
|
332
|
-
|
333
|
-
der_ 85
|
334
|
+
_med_ 84
|
334
335
|
N 83
|
335
|
-
ls 83
|
336
336
|
he 83
|
337
|
-
|
337
|
+
ls 83
|
338
338
|
nder 82
|
339
|
-
p� 82
|
340
339
|
rl 82
|
341
|
-
|
340
|
+
p� 82
|
341
|
+
t. 81
|
342
342
|
one 81
|
343
|
+
up 81
|
343
344
|
ns_ 80
|
344
|
-
|
345
|
+
rin 80
|
345
346
|
på 80
|
347
|
+
ft 80
|
346
348
|
st� 80
|
347
|
-
|
348
|
-
|
349
|
-
) 78
|
350
|
-
erna 78
|
349
|
+
n. 78
|
350
|
+
_på 78
|
351
351
|
ner 78
|
352
|
-
|
353
|
-
|
354
|
-
pa 77
|
355
|
-
nte 77
|
356
|
-
_på 77
|
352
|
+
erna 78
|
353
|
+
_p� 78
|
357
354
|
eg 77
|
358
|
-
|
355
|
+
�t 77
|
356
|
+
nte 77
|
357
|
+
E 77
|
359
358
|
_den 77
|
360
|
-
|
359
|
+
pa 77
|
360
|
+
r, 77
|
361
361
|
j� 77
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
rg 76
|
362
|
+
ät 77
|
363
|
+
rna_ 76
|
364
|
+
det_ 76
|
366
365
|
på_ 76
|
366
|
+
rg 76
|
367
367
|
-_ 76
|
368
368
|
ie 76
|
369
|
-
r,_
|
370
|
-
h� 75
|
371
|
-
det_ 75
|
372
|
-
tor 75
|
373
|
-
rna_ 75
|
369
|
+
r,_ 76
|
374
370
|
und 75
|
375
|
-
|
376
|
-
|
371
|
+
r. 75
|
372
|
+
tor 75
|
373
|
+
h� 75
|
374
|
+
_ut 74
|
375
|
+
_på_ 74
|
377
376
|
nde_ 74
|
378
|
-
|
379
|
-
|
380
|
-
ges 73
|
381
|
-
ring 73
|
382
|
-
ist 73
|
377
|
+
_vi 74
|
378
|
+
F 74
|
383
379
|
äl 73
|
384
|
-
|
385
|
-
|
386
|
-
rä 73
|
380
|
+
ring 73
|
381
|
+
ges 73
|
387
382
|
�l 73
|
388
|
-
|
383
|
+
ensk 73
|
384
|
+
rä 73
|
385
|
+
ning_ 73
|
386
|
+
dra 73
|
387
|
+
ist 73
|
388
|
+
ter_ 72
|
389
389
|
ett 72
|
390
|
-
_vi 72
|
391
|
-
_be 71
|
392
|
-
ms 71
|
393
390
|
gen_ 71
|
394
|
-
|
391
|
+
ms 71
|
395
392
|
_-_ 71
|
396
|
-
|
397
|
-
|
393
|
+
_be 71
|
394
|
+
sta_ 71
|
398
395
|
riges 70
|
399
|
-
|
396
|
+
dig 70
|
400
397
|
iges 70
|
398
|
+
ges_ 70
|
399
|
+
_v� 70
|
400
|
+
fi 70
|