scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/danish.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 16370
|
2
2
|
e 5759
|
3
3
|
r 3334
|
4
4
|
n 3061
|
@@ -13,388 +13,388 @@ g 1617
|
|
13
13
|
k 1278
|
14
14
|
m 1273
|
15
15
|
er 1209
|
16
|
-
e_
|
17
|
-
__
|
16
|
+
e_ 1183
|
17
|
+
__ 1139
|
18
18
|
de 1045
|
19
19
|
en 993
|
20
20
|
� 940
|
21
21
|
f 939
|
22
|
-
r_
|
22
|
+
r_ 840
|
23
23
|
v 770
|
24
|
-
t_
|
24
|
+
t_ 732
|
25
25
|
an 724
|
26
|
-
n_
|
26
|
+
n_ 715
|
27
27
|
u 605
|
28
28
|
nd 598
|
29
29
|
b 585
|
30
30
|
et 574
|
31
|
-
_s
|
31
|
+
_s 552
|
32
32
|
. 546
|
33
33
|
re 543
|
34
|
+
er_ 541
|
34
35
|
te 540
|
35
36
|
st 530
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
_d 484
|
37
|
+
en_ 530
|
38
|
+
g_ 521
|
39
|
+
_o 503
|
40
|
+
_d 487
|
41
41
|
, 480
|
42
42
|
,_ 478
|
43
43
|
h 474
|
44
44
|
ge 474
|
45
|
-
_a
|
45
|
+
_a 463
|
46
46
|
in 440
|
47
47
|
p 436
|
48
|
+
_f 431
|
48
49
|
ar 430
|
49
|
-
_f 423
|
50
50
|
og 415
|
51
51
|
or 411
|
52
52
|
ti 406
|
53
53
|
._ 405
|
54
|
-
|
55
|
-
|
54
|
+
_e 404
|
55
|
+
et_ 399
|
56
56
|
ed 381
|
57
|
-
_m
|
58
|
-
_i
|
57
|
+
_m 381
|
58
|
+
_i 381
|
59
59
|
sk 365
|
60
60
|
ne 365
|
61
61
|
le 353
|
62
|
-
_og
|
62
|
+
_og 346
|
63
63
|
ke 343
|
64
64
|
el 342
|
65
65
|
og_ 338
|
66
66
|
� 332
|
67
67
|
ø 332
|
68
|
-
d_
|
68
|
+
d_ 329
|
69
69
|
me 324
|
70
70
|
ng 317
|
71
|
-
|
72
|
-
|
71
|
+
_og_ 315
|
72
|
+
_de 315
|
73
73
|
å 293
|
74
74
|
ig 293
|
75
75
|
� 293
|
76
|
-
_b
|
77
|
-
� 286
|
76
|
+
_b 290
|
78
77
|
æ 286
|
79
|
-
|
78
|
+
� 286
|
79
|
+
i_ 284
|
80
|
+
de_ 275
|
80
81
|
_h 274
|
81
|
-
|
82
|
+
s_ 273
|
82
83
|
ri 273
|
83
|
-
s_ 271
|
84
84
|
D 268
|
85
85
|
nde 263
|
86
|
-
om 262
|
87
86
|
li 262
|
87
|
+
om 262
|
88
88
|
ma 259
|
89
89
|
ve 257
|
90
90
|
y 256
|
91
|
+
_i_ 254
|
91
92
|
af 254
|
92
|
-
_i_ 252
|
93
93
|
_t 249
|
94
94
|
at 245
|
95
95
|
il 244
|
96
|
+
es 241
|
96
97
|
and 241
|
97
|
-
es 240
|
98
98
|
be 234
|
99
99
|
al 234
|
100
100
|
is 233
|
101
101
|
fo 232
|
102
102
|
se 232
|
103
103
|
ns 229
|
104
|
+
_D 226
|
104
105
|
la 224
|
105
|
-
_D 223
|
106
106
|
on 221
|
107
107
|
rk 219
|
108
108
|
_af 217
|
109
109
|
den 216
|
110
110
|
der 214
|
111
|
+
_k 212
|
111
112
|
_me 210
|
112
|
-
_k 210
|
113
113
|
m_ 209
|
114
114
|
ing 207
|
115
115
|
_v 203
|
116
|
-
k_
|
116
|
+
k_ 201
|
117
117
|
ra 191
|
118
118
|
f_ 188
|
119
119
|
af_ 186
|
120
|
+
_p 184
|
120
121
|
for 184
|
121
|
-
_p 181
|
122
122
|
_af_ 180
|
123
|
+
l_ 179
|
123
124
|
ol 174
|
124
|
-
ere 172
|
125
|
-
_fo 172
|
126
125
|
ark 172
|
127
|
-
|
126
|
+
_fo 172
|
127
|
+
ere 172
|
128
128
|
ste 169
|
129
|
+
lan 169
|
129
130
|
te_ 168
|
131
|
+
_l 166
|
130
132
|
mar 165
|
131
|
-
_l 165
|
132
|
-
l_ 164
|
133
133
|
ll 162
|
134
134
|
ter 161
|
135
135
|
j 159
|
136
136
|
ske 159
|
137
137
|
om_ 155
|
138
|
+
ke_ 154
|
139
|
+
den_ 153
|
138
140
|
land 153
|
141
|
+
ha 153
|
139
142
|
mark 153
|
140
143
|
Da 153
|
141
|
-
ke_ 153
|
142
|
-
den_ 153
|
143
|
-
ha 153
|
144
|
-
ni 151
|
145
144
|
ed_ 151
|
145
|
+
ni 151
|
146
146
|
_st 151
|
147
|
-
_for 149
|
148
147
|
so 149
|
148
|
+
_for 149
|
149
149
|
Dan 148
|
150
150
|
ta 148
|
151
151
|
ger 147
|
152
|
+
_er 145
|
152
153
|
nge 144
|
153
154
|
det 143
|
154
|
-
re_
|
155
|
+
re_ 141
|
155
156
|
ede 139
|
156
157
|
nma 138
|
157
|
-
nm 138
|
158
158
|
vi 138
|
159
|
-
|
159
|
+
nm 138
|
160
160
|
_en 137
|
161
|
+
nmark 137
|
161
162
|
nmar 137
|
162
|
-
anm 136
|
163
163
|
anma 136
|
164
|
+
_Da 136
|
164
165
|
anmar 136
|
166
|
+
anm 136
|
165
167
|
ev 135
|
166
168
|
rs 135
|
167
|
-
der_
|
168
|
-
|
169
|
+
der_ 134
|
170
|
+
un 133
|
169
171
|
Danm 133
|
172
|
+
Danma 133
|
170
173
|
S 133
|
171
|
-
_Da 133
|
172
|
-
un 133
|
173
174
|
ans 132
|
174
|
-
|
175
|
+
_Dan 132
|
175
176
|
da 130
|
176
177
|
med 130
|
177
|
-
_Dan 129
|
178
|
-
ro 127
|
179
178
|
io 127
|
179
|
+
ro 127
|
180
180
|
til 126
|
181
|
-
som 125
|
182
|
-
_er_ 125
|
183
181
|
ik 125
|
182
|
+
_er_ 125
|
183
|
+
som 125
|
184
184
|
rn 124
|
185
185
|
_ti 124
|
186
|
+
�_ 123
|
186
187
|
ds 123
|
188
|
+
å_ 123
|
187
189
|
_u 122
|
188
190
|
em 122
|
189
191
|
eg 121
|
190
|
-
|
191
|
-
å_ 121
|
192
|
-
_ha 120
|
193
|
-
_Danm 120
|
192
|
+
_Danm 121
|
194
193
|
rt 120
|
194
|
+
_ha 120
|
195
195
|
ld 119
|
196
196
|
_med 119
|
197
|
-
|
197
|
+
_so 119
|
198
|
+
_g 118
|
198
199
|
som_ 118
|
199
200
|
to 117
|
200
|
-
|
201
|
-
_g 116
|
201
|
+
_som 116
|
202
202
|
ske_ 116
|
203
203
|
det_ 115
|
204
|
-
|
205
|
-
end 114
|
204
|
+
_r 115
|
206
205
|
ar_ 114
|
206
|
+
end 114
|
207
207
|
tr 114
|
208
|
-
|
209
|
-
|
208
|
+
ern 114
|
209
|
+
_som_ 113
|
210
210
|
ko 111
|
211
211
|
ud 111
|
212
|
-
|
212
|
+
id 111
|
213
213
|
del 110
|
214
214
|
_til 109
|
215
215
|
si 108
|
216
|
-
|
216
|
+
nsk 108
|
217
217
|
va 108
|
218
218
|
mi 108
|
219
|
-
|
219
|
+
lig 108
|
220
220
|
ls 107
|
221
|
-
_be
|
222
|
-
|
223
|
-
|
221
|
+
_be 107
|
222
|
+
ør 105
|
223
|
+
ind 105
|
224
224
|
ka 105
|
225
|
+
bl 105
|
225
226
|
ion 105
|
226
|
-
|
227
|
-
|
227
|
+
�r 105
|
228
|
+
_da 105
|
228
229
|
gs 105
|
229
230
|
lle 104
|
230
|
-
|
231
|
+
_S 104
|
231
232
|
t� 103
|
232
|
-
|
233
|
-
ne_ 102
|
233
|
+
_� 103
|
234
234
|
med_ 102
|
235
|
+
ne_ 102
|
235
236
|
ag 101
|
236
|
-
_en_ 101
|
237
237
|
tt 101
|
238
238
|
dt 101
|
239
|
-
|
240
|
-
_� 100
|
239
|
+
_en_ 101
|
241
240
|
c 100
|
241
|
+
r� 100
|
242
242
|
ansk 99
|
243
|
+
ie 99
|
243
244
|
nt 99
|
244
245
|
dan 99
|
245
|
-
ie 99
|
246
246
|
_med_ 98
|
247
247
|
or_ 97
|
248
|
-
|
248
|
+
ær 95
|
249
|
+
il_ 95
|
249
250
|
- 95
|
250
251
|
De 95
|
251
|
-
|
252
|
-
ær 95
|
253
|
-
na 94
|
254
|
-
nin 94
|
252
|
+
�r 95
|
255
253
|
rne 94
|
256
|
-
ning 94
|
257
|
-
ner 94
|
258
254
|
lt 94
|
255
|
+
nin 94
|
256
|
+
na 94
|
257
|
+
ner 94
|
258
|
+
ning 94
|
259
259
|
I 92
|
260
|
-
fr 92
|
261
260
|
til_ 92
|
261
|
+
fr 92
|
262
262
|
at_ 92
|
263
263
|
op 91
|
264
264
|
ru 91
|
265
|
-
_dan
|
266
|
-
|
267
|
-
ige 89
|
265
|
+
_dan 90
|
266
|
+
ge_ 90
|
268
267
|
_bl 89
|
268
|
+
rd 89
|
269
|
+
ng_ 89
|
270
|
+
v_ 89
|
269
271
|
erne 89
|
270
|
-
|
271
|
-
ng_ 88
|
272
|
-
v_ 88
|
272
|
+
ige 89
|
273
273
|
gt 88
|
274
|
-
|
275
|
-
tte 87
|
276
|
-
a_ 87
|
277
|
-
_re 87
|
274
|
+
a_ 88
|
278
275
|
inge 87
|
279
|
-
|
276
|
+
_. 87
|
280
277
|
p� 87
|
278
|
+
_den 87
|
279
|
+
kr 87
|
280
|
+
tte 87
|
281
|
+
s� 86
|
281
282
|
dans 86
|
282
283
|
men 86
|
283
|
-
s� 86
|
284
284
|
dansk 86
|
285
|
-
it 85
|
286
|
-
isk 85
|
287
285
|
_til_ 85
|
286
|
+
isk 85
|
287
|
+
es_ 85
|
288
|
+
it 85
|
288
289
|
ver 85
|
289
|
-
am 84
|
290
|
-
f� 84
|
291
290
|
_dans 84
|
292
|
-
|
291
|
+
_re 84
|
292
|
+
f� 84
|
293
|
+
am 84
|
293
294
|
els 84
|
294
|
-
|
295
|
+
_at 84
|
295
296
|
est 83
|
296
|
-
|
297
|
-
gen 82
|
297
|
+
_n 83
|
298
298
|
_den_ 82
|
299
|
-
|
299
|
+
gen 82
|
300
|
+
ur 82
|
300
301
|
_ud 81
|
302
|
+
he 81
|
301
303
|
_at_ 81
|
302
|
-
|
303
|
-
ene 80
|
304
|
+
rk_ 80
|
304
305
|
ble 80
|
305
|
-
|
306
|
-
|
306
|
+
ene 80
|
307
|
+
e. 79
|
307
308
|
und 79
|
308
|
-
_. 79
|
309
|
-
ede_ 79
|
310
309
|
ande 79
|
310
|
+
od 79
|
311
|
+
ede_ 79
|
312
|
+
_I 79
|
313
|
+
sa 79
|
314
|
+
_la 78
|
311
315
|
eri 78
|
312
|
-
nde_ 78
|
313
316
|
_in 78
|
314
|
-
|
315
|
-
|
317
|
+
nde_ 78
|
318
|
+
_fr 78
|
319
|
+
r. 77
|
316
320
|
ende 77
|
317
|
-
|
318
|
-
|
319
|
-
|
321
|
+
ov 77
|
322
|
+
_si 76
|
323
|
+
ing_ 76
|
324
|
+
sk_ 76
|
325
|
+
_._ 76
|
320
326
|
tor 76
|
321
327
|
av 75
|
322
|
-
lev 75
|
323
328
|
lk 75
|
324
|
-
|
325
|
-
|
326
|
-
ing_ 75
|
327
|
-
_si 74
|
329
|
+
ark_ 75
|
330
|
+
lev 75
|
328
331
|
an_ 74
|
329
332
|
ft 74
|
330
|
-
mm 73
|
331
|
-
på 73
|
332
333
|
F 73
|
333
|
-
|
334
|
+
mm 73
|
334
335
|
us 73
|
335
|
-
|
336
|
-
|
337
|
-
di 72
|
338
|
-
rin 72
|
336
|
+
på 73
|
337
|
+
_De 73
|
339
338
|
e, 72
|
340
|
-
|
339
|
+
di 72
|
341
340
|
E 72
|
341
|
+
rin 72
|
342
|
+
e,_ 72
|
343
|
+
mark_ 71
|
344
|
+
le_ 71
|
345
|
+
nd_ 71
|
346
|
+
_p� 71
|
342
347
|
nske 71
|
348
|
+
_lan 71
|
343
349
|
_på 71
|
344
350
|
_der 71
|
345
|
-
_p� 71
|
346
|
-
_lan 71
|
347
|
-
get 70
|
348
|
-
le_ 70
|
349
|
-
ark_ 70
|
350
351
|
st� 70
|
352
|
+
get 70
|
351
353
|
gi 70
|
352
|
-
ist 69
|
353
354
|
pr 69
|
354
|
-
|
355
|
-
|
355
|
+
ist 69
|
356
|
+
ks 68
|
356
357
|
_blev 68
|
357
358
|
_ble 68
|
358
359
|
blev 68
|
359
|
-
|
360
|
-
|
360
|
+
var 68
|
361
|
+
_va 67
|
362
|
+
anske 67
|
361
363
|
på_ 67
|
362
364
|
ss 67
|
363
|
-
anske 67
|
364
|
-
_va 67
|
365
|
-
_( 66
|
366
|
-
_land 66
|
367
|
-
ati 66
|
368
|
-
tio 66
|
369
|
-
lse 66
|
370
|
-
år 66
|
371
|
-
fi 66
|
372
365
|
_på_ 66
|
373
|
-
|
366
|
+
gr 66
|
367
|
+
år 66
|
368
|
+
tio 66
|
369
|
+
ati 66
|
374
370
|
tion 66
|
371
|
+
fi 66
|
372
|
+
lse 66
|
373
|
+
_land 66
|
375
374
|
�r 66
|
376
|
-
( 66
|
377
|
-
gr 66
|
378
375
|
ef 65
|
376
|
+
one 65
|
379
377
|
sto 65
|
380
378
|
kt 65
|
381
|
-
one 65
|
382
379
|
sen 64
|
383
|
-
ev_ 64
|
384
380
|
else 64
|
381
|
+
ev_ 64
|
385
382
|
A 63
|
386
383
|
ende_ 63
|
387
|
-
ren 63
|
388
384
|
ring 63
|
385
|
+
ren 63
|
389
386
|
_ko 62
|
390
387
|
for_ 62
|
391
388
|
ho 62
|
389
|
+
e._ 62
|
390
|
+
ig_ 62
|
392
391
|
dr 61
|
393
|
-
ig_ 61
|
394
|
-
ste_ 61
|
395
392
|
rig 61
|
396
|
-
|
393
|
+
ste_ 61
|
397
394
|
ret 60
|
395
|
+
lev_ 60
|
396
|
+
n,_ 60
|
397
|
+
_E 60
|
398
|
+
_F 60
|
399
|
+
n, 60
|
398
400
|
blev_ 60
|
399
|
-
_E 59
|
400
|
-
fø 59
|