scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/danish.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 16370
|
2
2
|
e 5759
|
3
3
|
r 3334
|
4
4
|
n 3061
|
@@ -13,388 +13,388 @@ g 1617
|
|
13
13
|
k 1278
|
14
14
|
m 1273
|
15
15
|
er 1209
|
16
|
-
e_
|
17
|
-
__
|
16
|
+
e_ 1183
|
17
|
+
__ 1139
|
18
18
|
de 1045
|
19
19
|
en 993
|
20
20
|
� 940
|
21
21
|
f 939
|
22
|
-
r_
|
22
|
+
r_ 840
|
23
23
|
v 770
|
24
|
-
t_
|
24
|
+
t_ 732
|
25
25
|
an 724
|
26
|
-
n_
|
26
|
+
n_ 715
|
27
27
|
u 605
|
28
28
|
nd 598
|
29
29
|
b 585
|
30
30
|
et 574
|
31
|
-
_s
|
31
|
+
_s 552
|
32
32
|
. 546
|
33
33
|
re 543
|
34
|
+
er_ 541
|
34
35
|
te 540
|
35
36
|
st 530
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
_d 484
|
37
|
+
en_ 530
|
38
|
+
g_ 521
|
39
|
+
_o 503
|
40
|
+
_d 487
|
41
41
|
, 480
|
42
42
|
,_ 478
|
43
43
|
h 474
|
44
44
|
ge 474
|
45
|
-
_a
|
45
|
+
_a 463
|
46
46
|
in 440
|
47
47
|
p 436
|
48
|
+
_f 431
|
48
49
|
ar 430
|
49
|
-
_f 423
|
50
50
|
og 415
|
51
51
|
or 411
|
52
52
|
ti 406
|
53
53
|
._ 405
|
54
|
-
|
55
|
-
|
54
|
+
_e 404
|
55
|
+
et_ 399
|
56
56
|
ed 381
|
57
|
-
_m
|
58
|
-
_i
|
57
|
+
_m 381
|
58
|
+
_i 381
|
59
59
|
sk 365
|
60
60
|
ne 365
|
61
61
|
le 353
|
62
|
-
_og
|
62
|
+
_og 346
|
63
63
|
ke 343
|
64
64
|
el 342
|
65
65
|
og_ 338
|
66
66
|
� 332
|
67
67
|
ø 332
|
68
|
-
d_
|
68
|
+
d_ 329
|
69
69
|
me 324
|
70
70
|
ng 317
|
71
|
-
|
72
|
-
|
71
|
+
_og_ 315
|
72
|
+
_de 315
|
73
73
|
å 293
|
74
74
|
ig 293
|
75
75
|
� 293
|
76
|
-
_b
|
77
|
-
� 286
|
76
|
+
_b 290
|
78
77
|
æ 286
|
79
|
-
|
78
|
+
� 286
|
79
|
+
i_ 284
|
80
|
+
de_ 275
|
80
81
|
_h 274
|
81
|
-
|
82
|
+
s_ 273
|
82
83
|
ri 273
|
83
|
-
s_ 271
|
84
84
|
D 268
|
85
85
|
nde 263
|
86
|
-
om 262
|
87
86
|
li 262
|
87
|
+
om 262
|
88
88
|
ma 259
|
89
89
|
ve 257
|
90
90
|
y 256
|
91
|
+
_i_ 254
|
91
92
|
af 254
|
92
|
-
_i_ 252
|
93
93
|
_t 249
|
94
94
|
at 245
|
95
95
|
il 244
|
96
|
+
es 241
|
96
97
|
and 241
|
97
|
-
es 240
|
98
98
|
be 234
|
99
99
|
al 234
|
100
100
|
is 233
|
101
101
|
fo 232
|
102
102
|
se 232
|
103
103
|
ns 229
|
104
|
+
_D 226
|
104
105
|
la 224
|
105
|
-
_D 223
|
106
106
|
on 221
|
107
107
|
rk 219
|
108
108
|
_af 217
|
109
109
|
den 216
|
110
110
|
der 214
|
111
|
+
_k 212
|
111
112
|
_me 210
|
112
|
-
_k 210
|
113
113
|
m_ 209
|
114
114
|
ing 207
|
115
115
|
_v 203
|
116
|
-
k_
|
116
|
+
k_ 201
|
117
117
|
ra 191
|
118
118
|
f_ 188
|
119
119
|
af_ 186
|
120
|
+
_p 184
|
120
121
|
for 184
|
121
|
-
_p 181
|
122
122
|
_af_ 180
|
123
|
+
l_ 179
|
123
124
|
ol 174
|
124
|
-
ere 172
|
125
|
-
_fo 172
|
126
125
|
ark 172
|
127
|
-
|
126
|
+
_fo 172
|
127
|
+
ere 172
|
128
128
|
ste 169
|
129
|
+
lan 169
|
129
130
|
te_ 168
|
131
|
+
_l 166
|
130
132
|
mar 165
|
131
|
-
_l 165
|
132
|
-
l_ 164
|
133
133
|
ll 162
|
134
134
|
ter 161
|
135
135
|
j 159
|
136
136
|
ske 159
|
137
137
|
om_ 155
|
138
|
+
ke_ 154
|
139
|
+
den_ 153
|
138
140
|
land 153
|
141
|
+
ha 153
|
139
142
|
mark 153
|
140
143
|
Da 153
|
141
|
-
ke_ 153
|
142
|
-
den_ 153
|
143
|
-
ha 153
|
144
|
-
ni 151
|
145
144
|
ed_ 151
|
145
|
+
ni 151
|
146
146
|
_st 151
|
147
|
-
_for 149
|
148
147
|
so 149
|
148
|
+
_for 149
|
149
149
|
Dan 148
|
150
150
|
ta 148
|
151
151
|
ger 147
|
152
|
+
_er 145
|
152
153
|
nge 144
|
153
154
|
det 143
|
154
|
-
re_
|
155
|
+
re_ 141
|
155
156
|
ede 139
|
156
157
|
nma 138
|
157
|
-
nm 138
|
158
158
|
vi 138
|
159
|
-
|
159
|
+
nm 138
|
160
160
|
_en 137
|
161
|
+
nmark 137
|
161
162
|
nmar 137
|
162
|
-
anm 136
|
163
163
|
anma 136
|
164
|
+
_Da 136
|
164
165
|
anmar 136
|
166
|
+
anm 136
|
165
167
|
ev 135
|
166
168
|
rs 135
|
167
|
-
der_
|
168
|
-
|
169
|
+
der_ 134
|
170
|
+
un 133
|
169
171
|
Danm 133
|
172
|
+
Danma 133
|
170
173
|
S 133
|
171
|
-
_Da 133
|
172
|
-
un 133
|
173
174
|
ans 132
|
174
|
-
|
175
|
+
_Dan 132
|
175
176
|
da 130
|
176
177
|
med 130
|
177
|
-
_Dan 129
|
178
|
-
ro 127
|
179
178
|
io 127
|
179
|
+
ro 127
|
180
180
|
til 126
|
181
|
-
som 125
|
182
|
-
_er_ 125
|
183
181
|
ik 125
|
182
|
+
_er_ 125
|
183
|
+
som 125
|
184
184
|
rn 124
|
185
185
|
_ti 124
|
186
|
+
�_ 123
|
186
187
|
ds 123
|
188
|
+
å_ 123
|
187
189
|
_u 122
|
188
190
|
em 122
|
189
191
|
eg 121
|
190
|
-
|
191
|
-
å_ 121
|
192
|
-
_ha 120
|
193
|
-
_Danm 120
|
192
|
+
_Danm 121
|
194
193
|
rt 120
|
194
|
+
_ha 120
|
195
195
|
ld 119
|
196
196
|
_med 119
|
197
|
-
|
197
|
+
_so 119
|
198
|
+
_g 118
|
198
199
|
som_ 118
|
199
200
|
to 117
|
200
|
-
|
201
|
-
_g 116
|
201
|
+
_som 116
|
202
202
|
ske_ 116
|
203
203
|
det_ 115
|
204
|
-
|
205
|
-
end 114
|
204
|
+
_r 115
|
206
205
|
ar_ 114
|
206
|
+
end 114
|
207
207
|
tr 114
|
208
|
-
|
209
|
-
|
208
|
+
ern 114
|
209
|
+
_som_ 113
|
210
210
|
ko 111
|
211
211
|
ud 111
|
212
|
-
|
212
|
+
id 111
|
213
213
|
del 110
|
214
214
|
_til 109
|
215
215
|
si 108
|
216
|
-
|
216
|
+
nsk 108
|
217
217
|
va 108
|
218
218
|
mi 108
|
219
|
-
|
219
|
+
lig 108
|
220
220
|
ls 107
|
221
|
-
_be
|
222
|
-
|
223
|
-
|
221
|
+
_be 107
|
222
|
+
ør 105
|
223
|
+
ind 105
|
224
224
|
ka 105
|
225
|
+
bl 105
|
225
226
|
ion 105
|
226
|
-
|
227
|
-
|
227
|
+
�r 105
|
228
|
+
_da 105
|
228
229
|
gs 105
|
229
230
|
lle 104
|
230
|
-
|
231
|
+
_S 104
|
231
232
|
t� 103
|
232
|
-
|
233
|
-
ne_ 102
|
233
|
+
_� 103
|
234
234
|
med_ 102
|
235
|
+
ne_ 102
|
235
236
|
ag 101
|
236
|
-
_en_ 101
|
237
237
|
tt 101
|
238
238
|
dt 101
|
239
|
-
|
240
|
-
_� 100
|
239
|
+
_en_ 101
|
241
240
|
c 100
|
241
|
+
r� 100
|
242
242
|
ansk 99
|
243
|
+
ie 99
|
243
244
|
nt 99
|
244
245
|
dan 99
|
245
|
-
ie 99
|
246
246
|
_med_ 98
|
247
247
|
or_ 97
|
248
|
-
|
248
|
+
ær 95
|
249
|
+
il_ 95
|
249
250
|
- 95
|
250
251
|
De 95
|
251
|
-
|
252
|
-
ær 95
|
253
|
-
na 94
|
254
|
-
nin 94
|
252
|
+
�r 95
|
255
253
|
rne 94
|
256
|
-
ning 94
|
257
|
-
ner 94
|
258
254
|
lt 94
|
255
|
+
nin 94
|
256
|
+
na 94
|
257
|
+
ner 94
|
258
|
+
ning 94
|
259
259
|
I 92
|
260
|
-
fr 92
|
261
260
|
til_ 92
|
261
|
+
fr 92
|
262
262
|
at_ 92
|
263
263
|
op 91
|
264
264
|
ru 91
|
265
|
-
_dan
|
266
|
-
|
267
|
-
ige 89
|
265
|
+
_dan 90
|
266
|
+
ge_ 90
|
268
267
|
_bl 89
|
268
|
+
rd 89
|
269
|
+
ng_ 89
|
270
|
+
v_ 89
|
269
271
|
erne 89
|
270
|
-
|
271
|
-
ng_ 88
|
272
|
-
v_ 88
|
272
|
+
ige 89
|
273
273
|
gt 88
|
274
|
-
|
275
|
-
tte 87
|
276
|
-
a_ 87
|
277
|
-
_re 87
|
274
|
+
a_ 88
|
278
275
|
inge 87
|
279
|
-
|
276
|
+
_. 87
|
280
277
|
p� 87
|
278
|
+
_den 87
|
279
|
+
kr 87
|
280
|
+
tte 87
|
281
|
+
s� 86
|
281
282
|
dans 86
|
282
283
|
men 86
|
283
|
-
s� 86
|
284
284
|
dansk 86
|
285
|
-
it 85
|
286
|
-
isk 85
|
287
285
|
_til_ 85
|
286
|
+
isk 85
|
287
|
+
es_ 85
|
288
|
+
it 85
|
288
289
|
ver 85
|
289
|
-
am 84
|
290
|
-
f� 84
|
291
290
|
_dans 84
|
292
|
-
|
291
|
+
_re 84
|
292
|
+
f� 84
|
293
|
+
am 84
|
293
294
|
els 84
|
294
|
-
|
295
|
+
_at 84
|
295
296
|
est 83
|
296
|
-
|
297
|
-
gen 82
|
297
|
+
_n 83
|
298
298
|
_den_ 82
|
299
|
-
|
299
|
+
gen 82
|
300
|
+
ur 82
|
300
301
|
_ud 81
|
302
|
+
he 81
|
301
303
|
_at_ 81
|
302
|
-
|
303
|
-
ene 80
|
304
|
+
rk_ 80
|
304
305
|
ble 80
|
305
|
-
|
306
|
-
|
306
|
+
ene 80
|
307
|
+
e. 79
|
307
308
|
und 79
|
308
|
-
_. 79
|
309
|
-
ede_ 79
|
310
309
|
ande 79
|
310
|
+
od 79
|
311
|
+
ede_ 79
|
312
|
+
_I 79
|
313
|
+
sa 79
|
314
|
+
_la 78
|
311
315
|
eri 78
|
312
|
-
nde_ 78
|
313
316
|
_in 78
|
314
|
-
|
315
|
-
|
317
|
+
nde_ 78
|
318
|
+
_fr 78
|
319
|
+
r. 77
|
316
320
|
ende 77
|
317
|
-
|
318
|
-
|
319
|
-
|
321
|
+
ov 77
|
322
|
+
_si 76
|
323
|
+
ing_ 76
|
324
|
+
sk_ 76
|
325
|
+
_._ 76
|
320
326
|
tor 76
|
321
327
|
av 75
|
322
|
-
lev 75
|
323
328
|
lk 75
|
324
|
-
|
325
|
-
|
326
|
-
ing_ 75
|
327
|
-
_si 74
|
329
|
+
ark_ 75
|
330
|
+
lev 75
|
328
331
|
an_ 74
|
329
332
|
ft 74
|
330
|
-
mm 73
|
331
|
-
på 73
|
332
333
|
F 73
|
333
|
-
|
334
|
+
mm 73
|
334
335
|
us 73
|
335
|
-
|
336
|
-
|
337
|
-
di 72
|
338
|
-
rin 72
|
336
|
+
på 73
|
337
|
+
_De 73
|
339
338
|
e, 72
|
340
|
-
|
339
|
+
di 72
|
341
340
|
E 72
|
341
|
+
rin 72
|
342
|
+
e,_ 72
|
343
|
+
mark_ 71
|
344
|
+
le_ 71
|
345
|
+
nd_ 71
|
346
|
+
_p� 71
|
342
347
|
nske 71
|
348
|
+
_lan 71
|
343
349
|
_på 71
|
344
350
|
_der 71
|
345
|
-
_p� 71
|
346
|
-
_lan 71
|
347
|
-
get 70
|
348
|
-
le_ 70
|
349
|
-
ark_ 70
|
350
351
|
st� 70
|
352
|
+
get 70
|
351
353
|
gi 70
|
352
|
-
ist 69
|
353
354
|
pr 69
|
354
|
-
|
355
|
-
|
355
|
+
ist 69
|
356
|
+
ks 68
|
356
357
|
_blev 68
|
357
358
|
_ble 68
|
358
359
|
blev 68
|
359
|
-
|
360
|
-
|
360
|
+
var 68
|
361
|
+
_va 67
|
362
|
+
anske 67
|
361
363
|
på_ 67
|
362
364
|
ss 67
|
363
|
-
anske 67
|
364
|
-
_va 67
|
365
|
-
_( 66
|
366
|
-
_land 66
|
367
|
-
ati 66
|
368
|
-
tio 66
|
369
|
-
lse 66
|
370
|
-
år 66
|
371
|
-
fi 66
|
372
365
|
_på_ 66
|
373
|
-
|
366
|
+
gr 66
|
367
|
+
år 66
|
368
|
+
tio 66
|
369
|
+
ati 66
|
374
370
|
tion 66
|
371
|
+
fi 66
|
372
|
+
lse 66
|
373
|
+
_land 66
|
375
374
|
�r 66
|
376
|
-
( 66
|
377
|
-
gr 66
|
378
375
|
ef 65
|
376
|
+
one 65
|
379
377
|
sto 65
|
380
378
|
kt 65
|
381
|
-
one 65
|
382
379
|
sen 64
|
383
|
-
ev_ 64
|
384
380
|
else 64
|
381
|
+
ev_ 64
|
385
382
|
A 63
|
386
383
|
ende_ 63
|
387
|
-
ren 63
|
388
384
|
ring 63
|
385
|
+
ren 63
|
389
386
|
_ko 62
|
390
387
|
for_ 62
|
391
388
|
ho 62
|
389
|
+
e._ 62
|
390
|
+
ig_ 62
|
392
391
|
dr 61
|
393
|
-
ig_ 61
|
394
|
-
ste_ 61
|
395
392
|
rig 61
|
396
|
-
|
393
|
+
ste_ 61
|
397
394
|
ret 60
|
395
|
+
lev_ 60
|
396
|
+
n,_ 60
|
397
|
+
_E 60
|
398
|
+
_F 60
|
399
|
+
n, 60
|
398
400
|
blev_ 60
|
399
|
-
_E 59
|
400
|
-
fø 59
|