scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/test/fixtures/lms/danish.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 16370
|
2
2
|
e 5759
|
3
3
|
r 3334
|
4
4
|
n 3061
|
@@ -12,389 +12,389 @@ l 1884
|
|
12
12
|
g 1617
|
13
13
|
k 1278
|
14
14
|
m 1273
|
15
|
-
er
|
16
|
-
e_
|
17
|
-
__
|
15
|
+
er 1210
|
16
|
+
e_ 1186
|
17
|
+
__ 1181
|
18
18
|
de 1045
|
19
19
|
en 993
|
20
20
|
� 940
|
21
21
|
f 939
|
22
|
-
r_
|
22
|
+
r_ 840
|
23
23
|
v 770
|
24
|
-
t_
|
24
|
+
t_ 732
|
25
25
|
an 724
|
26
|
-
n_
|
26
|
+
n_ 716
|
27
27
|
u 605
|
28
28
|
nd 598
|
29
29
|
b 585
|
30
30
|
et 574
|
31
|
-
_s
|
31
|
+
_s 552
|
32
32
|
. 546
|
33
33
|
re 543
|
34
|
+
er_ 541
|
34
35
|
te 540
|
36
|
+
en_ 530
|
35
37
|
st 530
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
_o 501
|
40
|
-
_d 484
|
38
|
+
g_ 521
|
39
|
+
_o 503
|
40
|
+
_d 487
|
41
41
|
, 480
|
42
42
|
,_ 478
|
43
43
|
h 474
|
44
44
|
ge 474
|
45
|
-
_a
|
45
|
+
_a 463
|
46
46
|
in 440
|
47
47
|
p 436
|
48
|
+
_f 431
|
48
49
|
ar 430
|
49
|
-
_f 423
|
50
50
|
og 415
|
51
51
|
or 411
|
52
52
|
ti 406
|
53
53
|
._ 405
|
54
|
-
|
55
|
-
|
54
|
+
_e 404
|
55
|
+
et_ 399
|
56
|
+
_m 382
|
57
|
+
_i 381
|
56
58
|
ed 381
|
57
|
-
_m 378
|
58
|
-
_i 378
|
59
|
-
sk 365
|
60
59
|
ne 365
|
60
|
+
sk 365
|
61
61
|
le 353
|
62
|
-
_og
|
63
|
-
ke
|
62
|
+
_og 346
|
63
|
+
ke 344
|
64
64
|
el 342
|
65
65
|
og_ 338
|
66
|
-
� 332
|
67
66
|
ø 332
|
68
|
-
|
67
|
+
� 332
|
68
|
+
d_ 330
|
69
69
|
me 324
|
70
70
|
ng 317
|
71
|
-
|
72
|
-
|
73
|
-
å 293
|
74
|
-
ig 293
|
71
|
+
_og_ 315
|
72
|
+
_de 315
|
75
73
|
� 293
|
76
|
-
|
77
|
-
|
74
|
+
ig 293
|
75
|
+
å 293
|
76
|
+
_b 290
|
78
77
|
æ 286
|
79
|
-
|
78
|
+
� 286
|
79
|
+
i_ 285
|
80
|
+
s_ 276
|
81
|
+
de_ 275
|
80
82
|
_h 274
|
81
|
-
de_ 274
|
82
83
|
ri 273
|
83
|
-
s_ 271
|
84
84
|
D 268
|
85
85
|
nde 263
|
86
|
-
om 262
|
87
86
|
li 262
|
87
|
+
om 262
|
88
88
|
ma 259
|
89
89
|
ve 257
|
90
|
+
_t 256
|
90
91
|
y 256
|
91
92
|
af 254
|
92
|
-
_i_
|
93
|
-
_t 249
|
93
|
+
_i_ 254
|
94
94
|
at 245
|
95
95
|
il 244
|
96
|
+
es 241
|
96
97
|
and 241
|
97
|
-
es 240
|
98
98
|
be 234
|
99
99
|
al 234
|
100
100
|
is 233
|
101
101
|
fo 232
|
102
102
|
se 232
|
103
103
|
ns 229
|
104
|
+
_D 226
|
104
105
|
la 224
|
105
|
-
_D 223
|
106
106
|
on 221
|
107
107
|
rk 219
|
108
108
|
_af 217
|
109
109
|
den 216
|
110
110
|
der 214
|
111
|
+
_k 212
|
111
112
|
_me 210
|
112
|
-
_k 210
|
113
113
|
m_ 209
|
114
114
|
ing 207
|
115
115
|
_v 203
|
116
|
-
k_
|
116
|
+
k_ 202
|
117
117
|
ra 191
|
118
118
|
f_ 188
|
119
119
|
af_ 186
|
120
120
|
for 184
|
121
|
-
_p
|
121
|
+
_p 184
|
122
|
+
l_ 183
|
122
123
|
_af_ 180
|
123
124
|
ol 174
|
124
|
-
ere
|
125
|
-
_fo 172
|
125
|
+
ere 173
|
126
126
|
ark 172
|
127
|
+
_fo 172
|
127
128
|
lan 169
|
128
129
|
ste 169
|
129
130
|
te_ 168
|
131
|
+
_l 166
|
130
132
|
mar 165
|
131
|
-
_l 165
|
132
|
-
l_ 164
|
133
133
|
ll 162
|
134
134
|
ter 161
|
135
|
+
ske 160
|
135
136
|
j 159
|
136
|
-
|
137
|
+
ke_ 155
|
137
138
|
om_ 155
|
138
|
-
|
139
|
-
mark 153
|
139
|
+
ha 153
|
140
140
|
Da 153
|
141
|
-
|
141
|
+
mark 153
|
142
142
|
den_ 153
|
143
|
-
|
143
|
+
land 153
|
144
|
+
_st 151
|
144
145
|
ni 151
|
145
146
|
ed_ 151
|
146
|
-
_st 151
|
147
147
|
_for 149
|
148
148
|
so 149
|
149
|
-
Dan 148
|
150
149
|
ta 148
|
150
|
+
Dan 148
|
151
151
|
ger 147
|
152
|
+
_er 145
|
152
153
|
nge 144
|
153
154
|
det 143
|
154
|
-
re_
|
155
|
+
re_ 141
|
155
156
|
ede 139
|
156
157
|
nma 138
|
157
158
|
nm 138
|
158
159
|
vi 138
|
159
|
-
nmark 137
|
160
|
-
_en 137
|
161
160
|
nmar 137
|
161
|
+
_en 137
|
162
|
+
nmark 137
|
163
|
+
anmar 136
|
162
164
|
anm 136
|
165
|
+
_Da 136
|
163
166
|
anma 136
|
164
|
-
anmar 136
|
165
167
|
ev 135
|
166
168
|
rs 135
|
167
|
-
der_
|
168
|
-
Danma 133
|
169
|
+
der_ 134
|
169
170
|
Danm 133
|
170
171
|
S 133
|
171
|
-
|
172
|
+
Danma 133
|
172
173
|
un 133
|
174
|
+
_Dan 132
|
173
175
|
ans 132
|
174
|
-
_er 131
|
175
|
-
da 130
|
176
176
|
med 130
|
177
|
-
|
177
|
+
da 130
|
178
178
|
ro 127
|
179
179
|
io 127
|
180
180
|
til 126
|
181
|
-
som 125
|
182
181
|
_er_ 125
|
183
182
|
ik 125
|
184
|
-
|
183
|
+
som 125
|
185
184
|
_ti 124
|
185
|
+
rn 124
|
186
|
+
em 123
|
186
187
|
ds 123
|
188
|
+
�_ 123
|
189
|
+
å_ 123
|
187
190
|
_u 122
|
188
|
-
em 122
|
189
191
|
eg 121
|
190
|
-
|
191
|
-
å_ 121
|
192
|
+
_Danm 121
|
192
193
|
_ha 120
|
193
|
-
_Danm 120
|
194
194
|
rt 120
|
195
|
-
ld 119
|
196
195
|
_med 119
|
197
|
-
|
196
|
+
_so 119
|
197
|
+
ld 119
|
198
|
+
_g 118
|
198
199
|
som_ 118
|
199
200
|
to 117
|
200
|
-
_so 116
|
201
|
-
_g 116
|
202
201
|
ske_ 116
|
202
|
+
_som 116
|
203
203
|
det_ 115
|
204
|
-
|
204
|
+
_r 115
|
205
205
|
end 114
|
206
|
+
ern 114
|
206
207
|
ar_ 114
|
207
208
|
tr 114
|
208
|
-
|
209
|
+
_som_ 113
|
209
210
|
id 111
|
210
|
-
ko 111
|
211
211
|
ud 111
|
212
|
-
|
212
|
+
ko 111
|
213
213
|
del 110
|
214
214
|
_til 109
|
215
|
-
si 108
|
216
|
-
lig 108
|
217
215
|
va 108
|
218
|
-
mi 108
|
219
216
|
nsk 108
|
217
|
+
mi 108
|
218
|
+
si 108
|
219
|
+
lig 108
|
220
|
+
_be 107
|
220
221
|
ls 107
|
221
|
-
_be 105
|
222
|
-
�r 105
|
223
|
-
bl 105
|
224
|
-
ka 105
|
225
222
|
ion 105
|
226
223
|
ind 105
|
227
|
-
|
224
|
+
_da 105
|
225
|
+
�r 105
|
226
|
+
bl 105
|
228
227
|
gs 105
|
228
|
+
ør 105
|
229
|
+
ka 105
|
230
|
+
_S 104
|
229
231
|
lle 104
|
230
|
-
|
232
|
+
_� 104
|
233
|
+
dt 103
|
231
234
|
t� 103
|
232
|
-
_S 103
|
233
235
|
ne_ 102
|
234
236
|
med_ 102
|
235
|
-
|
237
|
+
tt 102
|
236
238
|
_en_ 101
|
237
|
-
|
238
|
-
dt 101
|
239
|
+
ag 101
|
239
240
|
r� 100
|
240
|
-
_� 100
|
241
241
|
c 100
|
242
242
|
ansk 99
|
243
|
-
nt 99
|
244
|
-
dan 99
|
245
243
|
ie 99
|
244
|
+
dan 99
|
245
|
+
nt 99
|
246
246
|
_med_ 98
|
247
247
|
or_ 97
|
248
|
-
|
249
|
-
- 95
|
248
|
+
il_ 96
|
250
249
|
De 95
|
251
|
-
|
250
|
+
�r 95
|
252
251
|
ær 95
|
253
|
-
na 94
|
254
|
-
nin 94
|
255
252
|
rne 94
|
253
|
+
lt 94
|
256
254
|
ning 94
|
255
|
+
na 94
|
257
256
|
ner 94
|
258
|
-
|
259
|
-
I 92
|
260
|
-
fr 92
|
257
|
+
nin 94
|
261
258
|
til_ 92
|
259
|
+
fr 92
|
260
|
+
I 92
|
262
261
|
at_ 92
|
263
262
|
op 91
|
264
263
|
ru 91
|
265
|
-
|
264
|
+
ge_ 91
|
265
|
+
_dan 90
|
266
|
+
erne 89
|
267
|
+
v_ 89
|
266
268
|
rd 89
|
269
|
+
ng_ 89
|
267
270
|
ige 89
|
268
271
|
_bl 89
|
269
|
-
|
270
|
-
|
271
|
-
ng_ 88
|
272
|
-
v_ 88
|
272
|
+
_. 88
|
273
|
+
a_ 88
|
273
274
|
gt 88
|
275
|
+
p� 87
|
274
276
|
kr 87
|
277
|
+
_den 87
|
275
278
|
tte 87
|
276
|
-
a_ 87
|
277
|
-
_re 87
|
278
279
|
inge 87
|
279
|
-
_den 87
|
280
|
-
p� 87
|
281
|
-
dans 86
|
282
|
-
men 86
|
283
280
|
s� 86
|
281
|
+
men 86
|
282
|
+
dans 86
|
284
283
|
dansk 86
|
285
|
-
it 85
|
286
|
-
isk 85
|
287
284
|
_til_ 85
|
285
|
+
isk 85
|
288
286
|
ver 85
|
289
|
-
|
290
|
-
|
287
|
+
es_ 85
|
288
|
+
it 85
|
289
|
+
_re 84
|
291
290
|
_dans 84
|
292
|
-
_at 84
|
293
291
|
els 84
|
294
|
-
|
292
|
+
_at 84
|
293
|
+
am 84
|
294
|
+
f� 84
|
295
|
+
_n 83
|
295
296
|
est 83
|
296
297
|
ur 82
|
297
|
-
gen 82
|
298
298
|
_den_ 82
|
299
|
+
gen 82
|
299
300
|
he 81
|
300
|
-
_ud 81
|
301
301
|
_at_ 81
|
302
|
-
|
303
|
-
ene 80
|
302
|
+
_ud 81
|
304
303
|
ble 80
|
305
|
-
|
306
|
-
|
307
|
-
und 79
|
308
|
-
_. 79
|
304
|
+
ene 80
|
305
|
+
rk_ 80
|
309
306
|
ede_ 79
|
307
|
+
e. 79
|
310
308
|
ande 79
|
311
|
-
|
312
|
-
|
309
|
+
und 79
|
310
|
+
od 79
|
311
|
+
sa 79
|
312
|
+
_I 79
|
313
313
|
_in 78
|
314
|
+
nde_ 78
|
315
|
+
_fr 78
|
314
316
|
_la 78
|
317
|
+
eri 78
|
318
|
+
sk_ 77
|
315
319
|
ov 77
|
316
320
|
ende 77
|
317
|
-
|
318
|
-
|
319
|
-
|
321
|
+
r. 77
|
322
|
+
_._ 76
|
323
|
+
ing_ 76
|
324
|
+
_si 76
|
320
325
|
tor 76
|
326
|
+
lk 75
|
321
327
|
av 75
|
322
328
|
lev 75
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
ing_ 75
|
327
|
-
_si 74
|
328
|
-
an_ 74
|
329
|
+
an_ 75
|
330
|
+
ark_ 75
|
331
|
+
mm 74
|
329
332
|
ft 74
|
330
|
-
|
333
|
+
_De 73
|
331
334
|
på 73
|
332
335
|
F 73
|
333
|
-
_._ 73
|
334
336
|
us 73
|
335
|
-
|
336
|
-
e,_ 72
|
337
|
-
di 72
|
337
|
+
le_ 72
|
338
338
|
rin 72
|
339
|
+
e,_ 72
|
339
340
|
e, 72
|
340
|
-
|
341
|
+
di 72
|
342
|
+
nd_ 72
|
341
343
|
E 72
|
342
|
-
nske 71
|
343
|
-
_på 71
|
344
344
|
_der 71
|
345
|
-
|
345
|
+
mark_ 71
|
346
|
+
_på 71
|
347
|
+
nske 71
|
346
348
|
_lan 71
|
347
|
-
|
348
|
-
le_ 70
|
349
|
-
ark_ 70
|
349
|
+
_p� 71
|
350
350
|
st� 70
|
351
|
+
get 70
|
351
352
|
gi 70
|
353
|
+
ks 69
|
352
354
|
ist 69
|
353
355
|
pr 69
|
354
|
-
nd_ 68
|
355
|
-
var 68
|
356
356
|
_blev 68
|
357
357
|
_ble 68
|
358
358
|
blev 68
|
359
|
-
|
360
|
-
mark_ 67
|
361
|
-
på_ 67
|
359
|
+
var 68
|
362
360
|
ss 67
|
361
|
+
på_ 67
|
363
362
|
anske 67
|
364
363
|
_va 67
|
365
|
-
|
366
|
-
_land 66
|
364
|
+
fi 66
|
367
365
|
ati 66
|
368
366
|
tio 66
|
369
367
|
lse 66
|
370
|
-
år 66
|
371
|
-
fi 66
|
372
|
-
_på_ 66
|
373
|
-
) 66
|
374
368
|
tion 66
|
375
|
-
|
376
|
-
( 66
|
369
|
+
_på_ 66
|
377
370
|
gr 66
|
378
|
-
|
371
|
+
�r 66
|
372
|
+
år 66
|
373
|
+
_land 66
|
379
374
|
sto 65
|
380
|
-
kt 65
|
381
375
|
one 65
|
382
|
-
|
376
|
+
ef 65
|
377
|
+
kt 65
|
383
378
|
ev_ 64
|
379
|
+
sen 64
|
384
380
|
else 64
|
385
381
|
A 63
|
386
|
-
ende_ 63
|
387
|
-
ren 63
|
388
382
|
ring 63
|
389
|
-
|
390
|
-
|
383
|
+
ren 63
|
384
|
+
ende_ 63
|
385
|
+
e._ 62
|
391
386
|
ho 62
|
392
|
-
|
393
|
-
|
387
|
+
for_ 62
|
388
|
+
_ko 62
|
389
|
+
ig_ 62
|
390
|
+
n, 61
|
394
391
|
ste_ 61
|
395
392
|
rig 61
|
396
|
-
|
393
|
+
n,_ 61
|
394
|
+
dr 61
|
397
395
|
ret 60
|
396
|
+
lev_ 60
|
398
397
|
blev_ 60
|
399
|
-
|
400
|
-
|
398
|
+
_F 60
|
399
|
+
_E 60
|
400
|
+
nger 59
|