scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
@@ -11,390 +11,390 @@ u 337
|
|
11
11
|
l 264
|
12
12
|
m 234
|
13
13
|
d 226
|
14
|
-
e_
|
14
|
+
e_ 209
|
15
15
|
c 198
|
16
16
|
h 188
|
17
17
|
p 178
|
18
|
-
s_
|
18
|
+
s_ 151
|
19
19
|
re 134
|
20
20
|
_a 127
|
21
|
-
t_
|
21
|
+
t_ 127
|
22
22
|
in 123
|
23
23
|
_t 123
|
24
24
|
or 103
|
25
|
+
m_ 101
|
25
26
|
f 99
|
26
|
-
m_ 98
|
27
27
|
b 91
|
28
28
|
g 87
|
29
29
|
_o 85
|
30
|
+
n_ 85
|
30
31
|
th 84
|
31
|
-
n_ 84
|
32
32
|
er 83
|
33
33
|
d_ 83
|
34
|
-
_i
|
34
|
+
_i 82
|
35
35
|
he 81
|
36
36
|
y 80
|
37
37
|
an 79
|
38
|
-
at 78
|
39
38
|
_e 78
|
40
|
-
|
39
|
+
at 78
|
41
40
|
,_ 75
|
41
|
+
, 75
|
42
42
|
on 73
|
43
|
-
te 72
|
44
|
-
_s 72
|
45
43
|
es 72
|
44
|
+
_s 72
|
45
|
+
te 72
|
46
46
|
_th 70
|
47
47
|
is 69
|
48
48
|
_p 65
|
49
49
|
. 64
|
50
|
-
um 63
|
51
50
|
v 63
|
52
51
|
o_ 63
|
52
|
+
um 63
|
53
53
|
se 62
|
54
54
|
en 60
|
55
55
|
w 60
|
56
|
+
r_ 59
|
56
57
|
ti 59
|
57
|
-
nd 58
|
58
58
|
it 58
|
59
|
-
|
59
|
+
nd 58
|
60
|
+
_d 56
|
61
|
+
nt 53
|
60
62
|
ur 52
|
61
63
|
as 52
|
62
|
-
_d 52
|
63
64
|
the 52
|
64
|
-
nt 51
|
65
65
|
_the 50
|
66
|
-
a_ 50
|
67
66
|
su 50
|
67
|
+
a_ 50
|
68
68
|
_c 48
|
69
|
+
q 48
|
69
70
|
he_ 48
|
70
71
|
qu 48
|
71
|
-
q 48
|
72
72
|
y_ 47
|
73
73
|
_w 47
|
74
|
-
_b 46
|
75
74
|
us 46
|
75
|
+
_b 46
|
76
|
+
um_ 45
|
77
|
+
._ 45
|
76
78
|
le 45
|
77
|
-
._ 44
|
78
|
-
em 44
|
79
79
|
of 44
|
80
|
+
em 44
|
80
81
|
ta 43
|
81
82
|
si 43
|
82
|
-
f_ 42
|
83
|
-
um_ 42
|
84
83
|
ed 42
|
85
|
-
|
84
|
+
f_ 42
|
86
85
|
ore 42
|
87
|
-
|
86
|
+
_of 42
|
88
87
|
k 41
|
89
|
-
|
90
|
-
et 40
|
88
|
+
to 41
|
91
89
|
ng 40
|
90
|
+
li 40
|
92
91
|
re_ 40
|
93
|
-
|
92
|
+
et 40
|
94
93
|
and 39
|
95
94
|
the_ 39
|
96
95
|
ea 39
|
96
|
+
_of_ 39
|
97
|
+
is_ 39
|
97
98
|
_the_ 39
|
98
99
|
of_ 39
|
99
|
-
_of_ 39
|
100
|
-
ol 38
|
101
|
-
in_ 38
|
102
100
|
st 38
|
101
|
+
in_ 38
|
102
|
+
ol 38
|
103
103
|
de 37
|
104
104
|
_m 37
|
105
|
-
_in
|
105
|
+
_in 37
|
106
106
|
_an 36
|
107
|
-
_r 35
|
108
107
|
io 35
|
109
108
|
_h 35
|
110
|
-
|
111
|
-
ri 34
|
112
|
-
ed_ 34
|
109
|
+
_r 35
|
113
110
|
la 34
|
111
|
+
ed_ 34
|
112
|
+
ri 34
|
114
113
|
ar 34
|
115
|
-
|
116
|
-
ou 33
|
114
|
+
ve 34
|
117
115
|
ing 33
|
116
|
+
ou 33
|
118
117
|
ce 33
|
118
|
+
_f 33
|
119
|
+
ro 32
|
119
120
|
lo 32
|
120
121
|
I 32
|
121
122
|
ec 32
|
122
|
-
ro 32
|
123
123
|
ng_ 31
|
124
|
+
_I 31
|
124
125
|
g_ 31
|
125
126
|
nd_ 31
|
126
|
-
_I 31
|
127
|
-
ing_ 30
|
128
|
-
em_ 30
|
129
|
-
al 30
|
130
|
-
co 30
|
131
|
-
ha 30
|
132
127
|
_to 30
|
133
|
-
|
128
|
+
_l 30
|
129
|
+
al 30
|
130
|
+
em_ 30
|
131
|
+
ing_ 30
|
134
132
|
el 30
|
135
|
-
_n 30
|
136
133
|
ni 30
|
137
|
-
|
134
|
+
to_ 30
|
135
|
+
ha 30
|
136
|
+
co 30
|
137
|
+
_n 30
|
138
138
|
no 29
|
139
139
|
ra 29
|
140
|
+
di 29
|
141
|
+
ut 29
|
140
142
|
es_ 29
|
141
143
|
om 29
|
142
|
-
_l 29
|
143
|
-
ut 29
|
144
|
-
ai 28
|
145
|
-
ic 28
|
146
|
-
ne 28
|
147
144
|
ct 28
|
145
|
+
ic 28
|
146
|
+
ai 28
|
148
147
|
am 28
|
149
148
|
_u 28
|
150
|
-
l_
|
149
|
+
l_ 28
|
150
|
+
ne 28
|
151
151
|
ion 27
|
152
|
-
|
152
|
+
rem 27
|
153
|
+
_re 27
|
154
|
+
do 27
|
155
|
+
_and 27
|
153
156
|
ll 27
|
154
|
-
_and_ 27
|
155
157
|
and_ 27
|
158
|
+
_and_ 27
|
159
|
+
ns 27
|
156
160
|
_v 27
|
157
|
-
_re 27
|
158
|
-
_and 27
|
159
|
-
rem 27
|
160
161
|
L 26
|
161
|
-
|
162
|
+
_to_ 26
|
162
163
|
sum 26
|
163
|
-
do 26
|
164
164
|
ia 26
|
165
|
-
|
166
|
-
tio 25
|
167
|
-
pa 25
|
165
|
+
_a_ 26
|
168
166
|
_q 25
|
169
|
-
il 25
|
170
167
|
ps 25
|
171
168
|
ci 25
|
172
169
|
ru 25
|
173
170
|
_qu 25
|
171
|
+
pa 25
|
172
|
+
il 25
|
173
|
+
tio 25
|
174
|
+
_L 24
|
174
175
|
me 24
|
175
|
-
x 24
|
176
|
-
un 24
|
177
|
-
ui 24
|
178
176
|
ati 24
|
179
|
-
|
177
|
+
ui 24
|
178
|
+
un 24
|
179
|
+
x 24
|
180
180
|
tion 23
|
181
|
-
hi 23
|
182
|
-
orem 23
|
183
|
-
psum 23
|
184
181
|
psu 23
|
182
|
+
hi 23
|
185
183
|
ss 23
|
186
184
|
_co 23
|
187
|
-
rem_ 23
|
188
185
|
pl 23
|
189
|
-
|
186
|
+
orem 23
|
187
|
+
rem_ 23
|
188
|
+
psum 23
|
189
|
+
_in_ 23
|
190
|
+
sum_ 22
|
191
|
+
orem_ 22
|
192
|
+
_do 22
|
193
|
+
id 22
|
194
|
+
se_ 22
|
195
|
+
nc 22
|
196
|
+
on_ 22
|
197
|
+
or_ 22
|
198
|
+
mo 22
|
190
199
|
im 22
|
191
|
-
_pa 22
|
192
200
|
ma 22
|
193
|
-
|
194
|
-
_do 22
|
201
|
+
_pa 22
|
195
202
|
tr 22
|
196
203
|
ure 22
|
197
|
-
orem_ 22
|
198
|
-
or_ 22
|
199
|
-
id 22
|
200
|
-
se_ 22
|
201
|
-
lor 21
|
202
|
-
sum_ 21
|
203
|
-
on_ 21
|
204
204
|
ac 21
|
205
|
-
|
205
|
+
it_ 21
|
206
|
+
lor 21
|
207
|
+
_si 20
|
206
208
|
ex 20
|
207
|
-
|
208
|
-
|
209
|
+
psum_ 20
|
210
|
+
at_ 20
|
209
211
|
tu 20
|
210
|
-
it_ 20
|
211
212
|
i_ 20
|
212
|
-
_si 20
|
213
213
|
ut_ 20
|
214
|
-
|
214
|
+
ge 20
|
215
215
|
qui 19
|
216
216
|
et_ 19
|
217
|
-
Lo 19
|
218
|
-
Lorem 19
|
219
|
-
us_ 19
|
220
217
|
Lor 19
|
221
218
|
Lore 19
|
222
219
|
ons 19
|
223
|
-
|
220
|
+
Lo 19
|
221
|
+
Lorem 19
|
222
|
+
us_ 19
|
224
223
|
pt 19
|
224
|
+
_no 18
|
225
|
+
ain 18
|
225
226
|
be 18
|
226
|
-
ect 18
|
227
227
|
ho 18
|
228
|
+
_Lore 18
|
229
|
+
_de 18
|
230
|
+
pe 18
|
231
|
+
ect 18
|
232
|
+
as_ 18
|
233
|
+
_Lor 18
|
228
234
|
ab 18
|
229
|
-
|
230
|
-
ain 18
|
235
|
+
_Lo 18
|
231
236
|
er_ 18
|
232
|
-
|
233
|
-
|
234
|
-
|
237
|
+
nt_ 18
|
238
|
+
T 18
|
239
|
+
_Ips 17
|
240
|
+
rum 17
|
241
|
+
vo 17
|
235
242
|
Ip 17
|
236
|
-
|
237
|
-
_Ipsu 17
|
238
|
-
_Lore 17
|
239
|
-
Ipsu 17
|
240
|
-
Ips 17
|
241
|
-
sa 17
|
242
|
-
Ipsum 17
|
243
|
-
up 17
|
243
|
+
eas 17
|
244
244
|
wh 17
|
245
|
+
up 17
|
246
|
+
Ipsum 17
|
247
|
+
pr 17
|
248
|
+
ple 17
|
245
249
|
_is 17
|
246
|
-
h_ 17
|
247
250
|
_et 17
|
248
|
-
|
251
|
+
ca 17
|
249
252
|
_wh 17
|
253
|
+
Ipsu 17
|
254
|
+
Ips 17
|
250
255
|
rs 17
|
251
|
-
|
252
|
-
_Ips 17
|
253
|
-
rum 17
|
254
|
-
ple 17
|
255
|
-
vo 17
|
256
|
-
_Lor 17
|
256
|
+
sa 17
|
257
257
|
_be 17
|
258
|
-
|
258
|
+
h_ 17
|
259
|
+
_Ipsu 17
|
259
260
|
_Ip 17
|
260
|
-
|
261
|
-
olo 16
|
262
|
-
lea 16
|
263
|
-
ere 16
|
264
|
-
ep 16
|
265
|
-
ul 16
|
266
|
-
cu 16
|
261
|
+
bl 17
|
267
262
|
po 16
|
268
|
-
|
269
|
-
_ma 16
|
263
|
+
cu 16
|
270
264
|
du 16
|
271
|
-
au 16
|
272
|
-
tur 16
|
273
|
-
bu 16
|
274
|
-
ate 16
|
275
265
|
ch 16
|
266
|
+
lu 16
|
267
|
+
ate 16
|
268
|
+
ent 16
|
276
269
|
ag 16
|
270
|
+
ep 16
|
271
|
+
ul 16
|
272
|
+
bu 16
|
273
|
+
tur 16
|
274
|
+
ere 16
|
275
|
+
au 16
|
276
|
+
ua 16
|
277
|
+
_ma 16
|
277
278
|
_qui 16
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
279
|
+
olo 16
|
280
|
+
lea 16
|
281
|
+
s, 15
|
282
|
+
na 15
|
282
283
|
_ex 15
|
283
|
-
|
284
|
+
dolor 15
|
285
|
+
os 15
|
286
|
+
_dolo 15
|
287
|
+
leas 15
|
284
288
|
C 15
|
285
|
-
_dol 15
|
286
|
-
s, 15
|
287
|
-
s,_ 15
|
288
289
|
ter 15
|
289
|
-
ver 15
|
290
|
-
leas 15
|
291
|
-
_dolo 15
|
292
|
-
_is_ 15
|
293
|
-
dolor 15
|
294
|
-
na 15
|
295
290
|
dolo 15
|
296
|
-
|
291
|
+
tat 15
|
297
292
|
dol 15
|
298
|
-
|
299
|
-
|
300
|
-
|
293
|
+
ver 15
|
294
|
+
_et_ 15
|
295
|
+
olor 15
|
296
|
+
_is_ 15
|
297
|
+
s,_ 15
|
298
|
+
_dol 15
|
301
299
|
ke 14
|
302
|
-
qua 14
|
303
|
-
sur 14
|
304
|
-
_de 14
|
305
300
|
con 14
|
306
301
|
so 14
|
307
|
-
|
308
|
-
|
302
|
+
rum_ 14
|
303
|
+
_pl 14
|
309
304
|
ip 14
|
305
|
+
qua 14
|
306
|
+
t,_ 14
|
310
307
|
t, 14
|
308
|
+
ue 14
|
309
|
+
wi 14
|
310
|
+
sur 14
|
311
|
+
te_ 14
|
311
312
|
_con 14
|
313
|
+
her 13
|
314
|
+
_ve 13
|
315
|
+
_ple 13
|
316
|
+
_plea 13
|
317
|
+
_by_ 13
|
318
|
+
by 13
|
312
319
|
ir 13
|
313
|
-
|
320
|
+
_mo 13
|
314
321
|
olu 13
|
315
|
-
|
316
|
-
atio 13
|
317
|
-
_plea 13
|
318
|
-
_ve 13
|
322
|
+
res 13
|
319
323
|
en_ 13
|
320
|
-
by_ 13
|
321
|
-
vol 13
|
322
|
-
_mo 13
|
323
|
-
plea 13
|
324
|
-
te_ 13
|
325
|
-
od 13
|
326
|
-
_ple 13
|
327
324
|
Th 13
|
328
|
-
|
329
|
-
|
325
|
+
pleas 13
|
326
|
+
od 13
|
330
327
|
_ha 13
|
331
328
|
ation 13
|
332
|
-
sure 13
|
333
|
-
pleas 13
|
334
|
-
her 13
|
335
329
|
ure_ 13
|
330
|
+
ur_ 13
|
336
331
|
_by 13
|
337
|
-
|
332
|
+
sure 13
|
333
|
+
vol 13
|
334
|
+
plea 13
|
335
|
+
atio 13
|
336
|
+
by_ 13
|
337
|
+
upt 13
|
338
|
+
sin 12
|
339
|
+
nte 12
|
340
|
+
da 12
|
341
|
+
_volu 12
|
338
342
|
volup 12
|
339
|
-
|
340
|
-
_fr 12
|
341
|
-
asu 12
|
342
|
-
_vo 12
|
343
|
-
ae 12
|
344
|
-
upta 12
|
343
|
+
ly_ 12
|
345
344
|
easur 12
|
346
|
-
|
347
|
-
|
348
|
-
|
345
|
+
easu 12
|
346
|
+
pta 12
|
347
|
+
_T 12
|
348
|
+
_pr 12
|
349
|
+
lup 12
|
350
|
+
ae 12
|
351
|
+
oo 12
|
352
|
+
lupta 12
|
353
|
+
m,_ 12
|
349
354
|
asur 12
|
350
355
|
lupt 12
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
_pr 12
|
356
|
+
mp 12
|
357
|
+
leasu 12
|
358
|
+
_fr 12
|
355
359
|
age 12
|
356
|
-
|
357
|
-
oo 12
|
358
|
-
est 12
|
359
|
-
ly 12
|
360
|
-
ur_ 12
|
361
|
-
ly_ 12
|
362
|
-
lup 12
|
360
|
+
olupt 12
|
363
361
|
unt 12
|
364
|
-
_volu 12
|
365
|
-
da 12
|
366
|
-
_wi 12
|
367
|
-
olup 12
|
368
362
|
bo 12
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
_al 11
|
382
|
-
fo 11
|
383
|
-
orum 11
|
384
|
-
equ 11
|
385
|
-
_se 11
|
363
|
+
est 12
|
364
|
+
_vol 12
|
365
|
+
volu 12
|
366
|
+
asu 12
|
367
|
+
olup 12
|
368
|
+
upta 12
|
369
|
+
_vo 12
|
370
|
+
m, 12
|
371
|
+
fr 12
|
372
|
+
_wi 12
|
373
|
+
ly 12
|
374
|
+
asure 12
|
386
375
|
The 11
|
387
|
-
|
388
|
-
|
376
|
+
tion_ 11
|
377
|
+
_se 11
|
389
378
|
nce 11
|
379
|
+
B 11
|
380
|
+
ain_ 11
|
381
|
+
cons 11
|
382
|
+
oi 11
|
383
|
+
_pain 11
|
384
|
+
_te 11
|
385
|
+
e, 11
|
386
|
+
oru 11
|
387
|
+
e,_ 11
|
388
|
+
pai 11
|
390
389
|
_y 11
|
391
|
-
ib 11
|
392
|
-
_us 11
|
393
390
|
pain 11
|
394
|
-
|
395
|
-
|
391
|
+
_pai 11
|
392
|
+
_al 11
|
393
|
+
ib 11
|
396
394
|
_cons 11
|
397
|
-
|
398
|
-
|
395
|
+
equ 11
|
396
|
+
fo 11
|
397
|
+
here 11
|
398
|
+
_g 11
|
399
|
+
_us 11
|
399
400
|
eq 11
|
400
|
-
oi 11
|