scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
@@ -1,5 +1,5 @@
|
|
1
1
|
_ 1548
|
2
|
-
a
|
2
|
+
a 542
|
3
3
|
e 410
|
4
4
|
i 389
|
5
5
|
o 341
|
@@ -7,394 +7,394 @@ l 278
|
|
7
7
|
n 244
|
8
8
|
r 243
|
9
9
|
c 205
|
10
|
-
s
|
11
|
-
a_
|
10
|
+
s 200
|
11
|
+
a_ 186
|
12
12
|
t 176
|
13
|
-
d
|
14
|
-
e_
|
13
|
+
d 168
|
14
|
+
e_ 146
|
15
15
|
u 123
|
16
16
|
v 115
|
17
|
-
o_
|
18
|
-
i_
|
19
|
-
m 100
|
17
|
+
o_ 114
|
18
|
+
i_ 104
|
20
19
|
p 99
|
20
|
+
m 98
|
21
|
+
_c 85
|
21
22
|
_d 84
|
22
|
-
_c 84
|
23
23
|
_s 79
|
24
24
|
, 74
|
25
|
+
la 73
|
25
26
|
,_ 69
|
26
27
|
_a 63
|
27
|
-
la 62
|
28
|
-
g 60
|
29
28
|
ar 60
|
29
|
+
g 60
|
30
30
|
an 59
|
31
31
|
er 59
|
32
|
-
h
|
33
|
-
ri 52
|
34
|
-
co 52
|
32
|
+
h 54
|
35
33
|
ll 52
|
34
|
+
co 52
|
35
|
+
ri 52
|
36
36
|
_p 51
|
37
37
|
re 49
|
38
38
|
ch 46
|
39
39
|
ra 46
|
40
|
-
|
40
|
+
to 45
|
41
41
|
_i 45
|
42
|
+
el 45
|
42
43
|
_m 44
|
43
44
|
di 44
|
44
45
|
no 44
|
45
|
-
|
46
|
-
b 43
|
46
|
+
se 44
|
47
47
|
va 43
|
48
|
-
|
48
|
+
b 43
|
49
49
|
_l 42
|
50
|
+
l_ 42
|
50
51
|
ia 42
|
51
|
-
se 41
|
52
52
|
in 40
|
53
53
|
n_ 40
|
54
|
+
la_ 40
|
55
|
+
av 39
|
54
56
|
f 39
|
55
|
-
av 38
|
56
|
-
la_ 38
|
57
57
|
do 37
|
58
58
|
_di 37
|
59
59
|
on 36
|
60
|
-
al
|
60
|
+
al 36
|
61
|
+
ta 35
|
61
62
|
ca 34
|
62
|
-
ta 34
|
63
|
-
na 34
|
64
|
-
_e 34
|
65
|
-
' 34
|
66
63
|
li 34
|
67
64
|
en 34
|
65
|
+
na 34
|
66
|
+
_e 34
|
67
|
+
da 33
|
68
|
+
re_ 33
|
68
69
|
or 33
|
69
|
-
|
70
|
+
lla 33
|
70
71
|
si 32
|
71
72
|
_n 32
|
72
|
-
|
73
|
-
|
73
|
+
_co 32
|
74
|
+
le 32
|
74
75
|
to_ 31
|
75
76
|
ol 30
|
76
|
-
le 30
|
77
77
|
de 30
|
78
|
-
|
78
|
+
as 30
|
79
79
|
pe 29
|
80
80
|
cc 29
|
81
|
-
re_ 29
|
82
|
-
ve 28
|
83
81
|
il 28
|
84
82
|
ma 28
|
85
|
-
|
83
|
+
ve 28
|
84
|
+
va_ 27
|
86
85
|
io 27
|
87
86
|
o, 27
|
88
|
-
va_ 27
|
89
87
|
_v 27
|
88
|
+
. 27
|
90
89
|
nd 26
|
91
90
|
ne 26
|
92
91
|
tt 26
|
93
|
-
gl 25
|
94
92
|
nt 25
|
93
|
+
gl 25
|
95
94
|
st 25
|
96
95
|
gli 25
|
96
|
+
ell 24
|
97
97
|
o,_ 24
|
98
|
+
_se 24
|
99
|
+
che 24
|
100
|
+
ti 24
|
98
101
|
sa 24
|
99
102
|
_il 24
|
100
|
-
|
103
|
+
he 24
|
104
|
+
_de 23
|
105
|
+
at 23
|
101
106
|
me 23
|
102
|
-
_f 23
|
103
|
-
he 23
|
104
107
|
om 23
|
105
|
-
|
106
|
-
|
107
|
-
ti 23
|
108
|
-
_de 23
|
108
|
+
_f 23
|
109
|
+
q 22
|
109
110
|
_ch 22
|
110
|
-
|
111
|
-
_qu 22
|
111
|
+
_la 22
|
112
112
|
a, 22
|
113
|
-
ci 22
|
114
113
|
_q 22
|
115
|
-
|
116
|
-
|
114
|
+
ci 22
|
115
|
+
_qu 22
|
116
|
+
qu 22
|
117
117
|
il_ 22
|
118
|
-
|
118
|
+
_b 22
|
119
119
|
a,_ 21
|
120
|
-
te 21
|
121
120
|
_ca 21
|
122
|
-
|
123
|
-
_il_ 21
|
121
|
+
te 21
|
124
122
|
vi 21
|
123
|
+
_pe 21
|
124
|
+
lla_ 21
|
125
|
+
_il_ 21
|
125
126
|
un 21
|
127
|
+
_e_ 20
|
128
|
+
z 20
|
129
|
+
_che 20
|
126
130
|
di_ 20
|
127
131
|
he_ 20
|
128
|
-
ava 20
|
129
|
-
ra_ 20
|
130
|
-
_che 20
|
131
132
|
pa 20
|
132
|
-
|
133
|
-
del 20
|
134
|
-
_e_ 20
|
133
|
+
ra_ 20
|
135
134
|
_di_ 20
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
._ 19
|
135
|
+
del 20
|
136
|
+
ava 20
|
137
|
+
che_ 20
|
140
138
|
_del 19
|
141
|
-
|
139
|
+
lo 19
|
142
140
|
no_ 19
|
143
|
-
|
141
|
+
ss 19
|
142
|
+
._ 19
|
144
143
|
es 19
|
145
|
-
|
144
|
+
ev 19
|
145
|
+
_u 19
|
146
146
|
_che_ 18
|
147
|
-
|
147
|
+
ic 18
|
148
|
+
vo 18
|
148
149
|
_ma 18
|
149
150
|
is 18
|
150
|
-
|
151
|
-
|
151
|
+
et 18
|
152
|
+
and 18
|
153
|
+
os 17
|
152
154
|
ia_ 17
|
153
|
-
|
154
|
-
na_ 17
|
155
|
-
era 17
|
155
|
+
hi 17
|
156
156
|
_un 17
|
157
|
-
|
158
|
-
ic 17
|
157
|
+
era 17
|
159
158
|
si_ 17
|
160
|
-
|
159
|
+
na_ 17
|
161
160
|
le_ 17
|
162
|
-
|
163
|
-
|
164
|
-
|
161
|
+
sc 16
|
162
|
+
po 16
|
163
|
+
chi 16
|
164
|
+
io_ 16
|
165
165
|
ie 16
|
166
|
+
ano 16
|
166
167
|
ua 16
|
167
168
|
_g 16
|
168
|
-
|
169
|
-
chi 16
|
170
|
-
sc 16
|
171
|
-
ad 15
|
172
|
-
it 15
|
173
|
-
io_ 15
|
174
|
-
are 15
|
175
|
-
ava_ 15
|
176
|
-
_si 15
|
169
|
+
per 16
|
177
170
|
tr 15
|
178
|
-
ac 15
|
179
171
|
eva 15
|
180
|
-
|
181
|
-
com 15
|
182
|
-
_in 15
|
172
|
+
_si 15
|
183
173
|
su 15
|
174
|
+
com 15
|
175
|
+
are 15
|
176
|
+
tto 15
|
184
177
|
se_ 15
|
185
|
-
|
178
|
+
ac 15
|
179
|
+
ava_ 15
|
186
180
|
_per 15
|
187
|
-
|
188
|
-
|
181
|
+
ad 15
|
182
|
+
it 15
|
183
|
+
_in 15
|
184
|
+
e, 14
|
189
185
|
are_ 14
|
186
|
+
e,_ 14
|
190
187
|
_la_ 14
|
191
|
-
|
192
|
-
|
188
|
+
_da 14
|
189
|
+
li_ 14
|
190
|
+
oc 14
|
191
|
+
mo 14
|
193
192
|
ndo 14
|
194
193
|
ari 14
|
195
|
-
e,_ 14
|
196
194
|
_su 14
|
197
|
-
|
198
|
-
|
199
|
-
|
195
|
+
_r 14
|
196
|
+
_a_ 14
|
197
|
+
fa 14
|
200
198
|
ro 14
|
199
|
+
ce 13
|
200
|
+
all 13
|
201
201
|
in_ 13
|
202
|
-
id 13
|
203
|
-
ella 13
|
204
202
|
_in_ 13
|
205
203
|
on_ 13
|
206
|
-
|
204
|
+
ella 13
|
207
205
|
gli_ 13
|
206
|
+
ur 13
|
207
|
+
so 13
|
208
|
+
id 13
|
208
209
|
el_ 13
|
209
|
-
|
210
|
+
un_ 13
|
210
211
|
mp 13
|
211
|
-
|
212
|
-
ur 13
|
213
|
-
a. 12
|
212
|
+
era_ 12
|
214
213
|
� 12
|
215
|
-
|
214
|
+
a. 12
|
215
|
+
col 12
|
216
|
+
ta_ 12
|
216
217
|
r_ 12
|
218
|
+
pr 12
|
219
|
+
ig 12
|
220
|
+
ue 12
|
217
221
|
ni 12
|
218
222
|
ut 12
|
219
223
|
_qua 12
|
220
|
-
col 12
|
221
|
-
pr 12
|
222
|
-
sse 12
|
223
|
-
mi 12
|
224
224
|
qua 12
|
225
|
-
era_ 12
|
226
|
-
ig 12
|
227
225
|
be 12
|
226
|
+
ave 12
|
227
|
+
sse 12
|
228
228
|
uo 12
|
229
|
+
mi 12
|
229
230
|
tto_ 12
|
230
|
-
|
231
|
-
|
231
|
+
ul 11
|
232
|
+
iv 11
|
233
|
+
_vi 11
|
232
234
|
asa 11
|
233
|
-
|
234
|
-
|
235
|
+
_gli 11
|
236
|
+
_t 11
|
237
|
+
_col 11
|
238
|
+
acc 11
|
239
|
+
ne_ 11
|
240
|
+
_al 11
|
241
|
+
�_ 11
|
235
242
|
_gl 11
|
243
|
+
pi 11
|
236
244
|
me_ 11
|
237
|
-
_com 11
|
238
|
-
_cas 11
|
239
245
|
_no 11
|
240
|
-
ano_ 11
|
241
|
-
_al 11
|
242
|
-
_vi 11
|
243
|
-
ave 11
|
244
|
-
_da 11
|
245
246
|
cch 11
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
pi 11
|
250
|
-
�_ 11
|
251
|
-
ul 11
|
252
|
-
_un_ 11
|
247
|
+
ano_ 11
|
248
|
+
cas 11
|
249
|
+
_cas 11
|
253
250
|
ome 11
|
251
|
+
_un_ 11
|
252
|
+
gn 11
|
254
253
|
dd 11
|
255
|
-
|
256
|
-
|
257
|
-
_av 10
|
258
|
-
_que 10
|
259
|
-
van 10
|
260
|
-
man 10
|
261
|
-
ale 10
|
262
|
-
_fa 10
|
263
|
-
dell 10
|
264
|
-
zi 10
|
254
|
+
_le 11
|
255
|
+
_com 11
|
265
256
|
cchi 10
|
266
|
-
|
267
|
-
|
268
|
-
_t 10
|
257
|
+
non 10
|
258
|
+
_av 10
|
269
259
|
casa 10
|
260
|
+
ato 10
|
261
|
+
ser 10
|
262
|
+
ti_ 10
|
263
|
+
zi 10
|
270
264
|
og 10
|
271
|
-
|
265
|
+
_ne 10
|
266
|
+
dell 10
|
272
267
|
_gli_ 10
|
273
|
-
|
268
|
+
_do 10
|
269
|
+
_casa 10
|
274
270
|
ent 10
|
275
|
-
|
271
|
+
_que 10
|
272
|
+
ale 10
|
273
|
+
_ri 10
|
274
|
+
_dell 10
|
275
|
+
_fa 10
|
276
|
+
van 10
|
277
|
+
da_ 10
|
278
|
+
man 10
|
279
|
+
rr 10
|
276
280
|
que 10
|
277
281
|
am 10
|
278
|
-
_casa 10
|
279
|
-
rr 10
|
280
|
-
_ne 10
|
281
|
-
far 9
|
282
|
-
del_ 9
|
283
282
|
_mo 9
|
284
283
|
rid 9
|
285
|
-
er_ 9
|
286
|
-
_an 9
|
287
|
-
anda 9
|
288
|
-
�_ 9
|
289
|
-
ella_ 9
|
290
|
-
ne_ 9
|
291
|
-
bi 9
|
292
284
|
fi 9
|
285
|
+
_be 9
|
293
286
|
vano 9
|
287
|
+
far 9
|
294
288
|
_me 9
|
289
|
+
ot 9
|
295
290
|
ando 9
|
296
|
-
uri 9
|
297
|
-
ti_ 9
|
298
|
-
da_ 9
|
299
291
|
ess 9
|
300
292
|
oi 9
|
293
|
+
�_ 9
|
301
294
|
par 9
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
295
|
+
_an 9
|
296
|
+
er_ 9
|
297
|
+
uri 9
|
298
|
+
alla 9
|
306
299
|
ome_ 9
|
307
|
-
|
308
|
-
_be 9
|
300
|
+
anda 9
|
309
301
|
eva_ 9
|
302
|
+
ella_ 9
|
303
|
+
� 9
|
304
|
+
del_ 9
|
305
|
+
bi 9
|
310
306
|
sta 9
|
311
|
-
|
312
|
-
|
313
|
-
i, 8
|
307
|
+
come 9
|
308
|
+
nda 9
|
314
309
|
della 8
|
315
|
-
|
316
|
-
come_ 8
|
317
|
-
idd 8
|
310
|
+
ene 8
|
318
311
|
una_ 8
|
319
|
-
|
312
|
+
occ 8
|
313
|
+
idd 8
|
314
|
+
llo 8
|
315
|
+
_st 8
|
316
|
+
sa_ 8
|
317
|
+
cco 8
|
318
|
+
do_ 8
|
319
|
+
uel 8
|
320
|
+
lo_ 8
|
321
|
+
ed 8
|
320
322
|
L 8
|
321
323
|
ba 8
|
322
|
-
|
323
|
-
uel 8
|
324
|
-
_l' 8
|
325
|
-
una 8
|
326
|
-
! 8
|
327
|
-
_ave 8
|
328
|
-
ene 8
|
324
|
+
ridd 8
|
329
325
|
con 8
|
330
|
-
non_ 8
|
331
326
|
ato_ 8
|
327
|
+
i, 8
|
332
328
|
ant 8
|
333
|
-
|
334
|
-
|
335
|
-
ser 8
|
336
|
-
Tu 8
|
337
|
-
T 8
|
338
|
-
do_ 8
|
339
|
-
occ 8
|
340
|
-
S 8
|
341
|
-
; 8
|
329
|
+
nz 8
|
330
|
+
una 8
|
342
331
|
ir 8
|
332
|
+
come_ 8
|
333
|
+
ina 8
|
343
334
|
_all 8
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
335
|
+
_ave 8
|
336
|
+
_del_ 8
|
337
|
+
_come 8
|
338
|
+
S 8
|
339
|
+
non_ 8
|
340
|
+
T 8
|
341
|
+
Tu 8
|
342
|
+
Tur 7
|
343
|
+
ere 7
|
344
|
+
_le_ 7
|
345
|
+
nto 7
|
346
|
+
utt 7
|
347
|
+
oll 7
|
348
|
+
rc 7
|
352
349
|
rv 7
|
353
|
-
|
350
|
+
hi_ 7
|
351
|
+
_era 7
|
352
|
+
_quel 7
|
354
353
|
cia 7
|
355
|
-
ino 7
|
356
354
|
per_ 7
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
355
|
+
ett 7
|
356
|
+
ec 7
|
357
|
+
ndo_ 7
|
358
|
+
vano_ 7
|
359
|
+
dava 7
|
360
|
+
ai 7
|
361
|
+
coll 7
|
361
362
|
_non 7
|
362
|
-
ap 7
|
363
363
|
pre 7
|
364
|
-
|
365
|
-
|
364
|
+
ap 7
|
365
|
+
_ad 7
|
366
|
+
dav 7
|
367
|
+
lav 7
|
366
368
|
_pr 7
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
369
|
+
_coll 7
|
370
|
+
i,_ 7
|
371
|
+
zia 7
|
372
|
+
gi 7
|
373
|
+
ino 7
|
374
|
+
mpa 7
|
375
|
+
_er 7
|
372
376
|
_pa 7
|
373
|
-
|
374
|
-
sp 7
|
375
|
-
ai 7
|
376
|
-
_le_ 7
|
377
|
+
_si_ 7
|
377
378
|
ogl 7
|
378
|
-
|
379
|
-
ola 7
|
380
|
-
_quel 7
|
379
|
+
ogli 7
|
381
380
|
_vo 7
|
382
|
-
|
383
|
-
|
381
|
+
sp 7
|
382
|
+
Turi 7
|
384
383
|
za 7
|
385
|
-
|
386
|
-
|
384
|
+
ola 7
|
385
|
+
sse_ 7
|
387
386
|
_pi 7
|
388
387
|
tra 7
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
gi 7
|
394
|
-
_er 7
|
395
|
-
ere 7
|
388
|
+
quel 7
|
389
|
+
Sa 6
|
390
|
+
P 6
|
391
|
+
veva 6
|
396
392
|
nu 6
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
393
|
+
_sc 6
|
394
|
+
�_ 6
|
395
|
+
_con 6
|
396
|
+
esse 6
|
397
|
+
h�_ 6
|
398
|
+
uridd 6
|
399
|
+
ch�_ 6
|
400
|
+
uand 6
|