scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
@@ -1,5 +1,5 @@
|
|
1
1
|
_ 1548
|
2
|
-
a
|
2
|
+
a 542
|
3
3
|
e 410
|
4
4
|
i 389
|
5
5
|
o 341
|
@@ -7,394 +7,394 @@ l 278
|
|
7
7
|
n 244
|
8
8
|
r 243
|
9
9
|
c 205
|
10
|
-
s
|
11
|
-
a_
|
10
|
+
s 200
|
11
|
+
a_ 186
|
12
12
|
t 176
|
13
|
-
d
|
14
|
-
e_
|
13
|
+
d 168
|
14
|
+
e_ 146
|
15
15
|
u 123
|
16
16
|
v 115
|
17
|
-
o_
|
18
|
-
i_
|
19
|
-
m 100
|
17
|
+
o_ 114
|
18
|
+
i_ 104
|
20
19
|
p 99
|
20
|
+
m 98
|
21
|
+
_c 85
|
21
22
|
_d 84
|
22
|
-
_c 84
|
23
23
|
_s 79
|
24
24
|
, 74
|
25
|
+
la 73
|
25
26
|
,_ 69
|
26
27
|
_a 63
|
27
|
-
la 62
|
28
|
-
g 60
|
29
28
|
ar 60
|
29
|
+
g 60
|
30
30
|
an 59
|
31
31
|
er 59
|
32
|
-
h
|
33
|
-
ri 52
|
34
|
-
co 52
|
32
|
+
h 54
|
35
33
|
ll 52
|
34
|
+
co 52
|
35
|
+
ri 52
|
36
36
|
_p 51
|
37
37
|
re 49
|
38
38
|
ch 46
|
39
39
|
ra 46
|
40
|
-
|
40
|
+
to 45
|
41
41
|
_i 45
|
42
|
+
el 45
|
42
43
|
_m 44
|
43
44
|
di 44
|
44
45
|
no 44
|
45
|
-
|
46
|
-
b 43
|
46
|
+
se 44
|
47
47
|
va 43
|
48
|
-
|
48
|
+
b 43
|
49
49
|
_l 42
|
50
|
+
l_ 42
|
50
51
|
ia 42
|
51
|
-
se 41
|
52
52
|
in 40
|
53
53
|
n_ 40
|
54
|
+
la_ 40
|
55
|
+
av 39
|
54
56
|
f 39
|
55
|
-
av 38
|
56
|
-
la_ 38
|
57
57
|
do 37
|
58
58
|
_di 37
|
59
59
|
on 36
|
60
|
-
al
|
60
|
+
al 36
|
61
|
+
ta 35
|
61
62
|
ca 34
|
62
|
-
ta 34
|
63
|
-
na 34
|
64
|
-
_e 34
|
65
|
-
' 34
|
66
63
|
li 34
|
67
64
|
en 34
|
65
|
+
na 34
|
66
|
+
_e 34
|
67
|
+
da 33
|
68
|
+
re_ 33
|
68
69
|
or 33
|
69
|
-
|
70
|
+
lla 33
|
70
71
|
si 32
|
71
72
|
_n 32
|
72
|
-
|
73
|
-
|
73
|
+
_co 32
|
74
|
+
le 32
|
74
75
|
to_ 31
|
75
76
|
ol 30
|
76
|
-
le 30
|
77
77
|
de 30
|
78
|
-
|
78
|
+
as 30
|
79
79
|
pe 29
|
80
80
|
cc 29
|
81
|
-
re_ 29
|
82
|
-
ve 28
|
83
81
|
il 28
|
84
82
|
ma 28
|
85
|
-
|
83
|
+
ve 28
|
84
|
+
va_ 27
|
86
85
|
io 27
|
87
86
|
o, 27
|
88
|
-
va_ 27
|
89
87
|
_v 27
|
88
|
+
. 27
|
90
89
|
nd 26
|
91
90
|
ne 26
|
92
91
|
tt 26
|
93
|
-
gl 25
|
94
92
|
nt 25
|
93
|
+
gl 25
|
95
94
|
st 25
|
96
95
|
gli 25
|
96
|
+
ell 24
|
97
97
|
o,_ 24
|
98
|
+
_se 24
|
99
|
+
che 24
|
100
|
+
ti 24
|
98
101
|
sa 24
|
99
102
|
_il 24
|
100
|
-
|
103
|
+
he 24
|
104
|
+
_de 23
|
105
|
+
at 23
|
101
106
|
me 23
|
102
|
-
_f 23
|
103
|
-
he 23
|
104
107
|
om 23
|
105
|
-
|
106
|
-
|
107
|
-
ti 23
|
108
|
-
_de 23
|
108
|
+
_f 23
|
109
|
+
q 22
|
109
110
|
_ch 22
|
110
|
-
|
111
|
-
_qu 22
|
111
|
+
_la 22
|
112
112
|
a, 22
|
113
|
-
ci 22
|
114
113
|
_q 22
|
115
|
-
|
116
|
-
|
114
|
+
ci 22
|
115
|
+
_qu 22
|
116
|
+
qu 22
|
117
117
|
il_ 22
|
118
|
-
|
118
|
+
_b 22
|
119
119
|
a,_ 21
|
120
|
-
te 21
|
121
120
|
_ca 21
|
122
|
-
|
123
|
-
_il_ 21
|
121
|
+
te 21
|
124
122
|
vi 21
|
123
|
+
_pe 21
|
124
|
+
lla_ 21
|
125
|
+
_il_ 21
|
125
126
|
un 21
|
127
|
+
_e_ 20
|
128
|
+
z 20
|
129
|
+
_che 20
|
126
130
|
di_ 20
|
127
131
|
he_ 20
|
128
|
-
ava 20
|
129
|
-
ra_ 20
|
130
|
-
_che 20
|
131
132
|
pa 20
|
132
|
-
|
133
|
-
del 20
|
134
|
-
_e_ 20
|
133
|
+
ra_ 20
|
135
134
|
_di_ 20
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
._ 19
|
135
|
+
del 20
|
136
|
+
ava 20
|
137
|
+
che_ 20
|
140
138
|
_del 19
|
141
|
-
|
139
|
+
lo 19
|
142
140
|
no_ 19
|
143
|
-
|
141
|
+
ss 19
|
142
|
+
._ 19
|
144
143
|
es 19
|
145
|
-
|
144
|
+
ev 19
|
145
|
+
_u 19
|
146
146
|
_che_ 18
|
147
|
-
|
147
|
+
ic 18
|
148
|
+
vo 18
|
148
149
|
_ma 18
|
149
150
|
is 18
|
150
|
-
|
151
|
-
|
151
|
+
et 18
|
152
|
+
and 18
|
153
|
+
os 17
|
152
154
|
ia_ 17
|
153
|
-
|
154
|
-
na_ 17
|
155
|
-
era 17
|
155
|
+
hi 17
|
156
156
|
_un 17
|
157
|
-
|
158
|
-
ic 17
|
157
|
+
era 17
|
159
158
|
si_ 17
|
160
|
-
|
159
|
+
na_ 17
|
161
160
|
le_ 17
|
162
|
-
|
163
|
-
|
164
|
-
|
161
|
+
sc 16
|
162
|
+
po 16
|
163
|
+
chi 16
|
164
|
+
io_ 16
|
165
165
|
ie 16
|
166
|
+
ano 16
|
166
167
|
ua 16
|
167
168
|
_g 16
|
168
|
-
|
169
|
-
chi 16
|
170
|
-
sc 16
|
171
|
-
ad 15
|
172
|
-
it 15
|
173
|
-
io_ 15
|
174
|
-
are 15
|
175
|
-
ava_ 15
|
176
|
-
_si 15
|
169
|
+
per 16
|
177
170
|
tr 15
|
178
|
-
ac 15
|
179
171
|
eva 15
|
180
|
-
|
181
|
-
com 15
|
182
|
-
_in 15
|
172
|
+
_si 15
|
183
173
|
su 15
|
174
|
+
com 15
|
175
|
+
are 15
|
176
|
+
tto 15
|
184
177
|
se_ 15
|
185
|
-
|
178
|
+
ac 15
|
179
|
+
ava_ 15
|
186
180
|
_per 15
|
187
|
-
|
188
|
-
|
181
|
+
ad 15
|
182
|
+
it 15
|
183
|
+
_in 15
|
184
|
+
e, 14
|
189
185
|
are_ 14
|
186
|
+
e,_ 14
|
190
187
|
_la_ 14
|
191
|
-
|
192
|
-
|
188
|
+
_da 14
|
189
|
+
li_ 14
|
190
|
+
oc 14
|
191
|
+
mo 14
|
193
192
|
ndo 14
|
194
193
|
ari 14
|
195
|
-
e,_ 14
|
196
194
|
_su 14
|
197
|
-
|
198
|
-
|
199
|
-
|
195
|
+
_r 14
|
196
|
+
_a_ 14
|
197
|
+
fa 14
|
200
198
|
ro 14
|
199
|
+
ce 13
|
200
|
+
all 13
|
201
201
|
in_ 13
|
202
|
-
id 13
|
203
|
-
ella 13
|
204
202
|
_in_ 13
|
205
203
|
on_ 13
|
206
|
-
|
204
|
+
ella 13
|
207
205
|
gli_ 13
|
206
|
+
ur 13
|
207
|
+
so 13
|
208
|
+
id 13
|
208
209
|
el_ 13
|
209
|
-
|
210
|
+
un_ 13
|
210
211
|
mp 13
|
211
|
-
|
212
|
-
ur 13
|
213
|
-
a. 12
|
212
|
+
era_ 12
|
214
213
|
� 12
|
215
|
-
|
214
|
+
a. 12
|
215
|
+
col 12
|
216
|
+
ta_ 12
|
216
217
|
r_ 12
|
218
|
+
pr 12
|
219
|
+
ig 12
|
220
|
+
ue 12
|
217
221
|
ni 12
|
218
222
|
ut 12
|
219
223
|
_qua 12
|
220
|
-
col 12
|
221
|
-
pr 12
|
222
|
-
sse 12
|
223
|
-
mi 12
|
224
224
|
qua 12
|
225
|
-
era_ 12
|
226
|
-
ig 12
|
227
225
|
be 12
|
226
|
+
ave 12
|
227
|
+
sse 12
|
228
228
|
uo 12
|
229
|
+
mi 12
|
229
230
|
tto_ 12
|
230
|
-
|
231
|
-
|
231
|
+
ul 11
|
232
|
+
iv 11
|
233
|
+
_vi 11
|
232
234
|
asa 11
|
233
|
-
|
234
|
-
|
235
|
+
_gli 11
|
236
|
+
_t 11
|
237
|
+
_col 11
|
238
|
+
acc 11
|
239
|
+
ne_ 11
|
240
|
+
_al 11
|
241
|
+
�_ 11
|
235
242
|
_gl 11
|
243
|
+
pi 11
|
236
244
|
me_ 11
|
237
|
-
_com 11
|
238
|
-
_cas 11
|
239
245
|
_no 11
|
240
|
-
ano_ 11
|
241
|
-
_al 11
|
242
|
-
_vi 11
|
243
|
-
ave 11
|
244
|
-
_da 11
|
245
246
|
cch 11
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
pi 11
|
250
|
-
�_ 11
|
251
|
-
ul 11
|
252
|
-
_un_ 11
|
247
|
+
ano_ 11
|
248
|
+
cas 11
|
249
|
+
_cas 11
|
253
250
|
ome 11
|
251
|
+
_un_ 11
|
252
|
+
gn 11
|
254
253
|
dd 11
|
255
|
-
|
256
|
-
|
257
|
-
_av 10
|
258
|
-
_que 10
|
259
|
-
van 10
|
260
|
-
man 10
|
261
|
-
ale 10
|
262
|
-
_fa 10
|
263
|
-
dell 10
|
264
|
-
zi 10
|
254
|
+
_le 11
|
255
|
+
_com 11
|
265
256
|
cchi 10
|
266
|
-
|
267
|
-
|
268
|
-
_t 10
|
257
|
+
non 10
|
258
|
+
_av 10
|
269
259
|
casa 10
|
260
|
+
ato 10
|
261
|
+
ser 10
|
262
|
+
ti_ 10
|
263
|
+
zi 10
|
270
264
|
og 10
|
271
|
-
|
265
|
+
_ne 10
|
266
|
+
dell 10
|
272
267
|
_gli_ 10
|
273
|
-
|
268
|
+
_do 10
|
269
|
+
_casa 10
|
274
270
|
ent 10
|
275
|
-
|
271
|
+
_que 10
|
272
|
+
ale 10
|
273
|
+
_ri 10
|
274
|
+
_dell 10
|
275
|
+
_fa 10
|
276
|
+
van 10
|
277
|
+
da_ 10
|
278
|
+
man 10
|
279
|
+
rr 10
|
276
280
|
que 10
|
277
281
|
am 10
|
278
|
-
_casa 10
|
279
|
-
rr 10
|
280
|
-
_ne 10
|
281
|
-
far 9
|
282
|
-
del_ 9
|
283
282
|
_mo 9
|
284
283
|
rid 9
|
285
|
-
er_ 9
|
286
|
-
_an 9
|
287
|
-
anda 9
|
288
|
-
�_ 9
|
289
|
-
ella_ 9
|
290
|
-
ne_ 9
|
291
|
-
bi 9
|
292
284
|
fi 9
|
285
|
+
_be 9
|
293
286
|
vano 9
|
287
|
+
far 9
|
294
288
|
_me 9
|
289
|
+
ot 9
|
295
290
|
ando 9
|
296
|
-
uri 9
|
297
|
-
ti_ 9
|
298
|
-
da_ 9
|
299
291
|
ess 9
|
300
292
|
oi 9
|
293
|
+
�_ 9
|
301
294
|
par 9
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
295
|
+
_an 9
|
296
|
+
er_ 9
|
297
|
+
uri 9
|
298
|
+
alla 9
|
306
299
|
ome_ 9
|
307
|
-
|
308
|
-
_be 9
|
300
|
+
anda 9
|
309
301
|
eva_ 9
|
302
|
+
ella_ 9
|
303
|
+
� 9
|
304
|
+
del_ 9
|
305
|
+
bi 9
|
310
306
|
sta 9
|
311
|
-
|
312
|
-
|
313
|
-
i, 8
|
307
|
+
come 9
|
308
|
+
nda 9
|
314
309
|
della 8
|
315
|
-
|
316
|
-
come_ 8
|
317
|
-
idd 8
|
310
|
+
ene 8
|
318
311
|
una_ 8
|
319
|
-
|
312
|
+
occ 8
|
313
|
+
idd 8
|
314
|
+
llo 8
|
315
|
+
_st 8
|
316
|
+
sa_ 8
|
317
|
+
cco 8
|
318
|
+
do_ 8
|
319
|
+
uel 8
|
320
|
+
lo_ 8
|
321
|
+
ed 8
|
320
322
|
L 8
|
321
323
|
ba 8
|
322
|
-
|
323
|
-
uel 8
|
324
|
-
_l' 8
|
325
|
-
una 8
|
326
|
-
! 8
|
327
|
-
_ave 8
|
328
|
-
ene 8
|
324
|
+
ridd 8
|
329
325
|
con 8
|
330
|
-
non_ 8
|
331
326
|
ato_ 8
|
327
|
+
i, 8
|
332
328
|
ant 8
|
333
|
-
|
334
|
-
|
335
|
-
ser 8
|
336
|
-
Tu 8
|
337
|
-
T 8
|
338
|
-
do_ 8
|
339
|
-
occ 8
|
340
|
-
S 8
|
341
|
-
; 8
|
329
|
+
nz 8
|
330
|
+
una 8
|
342
331
|
ir 8
|
332
|
+
come_ 8
|
333
|
+
ina 8
|
343
334
|
_all 8
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
335
|
+
_ave 8
|
336
|
+
_del_ 8
|
337
|
+
_come 8
|
338
|
+
S 8
|
339
|
+
non_ 8
|
340
|
+
T 8
|
341
|
+
Tu 8
|
342
|
+
Tur 7
|
343
|
+
ere 7
|
344
|
+
_le_ 7
|
345
|
+
nto 7
|
346
|
+
utt 7
|
347
|
+
oll 7
|
348
|
+
rc 7
|
352
349
|
rv 7
|
353
|
-
|
350
|
+
hi_ 7
|
351
|
+
_era 7
|
352
|
+
_quel 7
|
354
353
|
cia 7
|
355
|
-
ino 7
|
356
354
|
per_ 7
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
355
|
+
ett 7
|
356
|
+
ec 7
|
357
|
+
ndo_ 7
|
358
|
+
vano_ 7
|
359
|
+
dava 7
|
360
|
+
ai 7
|
361
|
+
coll 7
|
361
362
|
_non 7
|
362
|
-
ap 7
|
363
363
|
pre 7
|
364
|
-
|
365
|
-
|
364
|
+
ap 7
|
365
|
+
_ad 7
|
366
|
+
dav 7
|
367
|
+
lav 7
|
366
368
|
_pr 7
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
369
|
+
_coll 7
|
370
|
+
i,_ 7
|
371
|
+
zia 7
|
372
|
+
gi 7
|
373
|
+
ino 7
|
374
|
+
mpa 7
|
375
|
+
_er 7
|
372
376
|
_pa 7
|
373
|
-
|
374
|
-
sp 7
|
375
|
-
ai 7
|
376
|
-
_le_ 7
|
377
|
+
_si_ 7
|
377
378
|
ogl 7
|
378
|
-
|
379
|
-
ola 7
|
380
|
-
_quel 7
|
379
|
+
ogli 7
|
381
380
|
_vo 7
|
382
|
-
|
383
|
-
|
381
|
+
sp 7
|
382
|
+
Turi 7
|
384
383
|
za 7
|
385
|
-
|
386
|
-
|
384
|
+
ola 7
|
385
|
+
sse_ 7
|
387
386
|
_pi 7
|
388
387
|
tra 7
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
gi 7
|
394
|
-
_er 7
|
395
|
-
ere 7
|
388
|
+
quel 7
|
389
|
+
Sa 6
|
390
|
+
P 6
|
391
|
+
veva 6
|
396
392
|
nu 6
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
393
|
+
_sc 6
|
394
|
+
�_ 6
|
395
|
+
_con 6
|
396
|
+
esse 6
|
397
|
+
h�_ 6
|
398
|
+
uridd 6
|
399
|
+
ch�_ 6
|
400
|
+
uand 6
|