scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/italian.lm
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
_ 1548
|
2
|
-
a
|
2
|
+
a 542
|
3
3
|
e 410
|
4
4
|
i 389
|
5
5
|
o 341
|
@@ -7,29 +7,29 @@ l 278
|
|
7
7
|
n 244
|
8
8
|
r 243
|
9
9
|
c 205
|
10
|
-
s
|
11
|
-
a_
|
10
|
+
s 200
|
11
|
+
a_ 186
|
12
12
|
t 176
|
13
|
-
d
|
14
|
-
e_
|
13
|
+
d 168
|
14
|
+
e_ 146
|
15
15
|
u 123
|
16
16
|
v 115
|
17
|
-
o_
|
18
|
-
i_
|
19
|
-
m 100
|
17
|
+
o_ 114
|
18
|
+
i_ 104
|
20
19
|
p 99
|
20
|
+
m 98
|
21
|
+
_c 85
|
21
22
|
_d 84
|
22
|
-
_c 84
|
23
23
|
_s 79
|
24
24
|
, 74
|
25
|
+
la 73
|
25
26
|
,_ 69
|
26
27
|
_a 63
|
27
|
-
la 62
|
28
28
|
g 60
|
29
29
|
ar 60
|
30
|
-
an 59
|
31
30
|
er 59
|
32
|
-
|
31
|
+
an 59
|
32
|
+
h 54
|
33
33
|
ri 52
|
34
34
|
co 52
|
35
35
|
ll 52
|
@@ -37,364 +37,364 @@ _p 51
|
|
37
37
|
re 49
|
38
38
|
ch 46
|
39
39
|
ra 46
|
40
|
-
|
40
|
+
to 45
|
41
41
|
_i 45
|
42
|
-
|
43
|
-
di 44
|
42
|
+
el 45
|
44
43
|
no 44
|
45
|
-
|
44
|
+
di 44
|
45
|
+
se 44
|
46
|
+
_m 44
|
46
47
|
b 43
|
47
48
|
va 43
|
48
|
-
l_ 42
|
49
49
|
_l 42
|
50
|
+
l_ 42
|
50
51
|
ia 42
|
51
|
-
se 41
|
52
|
-
in 40
|
53
52
|
n_ 40
|
53
|
+
la_ 40
|
54
|
+
in 40
|
55
|
+
av 39
|
54
56
|
f 39
|
55
|
-
av 38
|
56
|
-
la_ 38
|
57
|
-
do 37
|
58
57
|
_di 37
|
58
|
+
do 37
|
59
59
|
on 36
|
60
|
-
al
|
60
|
+
al 36
|
61
|
+
ta 35
|
61
62
|
ca 34
|
62
|
-
ta 34
|
63
63
|
na 34
|
64
|
+
en 34
|
64
65
|
_e 34
|
65
|
-
' 34
|
66
66
|
li 34
|
67
|
-
|
67
|
+
da 33
|
68
|
+
re_ 33
|
69
|
+
lla 33
|
68
70
|
or 33
|
69
|
-
|
71
|
+
le 32
|
70
72
|
si 32
|
71
73
|
_n 32
|
72
|
-
|
73
|
-
_co 31
|
74
|
+
_co 32
|
74
75
|
to_ 31
|
75
76
|
ol 30
|
76
|
-
le 30
|
77
77
|
de 30
|
78
|
-
|
78
|
+
as 30
|
79
79
|
pe 29
|
80
80
|
cc 29
|
81
|
-
re_ 29
|
82
|
-
ve 28
|
83
|
-
il 28
|
84
81
|
ma 28
|
85
|
-
|
86
|
-
|
82
|
+
il 28
|
83
|
+
ve 28
|
87
84
|
o, 27
|
88
|
-
va_ 27
|
89
85
|
_v 27
|
86
|
+
. 27
|
87
|
+
va_ 27
|
88
|
+
io 27
|
90
89
|
nd 26
|
91
|
-
ne 26
|
92
90
|
tt 26
|
93
|
-
|
94
|
-
nt 25
|
91
|
+
ne 26
|
95
92
|
st 25
|
96
93
|
gli 25
|
94
|
+
gl 25
|
95
|
+
nt 25
|
96
|
+
he 24
|
97
|
+
ell 24
|
97
98
|
o,_ 24
|
99
|
+
che 24
|
98
100
|
sa 24
|
101
|
+
ti 24
|
102
|
+
_se 24
|
99
103
|
_il 24
|
100
|
-
|
101
|
-
me 23
|
102
|
-
_f 23
|
103
|
-
he 23
|
104
|
+
_de 23
|
104
105
|
om 23
|
105
106
|
at 23
|
106
|
-
|
107
|
-
|
108
|
-
|
107
|
+
_f 23
|
108
|
+
me 23
|
109
|
+
_b 22
|
110
|
+
il_ 22
|
109
111
|
_ch 22
|
110
|
-
|
111
|
-
_qu 22
|
112
|
+
q 22
|
112
113
|
a, 22
|
113
|
-
|
114
|
+
_qu 22
|
115
|
+
qu 22
|
116
|
+
_la 22
|
114
117
|
_q 22
|
115
|
-
|
116
|
-
_b 22
|
117
|
-
il_ 22
|
118
|
-
_pe 21
|
119
|
-
a,_ 21
|
118
|
+
ci 22
|
120
119
|
te 21
|
121
|
-
_ca 21
|
122
|
-
_se 21
|
123
|
-
_il_ 21
|
124
120
|
vi 21
|
121
|
+
lla_ 21
|
122
|
+
a,_ 21
|
123
|
+
_ca 21
|
125
124
|
un 21
|
126
|
-
|
127
|
-
|
128
|
-
ava 20
|
129
|
-
ra_ 20
|
125
|
+
_pe 21
|
126
|
+
_il_ 21
|
130
127
|
_che 20
|
131
|
-
|
128
|
+
ava 20
|
132
129
|
che_ 20
|
133
|
-
del 20
|
134
130
|
_e_ 20
|
135
|
-
|
136
|
-
lla_ 20
|
131
|
+
del 20
|
137
132
|
z 20
|
138
|
-
|
139
|
-
|
140
|
-
|
133
|
+
_di_ 20
|
134
|
+
he_ 20
|
135
|
+
di_ 20
|
136
|
+
pa 20
|
137
|
+
ra_ 20
|
141
138
|
ss 19
|
139
|
+
_u 19
|
142
140
|
no_ 19
|
143
|
-
|
141
|
+
lo 19
|
144
142
|
es 19
|
143
|
+
ev 19
|
144
|
+
._ 19
|
145
|
+
_del 19
|
145
146
|
et 18
|
146
|
-
|
147
|
+
vo 18
|
147
148
|
and 18
|
148
|
-
_ma 18
|
149
149
|
is 18
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
l' 17
|
154
|
-
na_ 17
|
155
|
-
era 17
|
150
|
+
_ma 18
|
151
|
+
ic 18
|
152
|
+
_che_ 18
|
156
153
|
_un 17
|
157
|
-
os 17
|
158
|
-
ic 17
|
159
154
|
si_ 17
|
160
155
|
hi 17
|
161
156
|
le_ 17
|
162
|
-
|
157
|
+
na_ 17
|
158
|
+
ia_ 17
|
159
|
+
os 17
|
160
|
+
era 17
|
161
|
+
sc 16
|
163
162
|
per 16
|
164
163
|
ano 16
|
165
|
-
ie 16
|
166
164
|
ua 16
|
167
|
-
|
165
|
+
io_ 16
|
166
|
+
ie 16
|
168
167
|
po 16
|
169
168
|
chi 16
|
170
|
-
|
169
|
+
_g 16
|
170
|
+
_in 15
|
171
|
+
_per 15
|
171
172
|
ad 15
|
172
|
-
it 15
|
173
|
-
io_ 15
|
174
|
-
are 15
|
175
|
-
ava_ 15
|
176
|
-
_si 15
|
177
173
|
tr 15
|
178
|
-
ac 15
|
179
|
-
eva 15
|
180
|
-
tto 15
|
181
174
|
com 15
|
182
|
-
|
183
|
-
|
175
|
+
tto 15
|
176
|
+
eva 15
|
177
|
+
are 15
|
178
|
+
it 15
|
184
179
|
se_ 15
|
185
|
-
|
186
|
-
|
187
|
-
|
180
|
+
ac 15
|
181
|
+
su 15
|
182
|
+
_si 15
|
183
|
+
ava_ 15
|
188
184
|
fa 14
|
189
|
-
|
190
|
-
|
191
|
-
_a_ 14
|
185
|
+
_su 14
|
186
|
+
li_ 14
|
192
187
|
_r 14
|
193
|
-
|
188
|
+
_da 14
|
194
189
|
ari 14
|
195
190
|
e,_ 14
|
196
|
-
|
197
|
-
|
191
|
+
_a_ 14
|
192
|
+
_la_ 14
|
198
193
|
e, 14
|
199
|
-
li_ 14
|
200
194
|
ro 14
|
195
|
+
mo 14
|
196
|
+
ndo 14
|
197
|
+
are_ 14
|
198
|
+
oc 14
|
199
|
+
on_ 13
|
201
200
|
in_ 13
|
202
201
|
id 13
|
203
|
-
ella 13
|
204
202
|
_in_ 13
|
205
|
-
on_ 13
|
206
203
|
un_ 13
|
207
204
|
gli_ 13
|
208
|
-
el_ 13
|
209
205
|
all 13
|
210
206
|
mp 13
|
207
|
+
el_ 13
|
208
|
+
ella 13
|
209
|
+
ce 13
|
211
210
|
so 13
|
212
211
|
ur 13
|
213
|
-
|
214
|
-
� 12
|
215
|
-
ue 12
|
216
|
-
r_ 12
|
212
|
+
uo 12
|
217
213
|
ni 12
|
218
|
-
|
219
|
-
|
214
|
+
ta_ 12
|
215
|
+
ig 12
|
216
|
+
era_ 12
|
220
217
|
col 12
|
221
|
-
pr 12
|
222
|
-
sse 12
|
223
|
-
mi 12
|
224
218
|
qua 12
|
225
|
-
|
226
|
-
ig 12
|
227
|
-
be 12
|
228
|
-
uo 12
|
219
|
+
ave 12
|
229
220
|
tto_ 12
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
221
|
+
be 12
|
222
|
+
ut 12
|
223
|
+
ue 12
|
224
|
+
sse 12
|
225
|
+
mi 12
|
226
|
+
� 12
|
227
|
+
r_ 12
|
228
|
+
pr 12
|
229
|
+
_qua 12
|
230
|
+
a. 12
|
235
231
|
_gl 11
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
_no 11
|
240
|
-
ano_ 11
|
241
|
-
_al 11
|
232
|
+
_t 11
|
233
|
+
ne_ 11
|
234
|
+
asa 11
|
242
235
|
_vi 11
|
243
|
-
ave 11
|
244
|
-
_da 11
|
245
236
|
cch 11
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
237
|
+
_al 11
|
238
|
+
_col 11
|
239
|
+
_no 11
|
240
|
+
_com 11
|
241
|
+
iv 11
|
242
|
+
cas 11
|
250
243
|
�_ 11
|
251
|
-
|
252
|
-
|
244
|
+
ano_ 11
|
245
|
+
_cas 11
|
253
246
|
ome 11
|
247
|
+
me_ 11
|
248
|
+
acc 11
|
249
|
+
_le 11
|
250
|
+
_gli 11
|
251
|
+
pi 11
|
254
252
|
dd 11
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
_que 10
|
259
|
-
van 10
|
260
|
-
man 10
|
261
|
-
ale 10
|
262
|
-
_fa 10
|
263
|
-
dell 10
|
264
|
-
zi 10
|
253
|
+
_un_ 11
|
254
|
+
ul 11
|
255
|
+
gn 11
|
265
256
|
cchi 10
|
266
|
-
|
257
|
+
dell 10
|
258
|
+
da_ 10
|
259
|
+
rr 10
|
260
|
+
_ri 10
|
261
|
+
ent 10
|
267
262
|
_dell 10
|
268
|
-
|
263
|
+
zi 10
|
264
|
+
ato 10
|
265
|
+
van 10
|
266
|
+
_casa 10
|
267
|
+
_ne 10
|
268
|
+
_av 10
|
269
|
+
ti_ 10
|
269
270
|
casa 10
|
271
|
+
non 10
|
272
|
+
ale 10
|
273
|
+
ser 10
|
274
|
+
_do 10
|
275
|
+
_fa 10
|
276
|
+
man 10
|
277
|
+
am 10
|
278
|
+
_que 10
|
270
279
|
og 10
|
271
|
-
_ri 10
|
272
280
|
_gli_ 10
|
273
|
-
ato 10
|
274
|
-
ent 10
|
275
|
-
non 10
|
276
281
|
que 10
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
_ne 10
|
281
|
-
far 9
|
282
|
-
del_ 9
|
282
|
+
eva_ 9
|
283
|
+
ome_ 9
|
284
|
+
fi 9
|
283
285
|
_mo 9
|
284
|
-
|
286
|
+
par 9
|
287
|
+
ando 9
|
288
|
+
uri 9
|
285
289
|
er_ 9
|
286
|
-
_an 9
|
287
290
|
anda 9
|
288
|
-
�_ 9
|
289
|
-
ella_ 9
|
290
|
-
ne_ 9
|
291
291
|
bi 9
|
292
|
-
|
292
|
+
sta 9
|
293
|
+
del_ 9
|
294
|
+
far 9
|
293
295
|
vano 9
|
294
|
-
|
295
|
-
ando 9
|
296
|
-
uri 9
|
297
|
-
ti_ 9
|
298
|
-
da_ 9
|
296
|
+
ella_ 9
|
299
297
|
ess 9
|
298
|
+
rid 9
|
299
|
+
alla 9
|
300
|
+
_be 9
|
300
301
|
oi 9
|
301
|
-
|
302
|
-
_le 9
|
303
|
-
come 9
|
304
|
-
� 9
|
302
|
+
_an 9
|
305
303
|
nda 9
|
306
|
-
|
304
|
+
_me 9
|
307
305
|
ot 9
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
_del_ 8
|
312
|
-
_come 8
|
306
|
+
� 9
|
307
|
+
come 9
|
308
|
+
�_ 9
|
313
309
|
i, 8
|
314
|
-
della 8
|
315
310
|
ridd 8
|
316
311
|
come_ 8
|
317
|
-
|
318
|
-
una_ 8
|
312
|
+
non_ 8
|
319
313
|
ina 8
|
320
|
-
|
321
|
-
ba 8
|
322
|
-
nz 8
|
314
|
+
cco 8
|
323
315
|
uel 8
|
324
|
-
|
325
|
-
una 8
|
326
|
-
! 8
|
327
|
-
_ave 8
|
328
|
-
ene 8
|
329
|
-
con 8
|
330
|
-
non_ 8
|
316
|
+
llo 8
|
331
317
|
ato_ 8
|
332
|
-
|
333
|
-
_st 8
|
334
|
-
cco 8
|
335
|
-
ser 8
|
336
|
-
Tu 8
|
337
|
-
T 8
|
318
|
+
nz 8
|
338
319
|
do_ 8
|
320
|
+
_come 8
|
321
|
+
con 8
|
322
|
+
_all 8
|
323
|
+
ed 8
|
324
|
+
Tu 8
|
339
325
|
occ 8
|
326
|
+
idd 8
|
327
|
+
lo_ 8
|
328
|
+
sa_ 8
|
340
329
|
S 8
|
341
|
-
|
330
|
+
ant 8
|
331
|
+
_del_ 8
|
332
|
+
ba 8
|
333
|
+
una 8
|
334
|
+
L 8
|
335
|
+
_st 8
|
336
|
+
ene 8
|
342
337
|
ir 8
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
338
|
+
_ave 8
|
339
|
+
della 8
|
340
|
+
una_ 8
|
341
|
+
T 8
|
342
|
+
quel 7
|
343
|
+
oll 7
|
344
|
+
ere 7
|
345
|
+
ola 7
|
346
|
+
nto 7
|
347
|
+
rc 7
|
348
|
+
per_ 7
|
352
349
|
rv 7
|
353
|
-
|
350
|
+
gi 7
|
351
|
+
_era 7
|
352
|
+
dav 7
|
354
353
|
cia 7
|
355
|
-
|
356
|
-
|
357
|
-
quel 7
|
358
|
-
i,_ 7
|
359
|
-
_coll 7
|
354
|
+
ett 7
|
355
|
+
ec 7
|
360
356
|
Tur 7
|
361
|
-
|
362
|
-
ap 7
|
363
|
-
pre 7
|
364
|
-
hi_ 7
|
365
|
-
nto 7
|
366
|
-
_pr 7
|
357
|
+
_le_ 7
|
367
358
|
utt 7
|
368
|
-
|
369
|
-
llo 7
|
359
|
+
hi_ 7
|
370
360
|
coll 7
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
361
|
+
_quel 7
|
362
|
+
dava 7
|
363
|
+
pre 7
|
364
|
+
tra 7
|
365
|
+
_ad 7
|
366
|
+
vano_ 7
|
367
|
+
_pr 7
|
375
368
|
ai 7
|
376
|
-
|
369
|
+
ap 7
|
370
|
+
lav 7
|
371
|
+
_coll 7
|
372
|
+
i,_ 7
|
373
|
+
_si_ 7
|
374
|
+
zia 7
|
375
|
+
_er 7
|
376
|
+
ino 7
|
377
377
|
ogl 7
|
378
|
-
dav 7
|
379
|
-
ola 7
|
380
|
-
_quel 7
|
381
378
|
_vo 7
|
382
|
-
|
383
|
-
|
379
|
+
_pa 7
|
380
|
+
mpa 7
|
381
|
+
ogli 7
|
382
|
+
sp 7
|
384
383
|
za 7
|
385
|
-
|
386
|
-
|
384
|
+
sse_ 7
|
385
|
+
Turi 7
|
387
386
|
_pi 7
|
388
|
-
|
389
|
-
ogli 7
|
387
|
+
_non 7
|
390
388
|
ndo_ 7
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
ere 7
|
389
|
+
Sa 6
|
390
|
+
veva 6
|
391
|
+
_sc 6
|
392
|
+
P 6
|
396
393
|
nu 6
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
394
|
+
uridd 6
|
395
|
+
rm 6
|
396
|
+
_con 6
|
397
|
+
h�_ 6
|
398
|
+
esse 6
|
399
|
+
i� 6
|
400
|
+
_ve 6
|