scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/italian.lm
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
_ 1548
|
2
|
-
a
|
2
|
+
a 542
|
3
3
|
e 410
|
4
4
|
i 389
|
5
5
|
o 341
|
@@ -7,29 +7,29 @@ l 278
|
|
7
7
|
n 244
|
8
8
|
r 243
|
9
9
|
c 205
|
10
|
-
s
|
11
|
-
a_
|
10
|
+
s 200
|
11
|
+
a_ 186
|
12
12
|
t 176
|
13
|
-
d
|
14
|
-
e_
|
13
|
+
d 168
|
14
|
+
e_ 146
|
15
15
|
u 123
|
16
16
|
v 115
|
17
|
-
o_
|
18
|
-
i_
|
19
|
-
m 100
|
17
|
+
o_ 114
|
18
|
+
i_ 104
|
20
19
|
p 99
|
20
|
+
m 98
|
21
|
+
_c 85
|
21
22
|
_d 84
|
22
|
-
_c 84
|
23
23
|
_s 79
|
24
24
|
, 74
|
25
|
+
la 73
|
25
26
|
,_ 69
|
26
27
|
_a 63
|
27
|
-
la 62
|
28
28
|
g 60
|
29
29
|
ar 60
|
30
|
-
an 59
|
31
30
|
er 59
|
32
|
-
|
31
|
+
an 59
|
32
|
+
h 54
|
33
33
|
ri 52
|
34
34
|
co 52
|
35
35
|
ll 52
|
@@ -37,364 +37,364 @@ _p 51
|
|
37
37
|
re 49
|
38
38
|
ch 46
|
39
39
|
ra 46
|
40
|
-
|
40
|
+
to 45
|
41
41
|
_i 45
|
42
|
-
|
43
|
-
di 44
|
42
|
+
el 45
|
44
43
|
no 44
|
45
|
-
|
44
|
+
di 44
|
45
|
+
se 44
|
46
|
+
_m 44
|
46
47
|
b 43
|
47
48
|
va 43
|
48
|
-
l_ 42
|
49
49
|
_l 42
|
50
|
+
l_ 42
|
50
51
|
ia 42
|
51
|
-
se 41
|
52
|
-
in 40
|
53
52
|
n_ 40
|
53
|
+
la_ 40
|
54
|
+
in 40
|
55
|
+
av 39
|
54
56
|
f 39
|
55
|
-
av 38
|
56
|
-
la_ 38
|
57
|
-
do 37
|
58
57
|
_di 37
|
58
|
+
do 37
|
59
59
|
on 36
|
60
|
-
al
|
60
|
+
al 36
|
61
|
+
ta 35
|
61
62
|
ca 34
|
62
|
-
ta 34
|
63
63
|
na 34
|
64
|
+
en 34
|
64
65
|
_e 34
|
65
|
-
' 34
|
66
66
|
li 34
|
67
|
-
|
67
|
+
da 33
|
68
|
+
re_ 33
|
69
|
+
lla 33
|
68
70
|
or 33
|
69
|
-
|
71
|
+
le 32
|
70
72
|
si 32
|
71
73
|
_n 32
|
72
|
-
|
73
|
-
_co 31
|
74
|
+
_co 32
|
74
75
|
to_ 31
|
75
76
|
ol 30
|
76
|
-
le 30
|
77
77
|
de 30
|
78
|
-
|
78
|
+
as 30
|
79
79
|
pe 29
|
80
80
|
cc 29
|
81
|
-
re_ 29
|
82
|
-
ve 28
|
83
|
-
il 28
|
84
81
|
ma 28
|
85
|
-
|
86
|
-
|
82
|
+
il 28
|
83
|
+
ve 28
|
87
84
|
o, 27
|
88
|
-
va_ 27
|
89
85
|
_v 27
|
86
|
+
. 27
|
87
|
+
va_ 27
|
88
|
+
io 27
|
90
89
|
nd 26
|
91
|
-
ne 26
|
92
90
|
tt 26
|
93
|
-
|
94
|
-
nt 25
|
91
|
+
ne 26
|
95
92
|
st 25
|
96
93
|
gli 25
|
94
|
+
gl 25
|
95
|
+
nt 25
|
96
|
+
he 24
|
97
|
+
ell 24
|
97
98
|
o,_ 24
|
99
|
+
che 24
|
98
100
|
sa 24
|
101
|
+
ti 24
|
102
|
+
_se 24
|
99
103
|
_il 24
|
100
|
-
|
101
|
-
me 23
|
102
|
-
_f 23
|
103
|
-
he 23
|
104
|
+
_de 23
|
104
105
|
om 23
|
105
106
|
at 23
|
106
|
-
|
107
|
-
|
108
|
-
|
107
|
+
_f 23
|
108
|
+
me 23
|
109
|
+
_b 22
|
110
|
+
il_ 22
|
109
111
|
_ch 22
|
110
|
-
|
111
|
-
_qu 22
|
112
|
+
q 22
|
112
113
|
a, 22
|
113
|
-
|
114
|
+
_qu 22
|
115
|
+
qu 22
|
116
|
+
_la 22
|
114
117
|
_q 22
|
115
|
-
|
116
|
-
_b 22
|
117
|
-
il_ 22
|
118
|
-
_pe 21
|
119
|
-
a,_ 21
|
118
|
+
ci 22
|
120
119
|
te 21
|
121
|
-
_ca 21
|
122
|
-
_se 21
|
123
|
-
_il_ 21
|
124
120
|
vi 21
|
121
|
+
lla_ 21
|
122
|
+
a,_ 21
|
123
|
+
_ca 21
|
125
124
|
un 21
|
126
|
-
|
127
|
-
|
128
|
-
ava 20
|
129
|
-
ra_ 20
|
125
|
+
_pe 21
|
126
|
+
_il_ 21
|
130
127
|
_che 20
|
131
|
-
|
128
|
+
ava 20
|
132
129
|
che_ 20
|
133
|
-
del 20
|
134
130
|
_e_ 20
|
135
|
-
|
136
|
-
lla_ 20
|
131
|
+
del 20
|
137
132
|
z 20
|
138
|
-
|
139
|
-
|
140
|
-
|
133
|
+
_di_ 20
|
134
|
+
he_ 20
|
135
|
+
di_ 20
|
136
|
+
pa 20
|
137
|
+
ra_ 20
|
141
138
|
ss 19
|
139
|
+
_u 19
|
142
140
|
no_ 19
|
143
|
-
|
141
|
+
lo 19
|
144
142
|
es 19
|
143
|
+
ev 19
|
144
|
+
._ 19
|
145
|
+
_del 19
|
145
146
|
et 18
|
146
|
-
|
147
|
+
vo 18
|
147
148
|
and 18
|
148
|
-
_ma 18
|
149
149
|
is 18
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
l' 17
|
154
|
-
na_ 17
|
155
|
-
era 17
|
150
|
+
_ma 18
|
151
|
+
ic 18
|
152
|
+
_che_ 18
|
156
153
|
_un 17
|
157
|
-
os 17
|
158
|
-
ic 17
|
159
154
|
si_ 17
|
160
155
|
hi 17
|
161
156
|
le_ 17
|
162
|
-
|
157
|
+
na_ 17
|
158
|
+
ia_ 17
|
159
|
+
os 17
|
160
|
+
era 17
|
161
|
+
sc 16
|
163
162
|
per 16
|
164
163
|
ano 16
|
165
|
-
ie 16
|
166
164
|
ua 16
|
167
|
-
|
165
|
+
io_ 16
|
166
|
+
ie 16
|
168
167
|
po 16
|
169
168
|
chi 16
|
170
|
-
|
169
|
+
_g 16
|
170
|
+
_in 15
|
171
|
+
_per 15
|
171
172
|
ad 15
|
172
|
-
it 15
|
173
|
-
io_ 15
|
174
|
-
are 15
|
175
|
-
ava_ 15
|
176
|
-
_si 15
|
177
173
|
tr 15
|
178
|
-
ac 15
|
179
|
-
eva 15
|
180
|
-
tto 15
|
181
174
|
com 15
|
182
|
-
|
183
|
-
|
175
|
+
tto 15
|
176
|
+
eva 15
|
177
|
+
are 15
|
178
|
+
it 15
|
184
179
|
se_ 15
|
185
|
-
|
186
|
-
|
187
|
-
|
180
|
+
ac 15
|
181
|
+
su 15
|
182
|
+
_si 15
|
183
|
+
ava_ 15
|
188
184
|
fa 14
|
189
|
-
|
190
|
-
|
191
|
-
_a_ 14
|
185
|
+
_su 14
|
186
|
+
li_ 14
|
192
187
|
_r 14
|
193
|
-
|
188
|
+
_da 14
|
194
189
|
ari 14
|
195
190
|
e,_ 14
|
196
|
-
|
197
|
-
|
191
|
+
_a_ 14
|
192
|
+
_la_ 14
|
198
193
|
e, 14
|
199
|
-
li_ 14
|
200
194
|
ro 14
|
195
|
+
mo 14
|
196
|
+
ndo 14
|
197
|
+
are_ 14
|
198
|
+
oc 14
|
199
|
+
on_ 13
|
201
200
|
in_ 13
|
202
201
|
id 13
|
203
|
-
ella 13
|
204
202
|
_in_ 13
|
205
|
-
on_ 13
|
206
203
|
un_ 13
|
207
204
|
gli_ 13
|
208
|
-
el_ 13
|
209
205
|
all 13
|
210
206
|
mp 13
|
207
|
+
el_ 13
|
208
|
+
ella 13
|
209
|
+
ce 13
|
211
210
|
so 13
|
212
211
|
ur 13
|
213
|
-
|
214
|
-
� 12
|
215
|
-
ue 12
|
216
|
-
r_ 12
|
212
|
+
uo 12
|
217
213
|
ni 12
|
218
|
-
|
219
|
-
|
214
|
+
ta_ 12
|
215
|
+
ig 12
|
216
|
+
era_ 12
|
220
217
|
col 12
|
221
|
-
pr 12
|
222
|
-
sse 12
|
223
|
-
mi 12
|
224
218
|
qua 12
|
225
|
-
|
226
|
-
ig 12
|
227
|
-
be 12
|
228
|
-
uo 12
|
219
|
+
ave 12
|
229
220
|
tto_ 12
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
221
|
+
be 12
|
222
|
+
ut 12
|
223
|
+
ue 12
|
224
|
+
sse 12
|
225
|
+
mi 12
|
226
|
+
� 12
|
227
|
+
r_ 12
|
228
|
+
pr 12
|
229
|
+
_qua 12
|
230
|
+
a. 12
|
235
231
|
_gl 11
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
_no 11
|
240
|
-
ano_ 11
|
241
|
-
_al 11
|
232
|
+
_t 11
|
233
|
+
ne_ 11
|
234
|
+
asa 11
|
242
235
|
_vi 11
|
243
|
-
ave 11
|
244
|
-
_da 11
|
245
236
|
cch 11
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
237
|
+
_al 11
|
238
|
+
_col 11
|
239
|
+
_no 11
|
240
|
+
_com 11
|
241
|
+
iv 11
|
242
|
+
cas 11
|
250
243
|
�_ 11
|
251
|
-
|
252
|
-
|
244
|
+
ano_ 11
|
245
|
+
_cas 11
|
253
246
|
ome 11
|
247
|
+
me_ 11
|
248
|
+
acc 11
|
249
|
+
_le 11
|
250
|
+
_gli 11
|
251
|
+
pi 11
|
254
252
|
dd 11
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
_que 10
|
259
|
-
van 10
|
260
|
-
man 10
|
261
|
-
ale 10
|
262
|
-
_fa 10
|
263
|
-
dell 10
|
264
|
-
zi 10
|
253
|
+
_un_ 11
|
254
|
+
ul 11
|
255
|
+
gn 11
|
265
256
|
cchi 10
|
266
|
-
|
257
|
+
dell 10
|
258
|
+
da_ 10
|
259
|
+
rr 10
|
260
|
+
_ri 10
|
261
|
+
ent 10
|
267
262
|
_dell 10
|
268
|
-
|
263
|
+
zi 10
|
264
|
+
ato 10
|
265
|
+
van 10
|
266
|
+
_casa 10
|
267
|
+
_ne 10
|
268
|
+
_av 10
|
269
|
+
ti_ 10
|
269
270
|
casa 10
|
271
|
+
non 10
|
272
|
+
ale 10
|
273
|
+
ser 10
|
274
|
+
_do 10
|
275
|
+
_fa 10
|
276
|
+
man 10
|
277
|
+
am 10
|
278
|
+
_que 10
|
270
279
|
og 10
|
271
|
-
_ri 10
|
272
280
|
_gli_ 10
|
273
|
-
ato 10
|
274
|
-
ent 10
|
275
|
-
non 10
|
276
281
|
que 10
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
_ne 10
|
281
|
-
far 9
|
282
|
-
del_ 9
|
282
|
+
eva_ 9
|
283
|
+
ome_ 9
|
284
|
+
fi 9
|
283
285
|
_mo 9
|
284
|
-
|
286
|
+
par 9
|
287
|
+
ando 9
|
288
|
+
uri 9
|
285
289
|
er_ 9
|
286
|
-
_an 9
|
287
290
|
anda 9
|
288
|
-
�_ 9
|
289
|
-
ella_ 9
|
290
|
-
ne_ 9
|
291
291
|
bi 9
|
292
|
-
|
292
|
+
sta 9
|
293
|
+
del_ 9
|
294
|
+
far 9
|
293
295
|
vano 9
|
294
|
-
|
295
|
-
ando 9
|
296
|
-
uri 9
|
297
|
-
ti_ 9
|
298
|
-
da_ 9
|
296
|
+
ella_ 9
|
299
297
|
ess 9
|
298
|
+
rid 9
|
299
|
+
alla 9
|
300
|
+
_be 9
|
300
301
|
oi 9
|
301
|
-
|
302
|
-
_le 9
|
303
|
-
come 9
|
304
|
-
� 9
|
302
|
+
_an 9
|
305
303
|
nda 9
|
306
|
-
|
304
|
+
_me 9
|
307
305
|
ot 9
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
_del_ 8
|
312
|
-
_come 8
|
306
|
+
� 9
|
307
|
+
come 9
|
308
|
+
�_ 9
|
313
309
|
i, 8
|
314
|
-
della 8
|
315
310
|
ridd 8
|
316
311
|
come_ 8
|
317
|
-
|
318
|
-
una_ 8
|
312
|
+
non_ 8
|
319
313
|
ina 8
|
320
|
-
|
321
|
-
ba 8
|
322
|
-
nz 8
|
314
|
+
cco 8
|
323
315
|
uel 8
|
324
|
-
|
325
|
-
una 8
|
326
|
-
! 8
|
327
|
-
_ave 8
|
328
|
-
ene 8
|
329
|
-
con 8
|
330
|
-
non_ 8
|
316
|
+
llo 8
|
331
317
|
ato_ 8
|
332
|
-
|
333
|
-
_st 8
|
334
|
-
cco 8
|
335
|
-
ser 8
|
336
|
-
Tu 8
|
337
|
-
T 8
|
318
|
+
nz 8
|
338
319
|
do_ 8
|
320
|
+
_come 8
|
321
|
+
con 8
|
322
|
+
_all 8
|
323
|
+
ed 8
|
324
|
+
Tu 8
|
339
325
|
occ 8
|
326
|
+
idd 8
|
327
|
+
lo_ 8
|
328
|
+
sa_ 8
|
340
329
|
S 8
|
341
|
-
|
330
|
+
ant 8
|
331
|
+
_del_ 8
|
332
|
+
ba 8
|
333
|
+
una 8
|
334
|
+
L 8
|
335
|
+
_st 8
|
336
|
+
ene 8
|
342
337
|
ir 8
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
338
|
+
_ave 8
|
339
|
+
della 8
|
340
|
+
una_ 8
|
341
|
+
T 8
|
342
|
+
quel 7
|
343
|
+
oll 7
|
344
|
+
ere 7
|
345
|
+
ola 7
|
346
|
+
nto 7
|
347
|
+
rc 7
|
348
|
+
per_ 7
|
352
349
|
rv 7
|
353
|
-
|
350
|
+
gi 7
|
351
|
+
_era 7
|
352
|
+
dav 7
|
354
353
|
cia 7
|
355
|
-
|
356
|
-
|
357
|
-
quel 7
|
358
|
-
i,_ 7
|
359
|
-
_coll 7
|
354
|
+
ett 7
|
355
|
+
ec 7
|
360
356
|
Tur 7
|
361
|
-
|
362
|
-
ap 7
|
363
|
-
pre 7
|
364
|
-
hi_ 7
|
365
|
-
nto 7
|
366
|
-
_pr 7
|
357
|
+
_le_ 7
|
367
358
|
utt 7
|
368
|
-
|
369
|
-
llo 7
|
359
|
+
hi_ 7
|
370
360
|
coll 7
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
361
|
+
_quel 7
|
362
|
+
dava 7
|
363
|
+
pre 7
|
364
|
+
tra 7
|
365
|
+
_ad 7
|
366
|
+
vano_ 7
|
367
|
+
_pr 7
|
375
368
|
ai 7
|
376
|
-
|
369
|
+
ap 7
|
370
|
+
lav 7
|
371
|
+
_coll 7
|
372
|
+
i,_ 7
|
373
|
+
_si_ 7
|
374
|
+
zia 7
|
375
|
+
_er 7
|
376
|
+
ino 7
|
377
377
|
ogl 7
|
378
|
-
dav 7
|
379
|
-
ola 7
|
380
|
-
_quel 7
|
381
378
|
_vo 7
|
382
|
-
|
383
|
-
|
379
|
+
_pa 7
|
380
|
+
mpa 7
|
381
|
+
ogli 7
|
382
|
+
sp 7
|
384
383
|
za 7
|
385
|
-
|
386
|
-
|
384
|
+
sse_ 7
|
385
|
+
Turi 7
|
387
386
|
_pi 7
|
388
|
-
|
389
|
-
ogli 7
|
387
|
+
_non 7
|
390
388
|
ndo_ 7
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
ere 7
|
389
|
+
Sa 6
|
390
|
+
veva 6
|
391
|
+
_sc 6
|
392
|
+
P 6
|
396
393
|
nu 6
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
394
|
+
uridd 6
|
395
|
+
rm 6
|
396
|
+
_con 6
|
397
|
+
h�_ 6
|
398
|
+
esse 6
|
399
|
+
i� 6
|
400
|
+
_ve 6
|