scylla 0.7.0 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/classifier.rb +3 -4
- data/lib/scylla/generator.rb +4 -7
- data/lib/scylla/lms/afrikaans.lm +280 -280
- data/lib/scylla/lms/arabic.lm +225 -225
- data/lib/scylla/lms/bulgarian.lm +208 -208
- data/lib/scylla/lms/catalan.lm +212 -212
- data/lib/scylla/lms/chinese.lm +201 -201
- data/lib/scylla/lms/danish.lm +155 -155
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/finnish.lm +259 -259
- data/lib/scylla/lms/french.lm +203 -203
- data/lib/scylla/lms/german.lm +280 -280
- data/lib/scylla/lms/greek.lm +276 -276
- data/lib/scylla/lms/hebrew.lm +170 -170
- data/lib/scylla/lms/hindi.lm +241 -241
- data/lib/scylla/lms/icelandic.lm +264 -264
- data/lib/scylla/lms/indonesian.lm +272 -272
- data/lib/scylla/lms/italian.lm +283 -283
- data/lib/scylla/lms/japanese.lm +105 -105
- data/lib/scylla/lms/korean.lm +400 -400
- data/lib/scylla/lms/norwegian.lm +235 -235
- data/lib/scylla/lms/polish.lm +264 -264
- data/lib/scylla/lms/portuguese.lm +269 -269
- data/lib/scylla/lms/romanian.lm +278 -278
- data/lib/scylla/lms/russian.lm +127 -127
- data/lib/scylla/lms/slovak.lm +281 -281
- data/lib/scylla/lms/slovenian.lm +276 -276
- data/lib/scylla/lms/spanish.lm +190 -190
- data/lib/scylla/lms/swedish.lm +195 -195
- data/lib/scylla/lms/tagalog.lm +282 -282
- data/lib/scylla/lms/thai.lm +257 -257
- data/lib/scylla/lms/turkish.lm +300 -300
- data/lib/scylla/lms/vietnamese.lm +277 -277
- data/lib/scylla/lms/welsh.lm +271 -271
- data/scylla.gemspec +3 -22
- data/source_texts/korean.txt +219 -134
- metadata +15 -14
- data/scylla-0.6.0.gem +0 -0
data/lib/scylla/lms/italian.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 1798
|
2
2
|
a 542
|
3
3
|
e 410
|
4
4
|
i 389
|
@@ -6,395 +6,395 @@ o 341
|
|
6
6
|
l 278
|
7
7
|
n 244
|
8
8
|
r 243
|
9
|
+
a_ 206
|
9
10
|
c 205
|
10
11
|
s 200
|
11
|
-
a_ 186
|
12
12
|
t 176
|
13
13
|
d 168
|
14
|
-
e_
|
14
|
+
e_ 165
|
15
15
|
u 123
|
16
|
+
o_ 120
|
17
|
+
i_ 119
|
16
18
|
v 115
|
17
|
-
o_ 114
|
18
|
-
i_ 104
|
19
19
|
p 99
|
20
20
|
m 98
|
21
|
-
_c
|
22
|
-
_d
|
23
|
-
_s
|
21
|
+
_c 96
|
22
|
+
_d 91
|
23
|
+
_s 88
|
24
|
+
_a 79
|
24
25
|
, 74
|
25
|
-
|
26
|
-
|
27
|
-
|
26
|
+
,_ 74
|
27
|
+
l_ 67
|
28
|
+
la 62
|
28
29
|
g 60
|
29
|
-
ar
|
30
|
+
ar 59
|
30
31
|
er 59
|
31
|
-
an
|
32
|
+
an 57
|
33
|
+
_p 56
|
32
34
|
h 54
|
33
35
|
ri 52
|
34
36
|
co 52
|
35
|
-
ll
|
36
|
-
_p 51
|
37
|
+
ll 51
|
37
38
|
re 49
|
39
|
+
_l 47
|
40
|
+
_i 47
|
38
41
|
ch 46
|
39
42
|
ra 46
|
40
|
-
to 45
|
41
|
-
_i 45
|
42
43
|
el 45
|
43
|
-
|
44
|
+
_m 45
|
44
45
|
di 44
|
45
|
-
|
46
|
-
|
46
|
+
to 44
|
47
|
+
no 44
|
48
|
+
la_ 44
|
49
|
+
_e 43
|
47
50
|
b 43
|
51
|
+
n_ 43
|
48
52
|
va 43
|
49
|
-
_l 42
|
50
|
-
l_ 42
|
51
53
|
ia 42
|
52
|
-
|
53
|
-
|
54
|
-
in
|
55
|
-
av 39
|
54
|
+
se 41
|
55
|
+
_di 40
|
56
|
+
in 39
|
56
57
|
f 39
|
57
|
-
|
58
|
+
av 38
|
59
|
+
_n 37
|
60
|
+
_co 37
|
58
61
|
do 37
|
62
|
+
_v 36
|
59
63
|
on 36
|
60
|
-
al
|
61
|
-
|
62
|
-
|
63
|
-
na 34
|
64
|
+
al 34
|
65
|
+
re_ 34
|
66
|
+
ta 34
|
64
67
|
en 34
|
65
|
-
_e 34
|
66
68
|
li 34
|
67
|
-
|
68
|
-
|
69
|
-
lla 33
|
69
|
+
na 34
|
70
|
+
ca 34
|
70
71
|
or 33
|
71
|
-
le 32
|
72
72
|
si 32
|
73
|
-
_n 32
|
74
|
-
_co 32
|
75
73
|
to_ 31
|
74
|
+
da 30
|
76
75
|
ol 30
|
77
76
|
de 30
|
78
|
-
|
77
|
+
le 30
|
78
|
+
lla 30
|
79
|
+
va_ 29
|
79
80
|
pe 29
|
80
81
|
cc 29
|
81
|
-
ma 28
|
82
|
-
il 28
|
83
82
|
ve 28
|
83
|
+
ma 28
|
84
|
+
as 28
|
85
|
+
il 27
|
86
|
+
._ 27
|
84
87
|
o, 27
|
85
|
-
_v 27
|
86
|
-
. 27
|
87
|
-
va_ 27
|
88
88
|
io 27
|
89
|
+
. 27
|
90
|
+
o,_ 27
|
91
|
+
_e_ 26
|
92
|
+
ne 26
|
89
93
|
nd 26
|
94
|
+
_u 26
|
90
95
|
tt 26
|
91
|
-
|
92
|
-
|
96
|
+
_f 26
|
97
|
+
_il 25
|
98
|
+
_ch 25
|
99
|
+
nt 25
|
93
100
|
gli 25
|
94
101
|
gl 25
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
che 24
|
102
|
+
il_ 25
|
103
|
+
lla_ 25
|
104
|
+
_il_ 25
|
105
|
+
st 25
|
100
106
|
sa 24
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
om 23
|
107
|
+
ell 24
|
108
|
+
_de 24
|
109
|
+
che 23
|
110
|
+
_b 23
|
106
111
|
at 23
|
107
|
-
|
112
|
+
om 23
|
113
|
+
he 23
|
108
114
|
me 23
|
109
|
-
|
110
|
-
|
111
|
-
_ch 22
|
112
|
-
q 22
|
115
|
+
_ca 23
|
116
|
+
ti 23
|
113
117
|
a, 22
|
114
|
-
_qu 22
|
115
118
|
qu 22
|
116
|
-
|
119
|
+
q 22
|
117
120
|
_q 22
|
121
|
+
_qu 22
|
122
|
+
che_ 22
|
123
|
+
ra_ 22
|
124
|
+
_che 22
|
125
|
+
_che_ 22
|
126
|
+
a,_ 22
|
127
|
+
_pe 22
|
128
|
+
_di_ 22
|
129
|
+
_se 22
|
118
130
|
ci 22
|
119
|
-
|
131
|
+
he_ 22
|
132
|
+
di_ 22
|
120
133
|
vi 21
|
121
|
-
|
122
|
-
a,_ 21
|
123
|
-
_ca 21
|
134
|
+
_un 21
|
124
135
|
un 21
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
ava 20
|
129
|
-
che_ 20
|
130
|
-
_e_ 20
|
136
|
+
te 21
|
137
|
+
pa 20
|
138
|
+
si_ 20
|
131
139
|
del 20
|
132
140
|
z 20
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
pa 20
|
137
|
-
ra_ 20
|
141
|
+
ava 20
|
142
|
+
_del 20
|
143
|
+
ia_ 19
|
138
144
|
ss 19
|
139
|
-
|
145
|
+
r_ 19
|
146
|
+
se_ 19
|
140
147
|
no_ 19
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
_del 19
|
148
|
+
_ma 19
|
149
|
+
_g 19
|
150
|
+
_si 19
|
151
|
+
le_ 19
|
146
152
|
et 18
|
153
|
+
ev 18
|
154
|
+
is 18
|
155
|
+
es 18
|
147
156
|
vo 18
|
157
|
+
lo 18
|
148
158
|
and 18
|
149
|
-
|
150
|
-
_ma 18
|
151
|
-
ic 18
|
152
|
-
_che_ 18
|
153
|
-
_un 17
|
154
|
-
si_ 17
|
159
|
+
_la 18
|
155
160
|
hi 17
|
156
|
-
le_ 17
|
157
161
|
na_ 17
|
158
|
-
ia_ 17
|
159
|
-
os 17
|
160
162
|
era 17
|
161
|
-
|
162
|
-
|
163
|
-
ano 16
|
164
|
-
ua 16
|
163
|
+
chi 16
|
164
|
+
_a_ 16
|
165
165
|
io_ 16
|
166
|
+
el_ 16
|
167
|
+
os 16
|
168
|
+
_in 16
|
169
|
+
_per 16
|
170
|
+
ua 16
|
171
|
+
li_ 16
|
166
172
|
ie 16
|
173
|
+
sc 16
|
174
|
+
per 16
|
167
175
|
po 16
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
ad 15
|
176
|
+
_no 15
|
177
|
+
it 15
|
178
|
+
ic 15
|
179
|
+
ava_ 15
|
173
180
|
tr 15
|
174
|
-
com 15
|
175
181
|
tto 15
|
182
|
+
su 15
|
176
183
|
eva 15
|
184
|
+
com 15
|
185
|
+
_vi 15
|
186
|
+
_com 15
|
187
|
+
_su 15
|
177
188
|
are 15
|
178
|
-
|
179
|
-
se_ 15
|
180
|
-
ac 15
|
181
|
-
su 15
|
182
|
-
_si 15
|
183
|
-
ava_ 15
|
184
|
-
fa 14
|
185
|
-
_su 14
|
186
|
-
li_ 14
|
187
|
-
_r 14
|
188
|
-
_da 14
|
189
|
-
ari 14
|
189
|
+
_r 15
|
190
190
|
e,_ 14
|
191
|
-
|
192
|
-
_la_ 14
|
191
|
+
ac 14
|
193
192
|
e, 14
|
194
|
-
|
193
|
+
fa 14
|
194
|
+
_la_ 14
|
195
|
+
ad 14
|
196
|
+
er_ 14
|
197
|
+
gli_ 14
|
198
|
+
are_ 14
|
195
199
|
mo 14
|
200
|
+
ano 14
|
196
201
|
ndo 14
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
id 13
|
202
|
+
ro 14
|
203
|
+
on_ 14
|
204
|
+
_si_ 13
|
205
|
+
ella 13
|
202
206
|
_in_ 13
|
203
|
-
un_ 13
|
204
|
-
gli_ 13
|
205
|
-
all 13
|
206
207
|
mp 13
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
208
|
+
_un_ 13
|
209
|
+
oc 13
|
210
|
+
era_ 13
|
211
|
+
_do 13
|
212
|
+
ari 13
|
213
|
+
_av 13
|
211
214
|
ur 13
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
era_ 12
|
217
|
-
col 12
|
218
|
-
qua 12
|
219
|
-
ave 12
|
215
|
+
so 13
|
216
|
+
un_ 13
|
217
|
+
in_ 13
|
218
|
+
all 13
|
220
219
|
tto_ 12
|
220
|
+
_gli_ 12
|
221
|
+
_per_ 12
|
222
|
+
ce 12
|
223
|
+
ella_ 12
|
224
|
+
a. 12
|
225
|
+
_gl 12
|
226
|
+
col 12
|
221
227
|
be 12
|
222
|
-
|
223
|
-
|
224
|
-
sse 12
|
225
|
-
mi 12
|
228
|
+
uo 12
|
229
|
+
qua 12
|
226
230
|
� 12
|
227
|
-
|
231
|
+
a._ 12
|
232
|
+
�_ 12
|
233
|
+
mi 12
|
234
|
+
sse 12
|
235
|
+
ue 12
|
236
|
+
_l_ 12
|
237
|
+
me_ 12
|
238
|
+
ni 12
|
239
|
+
id 12
|
228
240
|
pr 12
|
241
|
+
ut 12
|
242
|
+
sa_ 12
|
243
|
+
_gli 12
|
244
|
+
per_ 12
|
245
|
+
_al 12
|
246
|
+
_er 12
|
229
247
|
_qua 12
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
asa 11
|
235
|
-
_vi 11
|
236
|
-
cch 11
|
237
|
-
_al 11
|
238
|
-
_col 11
|
239
|
-
_no 11
|
240
|
-
_com 11
|
241
|
-
iv 11
|
242
|
-
cas 11
|
243
|
-
�_ 11
|
248
|
+
pi 11
|
249
|
+
ta_ 11
|
250
|
+
ti_ 11
|
251
|
+
gn 11
|
244
252
|
ano_ 11
|
245
|
-
_cas 11
|
246
|
-
ome 11
|
247
|
-
me_ 11
|
248
253
|
acc 11
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
+
_fa 11
|
255
|
+
_col 11
|
256
|
+
_t 11
|
257
|
+
eva_ 11
|
258
|
+
_ri 11
|
259
|
+
_cas 11
|
260
|
+
da_ 11
|
261
|
+
_era 11
|
254
262
|
ul 11
|
255
|
-
|
263
|
+
ne_ 11
|
264
|
+
dd 11
|
265
|
+
_da 11
|
266
|
+
_ave 11
|
267
|
+
do_ 11
|
268
|
+
ave 11
|
269
|
+
ome 11
|
270
|
+
_ne 11
|
271
|
+
cas 11
|
272
|
+
cch 11
|
273
|
+
ig 11
|
274
|
+
_st 10
|
275
|
+
_casa 10
|
276
|
+
am 10
|
277
|
+
zi 10
|
278
|
+
man 10
|
279
|
+
ato 10
|
280
|
+
og 10
|
256
281
|
cchi 10
|
257
|
-
|
258
|
-
|
282
|
+
_era_ 10
|
283
|
+
que 10
|
284
|
+
_que 10
|
285
|
+
_non 10
|
259
286
|
rr 10
|
260
|
-
_ri 10
|
261
287
|
ent 10
|
262
|
-
|
263
|
-
zi 10
|
264
|
-
ato 10
|
265
|
-
van 10
|
266
|
-
_casa 10
|
267
|
-
_ne 10
|
268
|
-
_av 10
|
269
|
-
ti_ 10
|
288
|
+
ale 10
|
270
289
|
casa 10
|
290
|
+
dell 10
|
271
291
|
non 10
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
og 10
|
280
|
-
_gli_ 10
|
281
|
-
que 10
|
282
|
-
eva_ 9
|
283
|
-
ome_ 9
|
284
|
-
fi 9
|
285
|
-
_mo 9
|
286
|
-
par 9
|
287
|
-
ando 9
|
288
|
-
uri 9
|
289
|
-
er_ 9
|
290
|
-
anda 9
|
291
|
-
bi 9
|
292
|
+
ome_ 10
|
293
|
+
asa 10
|
294
|
+
_dell 10
|
295
|
+
come_ 9
|
296
|
+
iv 9
|
297
|
+
_ve 9
|
298
|
+
_come 9
|
292
299
|
sta 9
|
300
|
+
ndo_ 9
|
301
|
+
�_ 9
|
302
|
+
anda 9
|
293
303
|
del_ 9
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
304
|
+
_non_ 9
|
305
|
+
� 9
|
306
|
+
nda 9
|
307
|
+
par 9
|
298
308
|
rid 9
|
299
|
-
|
309
|
+
fi 9
|
310
|
+
lo_ 9
|
300
311
|
_be 9
|
312
|
+
_mo 9
|
313
|
+
_le 9
|
301
314
|
oi 9
|
315
|
+
_o 9
|
316
|
+
ot 9
|
317
|
+
non_ 9
|
318
|
+
bi 9
|
302
319
|
_an 9
|
303
|
-
nda 9
|
304
320
|
_me 9
|
305
|
-
|
306
|
-
|
321
|
+
_del_ 9
|
322
|
+
van 9
|
323
|
+
ando 9
|
324
|
+
far 9
|
325
|
+
ess 9
|
307
326
|
come 9
|
308
|
-
|
309
|
-
|
310
|
-
ridd 8
|
311
|
-
come_ 8
|
312
|
-
non_ 8
|
327
|
+
_pa 9
|
328
|
+
uri 9
|
313
329
|
ina 8
|
314
|
-
|
315
|
-
|
316
|
-
|
330
|
+
_pr 8
|
331
|
+
_T 8
|
332
|
+
_S 8
|
333
|
+
L 8
|
317
334
|
ato_ 8
|
335
|
+
ser 8
|
318
336
|
nz 8
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
_all 8
|
323
|
-
ed 8
|
324
|
-
Tu 8
|
325
|
-
occ 8
|
337
|
+
una_ 8
|
338
|
+
vano 8
|
339
|
+
i, 8
|
326
340
|
idd 8
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
_del_ 8
|
341
|
+
_vo 8
|
342
|
+
_una_ 8
|
343
|
+
_le_ 8
|
344
|
+
cco 8
|
332
345
|
ba 8
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
ene 8
|
346
|
+
sse_ 8
|
347
|
+
i,_ 8
|
348
|
+
uel 8
|
337
349
|
ir 8
|
338
|
-
_ave 8
|
339
350
|
della 8
|
340
|
-
|
351
|
+
te_ 8
|
352
|
+
una 8
|
353
|
+
Tu 8
|
354
|
+
_all 8
|
355
|
+
ant 8
|
356
|
+
ridd 8
|
357
|
+
S 8
|
358
|
+
_Tu 8
|
359
|
+
asa_ 8
|
360
|
+
_una 8
|
361
|
+
_pi 8
|
362
|
+
casa_ 8
|
363
|
+
con 8
|
364
|
+
occ 8
|
365
|
+
ene 8
|
366
|
+
_L 8
|
341
367
|
T 8
|
342
|
-
|
343
|
-
oll 7
|
344
|
-
ere 7
|
345
|
-
ola 7
|
346
|
-
nto 7
|
347
|
-
rc 7
|
348
|
-
per_ 7
|
349
|
-
rv 7
|
350
|
-
gi 7
|
351
|
-
_era 7
|
352
|
-
dav 7
|
353
|
-
cia 7
|
354
|
-
ett 7
|
355
|
-
ec 7
|
368
|
+
tra 7
|
356
369
|
Tur 7
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
coll 7
|
370
|
+
ett 7
|
371
|
+
sp 7
|
372
|
+
rc 7
|
361
373
|
_quel 7
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
374
|
+
coll 7
|
375
|
+
gi 7
|
376
|
+
nto 7
|
377
|
+
ere 7
|
378
|
+
_ar 7
|
379
|
+
mpa 7
|
380
|
+
za 7
|
369
381
|
ap 7
|
370
|
-
lav 7
|
371
382
|
_coll 7
|
372
|
-
|
373
|
-
|
383
|
+
vano_ 7
|
384
|
+
oll 7
|
385
|
+
quel 7
|
386
|
+
_con 7
|
387
|
+
ando_ 7
|
374
388
|
zia 7
|
375
|
-
|
376
|
-
|
389
|
+
rv 7
|
390
|
+
dav 7
|
391
|
+
ola 7
|
392
|
+
oi_ 7
|
393
|
+
cia 7
|
394
|
+
llo 7
|
377
395
|
ogl 7
|
378
|
-
|
379
|
-
_pa 7
|
380
|
-
mpa 7
|
381
|
-
ogli 7
|
382
|
-
sp 7
|
383
|
-
za 7
|
384
|
-
sse_ 7
|
396
|
+
_far 7
|
385
397
|
Turi 7
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
Sa 6
|
390
|
-
veva 6
|
391
|
-
_sc 6
|
392
|
-
P 6
|
393
|
-
nu 6
|
394
|
-
uridd 6
|
395
|
-
rm 6
|
396
|
-
_con 6
|
397
|
-
h�_ 6
|
398
|
-
esse 6
|
399
|
-
i� 6
|
400
|
-
_ve 6
|
398
|
+
alla 7
|
399
|
+
ogli 7
|
400
|
+
utt 7
|