scylla 0.7.0 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/classifier.rb +3 -4
- data/lib/scylla/generator.rb +4 -7
- data/lib/scylla/lms/afrikaans.lm +280 -280
- data/lib/scylla/lms/arabic.lm +225 -225
- data/lib/scylla/lms/bulgarian.lm +208 -208
- data/lib/scylla/lms/catalan.lm +212 -212
- data/lib/scylla/lms/chinese.lm +201 -201
- data/lib/scylla/lms/danish.lm +155 -155
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/finnish.lm +259 -259
- data/lib/scylla/lms/french.lm +203 -203
- data/lib/scylla/lms/german.lm +280 -280
- data/lib/scylla/lms/greek.lm +276 -276
- data/lib/scylla/lms/hebrew.lm +170 -170
- data/lib/scylla/lms/hindi.lm +241 -241
- data/lib/scylla/lms/icelandic.lm +264 -264
- data/lib/scylla/lms/indonesian.lm +272 -272
- data/lib/scylla/lms/italian.lm +283 -283
- data/lib/scylla/lms/japanese.lm +105 -105
- data/lib/scylla/lms/korean.lm +400 -400
- data/lib/scylla/lms/norwegian.lm +235 -235
- data/lib/scylla/lms/polish.lm +264 -264
- data/lib/scylla/lms/portuguese.lm +269 -269
- data/lib/scylla/lms/romanian.lm +278 -278
- data/lib/scylla/lms/russian.lm +127 -127
- data/lib/scylla/lms/slovak.lm +281 -281
- data/lib/scylla/lms/slovenian.lm +276 -276
- data/lib/scylla/lms/spanish.lm +190 -190
- data/lib/scylla/lms/swedish.lm +195 -195
- data/lib/scylla/lms/tagalog.lm +282 -282
- data/lib/scylla/lms/thai.lm +257 -257
- data/lib/scylla/lms/turkish.lm +300 -300
- data/lib/scylla/lms/vietnamese.lm +277 -277
- data/lib/scylla/lms/welsh.lm +271 -271
- data/scylla.gemspec +3 -22
- data/source_texts/korean.txt +219 -134
- metadata +15 -14
- data/scylla-0.6.0.gem +0 -0
data/lib/scylla/lms/catalan.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 14386
|
2
2
|
a 4010
|
3
3
|
e 3751
|
4
4
|
l 2647
|
@@ -9,76 +9,76 @@ t 2145
|
|
9
9
|
r 1821
|
10
10
|
o 1472
|
11
11
|
c 1470
|
12
|
+
a_ 1256
|
13
|
+
s_ 1211
|
12
14
|
� 1190
|
13
15
|
d 1181
|
14
|
-
a_ 1172
|
15
|
-
s_ 1146
|
16
16
|
u 1070
|
17
|
-
__ 872
|
18
17
|
m 828
|
19
|
-
_d
|
18
|
+
_d 784
|
20
19
|
en 755
|
21
20
|
es 750
|
21
|
+
_l 706
|
22
22
|
p 696
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
e_
|
27
|
-
|
28
|
-
ta
|
29
|
-
_de
|
30
|
-
|
23
|
+
l_ 691
|
24
|
+
_e 677
|
25
|
+
al 671
|
26
|
+
e_ 648
|
27
|
+
de 629
|
28
|
+
ta 553
|
29
|
+
_de 551
|
30
|
+
_a 510
|
31
31
|
g 496
|
32
|
-
|
32
|
+
,_ 483
|
33
33
|
, 483
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
es_ 427
|
34
|
+
_c 473
|
35
|
+
la 464
|
36
|
+
at 457
|
37
|
+
el 447
|
38
|
+
le 443
|
39
|
+
es_ 442
|
41
40
|
ca 424
|
41
|
+
t_ 415
|
42
|
+
_p 403
|
42
43
|
er 400
|
43
|
-
ci
|
44
|
-
_p 396
|
44
|
+
ci 396
|
45
45
|
. 391
|
46
|
-
t_ 390
|
47
46
|
an 380
|
47
|
+
._ 377
|
48
48
|
nt 376
|
49
49
|
� 360
|
50
50
|
à 360
|
51
|
+
n_ 359
|
51
52
|
re 356
|
52
53
|
st 351
|
53
|
-
n_ 347
|
54
54
|
b 341
|
55
|
+
_de_ 340
|
56
|
+
de_ 340
|
55
57
|
ar 339
|
56
|
-
de_ 335
|
57
|
-
_de_ 331
|
58
58
|
ra 315
|
59
59
|
f 301
|
60
|
+
_i 300
|
61
|
+
el_ 296
|
60
62
|
on 294
|
61
|
-
|
62
|
-
la_ 272
|
63
|
-
_la 270
|
63
|
+
la_ 277
|
64
64
|
v 265
|
65
|
-
|
66
|
-
|
67
|
-
._ 259
|
65
|
+
i_ 264
|
66
|
+
ll 263
|
68
67
|
ic 254
|
69
|
-
|
68
|
+
_la 254
|
70
69
|
ue 248
|
71
70
|
or 245
|
71
|
+
_la_ 245
|
72
72
|
q 243
|
73
|
-
_la_ 239
|
74
73
|
qu 239
|
74
|
+
_m 238
|
75
75
|
_ca 236
|
76
76
|
in 234
|
77
77
|
te 233
|
78
|
+
_s 232
|
78
79
|
tal 231
|
79
80
|
it 227
|
80
81
|
ia 225
|
81
|
-
_s 223
|
82
82
|
ti 215
|
83
83
|
co 212
|
84
84
|
en_ 211
|
@@ -86,315 +86,315 @@ om 211
|
|
86
86
|
na 210
|
87
87
|
ent 209
|
88
88
|
que 208
|
89
|
-
|
89
|
+
r_ 204
|
90
90
|
_el 201
|
91
|
-
se 199
|
92
91
|
ri 198
|
93
92
|
i� 197
|
94
93
|
un 196
|
95
|
-
|
94
|
+
_es 193
|
95
|
+
se 193
|
96
|
+
ata 192
|
96
97
|
x 191
|
97
98
|
ns 191
|
99
|
+
�_ 190
|
100
|
+
à_ 190
|
101
|
+
_i_ 188
|
102
|
+
_a_ 187
|
98
103
|
atal 186
|
99
104
|
E 185
|
100
|
-
l� 185
|
101
|
-
len 184
|
102
105
|
ng 183
|
103
|
-
|
104
|
-
|
105
|
-
_i_ 180
|
106
|
+
len 181
|
107
|
+
_en 179
|
106
108
|
� 178
|
107
109
|
ó 178
|
110
|
+
_co 177
|
108
111
|
A 177
|
109
|
-
di 177
|
110
|
-
�_ 175
|
111
|
-
ci� 175
|
112
|
-
à_ 175
|
113
112
|
nc 175
|
113
|
+
ci� 175
|
114
|
+
� 173
|
115
|
+
_. 173
|
114
116
|
é 173
|
115
117
|
gu 173
|
116
|
-
�
|
117
|
-
|
118
|
-
|
119
|
-
li 168
|
118
|
+
l� 172
|
119
|
+
_o 170
|
120
|
+
di 170
|
120
121
|
pe 167
|
122
|
+
_._ 167
|
121
123
|
C 162
|
122
|
-
|
123
|
-
|
124
|
-
|
124
|
+
_t 162
|
125
|
+
ac 160
|
126
|
+
li 158
|
127
|
+
al_ 157
|
128
|
+
_del 153
|
129
|
+
_l_ 153
|
125
130
|
del 153
|
126
|
-
_del 152
|
127
131
|
lle 151
|
128
|
-
|
132
|
+
_el_ 151
|
133
|
+
í 150
|
129
134
|
cat 150
|
135
|
+
� 150
|
130
136
|
me 150
|
131
|
-
í 150
|
132
|
-
_el_ 149
|
133
|
-
- 148
|
134
137
|
tr 148
|
135
|
-
al_ 145
|
136
138
|
pa 145
|
139
|
+
_ll 145
|
137
140
|
ts 145
|
138
|
-
|
139
|
-
|
141
|
+
ls 143
|
142
|
+
là 143
|
140
143
|
est 142
|
141
|
-
|
144
|
+
_f 142
|
145
|
+
nt_ 141
|
142
146
|
ua 141
|
143
|
-
|
147
|
+
ts_ 137
|
144
148
|
ne 136
|
145
149
|
cata 136
|
146
150
|
catal 136
|
147
151
|
pr 136
|
148
|
-
les 135
|
149
152
|
_cat 134
|
150
153
|
_cata 134
|
151
|
-
|
154
|
+
d_ 133
|
152
155
|
ió 133
|
153
156
|
ro 133
|
154
|
-
|
155
|
-
|
157
|
+
_en_ 133
|
158
|
+
ls_ 131
|
156
159
|
h 130
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
+
ó_ 129
|
161
|
+
_E 129
|
162
|
+
�_ 129
|
163
|
+
_C 128
|
164
|
+
_q 128
|
160
165
|
ma 128
|
161
|
-
|
162
|
-
|
163
|
-
|
166
|
+
oc 128
|
167
|
+
_del_ 127
|
168
|
+
del_ 127
|
169
|
+
_qu 126
|
164
170
|
sta 126
|
165
|
-
|
166
|
-
ls_ 125
|
167
|
-
_qu 125
|
168
|
-
�_ 124
|
169
|
-
ó_ 124
|
170
|
-
L 122
|
171
|
+
_lle 122
|
171
172
|
am 122
|
172
|
-
|
173
|
+
L 122
|
174
|
+
nci 121
|
175
|
+
_A 121
|
173
176
|
ica 121
|
174
|
-
|
177
|
+
_v 120
|
175
178
|
ció 120
|
176
|
-
et 119
|
177
179
|
no 119
|
178
|
-
_
|
180
|
+
ió_ 118
|
181
|
+
et 118
|
179
182
|
�s 117
|
180
|
-
_v 117
|
181
|
-
és 117
|
182
183
|
ni 117
|
183
|
-
|
184
|
+
là_ 117
|
185
|
+
és 117
|
186
|
+
_, 116
|
187
|
+
_,_ 116
|
188
|
+
ale 115
|
184
189
|
leng 115
|
185
190
|
al� 115
|
186
191
|
eng 115
|
192
|
+
o_ 114
|
187
193
|
�s_ 114
|
194
|
+
at_ 114
|
195
|
+
_pe 114
|
188
196
|
és_ 114
|
189
|
-
|
197
|
+
atal� 113
|
198
|
+
alà 113
|
190
199
|
tal� 113
|
191
200
|
talà 113
|
192
|
-
atal� 113
|
193
201
|
y 113
|
194
|
-
|
195
|
-
_pe 113
|
196
|
-
_le 112
|
202
|
+
les 112
|
197
203
|
fi 111
|
198
204
|
llen 111
|
199
|
-
|
205
|
+
_h 111
|
206
|
+
na_ 110
|
200
207
|
per 109
|
201
|
-
tat 109
|
202
208
|
_que 109
|
203
209
|
aci 109
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
210
|
+
tat 109
|
211
|
+
_al 109
|
212
|
+
as 109
|
213
|
+
_llen 108
|
208
214
|
va 108
|
209
|
-
|
210
|
-
|
215
|
+
lleng 108
|
216
|
+
_pr 108
|
217
|
+
les_ 107
|
211
218
|
po 106
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
219
|
+
ad 106
|
220
|
+
tu 106
|
221
|
+
ue_ 105
|
222
|
+
ció_ 105
|
223
|
+
_que_ 105
|
224
|
+
que_ 105
|
225
|
+
_u 104
|
226
|
+
_� 104
|
227
|
+
m� 102
|
217
228
|
g� 102
|
218
229
|
ant 102
|
219
|
-
|
220
|
-
|
221
|
-
ció_ 101
|
222
|
-
ec 101
|
230
|
+
_r 102
|
231
|
+
ia_ 102
|
223
232
|
com 101
|
233
|
+
t� 101
|
234
|
+
da 100
|
224
235
|
è 100
|
225
|
-
que_ 100
|
226
|
-
_que_ 100
|
227
236
|
� 100
|
228
|
-
|
229
|
-
a, 100
|
230
|
-
a,_ 99
|
231
|
-
_r 99
|
237
|
+
_com 99
|
232
238
|
cia 98
|
239
|
+
ec 98
|
233
240
|
ny 97
|
234
|
-
_com 97
|
235
241
|
ues 97
|
236
|
-
|
242
|
+
alà_ 97
|
243
|
+
_se 95
|
244
|
+
a, 95
|
237
245
|
ngu 95
|
238
|
-
|
246
|
+
ns_ 95
|
239
247
|
is 95
|
240
|
-
|
241
|
-
|
242
|
-
_al 94
|
243
|
-
ns_ 94
|
248
|
+
a,_ 95
|
249
|
+
ca_ 94
|
244
250
|
mi 93
|
245
|
-
ix 93
|
246
251
|
sti 93
|
247
|
-
|
252
|
+
ix 93
|
253
|
+
_d_ 92
|
254
|
+
er_ 92
|
248
255
|
gua 90
|
249
|
-
enc 89
|
250
|
-
to 89
|
251
256
|
os 89
|
252
|
-
|
257
|
+
to 89
|
253
258
|
aci� 89
|
254
259
|
ació 89
|
255
|
-
em 89
|
256
260
|
ques 89
|
257
|
-
|
261
|
+
em 89
|
262
|
+
enc 89
|
263
|
+
_per 88
|
264
|
+
men 87
|
258
265
|
� 87
|
259
|
-
|
260
|
-
mb 87
|
266
|
+
ct 87
|
261
267
|
r� 87
|
262
268
|
ü 87
|
263
|
-
|
264
|
-
ct 87
|
269
|
+
mb 87
|
265
270
|
rt 86
|
266
271
|
j 86
|
267
|
-
|
268
|
-
lengu 85
|
269
|
-
ol 85
|
272
|
+
_n 86
|
270
273
|
engu 85
|
274
|
+
engua 85
|
271
275
|
ngua 85
|
276
|
+
lengu 85
|
277
|
+
_L 84
|
278
|
+
ol 84
|
272
279
|
P 84
|
273
|
-
|
280
|
+
ent_ 83
|
274
281
|
tre 83
|
275
|
-
|
276
|
-
|
282
|
+
_le 83
|
283
|
+
ons 82
|
284
|
+
res 82
|
285
|
+
_P 82
|
277
286
|
I 82
|
278
287
|
gü 82
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
288
|
+
io 82
|
289
|
+
ica_ 82
|
290
|
+
� 82
|
291
|
+
_un 81
|
292
|
+
si 81
|
283
293
|
ei 81
|
284
|
-
_� 80
|
285
|
-
_n 79
|
286
294
|
ut 79
|
287
|
-
so
|
288
|
-
|
295
|
+
so 78
|
296
|
+
_di 78
|
289
297
|
par 78
|
290
|
-
|
298
|
+
_re 78
|
299
|
+
ng� 78
|
300
|
+
_g 77
|
291
301
|
esta 77
|
292
|
-
ngü 77
|
293
302
|
ur 77
|
303
|
+
_pa 77
|
304
|
+
ngü 77
|
294
305
|
�s 76
|
295
|
-
bl 76
|
296
306
|
ís 76
|
307
|
+
bl 76
|
308
|
+
_po 75
|
309
|
+
_est 75
|
297
310
|
eg 75
|
298
|
-
_pa 75
|
299
|
-
_re 75
|
300
|
-
_g 74
|
301
311
|
_les_ 74
|
302
|
-
|
303
|
-
_un 74
|
312
|
+
_les 74
|
304
313
|
ran 73
|
305
|
-
con 73
|
306
314
|
ha 73
|
307
|
-
|
315
|
+
con 73
|
316
|
+
mp 72
|
308
317
|
mo 72
|
309
|
-
|
318
|
+
ua_ 72
|
319
|
+
ar_ 72
|
320
|
+
m_ 72
|
321
|
+
_ha 71
|
310
322
|
tic 70
|
311
|
-
|
323
|
+
lu 70
|
324
|
+
ta_ 69
|
325
|
+
re_ 69
|
312
326
|
Ca 69
|
313
|
-
|
314
|
-
fic 68
|
315
|
-
enci 68
|
327
|
+
els 68
|
316
328
|
T 68
|
317
329
|
els_ 68
|
318
|
-
|
330
|
+
_b 68
|
331
|
+
ra_ 68
|
332
|
+
fic 68
|
333
|
+
_Ca 68
|
319
334
|
rs 67
|
320
335
|
amb 67
|
321
336
|
ie 67
|
322
|
-
|
337
|
+
gua_ 67
|
338
|
+
enci 67
|
339
|
+
ngua_ 67
|
323
340
|
ot 66
|
324
|
-
ta_ 66
|
325
|
-
_P 66
|
326
341
|
rd 65
|
327
|
-
|
342
|
+
_no 65
|
328
343
|
ial 65
|
329
|
-
do 64
|
330
344
|
ter 64
|
331
|
-
_, 64
|
332
|
-
_no 64
|
333
|
-
ngua_ 64
|
334
|
-
ià 64
|
335
345
|
nta 64
|
336
|
-
|
337
|
-
|
346
|
+
des 64
|
347
|
+
ià 64
|
338
348
|
eix 63
|
339
|
-
|
340
|
-
_,_ 63
|
341
|
-
ra_ 63
|
349
|
+
ment 63
|
342
350
|
vi 62
|
343
|
-
|
351
|
+
àn 61
|
352
|
+
lenc 61
|
344
353
|
V 61
|
345
|
-
ar_ 61
|
346
354
|
�n 61
|
347
|
-
|
348
|
-
àn 61
|
355
|
+
do 61
|
349
356
|
if 61
|
350
|
-
|
357
|
+
_va 60
|
351
358
|
lenci 60
|
352
|
-
|
353
|
-
|
359
|
+
_mo 60
|
360
|
+
_con 60
|
361
|
+
alen 60
|
354
362
|
nd 60
|
363
|
+
alenc 60
|
355
364
|
M 60
|
356
|
-
ell 59
|
357
|
-
lan 59
|
358
|
-
fe 59
|
359
|
-
als 59
|
360
365
|
� 59
|
361
|
-
_va 59
|
362
|
-
s. 59
|
363
366
|
ing 59
|
364
367
|
us 59
|
365
|
-
|
366
|
-
|
368
|
+
ell 59
|
369
|
+
fe 59
|
370
|
+
�st 58
|
367
371
|
�sti 58
|
368
372
|
íst 58
|
369
|
-
|
370
|
-
|
373
|
+
ísti 58
|
374
|
+
als 58
|
371
375
|
ce 57
|
372
|
-
_con 57
|
373
376
|
� 57
|
374
377
|
ita 57
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
378
|
+
_M 57
|
379
|
+
_é 57
|
380
|
+
lo 57
|
381
|
+
_és 57
|
379
382
|
D 56
|
383
|
+
_� 56
|
380
384
|
_ma 56
|
381
|
-
|
382
|
-
_é 56
|
383
|
-
|
384
|
-
_és_ 55
|
385
|
-
sa 55
|
385
|
+
sp 56
|
386
|
+
_és_ 56
|
387
|
+
ya 56
|
386
388
|
cià 55
|
389
|
+
� 55
|
387
390
|
om_ 55
|
388
|
-
|
391
|
+
S 54
|
389
392
|
ul 54
|
390
|
-
ca_ 54
|
391
393
|
ret 54
|
392
|
-
S 54
|
393
394
|
era 54
|
395
|
+
lt 54
|
394
396
|
ncia 54
|
395
|
-
il 53
|
396
397
|
ion 53
|
397
|
-
|
398
|
-
s,_ 53
|
399
|
-
nya 53
|
398
|
+
tat_ 53
|
400
399
|
s, 53
|
400
|
+
_par 53
|