scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/spanish.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 34431
|
2
2
|
a 10584
|
3
3
|
e 10526
|
4
4
|
o 7129
|
@@ -10,128 +10,128 @@ l 5112
|
|
10
10
|
d 4622
|
11
11
|
t 3867
|
12
12
|
c 3674
|
13
|
-
a_
|
14
|
-
e_
|
13
|
+
a_ 3001
|
14
|
+
e_ 2946
|
15
15
|
u 2914
|
16
16
|
� 2572
|
17
|
-
s_
|
17
|
+
s_ 2533
|
18
18
|
de 2468
|
19
|
-
_d
|
19
|
+
_d 2335
|
20
20
|
p 2303
|
21
21
|
m 2195
|
22
|
-
_de
|
23
|
-
o_
|
24
|
-
_e
|
25
|
-
n_
|
22
|
+
_de 2093
|
23
|
+
o_ 1885
|
24
|
+
_e 1787
|
25
|
+
n_ 1774
|
26
26
|
en 1746
|
27
27
|
de_ 1679
|
28
28
|
_de_ 1591
|
29
29
|
la 1459
|
30
30
|
es 1454
|
31
|
-
_l
|
31
|
+
_l 1380
|
32
32
|
, 1276
|
33
|
-
,_
|
34
|
-
l_
|
33
|
+
,_ 1261
|
34
|
+
l_ 1259
|
35
35
|
os 1221
|
36
36
|
er 1194
|
37
37
|
on 1122
|
38
38
|
as 1103
|
39
39
|
ci 1095
|
40
|
-
_p
|
40
|
+
_p 1077
|
41
|
+
_c 1054
|
41
42
|
el 1049
|
42
|
-
_c 1047
|
43
43
|
an 1032
|
44
44
|
ra 1018
|
45
45
|
al 997
|
46
|
+
os_ 941
|
46
47
|
g 941
|
47
|
-
_la
|
48
|
-
os_ 930
|
48
|
+
_la 935
|
49
49
|
nt 926
|
50
50
|
te 922
|
51
51
|
co 901
|
52
|
+
_a 898
|
52
53
|
b 896
|
53
|
-
_a 893
|
54
54
|
re 888
|
55
55
|
ta 858
|
56
56
|
ad 827
|
57
57
|
ri 825
|
58
|
+
la_ 816
|
58
59
|
ar 816
|
59
60
|
or 812
|
60
|
-
|
61
|
+
_s 800
|
61
62
|
el_ 796
|
62
|
-
_s 795
|
63
63
|
. 766
|
64
64
|
pa 757
|
65
65
|
� 756
|
66
66
|
ó 755
|
67
67
|
do 751
|
68
68
|
st 737
|
69
|
+
as_ 726
|
69
70
|
ro 725
|
70
71
|
y 717
|
71
|
-
|
72
|
-
_la_ 712
|
72
|
+
_la_ 714
|
73
73
|
na 691
|
74
74
|
ue 688
|
75
75
|
v 663
|
76
76
|
in 658
|
77
|
-
_en
|
77
|
+
_en 651
|
78
78
|
ca 643
|
79
|
+
es_ 636
|
79
80
|
ic 635
|
80
81
|
en_ 631
|
81
82
|
da 621
|
82
83
|
ia 620
|
83
|
-
es_ 620
|
84
84
|
E 615
|
85
85
|
to 609
|
86
86
|
lo 605
|
87
|
-
_m
|
87
|
+
_m 604
|
88
88
|
f 596
|
89
|
-
_co
|
89
|
+
_co 595
|
90
90
|
y_ 562
|
91
|
+
_el 557
|
91
92
|
� 557
|
92
93
|
í 557
|
93
|
-
|
94
|
+
_en_ 535
|
94
95
|
ti 535
|
95
96
|
no 532
|
96
|
-
|
97
|
-
_y 523
|
98
|
-
un 513
|
97
|
+
_y 525
|
99
98
|
ent 513
|
99
|
+
un 513
|
100
|
+
r_ 510
|
101
|
+
_el_ 509
|
100
102
|
le 507
|
101
|
-
_el_ 505
|
102
|
-
r_ 499
|
103
103
|
io 495
|
104
|
-
_y_
|
104
|
+
_y_ 494
|
105
105
|
ón 488
|
106
106
|
�n 488
|
107
107
|
i� 487
|
108
108
|
po 475
|
109
109
|
a� 472
|
110
110
|
se 468
|
111
|
+
_E 464
|
111
112
|
sp 460
|
112
|
-
_E 459
|
113
113
|
q 456
|
114
114
|
qu 455
|
115
|
-
� 453
|
116
115
|
ñ 453
|
116
|
+
� 453
|
117
117
|
C 451
|
118
118
|
tr 450
|
119
|
-
_t
|
119
|
+
_t 449
|
120
120
|
ma 440
|
121
|
-
._
|
121
|
+
._ 438
|
122
122
|
id 432
|
123
123
|
ac 431
|
124
124
|
ió 428
|
125
125
|
is 427
|
126
126
|
ni 426
|
127
127
|
om 425
|
128
|
-
ne 414
|
129
128
|
nte 414
|
129
|
+
ne 414
|
130
130
|
con 410
|
131
|
-
do_
|
131
|
+
do_ 408
|
132
132
|
nc 406
|
133
|
+
_r 404
|
133
134
|
nd 404
|
134
|
-
_r 403
|
135
135
|
li 403
|
136
136
|
ie 401
|
137
137
|
si 399
|
@@ -141,260 +141,260 @@ añ 395
|
|
141
141
|
á 391
|
142
142
|
h 383
|
143
143
|
pr 382
|
144
|
+
�n_ 379
|
145
|
+
ón_ 379
|
146
|
+
_C 376
|
144
147
|
spa 375
|
145
|
-
�n_ 375
|
146
|
-
ón_ 375
|
147
148
|
que 374
|
148
149
|
ión 373
|
149
|
-
ue_
|
150
|
+
ue_ 371
|
150
151
|
pa� 363
|
151
|
-
|
152
|
+
_con 341
|
153
|
+
ión_ 341
|
152
154
|
ec 341
|
153
155
|
mi 340
|
154
|
-
|
155
|
-
_con 338
|
156
|
-
_re 336
|
157
|
-
rt 336
|
156
|
+
_re 337
|
158
157
|
di 336
|
158
|
+
rt 336
|
159
159
|
pe 333
|
160
|
-
|
160
|
+
al_ 332
|
161
|
+
te_ 332
|
162
|
+
_pr 330
|
161
163
|
A 327
|
162
|
-
|
163
|
-
|
164
|
-
_qu
|
165
|
-
_q 319
|
164
|
+
que_ 325
|
165
|
+
_q 320
|
166
|
+
_qu 320
|
166
167
|
it 317
|
167
168
|
pañ 315
|
168
169
|
Es 313
|
169
|
-
|
170
|
+
_es 313
|
171
|
+
_se 311
|
170
172
|
mo 310
|
171
|
-
spa� 309
|
172
173
|
_lo 309
|
173
174
|
spañ 309
|
174
|
-
|
175
|
-
|
175
|
+
spa� 309
|
176
|
+
_que 309
|
177
|
+
_po 308
|
176
178
|
los 305
|
177
|
-
|
178
|
-
|
179
|
+
a, 305
|
180
|
+
a,_ 303
|
179
181
|
ol 302
|
180
|
-
ci� 300
|
181
182
|
aci 300
|
183
|
+
ci� 300
|
182
184
|
ció 299
|
183
185
|
los_ 296
|
184
|
-
|
186
|
+
_i 295
|
185
187
|
I 295
|
186
188
|
ado 294
|
187
|
-
|
189
|
+
_que_ 293
|
188
190
|
ur 293
|
189
|
-
|
190
|
-
|
191
|
+
__ 292
|
192
|
+
_Es 287
|
191
193
|
ción 285
|
192
|
-
_Es 282
|
193
194
|
su 282
|
194
|
-
|
195
|
+
_f 279
|
195
196
|
ña 278
|
197
|
+
�a 278
|
196
198
|
z 275
|
197
|
-
|
199
|
+
_u 275
|
200
|
+
on_ 273
|
198
201
|
ica 271
|
199
|
-
_u 271
|
200
202
|
del 270
|
201
|
-
on_ 270
|
202
|
-
cia 267
|
203
203
|
ce 267
|
204
|
+
cia 267
|
204
205
|
_del 266
|
205
206
|
del_ 261
|
206
207
|
é 261
|
207
208
|
� 261
|
209
|
+
_h 260
|
210
|
+
or_ 260
|
208
211
|
nci 260
|
209
212
|
_del_ 259
|
213
|
+
_o 259
|
214
|
+
s, 259
|
215
|
+
s,_ 258
|
210
216
|
tu 258
|
211
|
-
_o 258
|
212
|
-
_h 257
|
213
217
|
_los 257
|
214
|
-
|
218
|
+
_un 256
|
215
219
|
am 255
|
216
220
|
_los_ 254
|
217
221
|
sta 253
|
218
|
-
|
219
|
-
Espa� 250
|
222
|
+
ra_ 251
|
220
223
|
Esp 250
|
224
|
+
Espa� 250
|
221
225
|
Espa 250
|
222
|
-
|
223
|
-
s, 249
|
224
|
-
ra_ 248
|
226
|
+
ll 245
|
225
227
|
est 245
|
226
228
|
ab 245
|
227
|
-
ll 245
|
228
|
-
las 244
|
229
229
|
por 244
|
230
|
+
las 244
|
231
|
+
ía 243
|
230
232
|
�a 243
|
231
|
-
__ 243
|
232
233
|
aña 243
|
233
|
-
ía 243
|
234
234
|
r� 239
|
235
235
|
at 239
|
236
|
-
|
236
|
+
_A 236
|
237
237
|
paña 234
|
238
|
-
|
238
|
+
so 234
|
239
|
+
_a_ 232
|
239
240
|
im 231
|
240
241
|
las_ 230
|
241
|
-
|
242
|
+
_Esp 230
|
243
|
+
_Espa 230
|
242
244
|
ns 228
|
245
|
+
na_ 228
|
243
246
|
em 226
|
244
|
-
_Espa 226
|
245
|
-
_Esp 226
|
246
247
|
cu 226
|
247
248
|
j 224
|
248
|
-
|
249
|
+
_n 220
|
249
250
|
ul 220
|
251
|
+
ia_ 220
|
252
|
+
nte_ 220
|
250
253
|
P 219
|
251
254
|
ant 219
|
252
|
-
|
255
|
+
to_ 219
|
253
256
|
rr 218
|
254
|
-
|
255
|
-
nte_ 217
|
256
|
-
) 216
|
257
|
-
_n 216
|
258
|
-
( 216
|
259
|
-
dad 215
|
257
|
+
ente 218
|
260
258
|
se_ 215
|
261
|
-
|
262
|
-
_( 214
|
263
|
-
vi 213
|
259
|
+
dad 215
|
264
260
|
il 213
|
261
|
+
vi 213
|
265
262
|
L 211
|
266
263
|
ter 209
|
267
|
-
_pa
|
264
|
+
_pa 208
|
268
265
|
ada 205
|
266
|
+
o, 204
|
269
267
|
men 203
|
268
|
+
o,_ 202
|
269
|
+
da_ 202
|
270
270
|
era 202
|
271
271
|
ran 201
|
272
272
|
les 201
|
273
|
-
|
273
|
+
_su 200
|
274
274
|
ig 198
|
275
|
-
_su 198
|
276
|
-
o, 197
|
277
|
-
o,_ 195
|
278
275
|
ien 195
|
279
276
|
tra 193
|
280
277
|
res 192
|
281
|
-
|
278
|
+
_ca 191
|
282
279
|
com 190
|
283
|
-
|
280
|
+
cio 190
|
284
281
|
one 189
|
285
|
-
|
286
|
-
ed 188
|
282
|
+
ida 189
|
287
283
|
M 188
|
284
|
+
ed 188
|
288
285
|
S 187
|
289
|
-
|
286
|
+
_las_ 186
|
287
|
+
_las 186
|
290
288
|
m� 185
|
291
|
-
_las_ 185
|
292
|
-
ona 185
|
293
289
|
t� 185
|
294
|
-
|
290
|
+
ona 185
|
291
|
+
no_ 184
|
295
292
|
ha 184
|
293
|
+
ion 184
|
296
294
|
od 184
|
297
295
|
nes 183
|
298
|
-
no_ 182
|
299
296
|
ale 180
|
300
|
-
|
301
|
-
|
297
|
+
_com 179
|
298
|
+
_in 178
|
299
|
+
_por 178
|
302
300
|
br 178
|
303
|
-
|
301
|
+
sa 178
|
304
302
|
mp 177
|
305
303
|
bi 176
|
306
|
-
_in 176
|
307
|
-
ist 175
|
308
304
|
pro 175
|
309
|
-
|
305
|
+
ist 175
|
310
306
|
ació 173
|
311
307
|
dos 173
|
308
|
+
aci� 173
|
312
309
|
ct 172
|
313
|
-
des 172
|
314
310
|
oc 172
|
311
|
+
des 172
|
312
|
+
_I 171
|
313
|
+
_al 168
|
315
314
|
eg 167
|
316
|
-
|
317
|
-
an_ 166
|
318
|
-
_I 166
|
315
|
+
_pro 166
|
319
316
|
por_ 166
|
317
|
+
an_ 166
|
318
|
+
a. 165
|
319
|
+
_P 165
|
320
|
+
_se_ 165
|
321
|
+
_por_ 165
|
322
|
+
_v 165
|
320
323
|
ero 165
|
321
|
-
_pro 164
|
322
|
-
_se_ 164
|
323
|
-
_por_ 164
|
324
|
-
ño 163
|
325
324
|
�o 163
|
326
|
-
|
327
|
-
|
328
|
-
|
325
|
+
�a_ 163
|
326
|
+
ño 163
|
327
|
+
ía_ 163
|
329
328
|
va 161
|
330
329
|
ment 161
|
331
|
-
lo_
|
330
|
+
lo_ 161
|
332
331
|
iv 160
|
333
|
-
_P 160
|
334
332
|
gu 159
|
335
333
|
ndo 159
|
336
334
|
mu 158
|
337
|
-
|
338
|
-
|
335
|
+
_si 157
|
336
|
+
et 157
|
337
|
+
d_ 156
|
339
338
|
ici 155
|
340
|
-
d_ 155
|
341
339
|
fi 155
|
340
|
+
_L 154
|
342
341
|
ria 154
|
343
342
|
go 154
|
344
|
-
|
345
|
-
mo_
|
343
|
+
_S 153
|
344
|
+
mo_ 153
|
345
|
+
s. 152
|
346
346
|
ron 152
|
347
|
-
op 151
|
348
|
-
_ha 151
|
349
|
-
ones 151
|
350
347
|
ga 151
|
351
348
|
za 151
|
349
|
+
_ha 151
|
350
|
+
op 151
|
351
|
+
ones 151
|
352
352
|
us 150
|
353
353
|
mb 149
|
354
|
-
año 148
|
355
354
|
ba 148
|
356
355
|
Ca 148
|
356
|
+
año 148
|
357
|
+
ado_ 147
|
357
358
|
_pe 147
|
359
|
+
nes_ 147
|
358
360
|
sti 147
|
359
361
|
ncia 147
|
360
|
-
|
362
|
+
ña_ 146
|
361
363
|
ua 146
|
362
|
-
uc 146
|
363
364
|
ico 146
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
365
|
+
�a_ 146
|
366
|
+
uc 146
|
367
|
+
ente_ 145
|
368
|
+
_M 145
|
368
369
|
ve 144
|
369
|
-
ña_ 144
|
370
|
-
rio 143
|
371
370
|
cion 143
|
372
|
-
|
373
|
-
_M 142
|
371
|
+
rio 143
|
374
372
|
_con_ 142
|
375
373
|
con_ 142
|
376
|
-
ente_ 142
|
377
|
-
ip 141
|
378
374
|
rc 141
|
375
|
+
io_ 141
|
376
|
+
ip 141
|
377
|
+
ca_ 140
|
379
378
|
ntr 140
|
380
|
-
|
381
|
-
tor 139
|
382
|
-
ca_ 139
|
379
|
+
_g 139
|
383
380
|
nto 139
|
384
|
-
|
385
|
-
|
381
|
+
tor 139
|
382
|
+
ta_ 139
|
383
|
+
aña_ 138
|
386
384
|
par 138
|
387
|
-
|
385
|
+
ob 138
|
386
|
+
dos_ 137
|
388
387
|
ir 137
|
389
388
|
G 137
|
390
|
-
|
389
|
+
_. 137
|
390
|
+
ante 136
|
391
|
+
_, 136
|
391
392
|
bl 136
|
392
393
|
n� 136
|
393
|
-
|
394
|
-
dos_ 135
|
395
|
-
err 134
|
396
|
-
�_ 134
|
397
|
-
rm 134
|
398
|
-
ó_ 134
|
394
|
+
les_ 135
|
399
395
|
eri 134
|
400
396
|
ori 134
|
397
|
+
rm 134
|
398
|
+
�_ 134
|
399
|
+
ó_ 134
|
400
|
+
err 134
|