scylla 0.7.0 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/classifier.rb +3 -4
- data/lib/scylla/generator.rb +4 -7
- data/lib/scylla/lms/afrikaans.lm +280 -280
- data/lib/scylla/lms/arabic.lm +225 -225
- data/lib/scylla/lms/bulgarian.lm +208 -208
- data/lib/scylla/lms/catalan.lm +212 -212
- data/lib/scylla/lms/chinese.lm +201 -201
- data/lib/scylla/lms/danish.lm +155 -155
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/finnish.lm +259 -259
- data/lib/scylla/lms/french.lm +203 -203
- data/lib/scylla/lms/german.lm +280 -280
- data/lib/scylla/lms/greek.lm +276 -276
- data/lib/scylla/lms/hebrew.lm +170 -170
- data/lib/scylla/lms/hindi.lm +241 -241
- data/lib/scylla/lms/icelandic.lm +264 -264
- data/lib/scylla/lms/indonesian.lm +272 -272
- data/lib/scylla/lms/italian.lm +283 -283
- data/lib/scylla/lms/japanese.lm +105 -105
- data/lib/scylla/lms/korean.lm +400 -400
- data/lib/scylla/lms/norwegian.lm +235 -235
- data/lib/scylla/lms/polish.lm +264 -264
- data/lib/scylla/lms/portuguese.lm +269 -269
- data/lib/scylla/lms/romanian.lm +278 -278
- data/lib/scylla/lms/russian.lm +127 -127
- data/lib/scylla/lms/slovak.lm +281 -281
- data/lib/scylla/lms/slovenian.lm +276 -276
- data/lib/scylla/lms/spanish.lm +190 -190
- data/lib/scylla/lms/swedish.lm +195 -195
- data/lib/scylla/lms/tagalog.lm +282 -282
- data/lib/scylla/lms/thai.lm +257 -257
- data/lib/scylla/lms/turkish.lm +300 -300
- data/lib/scylla/lms/vietnamese.lm +277 -277
- data/lib/scylla/lms/welsh.lm +271 -271
- data/scylla.gemspec +3 -22
- data/source_texts/korean.txt +219 -134
- metadata +15 -14
- data/scylla-0.6.0.gem +0 -0
data/lib/scylla/lms/english.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 3692
|
2
2
|
e 1020
|
3
3
|
n 845
|
4
4
|
a 787
|
@@ -10,7 +10,7 @@ r 545
|
|
10
10
|
l 453
|
11
11
|
h 416
|
12
12
|
d 352
|
13
|
-
e_
|
13
|
+
e_ 343
|
14
14
|
g 317
|
15
15
|
c 303
|
16
16
|
an 270
|
@@ -21,380 +21,380 @@ m 213
|
|
21
21
|
f 206
|
22
22
|
ng 201
|
23
23
|
in 195
|
24
|
-
s_
|
24
|
+
s_ 194
|
25
25
|
_th 193
|
26
26
|
he 191
|
27
|
-
_a
|
28
|
-
d_
|
27
|
+
_a 190
|
28
|
+
d_ 188
|
29
29
|
the 168
|
30
|
-
n_
|
30
|
+
n_ 166
|
31
31
|
he_ 157
|
32
32
|
_the 152
|
33
33
|
er 146
|
34
|
-
the_ 143
|
35
34
|
_the_ 143
|
35
|
+
the_ 143
|
36
36
|
w 134
|
37
|
-
, 132
|
38
37
|
,_ 132
|
38
|
+
, 132
|
39
39
|
is 131
|
40
|
-
_o
|
40
|
+
_o 130
|
41
41
|
nd 126
|
42
42
|
p 126
|
43
|
-
|
44
|
-
|
45
|
-
es
|
43
|
+
_i 114
|
44
|
+
h_ 114
|
45
|
+
es 110
|
46
46
|
on 105
|
47
|
-
y 102
|
48
47
|
ti 102
|
48
|
+
y 102
|
49
49
|
v 99
|
50
50
|
and 98
|
51
51
|
nd_ 97
|
52
|
-
re 96
|
53
52
|
en 96
|
54
|
-
|
53
|
+
re 96
|
55
54
|
la 93
|
55
|
+
of 93
|
56
|
+
t_ 93
|
56
57
|
f_ 93
|
57
58
|
or 92
|
58
59
|
at 92
|
59
60
|
_of 91
|
60
|
-
t_ 90
|
61
61
|
of_ 89
|
62
62
|
_of_ 89
|
63
63
|
li 89
|
64
64
|
and_ 88
|
65
|
-
|
66
|
-
_an
|
65
|
+
_s 87
|
66
|
+
_an 85
|
67
|
+
_l 85
|
67
68
|
b 83
|
68
69
|
gl 81
|
69
|
-
_in
|
70
|
+
_in 81
|
71
|
+
_w 80
|
70
72
|
ed 79
|
71
|
-
|
72
|
-
_and 78
|
73
|
-
_s 78
|
73
|
+
al 78
|
74
74
|
_and_ 78
|
75
75
|
ngl 78
|
76
|
-
|
76
|
+
_and 78
|
77
|
+
_E 77
|
77
78
|
E 77
|
79
|
+
y_ 76
|
78
80
|
. 74
|
79
|
-
_E 74
|
80
81
|
sh 73
|
81
82
|
om 72
|
82
|
-
y_ 72
|
83
83
|
ro 71
|
84
84
|
ic 70
|
85
|
+
_c 70
|
85
86
|
ish 69
|
86
|
-
|
87
|
-
_c 69
|
87
|
+
o_ 69
|
88
88
|
ri 69
|
89
|
-
|
90
|
-
|
91
|
-
nt 67
|
89
|
+
._ 69
|
90
|
+
te 69
|
92
91
|
ma 67
|
92
|
+
nt 67
|
93
|
+
as 67
|
94
|
+
ce 67
|
93
95
|
ve 66
|
94
|
-
Engl 65
|
95
96
|
Eng 65
|
97
|
+
_Eng 65
|
98
|
+
_En 65
|
96
99
|
En 65
|
97
|
-
|
98
|
-
|
99
|
-
_Engl 64
|
100
|
+
_Engl 65
|
101
|
+
Engl 65
|
100
102
|
st 64
|
101
103
|
ing 63
|
102
104
|
ar 62
|
103
105
|
lan 62
|
104
106
|
ge 61
|
107
|
+
gli 60
|
105
108
|
lis 60
|
106
109
|
ed_ 60
|
107
110
|
ngli 60
|
108
|
-
|
109
|
-
nglis 59
|
111
|
+
le 59
|
110
112
|
Engli 59
|
113
|
+
nglis 59
|
111
114
|
glis 59
|
112
|
-
le 59
|
113
|
-
lish 58
|
114
115
|
glish 58
|
116
|
+
lish 58
|
115
117
|
co 58
|
116
|
-
|
118
|
+
in_ 58
|
117
119
|
_la 57
|
118
|
-
|
120
|
+
ua 57
|
119
121
|
it 56
|
120
|
-
o_ 56
|
121
122
|
nc 55
|
123
|
+
r_ 55
|
122
124
|
gu 54
|
123
|
-
r_ 54
|
124
125
|
to 54
|
125
|
-
|
126
|
+
sh_ 53
|
126
127
|
_b 53
|
127
|
-
|
128
|
-
|
129
|
-
|
128
|
+
ish_ 53
|
129
|
+
ngu 53
|
130
|
+
g_ 52
|
131
|
+
ngua 51
|
130
132
|
gua 51
|
133
|
+
_d 51
|
131
134
|
ni 51
|
132
|
-
- 51
|
133
135
|
es_ 51
|
134
|
-
|
135
|
-
_d 51
|
136
|
-
ang 50
|
136
|
+
ng_ 51
|
137
137
|
io 50
|
138
138
|
rm 50
|
139
|
+
ang 50
|
139
140
|
man 50
|
141
|
+
to_ 49
|
140
142
|
se 49
|
141
143
|
ag 49
|
142
|
-
ng_ 48
|
143
|
-
g_ 48
|
144
|
-
_f 47
|
145
144
|
age 47
|
145
|
+
_f 47
|
146
146
|
l_ 47
|
147
147
|
_lan 46
|
148
|
-
to_ 46
|
149
148
|
lang 46
|
150
|
-
|
149
|
+
lish_ 45
|
151
150
|
ati 45
|
152
|
-
|
153
|
-
|
151
|
+
angua 45
|
152
|
+
_e 45
|
153
|
+
nguag 45
|
154
154
|
_lang 45
|
155
|
+
uage 45
|
155
156
|
guag 45
|
156
|
-
guage 45
|
157
157
|
uag 45
|
158
|
+
langu 45
|
159
|
+
guage 45
|
158
160
|
angu 45
|
159
|
-
nguag 45
|
160
|
-
lish_ 44
|
161
161
|
ion 44
|
162
|
-
ur 44
|
163
162
|
a_ 44
|
164
|
-
|
163
|
+
ur 44
|
165
164
|
di 43
|
166
|
-
|
165
|
+
el 43
|
167
166
|
rma 42
|
168
167
|
_p 42
|
168
|
+
ea 42
|
169
|
+
ing_ 42
|
170
|
+
as_ 41
|
169
171
|
_to 41
|
170
172
|
_to_ 41
|
171
|
-
as_ 41
|
172
|
-
ca 40
|
173
173
|
_in_ 40
|
174
|
-
th_
|
174
|
+
th_ 40
|
175
|
+
ca 40
|
176
|
+
an_ 40
|
175
177
|
_r 39
|
176
|
-
__ 39
|
177
|
-
ing_ 39
|
178
|
-
ia 38
|
179
|
-
an_ 38
|
180
|
-
ra 38
|
181
178
|
rman 38
|
179
|
+
ra 38
|
180
|
+
ia 38
|
181
|
+
_co 38
|
182
182
|
ha 37
|
183
|
-
_co 37
|
184
|
-
_e 37
|
185
183
|
me 37
|
184
|
+
m_ 36
|
185
|
+
lo 36
|
186
186
|
G 36
|
187
187
|
si 36
|
188
|
-
|
189
|
-
m_ 35
|
188
|
+
_G 36
|
190
189
|
ll 35
|
191
|
-
ec 35
|
192
|
-
fr 35
|
193
190
|
de 35
|
194
|
-
|
195
|
-
|
191
|
+
fr 35
|
192
|
+
ec 35
|
193
|
+
erm 34
|
194
|
+
on_ 34
|
196
195
|
ow 34
|
196
|
+
s, 34
|
197
|
+
er_ 34
|
197
198
|
s,_ 34
|
198
|
-
erm 34
|
199
199
|
ly 34
|
200
200
|
k 33
|
201
|
-
er_ 33
|
202
201
|
al_ 33
|
203
|
-
|
204
|
-
tion 32
|
205
|
-
ou 32
|
202
|
+
_fr 32
|
206
203
|
tio 32
|
207
204
|
nce 32
|
208
|
-
|
209
|
-
|
210
|
-
|
205
|
+
tion 32
|
206
|
+
_n 32
|
207
|
+
ou 32
|
211
208
|
Ge 31
|
212
|
-
|
213
|
-
|
214
|
-
|
209
|
+
ge_ 31
|
210
|
+
S 31
|
211
|
+
pe 31
|
215
212
|
na 31
|
216
|
-
|
217
|
-
|
213
|
+
_Ge 31
|
214
|
+
_re 31
|
215
|
+
ent 31
|
218
216
|
erman 30
|
217
|
+
be 30
|
218
|
+
_Ger 30
|
219
219
|
Germa 30
|
220
|
+
erma 30
|
221
|
+
ch 30
|
220
222
|
Ger 30
|
221
|
-
|
223
|
+
_S 30
|
222
224
|
ts 30
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
ge_ 29
|
225
|
+
_Germ 30
|
226
|
+
Germ 30
|
227
|
+
_m 29
|
227
228
|
x 29
|
228
229
|
ly_ 29
|
229
|
-
_m 29
|
230
|
-
om_ 28
|
231
|
-
_Germ 28
|
232
230
|
iv 28
|
233
|
-
_Ger 28
|
234
231
|
no 28
|
232
|
+
ns 28
|
233
|
+
om_ 28
|
235
234
|
I 27
|
235
|
+
c_ 27
|
236
236
|
wo 27
|
237
|
+
st_ 27
|
238
|
+
ce_ 27
|
239
|
+
_as 27
|
240
|
+
age_ 27
|
237
241
|
wi 27
|
238
|
-
|
239
|
-
ic_ 26
|
240
|
-
ta 26
|
241
|
-
_as 26
|
242
|
+
uage_ 26
|
242
243
|
ne 26
|
244
|
+
ic_ 26
|
243
245
|
ive 26
|
244
|
-
|
245
|
-
|
246
|
+
_I 26
|
247
|
+
ta 26
|
248
|
+
A 25
|
249
|
+
rom 25
|
246
250
|
is_ 25
|
247
251
|
atio 25
|
248
|
-
|
249
|
-
|
250
|
-
ani 25
|
251
|
-
st_ 25
|
252
|
+
_wo 25
|
253
|
+
ct 25
|
252
254
|
_wor 25
|
253
|
-
|
255
|
+
we 25
|
256
|
+
ani 25
|
257
|
+
wor 25
|
254
258
|
nic 25
|
259
|
+
_A 25
|
260
|
+
ation 25
|
255
261
|
_a_ 25
|
256
|
-
|
257
|
-
A 25
|
258
|
-
_wo 25
|
259
|
-
we 25
|
260
|
-
ct 25
|
261
|
-
from_ 24
|
262
|
-
rom_ 24
|
263
|
-
_from 24
|
264
|
-
enc 24
|
262
|
+
ot 24
|
265
263
|
_fro 24
|
266
264
|
rt 24
|
267
|
-
fro 24
|
268
|
-
uage_ 24
|
269
265
|
_as_ 24
|
270
|
-
|
271
|
-
|
266
|
+
enc 24
|
267
|
+
fro 24
|
272
268
|
from 24
|
273
|
-
|
269
|
+
from_ 24
|
270
|
+
ts_ 24
|
271
|
+
rom_ 24
|
272
|
+
_from 24
|
274
273
|
pa 23
|
275
|
-
|
276
|
-
|
277
|
-
|
274
|
+
_h 23
|
275
|
+
nce_ 23
|
276
|
+
re_ 23
|
277
|
+
N 22
|
278
|
+
mani 22
|
278
279
|
_B 22
|
280
|
+
ion_ 22
|
281
|
+
ted 22
|
282
|
+
_F 22
|
283
|
+
ol 22
|
284
|
+
_N 22
|
279
285
|
ate 22
|
280
286
|
ted_ 22
|
281
|
-
_be 22
|
282
|
-
ted 22
|
283
|
-
ut 22
|
284
287
|
F 22
|
285
|
-
ion_ 22
|
286
|
-
_A 22
|
287
|
-
N 22
|
288
288
|
_is 22
|
289
289
|
tu 22
|
290
|
-
|
291
|
-
|
290
|
+
ut 22
|
291
|
+
B 22
|
292
|
+
_be 22
|
293
|
+
manic 21
|
294
|
+
_di 21
|
292
295
|
anic 21
|
296
|
+
ve_ 21
|
293
297
|
rmani 21
|
298
|
+
ie 21
|
294
299
|
ry 21
|
295
|
-
_di 21
|
296
|
-
hi 21
|
297
|
-
nce_ 21
|
298
300
|
rd 21
|
299
|
-
|
301
|
+
hi 21
|
300
302
|
sp 21
|
301
|
-
_N 21
|
302
|
-
ve_ 21
|
303
|
-
manic 21
|
304
|
-
ie 21
|
305
|
-
her 20
|
306
303
|
at_ 20
|
304
|
+
se_ 20
|
307
305
|
ter 20
|
306
|
+
tin 20
|
308
307
|
_is_ 20
|
309
308
|
so 20
|
309
|
+
her 20
|
310
310
|
us 20
|
311
|
-
ges 20
|
312
|
-
tin 20
|
313
|
-
fi 20
|
314
311
|
ver 20
|
312
|
+
fi 20
|
313
|
+
ica 19
|
314
|
+
com 19
|
315
315
|
nic_ 19
|
316
|
+
_v 19
|
316
317
|
all 19
|
317
318
|
ld 19
|
318
|
-
ica 19
|
319
319
|
su 19
|
320
|
-
|
321
|
-
ages 19
|
322
|
-
se_ 19
|
323
|
-
No 18
|
324
|
-
h, 18
|
325
|
-
rs 18
|
320
|
+
ges 19
|
326
321
|
anic_ 18
|
327
|
-
|
328
|
-
uages 18
|
329
|
-
if 18
|
330
|
-
tr 18
|
322
|
+
No 18
|
331
323
|
anc 18
|
324
|
+
_No 18
|
332
325
|
ord 18
|
333
|
-
|
326
|
+
if 18
|
334
327
|
po 18
|
328
|
+
ages 18
|
329
|
+
h,_ 18
|
330
|
+
_g 18
|
335
331
|
ss 18
|
332
|
+
tr 18
|
333
|
+
h, 18
|
334
|
+
ir 17
|
335
|
+
rs 17
|
336
|
+
tur 17
|
337
|
+
ence 17
|
336
338
|
_li 17
|
339
|
+
uages 17
|
340
|
+
rit 17
|
341
|
+
cen 17
|
337
342
|
ist 17
|
338
|
-
|
343
|
+
mp 17
|
344
|
+
con 17
|
339
345
|
ch_ 17
|
340
346
|
ther 17
|
341
|
-
con 17
|
342
|
-
_No 17
|
343
|
-
_F 17
|
344
|
-
tur 17
|
345
|
-
rit 17
|
346
|
-
ir 17
|
347
347
|
ue 17
|
348
348
|
mo 17
|
349
|
-
n, 17
|
350
|
-
mp 17
|
351
|
-
n,_ 17
|
352
|
-
cen 17
|
353
|
-
orm 16
|
354
|
-
word 16
|
355
|
-
ial 16
|
356
349
|
_Br 16
|
357
350
|
ect 16
|
358
|
-
|
359
|
-
|
351
|
+
tion_ 16
|
352
|
+
orm 16
|
360
353
|
_L 16
|
361
|
-
|
354
|
+
_T 16
|
355
|
+
L 16
|
362
356
|
rn 16
|
363
|
-
|
364
|
-
|
357
|
+
T 16
|
358
|
+
pl 16
|
359
|
+
ial 16
|
360
|
+
_word 16
|
365
361
|
_th_ 16
|
366
362
|
am 16
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
hat 15
|
371
|
-
ds 15
|
372
|
-
_Brit 15
|
373
|
-
or_ 15
|
374
|
-
ran 15
|
375
|
-
Brit 15
|
376
|
-
en_ 15
|
377
|
-
ev 15
|
378
|
-
iti 15
|
379
|
-
wh 15
|
363
|
+
Br 16
|
364
|
+
nt_ 16
|
365
|
+
word 16
|
380
366
|
O 15
|
381
|
-
|
382
|
-
ant 15
|
383
|
-
lat 15
|
384
|
-
Bri 15
|
385
|
-
hat_ 15
|
386
|
-
do 15
|
387
|
-
land 15
|
388
|
-
_Bri 15
|
367
|
+
or_ 15
|
389
368
|
ges_ 15
|
369
|
+
ac 15
|
370
|
+
ant 15
|
371
|
+
me_ 15
|
372
|
+
_Brit 15
|
373
|
+
mi 15
|
374
|
+
hat 15
|
375
|
+
_O 15
|
376
|
+
ds 15
|
390
377
|
ad 15
|
391
|
-
|
392
|
-
_wh 15
|
378
|
+
lat 15
|
393
379
|
wa 15
|
394
|
-
ac 15
|
395
380
|
Nor 15
|
381
|
+
_Nor 15
|
382
|
+
land 15
|
383
|
+
en_ 15
|
396
384
|
op 15
|
397
|
-
|
398
|
-
|
399
|
-
|
385
|
+
iti 15
|
386
|
+
_Bri 15
|
387
|
+
ran 15
|
388
|
+
do 15
|
400
389
|
_ha 15
|
390
|
+
Bri 15
|
391
|
+
Brit 15
|
392
|
+
_no 15
|
393
|
+
_wa 15
|
394
|
+
ex 15
|
395
|
+
ev 15
|
396
|
+
_wi 15
|
397
|
+
_wh 15
|
398
|
+
wh 15
|
399
|
+
hat_ 15
|
400
|
+
pr 14
|