scylla 0.7.0 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/classifier.rb +3 -4
- data/lib/scylla/generator.rb +4 -7
- data/lib/scylla/lms/afrikaans.lm +280 -280
- data/lib/scylla/lms/arabic.lm +225 -225
- data/lib/scylla/lms/bulgarian.lm +208 -208
- data/lib/scylla/lms/catalan.lm +212 -212
- data/lib/scylla/lms/chinese.lm +201 -201
- data/lib/scylla/lms/danish.lm +155 -155
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/finnish.lm +259 -259
- data/lib/scylla/lms/french.lm +203 -203
- data/lib/scylla/lms/german.lm +280 -280
- data/lib/scylla/lms/greek.lm +276 -276
- data/lib/scylla/lms/hebrew.lm +170 -170
- data/lib/scylla/lms/hindi.lm +241 -241
- data/lib/scylla/lms/icelandic.lm +264 -264
- data/lib/scylla/lms/indonesian.lm +272 -272
- data/lib/scylla/lms/italian.lm +283 -283
- data/lib/scylla/lms/japanese.lm +105 -105
- data/lib/scylla/lms/korean.lm +400 -400
- data/lib/scylla/lms/norwegian.lm +235 -235
- data/lib/scylla/lms/polish.lm +264 -264
- data/lib/scylla/lms/portuguese.lm +269 -269
- data/lib/scylla/lms/romanian.lm +278 -278
- data/lib/scylla/lms/russian.lm +127 -127
- data/lib/scylla/lms/slovak.lm +281 -281
- data/lib/scylla/lms/slovenian.lm +276 -276
- data/lib/scylla/lms/spanish.lm +190 -190
- data/lib/scylla/lms/swedish.lm +195 -195
- data/lib/scylla/lms/tagalog.lm +282 -282
- data/lib/scylla/lms/thai.lm +257 -257
- data/lib/scylla/lms/turkish.lm +300 -300
- data/lib/scylla/lms/vietnamese.lm +277 -277
- data/lib/scylla/lms/welsh.lm +271 -271
- data/scylla.gemspec +3 -22
- data/source_texts/korean.txt +219 -134
- metadata +15 -14
- data/scylla-0.6.0.gem +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 1362
|
2
2
|
a 715
|
3
3
|
n 366
|
4
4
|
e 308
|
@@ -8,393 +8,393 @@ r 208
|
|
8
8
|
k 205
|
9
9
|
t 205
|
10
10
|
an 185
|
11
|
-
g 152
|
12
11
|
s 152
|
13
|
-
|
12
|
+
g 152
|
14
13
|
d 149
|
15
|
-
|
14
|
+
m 149
|
16
15
|
l 108
|
16
|
+
ng 108
|
17
|
+
a_ 105
|
17
18
|
p 99
|
18
|
-
|
19
|
+
n_ 96
|
19
20
|
o 91
|
21
|
+
an_ 87
|
20
22
|
er 85
|
21
23
|
b 85
|
22
|
-
|
24
|
+
_d 80
|
23
25
|
, 77
|
26
|
+
,_ 77
|
27
|
+
i_ 76
|
24
28
|
ka 76
|
25
|
-
an_ 75
|
26
|
-
_d 74
|
27
29
|
h 73
|
28
|
-
|
29
|
-
,_ 67
|
30
|
-
_m 65
|
30
|
+
_m 68
|
31
31
|
ar 65
|
32
32
|
en 65
|
33
|
+
_k 62
|
33
34
|
ta 59
|
34
35
|
di 59
|
35
36
|
me 58
|
36
|
-
_k 57
|
37
37
|
ang 56
|
38
38
|
ra 55
|
39
39
|
at 54
|
40
|
-
ak
|
40
|
+
ak 53
|
41
41
|
y 53
|
42
|
-
_me
|
42
|
+
_me 52
|
43
43
|
da 49
|
44
44
|
ga 48
|
45
45
|
in 46
|
46
|
+
_di 44
|
46
47
|
ya 43
|
48
|
+
_s 42
|
49
|
+
_p 42
|
50
|
+
._ 42
|
47
51
|
. 42
|
48
|
-
_di 41
|
49
52
|
tu 41
|
53
|
+
_b 40
|
54
|
+
_t 40
|
50
55
|
un 40
|
51
|
-
|
52
|
-
la 38
|
56
|
+
g_ 38
|
53
57
|
j 38
|
58
|
+
ng_ 38
|
59
|
+
la 38
|
60
|
+
k_ 37
|
54
61
|
ma 37
|
55
|
-
_s 37
|
56
|
-
k_ 36
|
57
62
|
na 36
|
63
|
+
h_ 36
|
58
64
|
ah 35
|
59
65
|
ri 34
|
60
|
-
ng_ 33
|
61
|
-
g_ 33
|
62
|
-
_t 33
|
63
|
-
se 33
|
64
|
-
be 33
|
65
66
|
ke 33
|
66
|
-
|
67
|
-
|
67
|
+
be 33
|
68
|
+
se 33
|
68
69
|
al 32
|
69
|
-
pe 32
|
70
70
|
as 32
|
71
|
-
|
71
|
+
pe 32
|
72
72
|
ia 31
|
73
|
-
|
74
|
-
_ke
|
73
|
+
men 31
|
74
|
+
_ke 30
|
75
75
|
_men 29
|
76
|
+
_a 29
|
76
77
|
si 29
|
77
78
|
P 29
|
78
|
-
ny 28
|
79
|
-
ti 28
|
80
79
|
pa 28
|
81
80
|
it 28
|
81
|
+
ti 28
|
82
|
+
u_ 27
|
83
|
+
_P 27
|
82
84
|
em 27
|
83
|
-
|
84
|
-
|
85
|
-
|
85
|
+
s_ 27
|
86
|
+
ny 27
|
87
|
+
ang_ 27
|
86
88
|
kan 26
|
87
|
-
|
89
|
+
sa 26
|
90
|
+
_pe 26
|
91
|
+
am 26
|
92
|
+
_se 26
|
93
|
+
t_ 25
|
88
94
|
c 24
|
89
|
-
|
95
|
+
_ka 24
|
90
96
|
eng 24
|
91
|
-
_pe 24
|
92
97
|
te 24
|
93
|
-
|
94
|
-
|
98
|
+
_i 24
|
99
|
+
_da 23
|
100
|
+
ja 23
|
95
101
|
nga 23
|
102
|
+
r_ 23
|
103
|
+
S 23
|
96
104
|
ba 23
|
97
|
-
ja 23
|
98
|
-
_se 23
|
99
|
-
r_ 22
|
100
|
-
li 22
|
101
|
-
ni 22
|
102
105
|
el 22
|
103
|
-
ber 22
|
104
106
|
di_ 22
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
ak_
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
nya 20
|
114
|
-
bu 20
|
107
|
+
li 22
|
108
|
+
ber 22
|
109
|
+
ni 21
|
110
|
+
nt 21
|
111
|
+
ak_ 21
|
112
|
+
_be 21
|
113
|
+
ah_ 21
|
114
|
+
kan_ 20
|
115
115
|
ata 20
|
116
|
-
|
117
|
-
|
116
|
+
ad 19
|
117
|
+
nya 19
|
118
|
+
ik 19
|
119
|
+
ap 19
|
118
120
|
ran 19
|
119
|
-
|
120
|
-
is 19
|
121
|
+
bu 19
|
121
122
|
us 19
|
122
|
-
|
123
|
+
gan 19
|
124
|
+
is 19
|
123
125
|
ara 19
|
124
|
-
ru 18
|
125
126
|
dan 18
|
126
|
-
|
127
|
+
ru 18
|
127
128
|
era 18
|
128
|
-
|
129
|
-
|
129
|
+
_ber 18
|
130
|
+
ur 17
|
131
|
+
_S 17
|
132
|
+
uk 17
|
130
133
|
a, 17
|
134
|
+
at_ 17
|
131
135
|
ari 17
|
136
|
+
a,_ 17
|
132
137
|
es 17
|
133
|
-
ur 17
|
134
|
-
uk 17
|
135
|
-
- 17
|
136
138
|
to 17
|
137
|
-
|
138
|
-
|
139
|
-
|
139
|
+
nd 17
|
140
|
+
_te 17
|
141
|
+
_di_ 16
|
140
142
|
ter 16
|
141
143
|
I 16
|
142
144
|
ung 16
|
143
|
-
|
144
|
-
|
145
|
+
ala 16
|
146
|
+
su 16
|
145
147
|
yang 16
|
148
|
+
_ya 16
|
149
|
+
ol 16
|
146
150
|
ngan 16
|
151
|
+
_y 16
|
152
|
+
yan 16
|
153
|
+
ha 16
|
147
154
|
M 15
|
148
|
-
_ber 15
|
149
|
-
_di_ 15
|
150
|
-
_ya 15
|
151
|
-
kan_ 15
|
152
155
|
J 15
|
153
|
-
|
156
|
+
dan_ 14
|
154
157
|
lu 14
|
158
|
+
itu 14
|
159
|
+
_dan_ 14
|
160
|
+
l_ 14
|
161
|
+
yang_ 14
|
155
162
|
du 14
|
156
163
|
ek 14
|
157
|
-
|
164
|
+
ya_ 14
|
165
|
+
_yan 14
|
166
|
+
_dan 14
|
158
167
|
A 14
|
159
168
|
ok 14
|
160
|
-
de 14
|
161
|
-
_yan 14
|
162
|
-
itu 14
|
163
169
|
enga 14
|
164
|
-
|
165
|
-
|
170
|
+
_yang 14
|
171
|
+
_ter 14
|
172
|
+
de 14
|
173
|
+
R 13
|
174
|
+
ina 13
|
175
|
+
D 13
|
176
|
+
da_ 13
|
166
177
|
ku 13
|
167
|
-
|
178
|
+
kar 13
|
179
|
+
na_ 13
|
168
180
|
ko 13
|
169
|
-
|
170
|
-
ai 13
|
171
|
-
da_ 13
|
181
|
+
_c 13
|
172
182
|
rang 13
|
173
|
-
at_ 13
|
174
|
-
l_ 13
|
175
|
-
ina 13
|
176
|
-
eri 13
|
177
183
|
ge 13
|
178
|
-
|
184
|
+
ai 13
|
179
185
|
per 13
|
180
|
-
|
181
|
-
|
182
|
-
R 13
|
183
|
-
dan_ 13
|
184
|
-
ua 12
|
185
|
-
ia_ 12
|
186
|
-
ya_ 12
|
187
|
-
gi 12
|
186
|
+
eri 13
|
187
|
+
ia_ 13
|
188
188
|
po 12
|
189
|
-
ul 12
|
190
|
-
et 12
|
191
|
-
_dan_ 12
|
192
|
-
e_ 12
|
193
|
-
ju 12
|
194
189
|
rin 12
|
195
|
-
pen 12
|
196
|
-
aka 12
|
197
190
|
ut 12
|
191
|
+
gi 12
|
192
|
+
e_ 12
|
193
|
+
_J 12
|
194
|
+
ul 12
|
195
|
+
ju 12
|
198
196
|
w 12
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
gg 11
|
206
|
-
na_ 11
|
197
|
+
et 12
|
198
|
+
gan_ 12
|
199
|
+
ua 12
|
200
|
+
ngan_ 12
|
201
|
+
pen 12
|
202
|
+
_ba 11
|
207
203
|
_pen 11
|
208
|
-
|
204
|
+
ama 11
|
205
|
+
ena 11
|
206
|
+
B 11
|
209
207
|
or 11
|
210
|
-
|
208
|
+
nya_ 11
|
211
209
|
re 11
|
210
|
+
ni_ 11
|
211
|
+
gg 11
|
212
212
|
ca 11
|
213
|
-
|
214
|
-
|
213
|
+
mp 11
|
214
|
+
ed 11
|
215
|
+
aka 11
|
216
|
+
ngg 11
|
217
|
+
us_ 11
|
215
218
|
lan 11
|
216
|
-
uh 10
|
217
|
-
arin 10
|
218
|
-
wa 10
|
219
|
-
ela 10
|
220
|
-
_T 10
|
221
|
-
ni_ 10
|
222
|
-
Z 10
|
223
219
|
im 10
|
220
|
+
ini 10
|
221
|
+
uh 10
|
222
|
+
T 10
|
224
223
|
adi 10
|
225
|
-
|
226
|
-
|
227
|
-
Zar 10
|
228
|
-
us_ 10
|
229
|
-
Zarin 10
|
230
|
-
rt 10
|
224
|
+
Z 10
|
225
|
+
_M 10
|
231
226
|
rina 10
|
227
|
+
si_ 10
|
228
|
+
_Zari 10
|
229
|
+
_Zar 10
|
230
|
+
Zar 10
|
231
|
+
_T 10
|
232
|
+
_B 10
|
233
|
+
ngk 10
|
234
|
+
ta_ 10
|
235
|
+
ika 10
|
236
|
+
_in 10
|
237
|
+
arin 10
|
232
238
|
mi 10
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
239
|
+
_Za 10
|
240
|
+
_Z 10
|
241
|
+
wa 10
|
242
|
+
rt 10
|
243
|
+
ab 10
|
237
244
|
eru 10
|
238
|
-
Za 10
|
239
245
|
Zari 10
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
i,_ 9
|
251
|
-
mu 9
|
252
|
-
K 9
|
253
|
-
gan_ 9
|
246
|
+
Za 10
|
247
|
+
arina 10
|
248
|
+
Zarin 10
|
249
|
+
ela 10
|
250
|
+
gk 10
|
251
|
+
uk_ 10
|
252
|
+
il 9
|
253
|
+
jadi 9
|
254
|
+
i._ 9
|
255
|
+
mem 9
|
254
256
|
asi 9
|
257
|
+
engan 9
|
258
|
+
ntu 9
|
259
|
+
_I 9
|
255
260
|
i. 9
|
256
|
-
|
261
|
+
_per 9
|
262
|
+
_ma 9
|
257
263
|
_mem 9
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
ant 9
|
264
|
+
tu_ 9
|
265
|
+
K 9
|
266
|
+
_ta 9
|
267
|
+
_ini 9
|
263
268
|
apa 9
|
264
|
-
|
269
|
+
ag 9
|
265
270
|
_de 9
|
266
|
-
|
271
|
+
_itu 9
|
267
272
|
eb 9
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
nj 8
|
277
|
-
u, 8
|
278
|
-
a. 8
|
279
|
-
eny 8
|
280
|
-
_Za 8
|
281
|
-
_Z 8
|
273
|
+
jad 9
|
274
|
+
ing 9
|
275
|
+
i,_ 9
|
276
|
+
_it 9
|
277
|
+
os 9
|
278
|
+
as_ 9
|
279
|
+
i, 9
|
280
|
+
mu 9
|
282
281
|
dia 8
|
282
|
+
le 8
|
283
283
|
ki 8
|
284
|
-
_I 8
|
285
|
-
oko 8
|
286
|
-
n. 8
|
287
|
-
pat 8
|
288
|
-
_meng 8
|
289
|
-
st 8
|
290
|
-
_Zar 8
|
291
|
-
ian 8
|
292
|
-
ri_ 8
|
293
|
-
_ma 8
|
294
|
-
_M 8
|
295
|
-
meng 8
|
296
|
-
mb 8
|
297
284
|
rk 8
|
285
|
+
_K 8
|
286
|
+
st 8
|
287
|
+
isi 8
|
288
|
+
al_ 8
|
289
|
+
eny 8
|
290
|
+
ip 8
|
291
|
+
p_ 8
|
292
|
+
_j 8
|
293
|
+
ant 8
|
294
|
+
rang_ 8
|
295
|
+
pat 8
|
298
296
|
L 8
|
299
|
-
|
297
|
+
emb 8
|
298
|
+
ada 8
|
299
|
+
meng 8
|
300
|
+
_meng 8
|
301
|
+
man 8
|
302
|
+
_A 8
|
303
|
+
ar_ 8
|
304
|
+
au 8
|
300
305
|
itu_ 8
|
301
|
-
|
306
|
+
oko 8
|
307
|
+
oh 8
|
302
308
|
aga 8
|
303
|
-
|
304
|
-
|
309
|
+
ian 8
|
310
|
+
ri_ 8
|
305
311
|
ep 8
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
312
|
+
ma_ 8
|
313
|
+
nj 8
|
314
|
+
mb 8
|
315
|
+
ga_ 7
|
316
|
+
u, 7
|
317
|
+
ngka 7
|
318
|
+
um 7
|
319
|
+
uga 7
|
320
|
+
pat_ 7
|
321
|
+
ina_ 7
|
322
|
+
a._ 7
|
323
|
+
ada_ 7
|
324
|
+
a. 7
|
325
|
+
ntuk_ 7
|
326
|
+
rina_ 7
|
311
327
|
ay 7
|
312
|
-
|
313
|
-
|
328
|
+
toko 7
|
329
|
+
aya 7
|
330
|
+
ot 7
|
331
|
+
meny 7
|
332
|
+
_u 7
|
333
|
+
kal 7
|
334
|
+
art 7
|
335
|
+
alan 7
|
336
|
+
_peng 7
|
337
|
+
n._ 7
|
338
|
+
n. 7
|
339
|
+
g, 7
|
340
|
+
ngga 7
|
314
341
|
gga 7
|
342
|
+
aran 7
|
343
|
+
ib 7
|
344
|
+
ce 7
|
315
345
|
atan 7
|
346
|
+
_L 7
|
347
|
+
ej 7
|
348
|
+
ug 7
|
349
|
+
_meny 7
|
350
|
+
ng,_ 7
|
316
351
|
tak 7
|
352
|
+
and 7
|
353
|
+
tuk_ 7
|
354
|
+
_itu_ 7
|
355
|
+
rl 7
|
317
356
|
ger 7
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
357
|
+
rah 7
|
358
|
+
nu 7
|
359
|
+
lu_ 7
|
360
|
+
u,_ 7
|
361
|
+
ita 7
|
323
362
|
arang 7
|
363
|
+
gka 7
|
364
|
+
tan 7
|
365
|
+
ntuk 7
|
366
|
+
sia 7
|
324
367
|
Ja 7
|
325
|
-
pu 7
|
326
|
-
rah 7
|
327
|
-
ngka 7
|
328
|
-
ntuk_ 7
|
329
|
-
p_ 7
|
330
|
-
Ta 7
|
331
|
-
erl 7
|
332
|
-
ngga 7
|
333
|
-
ma_ 7
|
334
|
-
uga 7
|
335
368
|
peng 7
|
336
|
-
aran 7
|
337
|
-
_per 7
|
338
|
-
alan 7
|
339
|
-
tuk_ 7
|
340
369
|
_Ja 7
|
341
|
-
|
342
|
-
ug 7
|
343
|
-
ce 7
|
344
|
-
nda 7
|
345
|
-
ng, 7
|
346
|
-
g, 7
|
347
|
-
and 7
|
348
|
-
sia 7
|
349
|
-
_itu 7
|
350
|
-
_ba 7
|
351
|
-
rl 7
|
352
|
-
as_ 7
|
353
|
-
lu_ 7
|
354
|
-
ada_ 7
|
370
|
+
g,_ 7
|
355
371
|
ro 7
|
356
|
-
|
357
|
-
|
372
|
+
bur 7
|
373
|
+
nda 7
|
374
|
+
Ta 7
|
375
|
+
erl 7
|
376
|
+
ih 7
|
358
377
|
tok 7
|
359
|
-
|
360
|
-
|
361
|
-
|
378
|
+
ng, 7
|
379
|
+
_Ta 7
|
380
|
+
m_ 7
|
381
|
+
pu 7
|
382
|
+
eka 7
|
383
|
+
oh_ 7
|
362
384
|
tuk 7
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
ngi 6
|
372
|
-
up 6
|
373
|
-
aj 6
|
374
|
-
rang_ 6
|
375
|
-
an, 6
|
376
|
-
rta 6
|
385
|
+
deng 6
|
386
|
+
mun 6
|
387
|
+
sus 6
|
388
|
+
_an 6
|
389
|
+
tar 6
|
390
|
+
ub 6
|
391
|
+
_tok 6
|
392
|
+
rus 6
|
377
393
|
_tak 6
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
_po 6
|
382
|
-
_U 6
|
383
|
-
nta 6
|
384
|
-
Pol 6
|
385
|
-
akar 6
|
386
|
-
on 6
|
387
|
-
ga_ 6
|
394
|
+
an._ 6
|
395
|
+
N 6
|
396
|
+
_mel 6
|
388
397
|
f 6
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
kat 6
|
393
|
-
n, 6
|
394
|
-
n,_ 6
|
395
|
-
koh 6
|
396
|
-
arta 6
|
397
|
-
aha 6
|
398
|
-
mun 6
|
399
|
-
oli 6
|
400
|
-
tin 6
|
398
|
+
ang, 6
|
399
|
+
aj 6
|
400
|
+
nta 6
|