scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/polish.lm
CHANGED
@@ -21,380 +21,380 @@ ni 91
|
|
21
21
|
l 90
|
22
22
|
j 86
|
23
23
|
� 75
|
24
|
-
o_
|
25
|
-
_p
|
24
|
+
o_ 74
|
25
|
+
_p 71
|
26
26
|
g 65
|
27
|
+
a_ 65
|
27
28
|
, 64
|
28
|
-
|
29
|
-
e_ 61
|
29
|
+
e_ 62
|
30
30
|
b 58
|
31
31
|
,_ 56
|
32
32
|
_w 51
|
33
33
|
_n 49
|
34
34
|
ze 48
|
35
|
-
i_
|
35
|
+
i_ 47
|
36
|
+
__ 46
|
36
37
|
� 45
|
37
|
-
__ 42
|
38
38
|
. 42
|
39
39
|
� 41
|
40
|
-
pr 40
|
41
40
|
na 40
|
41
|
+
pr 40
|
42
42
|
sz 39
|
43
43
|
cz 39
|
44
44
|
nie 37
|
45
45
|
wi 37
|
46
|
-
|
46
|
+
_d 36
|
47
47
|
_s 36
|
48
|
+
_z 36
|
49
|
+
y_ 36
|
48
50
|
rz 35
|
49
|
-
_z 35
|
50
51
|
an 34
|
51
|
-
|
52
|
-
en 32
|
52
|
+
ie_ 33
|
53
53
|
st 32
|
54
|
-
ie_ 32
|
55
|
-
za 32
|
56
54
|
ra 32
|
57
|
-
|
55
|
+
za 32
|
56
|
+
en 32
|
58
57
|
ej 31
|
58
|
+
ow 31
|
59
59
|
h 31
|
60
|
-
_pr 30
|
61
|
-
zy 30
|
62
60
|
ki 30
|
61
|
+
zy 30
|
62
|
+
_pr 30
|
63
63
|
po 29
|
64
64
|
ro 28
|
65
|
-
|
66
|
-
� 28
|
65
|
+
ch 28
|
67
66
|
eg 28
|
68
67
|
._ 28
|
69
|
-
|
70
|
-
|
68
|
+
� 28
|
69
|
+
do 28
|
71
70
|
� 27
|
71
|
+
m_ 27
|
72
72
|
ne 27
|
73
|
-
|
74
|
-
m_ 26
|
73
|
+
_t 27
|
75
74
|
_na 26
|
76
|
-
|
77
|
-
|
75
|
+
_po 26
|
76
|
+
ia 26
|
78
77
|
em 25
|
79
78
|
wa 25
|
80
|
-
|
79
|
+
ko 25
|
81
80
|
li 24
|
82
|
-
dz 24
|
83
|
-
_k 24
|
84
81
|
_b 24
|
82
|
+
_k 24
|
83
|
+
dz 24
|
84
|
+
u_ 24
|
85
85
|
to 23
|
86
|
-
|
86
|
+
ci 21
|
87
|
+
w_ 21
|
88
|
+
� 21
|
87
89
|
ta 21
|
88
90
|
i� 21
|
89
|
-
|
90
|
-
� 21
|
91
|
-
w_ 21
|
91
|
+
_ni 21
|
92
92
|
_i 21
|
93
|
-
ci 21
|
94
93
|
yc 21
|
95
|
-
|
94
|
+
go 21
|
96
95
|
or 20
|
97
|
-
te 20
|
98
96
|
le 20
|
97
|
+
nie_ 20
|
99
98
|
_j 20
|
100
|
-
|
99
|
+
te 20
|
100
|
+
al 19
|
101
101
|
j_ 19
|
102
102
|
P 19
|
103
|
-
nie_ 19
|
104
|
-
re 19
|
105
|
-
wy 19
|
106
|
-
_o 19
|
107
|
-
al 19
|
108
103
|
ego 19
|
109
|
-
|
104
|
+
_o 19
|
105
|
+
wy 19
|
106
|
+
a� 19
|
107
|
+
re 19
|
110
108
|
_nie 18
|
111
|
-
|
112
|
-
�o 18
|
109
|
+
cze 18
|
113
110
|
ac 18
|
114
|
-
ej_ 18
|
115
|
-
ani 18
|
116
111
|
sk 18
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
112
|
+
ani 18
|
113
|
+
ej_ 18
|
114
|
+
�o 18
|
115
|
+
aw 18
|
116
|
+
ego_ 17
|
122
117
|
by 17
|
123
118
|
ak 17
|
119
|
+
�_ 17
|
120
|
+
_w_ 17
|
121
|
+
zi 17
|
122
|
+
go_ 17
|
123
|
+
ar 17
|
124
|
+
_do 17
|
125
|
+
_i_ 17
|
126
|
+
rze 17
|
124
127
|
on 16
|
125
|
-
ego_ 16
|
126
|
-
z_ 16
|
127
|
-
ka 16
|
128
128
|
ty 16
|
129
|
-
|
129
|
+
je 16
|
130
130
|
es 16
|
131
131
|
mi 16
|
132
|
+
ka 16
|
133
|
+
kie 16
|
132
134
|
dzi 16
|
133
|
-
_do 16
|
134
135
|
_m 16
|
135
|
-
|
136
|
-
je 16
|
137
|
-
kie 16
|
138
|
-
prz 15
|
139
|
-
� 15
|
140
|
-
zie 15
|
141
|
-
_r 15
|
142
|
-
S 15
|
136
|
+
z_ 16
|
143
137
|
_za 15
|
144
|
-
do_ 15
|
145
|
-
om 15
|
146
138
|
dzie 15
|
147
139
|
wie 15
|
140
|
+
� 15
|
141
|
+
prz 15
|
142
|
+
do_ 15
|
143
|
+
S 15
|
144
|
+
_r 15
|
148
145
|
�w 15
|
149
|
-
|
150
|
-
|
151
|
-
|
146
|
+
zie 15
|
147
|
+
om 15
|
148
|
+
�e 14
|
149
|
+
_prz 14
|
152
150
|
ny 14
|
151
|
+
em_ 14
|
153
152
|
W 14
|
154
|
-
- 14
|
155
|
-
_prz 14
|
156
|
-
�a 14
|
157
153
|
ych 14
|
154
|
+
�a 14
|
158
155
|
el 14
|
159
|
-
|
160
|
-
em_ 14
|
161
|
-
ic 14
|
156
|
+
_do_ 14
|
162
157
|
na_ 14
|
158
|
+
ic 14
|
159
|
+
ec 14
|
160
|
+
eni 14
|
161
|
+
am 14
|
162
|
+
- 14
|
163
|
+
ym 13
|
163
164
|
de 13
|
164
|
-
_do_ 13
|
165
165
|
f 13
|
166
|
-
ez 13
|
167
|
-
er 13
|
168
|
-
pra 13
|
169
|
-
ws 13
|
170
|
-
zc 13
|
171
|
-
ed 13
|
172
|
-
�_ 13
|
173
166
|
� 13
|
174
|
-
|
167
|
+
zc 13
|
175
168
|
ek 13
|
176
|
-
|
177
|
-
|
178
|
-
|
169
|
+
er 13
|
170
|
+
�_ 13
|
171
|
+
ez 13
|
172
|
+
ed 13
|
173
|
+
ws 13
|
174
|
+
pra 13
|
175
|
+
owa 12
|
176
|
+
ja 12
|
179
177
|
dn 12
|
180
|
-
|
181
|
-
�e_ 12
|
178
|
+
pi 12
|
182
179
|
_c 12
|
180
|
+
_wy 12
|
181
|
+
�e_ 12
|
182
|
+
h_ 12
|
183
|
+
_P 12
|
184
|
+
ch_ 12
|
183
185
|
as 12
|
184
186
|
zn 12
|
185
|
-
|
186
|
-
|
187
|
-
ol 12
|
188
|
-
ja 12
|
187
|
+
to_ 12
|
188
|
+
pa 12
|
189
189
|
od 12
|
190
|
-
|
190
|
+
ob 12
|
191
|
+
ol 12
|
191
192
|
ce 12
|
192
|
-
|
193
|
+
nej 12
|
194
|
+
tr 11
|
195
|
+
A 11
|
193
196
|
wo 11
|
194
197
|
ok 11
|
198
|
+
no 11
|
195
199
|
_wi 11
|
196
200
|
prze 11
|
197
|
-
|
198
|
-
|
201
|
+
dy 11
|
202
|
+
i, 11
|
199
203
|
ys 11
|
200
|
-
A 11
|
201
|
-
h_ 11
|
202
|
-
no 11
|
203
204
|
-_ 11
|
204
|
-
i, 11
|
205
|
-
ch_ 11
|
206
205
|
i,_ 11
|
207
|
-
|
206
|
+
zcz 11
|
208
207
|
aj 11
|
209
|
-
|
208
|
+
in 11
|
209
|
+
ku 10
|
210
|
+
Po 10
|
211
|
+
_prze 10
|
212
|
+
szc 10
|
210
213
|
T 10
|
211
|
-
|
212
|
-
ad 10
|
213
|
-
we 10
|
214
|
-
cj 10
|
214
|
+
�_ 10
|
215
215
|
e, 10
|
216
|
-
_u 10
|
217
|
-
ent 10
|
218
|
-
szc 10
|
219
216
|
is 10
|
220
|
-
|
217
|
+
nia 10
|
218
|
+
at 10
|
219
|
+
ad 10
|
220
|
+
ma 10
|
221
|
+
_z_ 10
|
222
|
+
_� 10
|
221
223
|
ski 10
|
222
224
|
iej 10
|
223
|
-
_P 10
|
224
|
-
_prze 10
|
225
225
|
og 10
|
226
|
+
nt 10
|
227
|
+
_l 10
|
226
228
|
ln 10
|
227
229
|
rzy 10
|
228
|
-
|
229
|
-
Po 10
|
230
|
-
nia 10
|
231
|
-
_� 10
|
232
|
-
_l 10
|
230
|
+
cj 10
|
233
231
|
k_ 10
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
232
|
+
ent 10
|
233
|
+
ud 10
|
234
|
+
we 10
|
235
|
+
_u 10
|
236
|
+
�_ 9
|
238
237
|
_nie_ 9
|
239
|
-
da 9
|
240
|
-
tw 9
|
241
238
|
_je 9
|
242
|
-
|
243
|
-
_�e 9
|
239
|
+
gr 9
|
244
240
|
szcz 9
|
245
241
|
ot 9
|
246
|
-
|
247
|
-
|
242
|
+
_na_ 9
|
243
|
+
_to 9
|
244
|
+
_g 9
|
245
|
+
us 9
|
248
246
|
si 9
|
249
|
-
gr 9
|
250
|
-
_z_ 9
|
251
247
|
_to_ 9
|
252
|
-
|
253
|
-
my 9
|
248
|
+
y� 9
|
254
249
|
e,_ 9
|
255
|
-
|
256
|
-
|
257
|
-
_na_ 9
|
250
|
+
_a 9
|
251
|
+
_�e 9
|
258
252
|
ru 9
|
259
|
-
|
260
|
-
|
253
|
+
my 9
|
254
|
+
da 9
|
255
|
+
_S 9
|
256
|
+
_�e_ 9
|
257
|
+
tw 9
|
261
258
|
�s 8
|
262
|
-
|
259
|
+
ym_ 8
|
263
260
|
ga 8
|
261
|
+
N 8
|
262
|
+
by_ 8
|
264
263
|
nej_ 8
|
265
|
-
|
266
|
-
|
264
|
+
neg 8
|
265
|
+
_si 8
|
266
|
+
la 8
|
267
|
+
wsk 8
|
268
|
+
�y 8
|
267
269
|
ap 8
|
270
|
+
esz 8
|
268
271
|
ia_ 8
|
269
|
-
|
270
|
-
|
272
|
+
raw 8
|
273
|
+
praw 8
|
271
274
|
kt 8
|
272
275
|
zcze 8
|
273
|
-
by_ 8
|
274
|
-
neg 8
|
275
276
|
_by 8
|
276
|
-
nego 8
|
277
|
-
wsk 8
|
278
277
|
�o_ 8
|
279
|
-
praw 8
|
280
|
-
la 8
|
281
|
-
pre 8
|
282
278
|
a� 8
|
283
|
-
|
279
|
+
pre 8
|
280
|
+
os 8
|
281
|
+
nego 8
|
282
|
+
pan 8
|
284
283
|
t� 8
|
285
|
-
raw 8
|
286
|
-
gi 7
|
287
|
-
ieg 7
|
288
|
-
ry 7
|
289
|
-
iu 7
|
290
284
|
_te 7
|
291
|
-
edn 7
|
292
|
-
_W 7
|
293
|
-
si� 7
|
294
|
-
skie 7
|
295
|
-
u� 7
|
296
285
|
G 7
|
297
|
-
|
298
|
-
rac 7
|
299
|
-
Z 7
|
286
|
+
lu 7
|
300
287
|
_-_ 7
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
_ro 7
|
307
|
-
li_ 7
|
308
|
-
I 7
|
309
|
-
icz 7
|
310
|
-
tem 7
|
288
|
+
Z 7
|
289
|
+
_W 7
|
290
|
+
_sz 7
|
291
|
+
_- 7
|
292
|
+
ni_ 7
|
311
293
|
_ja 7
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
sp 7
|
294
|
+
edn 7
|
295
|
+
tu 7
|
296
|
+
icz 7
|
297
|
+
I 7
|
317
298
|
_ty 7
|
299
|
+
et 7
|
300
|
+
_si� 7
|
301
|
+
si� 7
|
302
|
+
ych_ 7
|
303
|
+
_kt 7
|
304
|
+
a� 7
|
305
|
+
_Po 7
|
306
|
+
iu 7
|
318
307
|
K 7
|
308
|
+
gi 7
|
309
|
+
u� 7
|
310
|
+
sp 7
|
311
|
+
bi 7
|
312
|
+
iel 7
|
313
|
+
zy_ 7
|
314
|
+
ko_ 7
|
319
315
|
cie 7
|
320
|
-
|
316
|
+
li_ 7
|
317
|
+
rac 7
|
318
|
+
ieg 7
|
321
319
|
R 7
|
320
|
+
�_ 7
|
321
|
+
o� 7
|
322
|
+
skie 7
|
323
|
+
�d 7
|
324
|
+
nia_ 7
|
322
325
|
_pra 7
|
326
|
+
�u 7
|
327
|
+
ry 7
|
328
|
+
tem 7
|
323
329
|
bo 7
|
324
|
-
? 7
|
325
|
-
_si� 7
|
326
|
-
_kt 7
|
327
|
-
tu 7
|
328
|
-
iel 7
|
329
330
|
O 7
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
331
|
+
_ro 7
|
332
|
+
szcze 6
|
333
|
+
_pre 6
|
334
|
+
W_ 6
|
335
|
+
iego 6
|
336
|
+
t�r 6
|
337
|
+
oc 6
|
338
|
+
m� 6
|
339
|
+
_kt�r 6
|
340
|
+
un 6
|
341
|
+
yt 6
|
335
342
|
�dz 6
|
336
|
-
czy 6
|
337
|
-
no_ 6
|
338
|
-
_ob 6
|
339
|
-
ni_ 6
|
340
|
-
usz 6
|
341
343
|
s� 6
|
342
|
-
|
343
|
-
|
344
|
+
stw 6
|
345
|
+
nic 6
|
346
|
+
uk 6
|
347
|
+
owi 6
|
348
|
+
i�c 6
|
349
|
+
mie 6
|
350
|
+
yd 6
|
351
|
+
dni 6
|
352
|
+
iego_ 6
|
353
|
+
den 6
|
354
|
+
ze_ 6
|
355
|
+
�a 6
|
356
|
+
�c 6
|
357
|
+
�r 6
|
358
|
+
czn 6
|
359
|
+
y. 6
|
360
|
+
yb 6
|
361
|
+
dent 6
|
362
|
+
wski 6
|
363
|
+
J 6
|
364
|
+
iem 6
|
365
|
+
oz 6
|
344
366
|
lne 6
|
345
|
-
|
367
|
+
ji 6
|
368
|
+
a�_ 6
|
369
|
+
usz 6
|
346
370
|
ba 6
|
347
|
-
yt 6
|
348
371
|
il 6
|
349
|
-
|
350
|
-
ze_ 6
|
372
|
+
g� 6
|
351
373
|
ien 6
|
352
|
-
|
353
|
-
op 6
|
354
|
-
un 6
|
355
|
-
dr 6
|
356
|
-
�c 6
|
357
|
-
naj 6
|
374
|
+
io 6
|
358
375
|
_ko 6
|
359
|
-
m� 6
|
360
|
-
kt� 6
|
361
|
-
i�_ 6
|
362
|
-
iem 6
|
363
|
-
uk 6
|
364
|
-
nic 6
|
365
|
-
ku_ 6
|
366
|
-
iego 6
|
367
|
-
_kt�r 6
|
368
|
-
�a 6
|
369
|
-
J 6
|
370
|
-
be 6
|
371
|
-
mo 6
|
372
|
-
yd 6
|
373
376
|
zen 6
|
374
|
-
|
375
|
-
|
377
|
+
e� 6
|
378
|
+
_ob 6
|
379
|
+
dr 6
|
376
380
|
�d 6
|
377
|
-
|
378
|
-
|
379
|
-
|
381
|
+
by� 6
|
382
|
+
niu 6
|
383
|
+
be 6
|
384
|
+
kt�r 6
|
380
385
|
jak 6
|
381
|
-
stw 6
|
382
|
-
ji 6
|
383
|
-
wski 6
|
384
|
-
zu 6
|
385
|
-
oc 6
|
386
|
-
B 6
|
387
|
-
den 6
|
388
|
-
e� 6
|
389
|
-
dent 6
|
390
386
|
ali 6
|
391
|
-
|
392
|
-
|
393
|
-
|
387
|
+
op 6
|
388
|
+
zu 6
|
389
|
+
ku_ 6
|
394
390
|
_J 6
|
395
|
-
W_ 6
|
396
|
-
t�r 6
|
397
391
|
nd 6
|
398
|
-
|
399
|
-
|
392
|
+
_kt� 6
|
393
|
+
kr 6
|
394
|
+
i�_ 6
|
400
395
|
a, 6
|
396
|
+
B 6
|
397
|
+
czy 6
|
398
|
+
est 6
|
399
|
+
_sp 6
|
400
|
+
nego_ 6
|