scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 4704
|
2
2
|
e 1600
|
3
3
|
r 1018
|
4
4
|
n 1012
|
@@ -19,8 +19,8 @@ t_ 291
|
|
19
19
|
er 290
|
20
20
|
en 287
|
21
21
|
v 284
|
22
|
-
n_
|
23
|
-
_s
|
22
|
+
n_ 274
|
23
|
+
_s 216
|
24
24
|
et 214
|
25
25
|
an 213
|
26
26
|
g_ 205
|
@@ -39,362 +39,362 @@ re 160
|
|
39
39
|
en_ 158
|
40
40
|
_o 156
|
41
41
|
ar 145
|
42
|
-
_e
|
42
|
+
_e 145
|
43
43
|
st 143
|
44
|
-
_h
|
44
|
+
_h 143
|
45
45
|
et_ 138
|
46
|
+
_d 135
|
46
47
|
, 134
|
47
|
-
_d 132
|
48
48
|
in 130
|
49
49
|
,_ 130
|
50
50
|
sk 129
|
51
51
|
i_ 122
|
52
52
|
ng 117
|
53
|
-
ke 117
|
54
53
|
_i 117
|
55
54
|
ne 117
|
56
|
-
|
55
|
+
ke 117
|
57
56
|
le 110
|
57
|
+
ti 110
|
58
58
|
_f 109
|
59
59
|
om 109
|
60
60
|
og 108
|
61
|
-
_m 105
|
62
61
|
me 105
|
62
|
+
_m 105
|
63
63
|
j 104
|
64
64
|
b 103
|
65
65
|
_og 101
|
66
66
|
og_ 100
|
67
67
|
la 99
|
68
68
|
_og_ 97
|
69
|
-
_de
|
70
|
-
_a 94
|
69
|
+
_de 95
|
71
70
|
m_ 94
|
72
71
|
ha 94
|
73
|
-
ei
|
72
|
+
ei 94
|
73
|
+
_t 94
|
74
|
+
_a 94
|
74
75
|
ø 92
|
75
76
|
� 92
|
76
|
-
|
77
|
+
å_ 90
|
78
|
+
�_ 90
|
77
79
|
a_ 88
|
78
|
-
|
79
|
-
|
80
|
-
_t 87
|
80
|
+
se 88
|
81
|
+
_ha 85
|
81
82
|
nd 84
|
82
|
-
_v 84
|
83
83
|
el 84
|
84
84
|
ri 84
|
85
|
-
ge 84
|
86
85
|
eg 84
|
87
|
-
|
86
|
+
_v 84
|
87
|
+
ge 84
|
88
88
|
tt 83
|
89
|
+
nn 81
|
89
90
|
is 80
|
90
|
-
nn 80
|
91
91
|
y 79
|
92
92
|
ta 79
|
93
93
|
av 78
|
94
|
-
_i_ 77
|
95
94
|
_n 77
|
96
|
-
|
95
|
+
_i_ 77
|
97
96
|
al 75
|
98
|
-
om_ 74
|
99
|
-
rs 74
|
100
97
|
il 74
|
101
|
-
|
98
|
+
rs 74
|
99
|
+
om_ 74
|
100
|
+
._ 73
|
102
101
|
_k 73
|
102
|
+
no 73
|
103
|
+
l_ 72
|
103
104
|
on 71
|
105
|
+
rt 70
|
104
106
|
_b 70
|
105
107
|
v_ 70
|
106
|
-
l_ 70
|
107
|
-
rt 69
|
108
108
|
_me 69
|
109
109
|
d_ 68
|
110
|
+
_. 68
|
110
111
|
ve 67
|
111
112
|
ar_ 66
|
112
|
-
_. 66
|
113
113
|
va 65
|
114
114
|
ing 63
|
115
115
|
_, 62
|
116
|
-
|
117
|
-
_p 59
|
116
|
+
_p 60
|
118
117
|
at 59
|
118
|
+
det 59
|
119
119
|
_g 59
|
120
|
-
fo 58
|
121
|
-
_,_ 58
|
122
120
|
ll 58
|
121
|
+
_,_ 58
|
122
|
+
fo 58
|
123
|
+
un 57
|
123
124
|
ke_ 57
|
124
125
|
av_ 57
|
125
|
-
un 57
|
126
126
|
an_ 56
|
127
127
|
k_ 56
|
128
128
|
ik 55
|
129
129
|
_av 55
|
130
130
|
_er 54
|
131
|
-
|
131
|
+
_l 54
|
132
132
|
so 54
|
133
|
+
N 54
|
133
134
|
li 53
|
134
|
-
_l 53
|
135
|
-
am 52
|
136
135
|
nor 52
|
137
136
|
je 52
|
137
|
+
am 52
|
138
|
+
ns 51
|
138
139
|
lan 51
|
139
|
-
ed 50
|
140
|
-
ors 50
|
141
|
-
ns 50
|
142
|
-
_er_ 50
|
143
140
|
r� 50
|
144
|
-
|
145
|
-
_ti
|
141
|
+
_er_ 50
|
142
|
+
_ti 50
|
143
|
+
ors 50
|
144
|
+
ed 50
|
146
145
|
es 49
|
147
146
|
ter 49
|
148
|
-
ne_
|
147
|
+
ne_ 49
|
148
|
+
eg_ 49
|
149
149
|
D 48
|
150
|
+
rsk 48
|
150
151
|
for 48
|
151
152
|
til 48
|
152
|
-
rsk 48
|
153
153
|
_av_ 47
|
154
|
-
and 47
|
155
154
|
nde 47
|
156
|
-
|
157
|
-
H 46
|
158
|
-
it 46
|
155
|
+
and 47
|
159
156
|
_r 46
|
160
157
|
ka 46
|
158
|
+
ra 46
|
159
|
+
it 46
|
160
|
+
H 46
|
161
|
+
_til 45
|
161
162
|
_N 45
|
162
163
|
ske 45
|
163
|
-
_til 45
|
164
164
|
ga 44
|
165
|
-
te_ 44
|
166
165
|
orsk 44
|
166
|
+
te_ 44
|
167
|
+
No 43
|
167
168
|
_so 43
|
168
|
-
S 43
|
169
169
|
som 43
|
170
|
-
No 43
|
171
170
|
ko 43
|
172
171
|
na 43
|
173
|
-
|
172
|
+
S 43
|
174
173
|
det_ 42
|
175
174
|
Nor 42
|
176
175
|
_fo 42
|
176
|
+
nge 42
|
177
177
|
ste 42
|
178
|
-
_som 41
|
179
|
-
_no 41
|
180
178
|
il_ 41
|
181
|
-
|
179
|
+
_no 41
|
180
|
+
_som 41
|
182
181
|
_No 40
|
183
|
-
|
184
|
-
som_ 40
|
182
|
+
p� 40
|
185
183
|
rå 40
|
186
|
-
|
187
|
-
|
184
|
+
som_ 40
|
185
|
+
tte 40
|
188
186
|
ett 39
|
187
|
+
nt 39
|
189
188
|
_som_ 39
|
190
|
-
|
191
|
-
rd 38
|
192
|
-
kk 38
|
193
|
-
da 38
|
194
|
-
em 38
|
195
|
-
_� 38
|
189
|
+
_Nor 39
|
196
190
|
ma 38
|
191
|
+
_ei 38
|
192
|
+
em 38
|
193
|
+
da 38
|
197
194
|
ig 38
|
198
|
-
|
199
|
-
�
|
200
|
-
|
195
|
+
kk 38
|
196
|
+
_� 38
|
197
|
+
rd 38
|
198
|
+
på 38
|
199
|
+
_u 37
|
201
200
|
ør 37
|
201
|
+
_han 37
|
202
|
+
De 37
|
202
203
|
han 37
|
203
|
-
|
204
|
-
_u 37
|
204
|
+
�r 37
|
205
205
|
ten 37
|
206
|
-
|
206
|
+
sa 37
|
207
|
+
gj 36
|
208
|
+
vi 36
|
207
209
|
ed_ 36
|
208
|
-
_han 36
|
209
210
|
nors 36
|
210
211
|
_p� 36
|
211
212
|
_på 36
|
212
|
-
|
213
|
-
|
214
|
-
|
213
|
+
norsk 36
|
214
|
+
ske_ 35
|
215
|
+
_for 35
|
215
216
|
kt 35
|
216
217
|
til_ 35
|
217
218
|
reg 35
|
218
|
-
|
219
|
-
|
219
|
+
den 35
|
220
|
+
__ 35
|
220
221
|
på_ 35
|
221
|
-
|
222
|
+
med 34
|
223
|
+
_til_ 34
|
222
224
|
rt_ 34
|
223
225
|
_va 34
|
224
|
-
med 34
|
225
226
|
_på_ 34
|
226
|
-
_til_ 34
|
227
|
-
ore 33
|
228
|
-
lle 33
|
229
227
|
_nor 33
|
228
|
+
ore 33
|
230
229
|
var 33
|
231
|
-
|
232
|
-
m� 32
|
230
|
+
lle 33
|
233
231
|
_det 32
|
234
|
-
|
235
|
-
|
232
|
+
m� 32
|
233
|
+
_se 32
|
234
|
+
de_ 32
|
236
235
|
_med 31
|
237
|
-
|
236
|
+
den_ 31
|
237
|
+
re_ 31
|
238
|
+
_S 31
|
238
239
|
pe 31
|
239
240
|
or_ 31
|
241
|
+
_st 31
|
240
242
|
tr 31
|
241
|
-
to 31
|
242
|
-
den_ 31
|
243
243
|
sp 31
|
244
|
-
ag 31
|
245
244
|
land 31
|
246
|
-
|
247
|
-
|
245
|
+
to 31
|
246
|
+
kr 31
|
247
|
+
ag 31
|
248
248
|
sk_ 30
|
249
|
+
ol 30
|
249
250
|
ell 30
|
250
251
|
fr 29
|
251
|
-
le_ 29
|
252
|
-
pr 29
|
253
|
-
s_ 29
|
254
|
-
må 29
|
255
252
|
har 29
|
256
253
|
ak 29
|
257
254
|
ni 29
|
258
255
|
med_ 29
|
259
|
-
|
256
|
+
pr 29
|
257
|
+
må 29
|
258
|
+
s_ 29
|
259
|
+
le_ 29
|
260
|
+
ks 28
|
260
261
|
_har 28
|
262
|
+
har_ 28
|
261
263
|
der 28
|
264
|
+
rk 28
|
262
265
|
si 28
|
263
266
|
ss 28
|
267
|
+
_la 28
|
264
268
|
ek 28
|
265
|
-
har_ 28
|
266
269
|
mi 28
|
267
|
-
|
268
|
-
|
269
|
-
_re 27
|
270
|
-
�l 27
|
270
|
+
� 27
|
271
|
+
enn 27
|
271
272
|
_ko 27
|
272
|
-
æ 27
|
273
|
-
�r 27
|
274
|
-
år 27
|
275
|
-
ål 27
|
276
|
-
as 27
|
277
273
|
_fr 27
|
278
|
-
|
274
|
+
�r 27
|
275
|
+
_re 27
|
276
|
+
æ 27
|
279
277
|
ie 27
|
280
|
-
|
281
|
-
� 27
|
278
|
+
�l 27
|
282
279
|
ei_ 27
|
280
|
+
_med_ 27
|
281
|
+
år 27
|
282
|
+
ål 27
|
283
283
|
_har_ 27
|
284
|
+
as 27
|
285
|
+
_den 27
|
286
|
+
ord 26
|
287
|
+
ans 26
|
288
|
+
ande 26
|
284
289
|
men 26
|
285
|
-
|
290
|
+
ng_ 26
|
286
291
|
tin 26
|
287
|
-
|
292
|
+
_D 26
|
288
293
|
_var 26
|
294
|
+
_den_ 26
|
295
|
+
ut 26
|
289
296
|
nne 26
|
290
|
-
enn 26
|
291
|
-
ng_ 26
|
292
|
-
ord 26
|
293
|
-
ande 26
|
294
|
-
ans 26
|
295
|
-
- 26
|
296
297
|
_han_ 25
|
297
|
-
|
298
|
-
mål 25
|
299
|
-
_den_ 25
|
298
|
+
_ve 25
|
300
299
|
Noreg 25
|
301
300
|
oreg 25
|
302
|
-
_ve 25
|
303
301
|
han_ 25
|
304
|
-
|
302
|
+
mål 25
|
305
303
|
ru 25
|
304
|
+
ere 25
|
306
305
|
ver 25
|
307
|
-
|
306
|
+
Nore 25
|
308
307
|
eri 25
|
308
|
+
ette 25
|
309
309
|
gje 25
|
310
|
+
E 24
|
310
311
|
A 24
|
312
|
+
gen 24
|
311
313
|
ts 24
|
312
314
|
sj 24
|
313
|
-
|
314
|
-
|
315
|
-
ting 23
|
315
|
+
he 23
|
316
|
+
ene 23
|
316
317
|
tt_ 23
|
317
|
-
|
318
|
+
lo 23
|
318
319
|
ter_ 23
|
319
|
-
|
320
|
+
bl 23
|
321
|
+
Ha 23
|
320
322
|
_å 23
|
323
|
+
ting 23
|
321
324
|
ten_ 23
|
322
|
-
lo 23
|
323
|
-
Ha 23
|
324
|
-
bl 23
|
325
325
|
kke 23
|
326
|
-
|
327
|
-
ene 23
|
328
|
-
isk 22
|
326
|
+
nga 23
|
329
327
|
_det_ 22
|
330
328
|
_om 22
|
331
|
-
s� 22
|
332
|
-
kon 22
|
333
|
-
ds 22
|
334
329
|
ong 22
|
330
|
+
ds 22
|
335
331
|
sta 22
|
332
|
+
s� 22
|
333
|
+
isk 22
|
336
334
|
_Nore 22
|
337
|
-
|
338
|
-
_H 21
|
339
|
-
ok 21
|
340
|
-
ane 21
|
335
|
+
kon 22
|
341
336
|
øy 21
|
342
|
-
rm 21
|
343
|
-
�r 21
|
344
337
|
gr 21
|
345
|
-
|
346
|
-
|
338
|
+
�r 21
|
339
|
+
rm 21
|
347
340
|
us 21
|
341
|
+
kri 21
|
348
342
|
�y 21
|
343
|
+
ok 21
|
349
344
|
ær 21
|
345
|
+
ir 21
|
346
|
+
ane 21
|
350
347
|
art 20
|
351
|
-
|
348
|
+
ld 20
|
352
349
|
_A 20
|
353
|
-
|
350
|
+
ist 20
|
351
|
+
for_ 20
|
352
|
+
sl 20
|
354
353
|
_bl 20
|
355
354
|
ad 20
|
356
|
-
|
357
|
-
ld 20
|
358
|
-
ist 20
|
355
|
+
ert 20
|
359
356
|
ndet 20
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
rsk_ 19
|
364
|
-
sam 19
|
365
|
-
oreg_ 19
|
366
|
-
inga 19
|
367
|
-
ret 19
|
368
|
-
_ka 19
|
369
|
-
_ut 19
|
357
|
+
_lan 20
|
358
|
+
_nors 20
|
359
|
+
andet 19
|
370
360
|
f� 19
|
371
|
-
|
372
|
-
|
373
|
-
lt 19
|
374
|
-
hu 19
|
361
|
+
ein 19
|
362
|
+
_ka 19
|
375
363
|
jo 19
|
364
|
+
inga 19
|
365
|
+
ale 19
|
376
366
|
seg 19
|
377
|
-
|
378
|
-
|
367
|
+
_E 19
|
368
|
+
reg_ 19
|
369
|
+
oreg_ 19
|
379
370
|
fa 19
|
371
|
+
rske 19
|
372
|
+
hu 19
|
373
|
+
rsk_ 19
|
380
374
|
id 19
|
381
|
-
|
382
|
-
|
375
|
+
ret 19
|
376
|
+
_H 19
|
377
|
+
sam 19
|
378
|
+
_s� 19
|
379
|
+
lt 19
|
383
380
|
un_ 19
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
381
|
+
_ut 19
|
382
|
+
orsk_ 19
|
383
|
+
orske 19
|
384
|
+
ort 19
|
385
|
+
rin 18
|
386
|
+
ov 18
|
387
|
+
_vi 18
|
389
388
|
tor 18
|
389
|
+
_et 18
|
390
|
+
ro 18
|
390
391
|
gs 18
|
391
|
-
|
392
|
+
_en 18
|
392
393
|
tter 18
|
393
|
-
|
394
|
+
mn 18
|
395
|
+
ikk 18
|
396
|
+
ren 18
|
397
|
+
elle 18
|
394
398
|
_sa 18
|
395
399
|
inge 18
|
396
|
-
|
397
|
-
ov 18
|
398
|
-
mn 18
|
399
|
-
ro 18
|
400
|
-
rin 18
|
400
|
+
est 18
|