scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 4704
|
2
2
|
e 1600
|
3
3
|
r 1018
|
4
4
|
n 1012
|
@@ -19,8 +19,8 @@ t_ 291
|
|
19
19
|
er 290
|
20
20
|
en 287
|
21
21
|
v 284
|
22
|
-
n_
|
23
|
-
_s
|
22
|
+
n_ 274
|
23
|
+
_s 216
|
24
24
|
et 214
|
25
25
|
an 213
|
26
26
|
g_ 205
|
@@ -39,362 +39,362 @@ re 160
|
|
39
39
|
en_ 158
|
40
40
|
_o 156
|
41
41
|
ar 145
|
42
|
-
_e
|
42
|
+
_e 145
|
43
43
|
st 143
|
44
|
-
_h
|
44
|
+
_h 143
|
45
45
|
et_ 138
|
46
|
+
_d 135
|
46
47
|
, 134
|
47
|
-
_d 132
|
48
48
|
in 130
|
49
49
|
,_ 130
|
50
50
|
sk 129
|
51
51
|
i_ 122
|
52
52
|
ng 117
|
53
|
-
ke 117
|
54
53
|
_i 117
|
55
54
|
ne 117
|
56
|
-
|
55
|
+
ke 117
|
57
56
|
le 110
|
57
|
+
ti 110
|
58
58
|
_f 109
|
59
59
|
om 109
|
60
60
|
og 108
|
61
|
-
_m 105
|
62
61
|
me 105
|
62
|
+
_m 105
|
63
63
|
j 104
|
64
64
|
b 103
|
65
65
|
_og 101
|
66
66
|
og_ 100
|
67
67
|
la 99
|
68
68
|
_og_ 97
|
69
|
-
_de
|
70
|
-
_a 94
|
69
|
+
_de 95
|
71
70
|
m_ 94
|
72
71
|
ha 94
|
73
|
-
ei
|
72
|
+
ei 94
|
73
|
+
_t 94
|
74
|
+
_a 94
|
74
75
|
ø 92
|
75
76
|
� 92
|
76
|
-
|
77
|
+
å_ 90
|
78
|
+
�_ 90
|
77
79
|
a_ 88
|
78
|
-
|
79
|
-
|
80
|
-
_t 87
|
80
|
+
se 88
|
81
|
+
_ha 85
|
81
82
|
nd 84
|
82
|
-
_v 84
|
83
83
|
el 84
|
84
84
|
ri 84
|
85
|
-
ge 84
|
86
85
|
eg 84
|
87
|
-
|
86
|
+
_v 84
|
87
|
+
ge 84
|
88
88
|
tt 83
|
89
|
+
nn 81
|
89
90
|
is 80
|
90
|
-
nn 80
|
91
91
|
y 79
|
92
92
|
ta 79
|
93
93
|
av 78
|
94
|
-
_i_ 77
|
95
94
|
_n 77
|
96
|
-
|
95
|
+
_i_ 77
|
97
96
|
al 75
|
98
|
-
om_ 74
|
99
|
-
rs 74
|
100
97
|
il 74
|
101
|
-
|
98
|
+
rs 74
|
99
|
+
om_ 74
|
100
|
+
._ 73
|
102
101
|
_k 73
|
102
|
+
no 73
|
103
|
+
l_ 72
|
103
104
|
on 71
|
105
|
+
rt 70
|
104
106
|
_b 70
|
105
107
|
v_ 70
|
106
|
-
l_ 70
|
107
|
-
rt 69
|
108
108
|
_me 69
|
109
109
|
d_ 68
|
110
|
+
_. 68
|
110
111
|
ve 67
|
111
112
|
ar_ 66
|
112
|
-
_. 66
|
113
113
|
va 65
|
114
114
|
ing 63
|
115
115
|
_, 62
|
116
|
-
|
117
|
-
_p 59
|
116
|
+
_p 60
|
118
117
|
at 59
|
118
|
+
det 59
|
119
119
|
_g 59
|
120
|
-
fo 58
|
121
|
-
_,_ 58
|
122
120
|
ll 58
|
121
|
+
_,_ 58
|
122
|
+
fo 58
|
123
|
+
un 57
|
123
124
|
ke_ 57
|
124
125
|
av_ 57
|
125
|
-
un 57
|
126
126
|
an_ 56
|
127
127
|
k_ 56
|
128
128
|
ik 55
|
129
129
|
_av 55
|
130
130
|
_er 54
|
131
|
-
|
131
|
+
_l 54
|
132
132
|
so 54
|
133
|
+
N 54
|
133
134
|
li 53
|
134
|
-
_l 53
|
135
|
-
am 52
|
136
135
|
nor 52
|
137
136
|
je 52
|
137
|
+
am 52
|
138
|
+
ns 51
|
138
139
|
lan 51
|
139
|
-
ed 50
|
140
|
-
ors 50
|
141
|
-
ns 50
|
142
|
-
_er_ 50
|
143
140
|
r� 50
|
144
|
-
|
145
|
-
_ti
|
141
|
+
_er_ 50
|
142
|
+
_ti 50
|
143
|
+
ors 50
|
144
|
+
ed 50
|
146
145
|
es 49
|
147
146
|
ter 49
|
148
|
-
ne_
|
147
|
+
ne_ 49
|
148
|
+
eg_ 49
|
149
149
|
D 48
|
150
|
+
rsk 48
|
150
151
|
for 48
|
151
152
|
til 48
|
152
|
-
rsk 48
|
153
153
|
_av_ 47
|
154
|
-
and 47
|
155
154
|
nde 47
|
156
|
-
|
157
|
-
H 46
|
158
|
-
it 46
|
155
|
+
and 47
|
159
156
|
_r 46
|
160
157
|
ka 46
|
158
|
+
ra 46
|
159
|
+
it 46
|
160
|
+
H 46
|
161
|
+
_til 45
|
161
162
|
_N 45
|
162
163
|
ske 45
|
163
|
-
_til 45
|
164
164
|
ga 44
|
165
|
-
te_ 44
|
166
165
|
orsk 44
|
166
|
+
te_ 44
|
167
|
+
No 43
|
167
168
|
_so 43
|
168
|
-
S 43
|
169
169
|
som 43
|
170
|
-
No 43
|
171
170
|
ko 43
|
172
171
|
na 43
|
173
|
-
|
172
|
+
S 43
|
174
173
|
det_ 42
|
175
174
|
Nor 42
|
176
175
|
_fo 42
|
176
|
+
nge 42
|
177
177
|
ste 42
|
178
|
-
_som 41
|
179
|
-
_no 41
|
180
178
|
il_ 41
|
181
|
-
|
179
|
+
_no 41
|
180
|
+
_som 41
|
182
181
|
_No 40
|
183
|
-
|
184
|
-
som_ 40
|
182
|
+
p� 40
|
185
183
|
rå 40
|
186
|
-
|
187
|
-
|
184
|
+
som_ 40
|
185
|
+
tte 40
|
188
186
|
ett 39
|
187
|
+
nt 39
|
189
188
|
_som_ 39
|
190
|
-
|
191
|
-
rd 38
|
192
|
-
kk 38
|
193
|
-
da 38
|
194
|
-
em 38
|
195
|
-
_� 38
|
189
|
+
_Nor 39
|
196
190
|
ma 38
|
191
|
+
_ei 38
|
192
|
+
em 38
|
193
|
+
da 38
|
197
194
|
ig 38
|
198
|
-
|
199
|
-
�
|
200
|
-
|
195
|
+
kk 38
|
196
|
+
_� 38
|
197
|
+
rd 38
|
198
|
+
på 38
|
199
|
+
_u 37
|
201
200
|
ør 37
|
201
|
+
_han 37
|
202
|
+
De 37
|
202
203
|
han 37
|
203
|
-
|
204
|
-
_u 37
|
204
|
+
�r 37
|
205
205
|
ten 37
|
206
|
-
|
206
|
+
sa 37
|
207
|
+
gj 36
|
208
|
+
vi 36
|
207
209
|
ed_ 36
|
208
|
-
_han 36
|
209
210
|
nors 36
|
210
211
|
_p� 36
|
211
212
|
_på 36
|
212
|
-
|
213
|
-
|
214
|
-
|
213
|
+
norsk 36
|
214
|
+
ske_ 35
|
215
|
+
_for 35
|
215
216
|
kt 35
|
216
217
|
til_ 35
|
217
218
|
reg 35
|
218
|
-
|
219
|
-
|
219
|
+
den 35
|
220
|
+
__ 35
|
220
221
|
på_ 35
|
221
|
-
|
222
|
+
med 34
|
223
|
+
_til_ 34
|
222
224
|
rt_ 34
|
223
225
|
_va 34
|
224
|
-
med 34
|
225
226
|
_på_ 34
|
226
|
-
_til_ 34
|
227
|
-
ore 33
|
228
|
-
lle 33
|
229
227
|
_nor 33
|
228
|
+
ore 33
|
230
229
|
var 33
|
231
|
-
|
232
|
-
m� 32
|
230
|
+
lle 33
|
233
231
|
_det 32
|
234
|
-
|
235
|
-
|
232
|
+
m� 32
|
233
|
+
_se 32
|
234
|
+
de_ 32
|
236
235
|
_med 31
|
237
|
-
|
236
|
+
den_ 31
|
237
|
+
re_ 31
|
238
|
+
_S 31
|
238
239
|
pe 31
|
239
240
|
or_ 31
|
241
|
+
_st 31
|
240
242
|
tr 31
|
241
|
-
to 31
|
242
|
-
den_ 31
|
243
243
|
sp 31
|
244
|
-
ag 31
|
245
244
|
land 31
|
246
|
-
|
247
|
-
|
245
|
+
to 31
|
246
|
+
kr 31
|
247
|
+
ag 31
|
248
248
|
sk_ 30
|
249
|
+
ol 30
|
249
250
|
ell 30
|
250
251
|
fr 29
|
251
|
-
le_ 29
|
252
|
-
pr 29
|
253
|
-
s_ 29
|
254
|
-
må 29
|
255
252
|
har 29
|
256
253
|
ak 29
|
257
254
|
ni 29
|
258
255
|
med_ 29
|
259
|
-
|
256
|
+
pr 29
|
257
|
+
må 29
|
258
|
+
s_ 29
|
259
|
+
le_ 29
|
260
|
+
ks 28
|
260
261
|
_har 28
|
262
|
+
har_ 28
|
261
263
|
der 28
|
264
|
+
rk 28
|
262
265
|
si 28
|
263
266
|
ss 28
|
267
|
+
_la 28
|
264
268
|
ek 28
|
265
|
-
har_ 28
|
266
269
|
mi 28
|
267
|
-
|
268
|
-
|
269
|
-
_re 27
|
270
|
-
�l 27
|
270
|
+
� 27
|
271
|
+
enn 27
|
271
272
|
_ko 27
|
272
|
-
æ 27
|
273
|
-
�r 27
|
274
|
-
år 27
|
275
|
-
ål 27
|
276
|
-
as 27
|
277
273
|
_fr 27
|
278
|
-
|
274
|
+
�r 27
|
275
|
+
_re 27
|
276
|
+
æ 27
|
279
277
|
ie 27
|
280
|
-
|
281
|
-
� 27
|
278
|
+
�l 27
|
282
279
|
ei_ 27
|
280
|
+
_med_ 27
|
281
|
+
år 27
|
282
|
+
ål 27
|
283
283
|
_har_ 27
|
284
|
+
as 27
|
285
|
+
_den 27
|
286
|
+
ord 26
|
287
|
+
ans 26
|
288
|
+
ande 26
|
284
289
|
men 26
|
285
|
-
|
290
|
+
ng_ 26
|
286
291
|
tin 26
|
287
|
-
|
292
|
+
_D 26
|
288
293
|
_var 26
|
294
|
+
_den_ 26
|
295
|
+
ut 26
|
289
296
|
nne 26
|
290
|
-
enn 26
|
291
|
-
ng_ 26
|
292
|
-
ord 26
|
293
|
-
ande 26
|
294
|
-
ans 26
|
295
|
-
- 26
|
296
297
|
_han_ 25
|
297
|
-
|
298
|
-
mål 25
|
299
|
-
_den_ 25
|
298
|
+
_ve 25
|
300
299
|
Noreg 25
|
301
300
|
oreg 25
|
302
|
-
_ve 25
|
303
301
|
han_ 25
|
304
|
-
|
302
|
+
mål 25
|
305
303
|
ru 25
|
304
|
+
ere 25
|
306
305
|
ver 25
|
307
|
-
|
306
|
+
Nore 25
|
308
307
|
eri 25
|
308
|
+
ette 25
|
309
309
|
gje 25
|
310
|
+
E 24
|
310
311
|
A 24
|
312
|
+
gen 24
|
311
313
|
ts 24
|
312
314
|
sj 24
|
313
|
-
|
314
|
-
|
315
|
-
ting 23
|
315
|
+
he 23
|
316
|
+
ene 23
|
316
317
|
tt_ 23
|
317
|
-
|
318
|
+
lo 23
|
318
319
|
ter_ 23
|
319
|
-
|
320
|
+
bl 23
|
321
|
+
Ha 23
|
320
322
|
_å 23
|
323
|
+
ting 23
|
321
324
|
ten_ 23
|
322
|
-
lo 23
|
323
|
-
Ha 23
|
324
|
-
bl 23
|
325
325
|
kke 23
|
326
|
-
|
327
|
-
ene 23
|
328
|
-
isk 22
|
326
|
+
nga 23
|
329
327
|
_det_ 22
|
330
328
|
_om 22
|
331
|
-
s� 22
|
332
|
-
kon 22
|
333
|
-
ds 22
|
334
329
|
ong 22
|
330
|
+
ds 22
|
335
331
|
sta 22
|
332
|
+
s� 22
|
333
|
+
isk 22
|
336
334
|
_Nore 22
|
337
|
-
|
338
|
-
_H 21
|
339
|
-
ok 21
|
340
|
-
ane 21
|
335
|
+
kon 22
|
341
336
|
øy 21
|
342
|
-
rm 21
|
343
|
-
�r 21
|
344
337
|
gr 21
|
345
|
-
|
346
|
-
|
338
|
+
�r 21
|
339
|
+
rm 21
|
347
340
|
us 21
|
341
|
+
kri 21
|
348
342
|
�y 21
|
343
|
+
ok 21
|
349
344
|
ær 21
|
345
|
+
ir 21
|
346
|
+
ane 21
|
350
347
|
art 20
|
351
|
-
|
348
|
+
ld 20
|
352
349
|
_A 20
|
353
|
-
|
350
|
+
ist 20
|
351
|
+
for_ 20
|
352
|
+
sl 20
|
354
353
|
_bl 20
|
355
354
|
ad 20
|
356
|
-
|
357
|
-
ld 20
|
358
|
-
ist 20
|
355
|
+
ert 20
|
359
356
|
ndet 20
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
rsk_ 19
|
364
|
-
sam 19
|
365
|
-
oreg_ 19
|
366
|
-
inga 19
|
367
|
-
ret 19
|
368
|
-
_ka 19
|
369
|
-
_ut 19
|
357
|
+
_lan 20
|
358
|
+
_nors 20
|
359
|
+
andet 19
|
370
360
|
f� 19
|
371
|
-
|
372
|
-
|
373
|
-
lt 19
|
374
|
-
hu 19
|
361
|
+
ein 19
|
362
|
+
_ka 19
|
375
363
|
jo 19
|
364
|
+
inga 19
|
365
|
+
ale 19
|
376
366
|
seg 19
|
377
|
-
|
378
|
-
|
367
|
+
_E 19
|
368
|
+
reg_ 19
|
369
|
+
oreg_ 19
|
379
370
|
fa 19
|
371
|
+
rske 19
|
372
|
+
hu 19
|
373
|
+
rsk_ 19
|
380
374
|
id 19
|
381
|
-
|
382
|
-
|
375
|
+
ret 19
|
376
|
+
_H 19
|
377
|
+
sam 19
|
378
|
+
_s� 19
|
379
|
+
lt 19
|
383
380
|
un_ 19
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
381
|
+
_ut 19
|
382
|
+
orsk_ 19
|
383
|
+
orske 19
|
384
|
+
ort 19
|
385
|
+
rin 18
|
386
|
+
ov 18
|
387
|
+
_vi 18
|
389
388
|
tor 18
|
389
|
+
_et 18
|
390
|
+
ro 18
|
390
391
|
gs 18
|
391
|
-
|
392
|
+
_en 18
|
392
393
|
tter 18
|
393
|
-
|
394
|
+
mn 18
|
395
|
+
ikk 18
|
396
|
+
ren 18
|
397
|
+
elle 18
|
394
398
|
_sa 18
|
395
399
|
inge 18
|
396
|
-
|
397
|
-
ov 18
|
398
|
-
mn 18
|
399
|
-
ro 18
|
400
|
-
rin 18
|
400
|
+
est 18
|