scylla 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/norwegian.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 4738
|
2
2
|
e 1600
|
3
3
|
r 1018
|
4
4
|
n 1012
|
@@ -13,14 +13,14 @@ k 479
|
|
13
13
|
d 416
|
14
14
|
m 398
|
15
15
|
� 323
|
16
|
+
r_ 306
|
16
17
|
e_ 305
|
17
|
-
|
18
|
-
t_ 291
|
18
|
+
t_ 294
|
19
19
|
er 290
|
20
20
|
en 287
|
21
21
|
v 284
|
22
|
-
n_
|
23
|
-
_s
|
22
|
+
n_ 277
|
23
|
+
_s 216
|
24
24
|
et 214
|
25
25
|
an 213
|
26
26
|
g_ 205
|
@@ -28,35 +28,35 @@ or 203
|
|
28
28
|
� 194
|
29
29
|
å 194
|
30
30
|
de 191
|
31
|
-
f 188
|
32
31
|
u 188
|
32
|
+
f 188
|
33
33
|
h 172
|
34
34
|
p 166
|
35
35
|
er_ 164
|
36
36
|
te 162
|
37
37
|
. 161
|
38
38
|
re 160
|
39
|
-
en_
|
39
|
+
en_ 159
|
40
40
|
_o 156
|
41
|
+
_e 145
|
41
42
|
ar 145
|
42
|
-
|
43
|
+
_h 143
|
43
44
|
st 143
|
44
|
-
_h 142
|
45
45
|
et_ 138
|
46
|
+
_d 135
|
46
47
|
, 134
|
47
|
-
_d 132
|
48
|
-
in 130
|
49
48
|
,_ 130
|
49
|
+
in 130
|
50
50
|
sk 129
|
51
51
|
i_ 122
|
52
|
-
|
52
|
+
_i 118
|
53
53
|
ke 117
|
54
|
-
_i 117
|
55
54
|
ne 117
|
55
|
+
ng 117
|
56
56
|
ti 110
|
57
57
|
le 110
|
58
|
-
_f 109
|
59
58
|
om 109
|
59
|
+
_f 109
|
60
60
|
og 108
|
61
61
|
_m 105
|
62
62
|
me 105
|
@@ -66,335 +66,335 @@ _og 101
|
|
66
66
|
og_ 100
|
67
67
|
la 99
|
68
68
|
_og_ 97
|
69
|
-
_de
|
70
|
-
_a 94
|
69
|
+
_de 95
|
71
70
|
m_ 94
|
71
|
+
_a 94
|
72
72
|
ha 94
|
73
73
|
ei 93
|
74
74
|
ø 92
|
75
75
|
� 92
|
76
|
-
|
76
|
+
�_ 91
|
77
|
+
å_ 91
|
77
78
|
a_ 88
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
nd 84
|
82
|
-
_v 84
|
79
|
+
_t 88
|
80
|
+
se 88
|
81
|
+
_ha 85
|
83
82
|
el 84
|
84
|
-
|
83
|
+
nd 84
|
85
84
|
ge 84
|
86
85
|
eg 84
|
87
|
-
|
86
|
+
ri 84
|
87
|
+
_v 84
|
88
88
|
tt 83
|
89
|
+
._ 82
|
89
90
|
is 80
|
90
91
|
nn 80
|
91
92
|
y 79
|
92
93
|
ta 79
|
93
94
|
av 78
|
94
|
-
_i_
|
95
|
+
_i_ 78
|
95
96
|
_n 77
|
96
|
-
._ 77
|
97
97
|
al 75
|
98
98
|
om_ 74
|
99
|
-
rs 74
|
100
99
|
il 74
|
100
|
+
rs 74
|
101
|
+
_k 74
|
101
102
|
no 73
|
102
|
-
|
103
|
+
l_ 72
|
103
104
|
on 71
|
104
105
|
_b 70
|
105
106
|
v_ 70
|
106
|
-
l_ 70
|
107
|
-
rt 69
|
108
107
|
_me 69
|
108
|
+
rt 69
|
109
|
+
_. 68
|
109
110
|
d_ 68
|
110
111
|
ve 67
|
111
112
|
ar_ 66
|
112
|
-
_. 66
|
113
113
|
va 65
|
114
114
|
ing 63
|
115
115
|
_, 62
|
116
|
-
|
117
|
-
_p 59
|
116
|
+
_p 60
|
118
117
|
at 59
|
119
118
|
_g 59
|
119
|
+
det 59
|
120
|
+
ll 58
|
120
121
|
fo 58
|
121
122
|
_,_ 58
|
122
|
-
ll 58
|
123
123
|
ke_ 57
|
124
124
|
av_ 57
|
125
125
|
un 57
|
126
|
-
an_ 56
|
127
126
|
k_ 56
|
127
|
+
an_ 56
|
128
128
|
ik 55
|
129
129
|
_av 55
|
130
130
|
_er 54
|
131
|
-
N 54
|
132
131
|
so 54
|
132
|
+
_l 54
|
133
|
+
N 54
|
133
134
|
li 53
|
134
|
-
_l 53
|
135
|
-
am 52
|
136
135
|
nor 52
|
137
136
|
je 52
|
137
|
+
am 52
|
138
138
|
lan 51
|
139
|
-
ed 50
|
140
|
-
ors 50
|
141
139
|
ns 50
|
142
|
-
_er_ 50
|
143
140
|
r� 50
|
141
|
+
ors 50
|
142
|
+
_ti 50
|
143
|
+
_er_ 50
|
144
|
+
ed 50
|
145
|
+
ne_ 49
|
146
|
+
ter 49
|
144
147
|
eg_ 49
|
145
|
-
_ti 49
|
146
148
|
es 49
|
147
|
-
ter 49
|
148
|
-
ne_ 48
|
149
149
|
D 48
|
150
|
-
for 48
|
151
150
|
til 48
|
152
151
|
rsk 48
|
153
|
-
|
154
|
-
and 47
|
152
|
+
for 48
|
155
153
|
nde 47
|
156
|
-
|
154
|
+
and 47
|
155
|
+
_av_ 47
|
157
156
|
H 46
|
158
|
-
it 46
|
159
157
|
_r 46
|
158
|
+
ra 46
|
159
|
+
it 46
|
160
160
|
ka 46
|
161
|
-
_N 45
|
162
|
-
ske 45
|
163
161
|
_til 45
|
164
|
-
|
162
|
+
ske 45
|
163
|
+
_N 45
|
165
164
|
te_ 44
|
165
|
+
ga 44
|
166
166
|
orsk 44
|
167
|
-
_so 43
|
168
167
|
S 43
|
169
168
|
som 43
|
170
|
-
|
169
|
+
_so 43
|
171
170
|
ko 43
|
172
171
|
na 43
|
173
|
-
|
174
|
-
det_ 42
|
172
|
+
No 43
|
175
173
|
Nor 42
|
174
|
+
det_ 42
|
176
175
|
_fo 42
|
176
|
+
nge 42
|
177
177
|
ste 42
|
178
178
|
_som 41
|
179
179
|
_no 41
|
180
180
|
il_ 41
|
181
|
-
p� 40
|
182
|
-
_No 40
|
183
|
-
tte 40
|
184
181
|
som_ 40
|
182
|
+
tte 40
|
183
|
+
p� 40
|
185
184
|
rå 40
|
186
|
-
|
185
|
+
_No 40
|
187
186
|
_Nor 39
|
188
187
|
ett 39
|
189
188
|
_som_ 39
|
190
|
-
|
191
|
-
rd 38
|
192
|
-
kk 38
|
193
|
-
da 38
|
194
|
-
em 38
|
195
|
-
_� 38
|
189
|
+
nt 39
|
196
190
|
ma 38
|
191
|
+
rd 38
|
197
192
|
ig 38
|
193
|
+
_� 38
|
194
|
+
em 38
|
195
|
+
på 38
|
196
|
+
da 38
|
197
|
+
kk 38
|
198
|
+
_ei 38
|
198
199
|
De 37
|
199
|
-
�r 37
|
200
|
-
_ei 37
|
201
|
-
ør 37
|
202
200
|
han 37
|
203
201
|
sa 37
|
204
|
-
|
202
|
+
_han 37
|
203
|
+
ør 37
|
205
204
|
ten 37
|
206
|
-
|
205
|
+
�r 37
|
206
|
+
_u 37
|
207
207
|
ed_ 36
|
208
|
-
_han 36
|
209
|
-
nors 36
|
210
208
|
_p� 36
|
211
|
-
_på 36
|
212
209
|
gj 36
|
210
|
+
norsk 36
|
211
|
+
nors 36
|
213
212
|
vi 36
|
213
|
+
_på 36
|
214
|
+
_for 35
|
214
215
|
den 35
|
215
|
-
|
216
|
-
til_ 35
|
216
|
+
på_ 35
|
217
217
|
reg 35
|
218
218
|
ske_ 35
|
219
|
-
|
220
|
-
|
221
|
-
re_ 34
|
222
|
-
rt_ 34
|
223
|
-
_va 34
|
224
|
-
med 34
|
219
|
+
til_ 35
|
220
|
+
kt 35
|
225
221
|
_på_ 34
|
222
|
+
__ 34
|
226
223
|
_til_ 34
|
227
|
-
|
228
|
-
|
229
|
-
|
224
|
+
med 34
|
225
|
+
rt_ 34
|
226
|
+
_va 34
|
230
227
|
var 33
|
228
|
+
_nor 33
|
229
|
+
lle 33
|
230
|
+
ore 33
|
231
231
|
de_ 32
|
232
|
-
|
233
|
-
|
232
|
+
or_ 32
|
233
|
+
_se 32
|
234
234
|
_S 32
|
235
|
-
|
236
|
-
|
237
|
-
_se 31
|
238
|
-
pe 31
|
239
|
-
or_ 31
|
235
|
+
_det 32
|
236
|
+
m� 32
|
240
237
|
tr 31
|
241
|
-
to 31
|
242
238
|
den_ 31
|
243
|
-
|
239
|
+
to 31
|
244
240
|
ag 31
|
245
|
-
land 31
|
246
241
|
_st 31
|
247
|
-
|
248
|
-
|
242
|
+
land 31
|
243
|
+
sp 31
|
244
|
+
kr 31
|
245
|
+
_med 31
|
246
|
+
re_ 31
|
247
|
+
pe 31
|
249
248
|
ell 30
|
250
|
-
|
251
|
-
|
249
|
+
sk_ 30
|
250
|
+
ol 30
|
252
251
|
pr 29
|
253
|
-
|
254
|
-
må 29
|
252
|
+
le_ 29
|
255
253
|
har 29
|
254
|
+
s_ 29
|
256
255
|
ak 29
|
257
|
-
ni 29
|
258
256
|
med_ 29
|
257
|
+
må 29
|
258
|
+
fr 29
|
259
|
+
ni 29
|
259
260
|
rk 28
|
260
|
-
_har 28
|
261
261
|
der 28
|
262
|
-
|
262
|
+
_la 28
|
263
|
+
_D 28
|
263
264
|
ss 28
|
264
|
-
ek 28
|
265
265
|
har_ 28
|
266
|
+
ek 28
|
266
267
|
mi 28
|
267
|
-
|
268
|
-
|
269
|
-
_re 27
|
270
|
-
�l 27
|
271
|
-
_ko 27
|
268
|
+
_har 28
|
269
|
+
si 28
|
272
270
|
æ 27
|
273
|
-
�r 27
|
274
|
-
år 27
|
275
|
-
ål 27
|
276
|
-
as 27
|
277
|
-
_fr 27
|
278
271
|
ks 27
|
279
|
-
|
280
|
-
|
272
|
+
_ko 27
|
273
|
+
ål 27
|
274
|
+
�l 27
|
281
275
|
� 27
|
282
|
-
ei_ 27
|
283
276
|
_har_ 27
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
277
|
+
_re 27
|
278
|
+
�r 27
|
279
|
+
ei_ 27
|
280
|
+
ie 27
|
281
|
+
as 27
|
282
|
+
_fr 27
|
283
|
+
_den 27
|
284
|
+
år 27
|
285
|
+
_med_ 27
|
288
286
|
_var 26
|
289
|
-
|
287
|
+
men 26
|
288
|
+
ande 26
|
290
289
|
enn 26
|
291
290
|
ng_ 26
|
292
|
-
|
293
|
-
ande 26
|
291
|
+
ut 26
|
294
292
|
ans 26
|
293
|
+
ord 26
|
294
|
+
tin 26
|
295
295
|
- 26
|
296
|
-
|
296
|
+
nne 26
|
297
|
+
_den_ 26
|
298
|
+
ver 25
|
299
|
+
eri 25
|
300
|
+
ru 25
|
301
|
+
gje 25
|
297
302
|
Nore 25
|
298
303
|
mål 25
|
299
|
-
|
300
|
-
Noreg 25
|
304
|
+
ette 25
|
301
305
|
oreg 25
|
302
306
|
_ve 25
|
303
|
-
han_ 25
|
304
307
|
ere 25
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
eri 25
|
309
|
-
gje 25
|
310
|
-
A 24
|
311
|
-
ts 24
|
308
|
+
_han_ 25
|
309
|
+
han_ 25
|
310
|
+
Noreg 25
|
312
311
|
sj 24
|
312
|
+
ts 24
|
313
|
+
tt_ 24
|
313
314
|
gen 24
|
315
|
+
A 24
|
314
316
|
E 24
|
317
|
+
ene 23
|
315
318
|
ting 23
|
316
|
-
|
317
|
-
__ 23
|
318
|
-
ter_ 23
|
319
|
-
nga 23
|
319
|
+
_H 23
|
320
320
|
_å 23
|
321
|
-
ten_ 23
|
322
|
-
lo 23
|
323
|
-
Ha 23
|
324
|
-
bl 23
|
325
321
|
kke 23
|
326
322
|
he 23
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
323
|
+
lo 23
|
324
|
+
ter_ 23
|
325
|
+
bl 23
|
326
|
+
ten_ 23
|
327
|
+
nga 23
|
328
|
+
Ha 23
|
333
329
|
ds 22
|
334
|
-
|
335
|
-
sta 22
|
330
|
+
s� 22
|
336
331
|
_Nore 22
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
332
|
+
sta 22
|
333
|
+
ong 22
|
334
|
+
_det_ 22
|
335
|
+
kon 22
|
336
|
+
isk 22
|
337
|
+
_om 22
|
341
338
|
øy 21
|
342
339
|
rm 21
|
340
|
+
ir 21
|
343
341
|
�r 21
|
344
342
|
gr 21
|
345
|
-
kri 21
|
346
|
-
ir 21
|
347
343
|
us 21
|
348
|
-
|
344
|
+
kri 21
|
345
|
+
ok 21
|
349
346
|
ær 21
|
347
|
+
for_ 21
|
348
|
+
�y 21
|
349
|
+
ane 21
|
350
|
+
_bl 20
|
350
351
|
art 20
|
351
|
-
|
352
|
-
_A 20
|
352
|
+
_lan 20
|
353
353
|
_nors 20
|
354
|
-
|
355
|
-
ad 20
|
354
|
+
ndet 20
|
356
355
|
sl 20
|
356
|
+
ad 20
|
357
357
|
ld 20
|
358
358
|
ist 20
|
359
|
-
|
360
|
-
_E 19
|
361
|
-
orske 19
|
362
|
-
_lan 19
|
363
|
-
rsk_ 19
|
364
|
-
sam 19
|
365
|
-
oreg_ 19
|
366
|
-
inga 19
|
367
|
-
ret 19
|
368
|
-
_ka 19
|
369
|
-
_ut 19
|
370
|
-
f� 19
|
371
|
-
orsk_ 19
|
372
|
-
_s� 19
|
359
|
+
_A 20
|
373
360
|
lt 19
|
361
|
+
ale 19
|
362
|
+
id 19
|
363
|
+
sam 19
|
374
364
|
hu 19
|
375
365
|
jo 19
|
376
|
-
seg 19
|
377
366
|
andet 19
|
367
|
+
reg_ 19
|
368
|
+
ert 19
|
369
|
+
un_ 19
|
370
|
+
_De 19
|
371
|
+
rsk_ 19
|
378
372
|
ort 19
|
373
|
+
_ut 19
|
374
|
+
orsk_ 19
|
375
|
+
_ka 19
|
376
|
+
f� 19
|
377
|
+
inga 19
|
378
|
+
oreg_ 19
|
379
|
+
orske 19
|
380
|
+
_s� 19
|
381
|
+
_E 19
|
379
382
|
fa 19
|
380
|
-
id 19
|
381
|
-
ale 19
|
382
|
-
_De 19
|
383
|
-
un_ 19
|
384
383
|
rske 19
|
385
|
-
|
386
|
-
|
384
|
+
ret 19
|
385
|
+
seg 19
|
386
|
+
ro 18
|
387
387
|
_et 18
|
388
|
-
|
389
|
-
tor 18
|
390
|
-
gs 18
|
388
|
+
mn 18
|
391
389
|
n� 18
|
390
|
+
ang 18
|
391
|
+
rin 18
|
392
392
|
tter 18
|
393
|
-
|
393
|
+
ikk 18
|
394
|
+
ren 18
|
395
|
+
tor 18
|
396
|
+
ov 18
|
394
397
|
_sa 18
|
398
|
+
sm 18
|
395
399
|
inge 18
|
396
|
-
|
397
|
-
ov 18
|
398
|
-
mn 18
|
399
|
-
ro 18
|
400
|
-
rin 18
|
400
|
+
gs 18
|