scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/norwegian.lm
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
_
|
1
|
+
_ 4738
|
2
2
|
e 1600
|
3
3
|
r 1018
|
4
4
|
n 1012
|
@@ -13,14 +13,14 @@ k 479
|
|
13
13
|
d 416
|
14
14
|
m 398
|
15
15
|
� 323
|
16
|
+
r_ 306
|
16
17
|
e_ 305
|
17
|
-
|
18
|
-
t_ 291
|
18
|
+
t_ 294
|
19
19
|
er 290
|
20
20
|
en 287
|
21
21
|
v 284
|
22
|
-
n_
|
23
|
-
_s
|
22
|
+
n_ 277
|
23
|
+
_s 216
|
24
24
|
et 214
|
25
25
|
an 213
|
26
26
|
g_ 205
|
@@ -28,35 +28,35 @@ or 203
|
|
28
28
|
� 194
|
29
29
|
å 194
|
30
30
|
de 191
|
31
|
-
f 188
|
32
31
|
u 188
|
32
|
+
f 188
|
33
33
|
h 172
|
34
34
|
p 166
|
35
35
|
er_ 164
|
36
36
|
te 162
|
37
37
|
. 161
|
38
38
|
re 160
|
39
|
-
en_
|
39
|
+
en_ 159
|
40
40
|
_o 156
|
41
|
+
_e 145
|
41
42
|
ar 145
|
42
|
-
|
43
|
+
_h 143
|
43
44
|
st 143
|
44
|
-
_h 142
|
45
45
|
et_ 138
|
46
|
+
_d 135
|
46
47
|
, 134
|
47
|
-
_d 132
|
48
|
-
in 130
|
49
48
|
,_ 130
|
49
|
+
in 130
|
50
50
|
sk 129
|
51
51
|
i_ 122
|
52
|
-
|
52
|
+
_i 118
|
53
53
|
ke 117
|
54
|
-
_i 117
|
55
54
|
ne 117
|
55
|
+
ng 117
|
56
56
|
ti 110
|
57
57
|
le 110
|
58
|
-
_f 109
|
59
58
|
om 109
|
59
|
+
_f 109
|
60
60
|
og 108
|
61
61
|
_m 105
|
62
62
|
me 105
|
@@ -66,335 +66,335 @@ _og 101
|
|
66
66
|
og_ 100
|
67
67
|
la 99
|
68
68
|
_og_ 97
|
69
|
-
_de
|
70
|
-
_a 94
|
69
|
+
_de 95
|
71
70
|
m_ 94
|
71
|
+
_a 94
|
72
72
|
ha 94
|
73
73
|
ei 93
|
74
74
|
ø 92
|
75
75
|
� 92
|
76
|
-
|
76
|
+
�_ 91
|
77
|
+
å_ 91
|
77
78
|
a_ 88
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
nd 84
|
82
|
-
_v 84
|
79
|
+
_t 88
|
80
|
+
se 88
|
81
|
+
_ha 85
|
83
82
|
el 84
|
84
|
-
|
83
|
+
nd 84
|
85
84
|
ge 84
|
86
85
|
eg 84
|
87
|
-
|
86
|
+
ri 84
|
87
|
+
_v 84
|
88
88
|
tt 83
|
89
|
+
._ 82
|
89
90
|
is 80
|
90
91
|
nn 80
|
91
92
|
y 79
|
92
93
|
ta 79
|
93
94
|
av 78
|
94
|
-
_i_
|
95
|
+
_i_ 78
|
95
96
|
_n 77
|
96
|
-
._ 77
|
97
97
|
al 75
|
98
98
|
om_ 74
|
99
|
-
rs 74
|
100
99
|
il 74
|
100
|
+
rs 74
|
101
|
+
_k 74
|
101
102
|
no 73
|
102
|
-
|
103
|
+
l_ 72
|
103
104
|
on 71
|
104
105
|
_b 70
|
105
106
|
v_ 70
|
106
|
-
l_ 70
|
107
|
-
rt 69
|
108
107
|
_me 69
|
108
|
+
rt 69
|
109
|
+
_. 68
|
109
110
|
d_ 68
|
110
111
|
ve 67
|
111
112
|
ar_ 66
|
112
|
-
_. 66
|
113
113
|
va 65
|
114
114
|
ing 63
|
115
115
|
_, 62
|
116
|
-
|
117
|
-
_p 59
|
116
|
+
_p 60
|
118
117
|
at 59
|
119
118
|
_g 59
|
119
|
+
det 59
|
120
|
+
ll 58
|
120
121
|
fo 58
|
121
122
|
_,_ 58
|
122
|
-
ll 58
|
123
123
|
ke_ 57
|
124
124
|
av_ 57
|
125
125
|
un 57
|
126
|
-
an_ 56
|
127
126
|
k_ 56
|
127
|
+
an_ 56
|
128
128
|
ik 55
|
129
129
|
_av 55
|
130
130
|
_er 54
|
131
|
-
N 54
|
132
131
|
so 54
|
132
|
+
_l 54
|
133
|
+
N 54
|
133
134
|
li 53
|
134
|
-
_l 53
|
135
|
-
am 52
|
136
135
|
nor 52
|
137
136
|
je 52
|
137
|
+
am 52
|
138
138
|
lan 51
|
139
|
-
ed 50
|
140
|
-
ors 50
|
141
139
|
ns 50
|
142
|
-
_er_ 50
|
143
140
|
r� 50
|
141
|
+
ors 50
|
142
|
+
_ti 50
|
143
|
+
_er_ 50
|
144
|
+
ed 50
|
145
|
+
ne_ 49
|
146
|
+
ter 49
|
144
147
|
eg_ 49
|
145
|
-
_ti 49
|
146
148
|
es 49
|
147
|
-
ter 49
|
148
|
-
ne_ 48
|
149
149
|
D 48
|
150
|
-
for 48
|
151
150
|
til 48
|
152
151
|
rsk 48
|
153
|
-
|
154
|
-
and 47
|
152
|
+
for 48
|
155
153
|
nde 47
|
156
|
-
|
154
|
+
and 47
|
155
|
+
_av_ 47
|
157
156
|
H 46
|
158
|
-
it 46
|
159
157
|
_r 46
|
158
|
+
ra 46
|
159
|
+
it 46
|
160
160
|
ka 46
|
161
|
-
_N 45
|
162
|
-
ske 45
|
163
161
|
_til 45
|
164
|
-
|
162
|
+
ske 45
|
163
|
+
_N 45
|
165
164
|
te_ 44
|
165
|
+
ga 44
|
166
166
|
orsk 44
|
167
|
-
_so 43
|
168
167
|
S 43
|
169
168
|
som 43
|
170
|
-
|
169
|
+
_so 43
|
171
170
|
ko 43
|
172
171
|
na 43
|
173
|
-
|
174
|
-
det_ 42
|
172
|
+
No 43
|
175
173
|
Nor 42
|
174
|
+
det_ 42
|
176
175
|
_fo 42
|
176
|
+
nge 42
|
177
177
|
ste 42
|
178
178
|
_som 41
|
179
179
|
_no 41
|
180
180
|
il_ 41
|
181
|
-
p� 40
|
182
|
-
_No 40
|
183
|
-
tte 40
|
184
181
|
som_ 40
|
182
|
+
tte 40
|
183
|
+
p� 40
|
185
184
|
rå 40
|
186
|
-
|
185
|
+
_No 40
|
187
186
|
_Nor 39
|
188
187
|
ett 39
|
189
188
|
_som_ 39
|
190
|
-
|
191
|
-
rd 38
|
192
|
-
kk 38
|
193
|
-
da 38
|
194
|
-
em 38
|
195
|
-
_� 38
|
189
|
+
nt 39
|
196
190
|
ma 38
|
191
|
+
rd 38
|
197
192
|
ig 38
|
193
|
+
_� 38
|
194
|
+
em 38
|
195
|
+
på 38
|
196
|
+
da 38
|
197
|
+
kk 38
|
198
|
+
_ei 38
|
198
199
|
De 37
|
199
|
-
�r 37
|
200
|
-
_ei 37
|
201
|
-
ør 37
|
202
200
|
han 37
|
203
201
|
sa 37
|
204
|
-
|
202
|
+
_han 37
|
203
|
+
ør 37
|
205
204
|
ten 37
|
206
|
-
|
205
|
+
�r 37
|
206
|
+
_u 37
|
207
207
|
ed_ 36
|
208
|
-
_han 36
|
209
|
-
nors 36
|
210
208
|
_p� 36
|
211
|
-
_på 36
|
212
209
|
gj 36
|
210
|
+
norsk 36
|
211
|
+
nors 36
|
213
212
|
vi 36
|
213
|
+
_på 36
|
214
|
+
_for 35
|
214
215
|
den 35
|
215
|
-
|
216
|
-
til_ 35
|
216
|
+
på_ 35
|
217
217
|
reg 35
|
218
218
|
ske_ 35
|
219
|
-
|
220
|
-
|
221
|
-
re_ 34
|
222
|
-
rt_ 34
|
223
|
-
_va 34
|
224
|
-
med 34
|
219
|
+
til_ 35
|
220
|
+
kt 35
|
225
221
|
_på_ 34
|
222
|
+
__ 34
|
226
223
|
_til_ 34
|
227
|
-
|
228
|
-
|
229
|
-
|
224
|
+
med 34
|
225
|
+
rt_ 34
|
226
|
+
_va 34
|
230
227
|
var 33
|
228
|
+
_nor 33
|
229
|
+
lle 33
|
230
|
+
ore 33
|
231
231
|
de_ 32
|
232
|
-
|
233
|
-
|
232
|
+
or_ 32
|
233
|
+
_se 32
|
234
234
|
_S 32
|
235
|
-
|
236
|
-
|
237
|
-
_se 31
|
238
|
-
pe 31
|
239
|
-
or_ 31
|
235
|
+
_det 32
|
236
|
+
m� 32
|
240
237
|
tr 31
|
241
|
-
to 31
|
242
238
|
den_ 31
|
243
|
-
|
239
|
+
to 31
|
244
240
|
ag 31
|
245
|
-
land 31
|
246
241
|
_st 31
|
247
|
-
|
248
|
-
|
242
|
+
land 31
|
243
|
+
sp 31
|
244
|
+
kr 31
|
245
|
+
_med 31
|
246
|
+
re_ 31
|
247
|
+
pe 31
|
249
248
|
ell 30
|
250
|
-
|
251
|
-
|
249
|
+
sk_ 30
|
250
|
+
ol 30
|
252
251
|
pr 29
|
253
|
-
|
254
|
-
må 29
|
252
|
+
le_ 29
|
255
253
|
har 29
|
254
|
+
s_ 29
|
256
255
|
ak 29
|
257
|
-
ni 29
|
258
256
|
med_ 29
|
257
|
+
må 29
|
258
|
+
fr 29
|
259
|
+
ni 29
|
259
260
|
rk 28
|
260
|
-
_har 28
|
261
261
|
der 28
|
262
|
-
|
262
|
+
_la 28
|
263
|
+
_D 28
|
263
264
|
ss 28
|
264
|
-
ek 28
|
265
265
|
har_ 28
|
266
|
+
ek 28
|
266
267
|
mi 28
|
267
|
-
|
268
|
-
|
269
|
-
_re 27
|
270
|
-
�l 27
|
271
|
-
_ko 27
|
268
|
+
_har 28
|
269
|
+
si 28
|
272
270
|
æ 27
|
273
|
-
�r 27
|
274
|
-
år 27
|
275
|
-
ål 27
|
276
|
-
as 27
|
277
|
-
_fr 27
|
278
271
|
ks 27
|
279
|
-
|
280
|
-
|
272
|
+
_ko 27
|
273
|
+
ål 27
|
274
|
+
�l 27
|
281
275
|
� 27
|
282
|
-
ei_ 27
|
283
276
|
_har_ 27
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
277
|
+
_re 27
|
278
|
+
�r 27
|
279
|
+
ei_ 27
|
280
|
+
ie 27
|
281
|
+
as 27
|
282
|
+
_fr 27
|
283
|
+
_den 27
|
284
|
+
år 27
|
285
|
+
_med_ 27
|
288
286
|
_var 26
|
289
|
-
|
287
|
+
men 26
|
288
|
+
ande 26
|
290
289
|
enn 26
|
291
290
|
ng_ 26
|
292
|
-
|
293
|
-
ande 26
|
291
|
+
ut 26
|
294
292
|
ans 26
|
293
|
+
ord 26
|
294
|
+
tin 26
|
295
295
|
- 26
|
296
|
-
|
296
|
+
nne 26
|
297
|
+
_den_ 26
|
298
|
+
ver 25
|
299
|
+
eri 25
|
300
|
+
ru 25
|
301
|
+
gje 25
|
297
302
|
Nore 25
|
298
303
|
mål 25
|
299
|
-
|
300
|
-
Noreg 25
|
304
|
+
ette 25
|
301
305
|
oreg 25
|
302
306
|
_ve 25
|
303
|
-
han_ 25
|
304
307
|
ere 25
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
eri 25
|
309
|
-
gje 25
|
310
|
-
A 24
|
311
|
-
ts 24
|
308
|
+
_han_ 25
|
309
|
+
han_ 25
|
310
|
+
Noreg 25
|
312
311
|
sj 24
|
312
|
+
ts 24
|
313
|
+
tt_ 24
|
313
314
|
gen 24
|
315
|
+
A 24
|
314
316
|
E 24
|
317
|
+
ene 23
|
315
318
|
ting 23
|
316
|
-
|
317
|
-
__ 23
|
318
|
-
ter_ 23
|
319
|
-
nga 23
|
319
|
+
_H 23
|
320
320
|
_å 23
|
321
|
-
ten_ 23
|
322
|
-
lo 23
|
323
|
-
Ha 23
|
324
|
-
bl 23
|
325
321
|
kke 23
|
326
322
|
he 23
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
323
|
+
lo 23
|
324
|
+
ter_ 23
|
325
|
+
bl 23
|
326
|
+
ten_ 23
|
327
|
+
nga 23
|
328
|
+
Ha 23
|
333
329
|
ds 22
|
334
|
-
|
335
|
-
sta 22
|
330
|
+
s� 22
|
336
331
|
_Nore 22
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
332
|
+
sta 22
|
333
|
+
ong 22
|
334
|
+
_det_ 22
|
335
|
+
kon 22
|
336
|
+
isk 22
|
337
|
+
_om 22
|
341
338
|
øy 21
|
342
339
|
rm 21
|
340
|
+
ir 21
|
343
341
|
�r 21
|
344
342
|
gr 21
|
345
|
-
kri 21
|
346
|
-
ir 21
|
347
343
|
us 21
|
348
|
-
|
344
|
+
kri 21
|
345
|
+
ok 21
|
349
346
|
ær 21
|
347
|
+
for_ 21
|
348
|
+
�y 21
|
349
|
+
ane 21
|
350
|
+
_bl 20
|
350
351
|
art 20
|
351
|
-
|
352
|
-
_A 20
|
352
|
+
_lan 20
|
353
353
|
_nors 20
|
354
|
-
|
355
|
-
ad 20
|
354
|
+
ndet 20
|
356
355
|
sl 20
|
356
|
+
ad 20
|
357
357
|
ld 20
|
358
358
|
ist 20
|
359
|
-
|
360
|
-
_E 19
|
361
|
-
orske 19
|
362
|
-
_lan 19
|
363
|
-
rsk_ 19
|
364
|
-
sam 19
|
365
|
-
oreg_ 19
|
366
|
-
inga 19
|
367
|
-
ret 19
|
368
|
-
_ka 19
|
369
|
-
_ut 19
|
370
|
-
f� 19
|
371
|
-
orsk_ 19
|
372
|
-
_s� 19
|
359
|
+
_A 20
|
373
360
|
lt 19
|
361
|
+
ale 19
|
362
|
+
id 19
|
363
|
+
sam 19
|
374
364
|
hu 19
|
375
365
|
jo 19
|
376
|
-
seg 19
|
377
366
|
andet 19
|
367
|
+
reg_ 19
|
368
|
+
ert 19
|
369
|
+
un_ 19
|
370
|
+
_De 19
|
371
|
+
rsk_ 19
|
378
372
|
ort 19
|
373
|
+
_ut 19
|
374
|
+
orsk_ 19
|
375
|
+
_ka 19
|
376
|
+
f� 19
|
377
|
+
inga 19
|
378
|
+
oreg_ 19
|
379
|
+
orske 19
|
380
|
+
_s� 19
|
381
|
+
_E 19
|
379
382
|
fa 19
|
380
|
-
id 19
|
381
|
-
ale 19
|
382
|
-
_De 19
|
383
|
-
un_ 19
|
384
383
|
rske 19
|
385
|
-
|
386
|
-
|
384
|
+
ret 19
|
385
|
+
seg 19
|
386
|
+
ro 18
|
387
387
|
_et 18
|
388
|
-
|
389
|
-
tor 18
|
390
|
-
gs 18
|
388
|
+
mn 18
|
391
389
|
n� 18
|
390
|
+
ang 18
|
391
|
+
rin 18
|
392
392
|
tter 18
|
393
|
-
|
393
|
+
ikk 18
|
394
|
+
ren 18
|
395
|
+
tor 18
|
396
|
+
ov 18
|
394
397
|
_sa 18
|
398
|
+
sm 18
|
395
399
|
inge 18
|
396
|
-
|
397
|
-
ov 18
|
398
|
-
mn 18
|
399
|
-
ro 18
|
400
|
-
rin 18
|
400
|
+
gs 18
|