scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/german.lm
CHANGED
@@ -9,392 +9,392 @@ a 197
|
|
9
9
|
h 190
|
10
10
|
d 187
|
11
11
|
u 161
|
12
|
-
en 137
|
13
12
|
er 137
|
13
|
+
en 137
|
14
14
|
l 131
|
15
15
|
c 125
|
16
16
|
o 116
|
17
17
|
ch 112
|
18
18
|
g 105
|
19
19
|
m 101
|
20
|
-
e_
|
21
|
-
n_
|
20
|
+
e_ 99
|
21
|
+
n_ 92
|
22
22
|
te 80
|
23
|
+
_d 78
|
23
24
|
in 77
|
24
|
-
_d 76
|
25
25
|
de 76
|
26
|
+
r_ 75
|
26
27
|
ei 73
|
27
|
-
r_ 72
|
28
28
|
k 68
|
29
29
|
b 66
|
30
30
|
f 58
|
31
|
+
er_ 56
|
31
32
|
un 56
|
32
33
|
ie 54
|
33
|
-
er_ 53
|
34
34
|
nd 52
|
35
|
+
en_ 52
|
35
36
|
w 50
|
36
|
-
|
37
|
-
_e 48
|
37
|
+
_e 49
|
38
38
|
ge 48
|
39
|
-
__
|
39
|
+
__ 47
|
40
40
|
ng 45
|
41
41
|
t_ 44
|
42
42
|
, 42
|
43
43
|
der 41
|
44
|
-
ne 39
|
45
44
|
st 39
|
46
|
-
|
45
|
+
ne 39
|
46
|
+
_s 37
|
47
47
|
,_ 36
|
48
|
-
|
48
|
+
. 36
|
49
49
|
he 36
|
50
|
-
|
50
|
+
z 36
|
51
|
+
es 34
|
51
52
|
re 34
|
52
|
-
s_ 34
|
53
53
|
m_ 34
|
54
|
+
s_ 34
|
54
55
|
di 34
|
55
|
-
es 33
|
56
56
|
it 33
|
57
|
+
_de 33
|
57
58
|
v 33
|
58
|
-
_de 32
|
59
59
|
ie_ 32
|
60
60
|
_i 32
|
61
|
-
be 31
|
62
|
-
au 31
|
63
61
|
ein 31
|
62
|
+
be 31
|
64
63
|
S 31
|
64
|
+
au 31
|
65
65
|
ti 30
|
66
|
-
|
66
|
+
ic 29
|
67
67
|
� 29
|
68
68
|
_a 29
|
69
|
-
|
69
|
+
d_ 29
|
70
|
+
le 28
|
70
71
|
_w 28
|
71
72
|
� 28
|
72
|
-
d_ 28
|
73
|
-
le 28
|
74
|
-
sch 27
|
75
73
|
sc 27
|
74
|
+
sch 27
|
76
75
|
nt 26
|
77
76
|
ung 26
|
78
|
-
|
77
|
+
an 25
|
79
78
|
ich 25
|
80
|
-
|
79
|
+
si 25
|
81
80
|
is 25
|
82
|
-
|
83
|
-
p 24
|
81
|
+
die 25
|
84
82
|
che 24
|
85
83
|
or 24
|
84
|
+
p 24
|
85
|
+
h_ 24
|
86
86
|
el 24
|
87
|
-
|
88
|
-
der_ 23
|
89
|
-
_die 23
|
87
|
+
der_ 24
|
90
88
|
_di 23
|
91
|
-
|
89
|
+
_die 23
|
92
90
|
on 23
|
93
|
-
|
91
|
+
nd_ 23
|
92
|
+
al 23
|
94
93
|
rt 22
|
95
|
-
|
94
|
+
_ein 22
|
95
|
+
_ei 22
|
96
|
+
ch_ 22
|
97
|
+
ht 21
|
98
|
+
und 21
|
99
|
+
em 21
|
100
|
+
ten 21
|
96
101
|
li 21
|
97
102
|
_S 21
|
98
|
-
ten 21
|
99
103
|
ra 21
|
100
|
-
und 21
|
101
|
-
em 21
|
102
|
-
ht 21
|
103
|
-
_ein 21
|
104
104
|
die_ 21
|
105
|
-
ch_ 20
|
106
105
|
at 20
|
107
106
|
hr 20
|
108
|
-
ke 20
|
109
107
|
in_ 20
|
108
|
+
ke 20
|
110
109
|
ur 19
|
111
|
-
_die_ 19
|
112
|
-
A 19
|
113
110
|
_v 19
|
111
|
+
A 19
|
114
112
|
me 19
|
115
|
-
|
116
|
-
|
117
|
-
_der 18
|
113
|
+
_der 19
|
114
|
+
_die_ 19
|
118
115
|
ine 18
|
119
|
-
|
116
|
+
ns 18
|
120
117
|
se 18
|
121
|
-
|
122
|
-
|
123
|
-
nge 17
|
124
|
-
ri 17
|
118
|
+
_in 18
|
119
|
+
ll 18
|
125
120
|
eine 17
|
126
|
-
eit 17
|
127
121
|
im 17
|
122
|
+
ri 17
|
123
|
+
._ 17
|
124
|
+
nge 17
|
128
125
|
ar 17
|
129
|
-
|
130
|
-
|
126
|
+
ter 17
|
127
|
+
L 17
|
128
|
+
eit 17
|
129
|
+
we 16
|
131
130
|
D 16
|
131
|
+
gen 16
|
132
|
+
_h 16
|
133
|
+
den 16
|
132
134
|
da 16
|
133
|
-
we 16
|
134
|
-
_h 15
|
135
|
-
ig 15
|
136
|
-
eu 15
|
137
|
-
K 15
|
138
|
-
cht 15
|
139
|
-
._ 15
|
140
135
|
zu 15
|
136
|
+
eu 15
|
137
|
+
ig 15
|
141
138
|
_er 15
|
139
|
+
K 15
|
142
140
|
F 15
|
141
|
+
cht 15
|
142
|
+
te_ 15
|
143
|
+
E 14
|
144
|
+
eh 14
|
145
|
+
us 14
|
146
|
+
und_ 14
|
147
|
+
_A 14
|
143
148
|
_au 14
|
144
149
|
ac 14
|
145
|
-
und_ 14
|
146
|
-
te_ 14
|
147
150
|
mm 14
|
151
|
+
vo 14
|
152
|
+
nde 14
|
148
153
|
rs 14
|
149
|
-
E 14
|
150
154
|
ach 14
|
151
|
-
eh 14
|
152
|
-
vo 14
|
153
155
|
ha 14
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
156
|
+
�r 13
|
157
|
+
ab 13
|
158
|
+
B 13
|
159
|
+
_da 13
|
160
|
+
� 13
|
161
|
+
n. 13
|
158
162
|
_L 13
|
159
163
|
_si 13
|
160
|
-
ls 13
|
161
164
|
fe 13
|
162
|
-
|
163
|
-
|
164
|
-
� 13
|
165
|
-
|
166
|
-
B 13
|
167
|
-
et 12
|
168
|
-
lic 12
|
165
|
+
ls 13
|
166
|
+
_u 13
|
167
|
+
� 13
|
168
|
+
_z 12
|
169
169
|
em_ 12
|
170
|
-
|
171
|
-
nte 12
|
172
|
-
_da 12
|
173
|
-
am 12
|
170
|
+
et 12
|
174
171
|
ste 12
|
175
|
-
|
176
|
-
lle 12
|
172
|
+
rk 12
|
177
173
|
as 12
|
178
174
|
R 12
|
179
|
-
_z 12
|
180
|
-
rk 12
|
181
|
-
_u 12
|
182
175
|
rd 12
|
183
176
|
T 12
|
177
|
+
lic 12
|
184
178
|
_b 12
|
179
|
+
_eine 12
|
180
|
+
lle 12
|
181
|
+
nte 12
|
182
|
+
g_ 12
|
183
|
+
rn 12
|
184
|
+
_der_ 12
|
185
|
+
wi 12
|
186
|
+
_D 12
|
187
|
+
am 12
|
188
|
+
hl 11
|
189
|
+
on_ 11
|
185
190
|
uf 11
|
186
|
-
|
187
|
-
|
188
|
-
rte 11
|
189
|
-
g_ 11
|
190
|
-
ge_ 11
|
191
|
+
ni 11
|
192
|
+
_B 11
|
191
193
|
- 11
|
192
|
-
_eine 11
|
193
|
-
hl 11
|
194
|
-
Sc 11
|
195
|
-
ck 11
|
196
|
-
ts 11
|
197
194
|
lich 11
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
Sch 11
|
195
|
+
_in_ 11
|
196
|
+
ck 11
|
197
|
+
ru 11
|
198
|
+
Sc 11
|
203
199
|
_n 11
|
204
|
-
|
200
|
+
men 11
|
205
201
|
ent 11
|
206
|
-
|
207
|
-
|
202
|
+
ve 11
|
203
|
+
nn 11
|
204
|
+
G 11
|
205
|
+
U 11
|
206
|
+
rte 11
|
207
|
+
ers 11
|
208
|
+
Sch 11
|
209
|
+
n, 11
|
210
|
+
ren 11
|
208
211
|
tt 11
|
209
|
-
_in_ 11
|
210
212
|
f� 11
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
_g
|
215
|
-
f�r 10
|
213
|
+
ge_ 11
|
214
|
+
tr 11
|
215
|
+
ts 11
|
216
|
+
_g 11
|
216
217
|
na 10
|
218
|
+
_E 10
|
219
|
+
ten_ 10
|
217
220
|
rc 10
|
218
|
-
|
219
|
-
nder 10
|
220
|
-
�r_ 10
|
221
|
-
hi 10
|
222
|
-
sin 10
|
223
|
-
on_ 10
|
224
|
-
_B 10
|
225
|
-
isc 10
|
226
|
-
um 10
|
227
|
-
_" 10
|
228
|
-
ut 10
|
229
|
-
isch 10
|
221
|
+
sa 10
|
230
222
|
auf 10
|
231
|
-
|
223
|
+
den_ 10
|
224
|
+
f�r 10
|
225
|
+
hi 10
|
226
|
+
_F 10
|
227
|
+
�r_ 10
|
232
228
|
gs 10
|
233
|
-
|
229
|
+
ut 10
|
230
|
+
ht_ 10
|
234
231
|
en. 10
|
235
|
-
|
232
|
+
isc 10
|
236
233
|
_vo 10
|
237
|
-
|
238
|
-
|
234
|
+
_K 10
|
235
|
+
um 10
|
239
236
|
_zu 10
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
_Sch 9
|
247
|
-
den_ 9
|
248
|
-
it_ 9
|
249
|
-
_Sc 9
|
250
|
-
hre 9
|
251
|
-
O 9
|
252
|
-
ma 9
|
253
|
-
_T 9
|
254
|
-
ngs 9
|
237
|
+
als 10
|
238
|
+
e, 10
|
239
|
+
ma 10
|
240
|
+
nder 10
|
241
|
+
sin 10
|
242
|
+
isch 10
|
255
243
|
ere 9
|
256
244
|
f�r_ 9
|
257
|
-
k_ 9
|
258
245
|
rch 9
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
il 9
|
246
|
+
it_ 9
|
247
|
+
_Sch 9
|
248
|
+
e,_ 9
|
249
|
+
k_ 9
|
250
|
+
om 9
|
251
|
+
ng_ 9
|
266
252
|
erk 9
|
267
|
-
Un 9
|
268
253
|
no 9
|
254
|
+
hre 9
|
255
|
+
il 9
|
256
|
+
ngs 9
|
257
|
+
ik 9
|
258
|
+
nen 9
|
259
|
+
Un 9
|
260
|
+
lt 9
|
261
|
+
ze 9
|
262
|
+
ungs 9
|
269
263
|
M 9
|
264
|
+
ver 9
|
265
|
+
_T 9
|
266
|
+
chen 9
|
267
|
+
hen 9
|
268
|
+
O 9
|
269
|
+
_be 9
|
270
270
|
so 9
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
271
|
+
n,_ 9
|
272
|
+
_Sc 9
|
273
|
+
_un 9
|
274
|
+
ner 8
|
275
|
+
sic 8
|
276
|
+
wo 8
|
275
277
|
le_ 8
|
276
|
-
sp 8
|
277
|
-
_k 8
|
278
278
|
od 8
|
279
|
-
|
280
|
-
urc 8
|
279
|
+
_G 8
|
281
280
|
_ha 8
|
282
|
-
|
283
|
-
run 8
|
284
|
-
tra 8
|
285
|
-
ber 8
|
286
|
-
end 8
|
287
|
-
ir 8
|
288
|
-
du 8
|
289
|
-
ind 8
|
290
|
-
ng_ 8
|
291
|
-
bei 8
|
292
|
-
ner 8
|
293
|
-
sic 8
|
281
|
+
mme 8
|
294
282
|
_Un 8
|
295
|
-
|
296
|
-
tig 8
|
297
|
-
"_ 8
|
298
|
-
es_ 8
|
299
|
-
e,_ 8
|
283
|
+
rde 8
|
300
284
|
ute 8
|
301
|
-
|
302
|
-
|
303
|
-
wo 8
|
304
|
-
ls_ 8
|
305
|
-
kei 8
|
306
|
-
_G 8
|
307
|
-
Le 8
|
285
|
+
uc 8
|
286
|
+
ol 8
|
308
287
|
das 8
|
309
288
|
_Le 8
|
310
|
-
|
289
|
+
tra 8
|
290
|
+
du 8
|
291
|
+
_das 8
|
292
|
+
urc 8
|
293
|
+
Le 8
|
294
|
+
sp 8
|
295
|
+
ft 8
|
311
296
|
ens 8
|
312
|
-
|
313
|
-
|
297
|
+
_U 8
|
298
|
+
kei 8
|
299
|
+
tig 8
|
300
|
+
bei 8
|
314
301
|
_f 8
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
302
|
+
ir 8
|
303
|
+
la 8
|
304
|
+
ls_ 8
|
305
|
+
end 8
|
306
|
+
_k 8
|
307
|
+
sich 8
|
308
|
+
V 8
|
309
|
+
ind 8
|
310
|
+
run 8
|
311
|
+
ber 8
|
312
|
+
es_ 8
|
313
|
+
_und 7
|
314
|
+
St 7
|
315
|
+
des 7
|
316
|
+
l_ 7
|
317
|
+
ich_ 7
|
318
|
+
sti 7
|
324
319
|
ngen 7
|
320
|
+
io 7
|
321
|
+
H 7
|
322
|
+
durch 7
|
325
323
|
ho 7
|
326
|
-
|
327
|
-
eb 7
|
328
|
-
des 7
|
329
|
-
ne_ 7
|
330
|
-
y 7
|
331
|
-
eut 7
|
332
|
-
ein_ 7
|
333
|
-
mi 7
|
334
|
-
das_ 7
|
335
|
-
dur 7
|
336
|
-
St 7
|
324
|
+
keit 7
|
337
325
|
ert 7
|
338
|
-
|
339
|
-
|
340
|
-
_o 7
|
341
|
-
_al 7
|
342
|
-
urch 7
|
343
|
-
Leu 7
|
344
|
-
ich_ 7
|
326
|
+
ein_ 7
|
327
|
+
ih 7
|
345
328
|
tte 7
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
329
|
+
_ve 7
|
330
|
+
urch 7
|
331
|
+
_al 7
|
332
|
+
ko 7
|
333
|
+
as_ 7
|
334
|
+
von 7
|
335
|
+
rei 7
|
336
|
+
en, 7
|
350
337
|
durc 7
|
351
|
-
_das 7
|
352
|
-
ba 7
|
353
|
-
durch 7
|
354
338
|
ode 7
|
355
|
-
|
356
|
-
_ver 7
|
357
|
-
_Leu 7
|
358
|
-
im_ 7
|
339
|
+
icht 7
|
359
340
|
als_ 7
|
360
|
-
|
361
|
-
unge 7
|
341
|
+
ba 7
|
362
342
|
P 7
|
343
|
+
nh 7
|
363
344
|
f_ 7
|
345
|
+
uch 7
|
346
|
+
e. 7
|
347
|
+
_und_ 7
|
348
|
+
_ge 7
|
349
|
+
n._ 7
|
350
|
+
hat 7
|
351
|
+
das_ 7
|
352
|
+
Leu 7
|
353
|
+
_das_ 7
|
354
|
+
r� 7
|
355
|
+
_Leu 7
|
356
|
+
ne_ 7
|
357
|
+
eute 7
|
358
|
+
_wi 7
|
359
|
+
do 7
|
360
|
+
and 7
|
361
|
+
im_ 7
|
362
|
+
y 7
|
363
|
+
_ver 7
|
364
|
+
dur 7
|
365
|
+
_o 7
|
366
|
+
ing 7
|
367
|
+
mi 7
|
368
|
+
eb 7
|
364
369
|
len 7
|
365
|
-
|
366
|
-
|
367
|
-
io 7
|
368
|
-
_und 7
|
369
|
-
keit 7
|
370
|
-
eg 6
|
371
|
-
eute_ 6
|
372
|
-
ren_ 6
|
373
|
-
auch 6
|
374
|
-
_von 6
|
375
|
-
erst 6
|
376
|
-
_ih 6
|
377
|
-
nge_ 6
|
378
|
-
gu 6
|
379
|
-
nsi 6
|
380
|
-
wa 6
|
381
|
-
ss 6
|
382
|
-
chl 6
|
370
|
+
unge 7
|
371
|
+
eut 7
|
383
372
|
dor 6
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
373
|
+
ta 6
|
374
|
+
bi 6
|
375
|
+
hw 6
|
376
|
+
hu 6
|
377
|
+
ung_ 6
|
378
|
+
sind 6
|
379
|
+
von_ 6
|
380
|
+
nge_ 6
|
381
|
+
ren_ 6
|
382
|
+
_f� 6
|
383
|
+
nter 6
|
384
|
+
Z 6
|
385
|
+
ter_ 6
|
386
|
+
ische 6
|
387
|
+
rst 6
|
388
|
+
_St 6
|
391
389
|
he_ 6
|
392
390
|
_we 6
|
393
391
|
_sich 6
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
392
|
+
mmen 6
|
393
|
+
th 6
|
394
|
+
x 6
|
395
|
+
_hat 6
|
396
|
+
ute_ 6
|
397
|
+
_ih 6
|
398
|
+
gu 6
|
399
|
+
t. 6
|
400
|
+
vor 6
|