scylla 0.4.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Gemfile.lock +10 -0
- data/VERSION +1 -1
- data/lib/scylla/generator.rb +1 -1
- data/lib/scylla/lms/13375P33K.lm +156 -156
- data/lib/scylla/lms/arabic.lm +133 -133
- data/lib/scylla/lms/bulgarian.lm +122 -122
- data/lib/scylla/lms/catalan.lm +151 -151
- data/lib/scylla/lms/danish.lm +137 -137
- data/lib/scylla/lms/english.lm +207 -207
- data/lib/scylla/lms/french.lm +400 -400
- data/lib/scylla/lms/japanese.lm +400 -400
- data/lib/scylla/lms/korean.lm +233 -233
- data/lib/scylla/lms/norwegian.lm +398 -398
- data/lib/scylla/lms/spanish.lm +98 -98
- data/lib/scylla/lms/swedish.lm +123 -123
- data/lib/scylla/lms/tagalog.lm +223 -223
- data/lib/scylla/lms/welsh.lm +234 -234
- data/lib/scylla/resources.rb +10 -10
- data/scylla.gemspec +17 -40
- data/source_texts/catalan.txt +28 -28
- data/source_texts/danish.txt +62 -62
- data/source_texts/english.txt +10 -10
- data/source_texts/french.txt +470 -77
- data/source_texts/japanese.txt +453 -199
- data/source_texts/norwegian.txt +96 -63
- data/source_texts/spanish.txt +269 -269
- data/test/classifier_test.rb +2 -2
- data/test/fixtures/lms/13375p33k.lm +156 -156
- data/test/fixtures/lms/danish.lm +137 -137
- data/test/fixtures/lms/english.lm +207 -207
- data/test/fixtures/lms/french.lm +400 -400
- data/test/fixtures/lms/hindi.lm +400 -0
- data/test/fixtures/lms/italian.lm +400 -0
- data/test/fixtures/lms/japanese.lm +400 -400
- data/test/fixtures/lms/norwegian.lm +400 -0
- data/test/fixtures/lms/spanish.lm +98 -98
- data/test/fixtures/source_texts/danish.txt +62 -62
- data/test/fixtures/source_texts/english.txt +10 -10
- data/test/fixtures/source_texts/french.txt +470 -77
- data/test/fixtures/source_texts/hindi.txt +199 -0
- data/test/fixtures/source_texts/italian.txt +120 -0
- data/test/fixtures/source_texts/japanese.txt +453 -199
- data/test/fixtures/source_texts/norwegian.txt +190 -0
- data/test/fixtures/source_texts/spanish.txt +269 -269
- data/test/fixtures/test_languages/english +61 -0
- data/test/fixtures/test_languages/french +0 -0
- data/test/fixtures/test_languages/german +29 -0
- data/test/fixtures/test_languages/hindi +3 -0
- data/test/fixtures/test_languages/italian +6 -0
- data/test/fixtures/test_languages/japanese +79 -0
- data/test/fixtures/test_languages/norwegian +14 -0
- data/test/fixtures/test_languages/spanish +22 -0
- data/test/generator_test.rb +0 -1
- data/test/language_test.rb +28 -0
- metadata +20 -43
- data/lib/scylla/lms/esperanto.lm +0 -400
- data/lib/scylla/lms/hungarian.lm +0 -400
- data/lib/scylla/lms/irish.lm +0 -400
- data/lib/scylla/lms/kannada.lm +0 -400
- data/lib/scylla/lms/latin.lm +0 -400
- data/lib/scylla/lms/malay.lm +0 -400
- data/lib/scylla/lms/marathi.lm +0 -400
- data/lib/scylla/lms/mingo.lm +0 -400
- data/lib/scylla/lms/nepali.lm +0 -400
- data/lib/scylla/lms/quechua.lm +0 -400
- data/lib/scylla/lms/rumantsch.lm +0 -400
- data/lib/scylla/lms/sanskrit.lm +0 -400
- data/lib/scylla/lms/scots_gaelic.lm +0 -400
- data/lib/scylla/lms/serbian.lm +0 -400
- data/lib/scylla/lms/swahili.lm +0 -400
- data/lib/scylla/lms/tamil.lm +0 -400
- data/lib/scylla/lms/ukrainian.lm +0 -400
- data/lib/scylla/lms/yiddish.lm +0 -400
- data/source_texts/esperanto.txt +0 -199
- data/source_texts/hungarian.txt +0 -102
- data/source_texts/irish.txt +0 -209
- data/source_texts/kannada.txt +0 -283
- data/source_texts/latin.txt +0 -120
- data/source_texts/malay.txt +0 -108
- data/source_texts/marathi.txt +0 -100
- data/source_texts/mingo.txt +0 -146
- data/source_texts/nepali.txt +0 -131
- data/source_texts/quechua.txt +0 -108
- data/source_texts/rumantsch.txt +0 -110
- data/source_texts/sanskrit.txt +0 -135
- data/source_texts/scots_gaelic.txt +0 -93
- data/source_texts/serbian.txt +0 -121
- data/source_texts/swahili.txt +0 -120
- data/source_texts/tamil.txt +0 -167
- data/source_texts/ukrainian.txt +0 -214
- data/source_texts/yiddish-utf.txt +0 -83
- data/test/fixtures/lms/kannada.lm +0 -400
- data/test/fixtures/source_texts/kannada.txt +0 -283
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,13 +1,22 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
+
columnize (0.3.4)
|
4
5
|
git (1.2.5)
|
5
6
|
jeweler (1.6.4)
|
6
7
|
bundler (~> 1.0)
|
7
8
|
git (>= 1.2.5)
|
8
9
|
rake
|
10
|
+
linecache (0.46)
|
11
|
+
rbx-require-relative (> 0.0.4)
|
9
12
|
mocha (0.9.12)
|
10
13
|
rake (0.9.2)
|
14
|
+
rbx-require-relative (0.0.5)
|
15
|
+
ruby-debug (0.10.4)
|
16
|
+
columnize (>= 0.1)
|
17
|
+
ruby-debug-base (~> 0.10.4.0)
|
18
|
+
ruby-debug-base (0.10.4)
|
19
|
+
linecache (>= 0.3)
|
11
20
|
shoulda (2.11.3)
|
12
21
|
|
13
22
|
PLATFORMS
|
@@ -17,4 +26,5 @@ DEPENDENCIES
|
|
17
26
|
bundler (~> 1.0.0)
|
18
27
|
jeweler (~> 1.6.4)
|
19
28
|
mocha (~> 0.9.12)
|
29
|
+
ruby-debug (~> 0.10.4)
|
20
30
|
shoulda
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/lib/scylla/generator.rb
CHANGED
data/lib/scylla/lms/13375P33K.lm
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
_
|
2
|
-
__
|
1
|
+
_ 23082
|
2
|
+
__ 3963
|
3
3
|
| 1114
|
4
4
|
|_ 748
|
5
5
|
n 708
|
@@ -21,14 +21,14 @@ p 425
|
|
21
21
|
_|_ 395
|
22
22
|
h_ 365
|
23
23
|
_d 363
|
24
|
-
z_ 350
|
25
24
|
n_ 350
|
25
|
+
z_ 350
|
26
26
|
c 345
|
27
27
|
v 339
|
28
28
|
N 331
|
29
29
|
_r_ 326
|
30
|
-
d_ 319
|
31
30
|
_p 319
|
31
|
+
d_ 319
|
32
32
|
f 302
|
33
33
|
l 297
|
34
34
|
u 295
|
@@ -47,12 +47,12 @@ k 229
|
|
47
47
|
f_ 221
|
48
48
|
_c 220
|
49
49
|
H 208
|
50
|
-
U 203
|
51
50
|
_h 203
|
52
|
-
|
51
|
+
U 203
|
53
52
|
_w 202
|
53
|
+
w 202
|
54
54
|
m_ 199
|
55
|
-
_,
|
55
|
+
_, 196
|
56
56
|
_u 193
|
57
57
|
\ 190
|
58
58
|
_f_ 190
|
@@ -62,9 +62,9 @@ T 187
|
|
62
62
|
L 187
|
63
63
|
v_ 184
|
64
64
|
_N 179
|
65
|
-
_,_
|
66
|
-
D 174
|
65
|
+
_,_ 176
|
67
66
|
I 174
|
67
|
+
D 174
|
68
68
|
_m 173
|
69
69
|
p_ 170
|
70
70
|
_g 170
|
@@ -80,25 +80,25 @@ _l 153
|
|
80
80
|
g_ 153
|
81
81
|
_b 152
|
82
82
|
_t_ 151
|
83
|
+
|\ 149
|
83
84
|
_|\ 149
|
84
85
|
_. 149
|
85
|
-
|\ 149
|
86
86
|
_( 148
|
87
87
|
R_ 143
|
88
88
|
l_ 140
|
89
89
|
_k 138
|
90
90
|
k_ 136
|
91
|
-
c_ 134
|
92
91
|
_._ 134
|
93
|
-
|
92
|
+
c_ 134
|
94
93
|
w_ 133
|
94
|
+
_w_ 133
|
95
95
|
T_ 130
|
96
96
|
N_ 125
|
97
97
|
- 121
|
98
98
|
_m_ 120
|
99
99
|
+_ 119
|
100
|
-
E 118
|
101
100
|
_y_ 118
|
101
|
+
E 118
|
102
102
|
O 117
|
103
103
|
_g_ 114
|
104
104
|
_p_ 112
|
@@ -107,294 +107,294 @@ x 107
|
|
107
107
|
o 107
|
108
108
|
W 106
|
109
109
|
i 105
|
110
|
-
|
110
|
+
_c_ 104
|
111
111
|
_|\| 104
|
112
|
+
|\| 104
|
112
113
|
\| 104
|
113
|
-
_c_ 104
|
114
|
-
M 102
|
115
114
|
u_ 102
|
116
115
|
$ 102
|
116
|
+
M 102
|
117
117
|
vv 101
|
118
118
|
H_ 97
|
119
119
|
L_ 94
|
120
120
|
D_ 91
|
121
121
|
_I 90
|
122
|
+
(_ 89
|
122
123
|
F 88
|
123
|
-
/ 86
|
124
|
-
\/ 86
|
125
124
|
_R 86
|
125
|
+
\/ 86
|
126
|
+
/ 86
|
126
127
|
_T 84
|
127
128
|
s 84
|
128
129
|
_W 84
|
129
|
-
_U 83
|
130
130
|
b_ 83
|
131
131
|
_b_ 83
|
132
|
+
_U 83
|
132
133
|
_l_ 81
|
133
134
|
x_ 81
|
134
135
|
_v_ 79
|
136
|
+
_(_ 79
|
135
137
|
_D 78
|
136
138
|
_u_ 78
|
137
|
-
(_ 77
|
138
139
|
_vv 77
|
139
140
|
_s 76
|
140
141
|
$_ 72
|
141
142
|
P 70
|
142
143
|
_L 69
|
143
144
|
_M 68
|
144
|
-
_(_ 67
|
145
145
|
e_ 67
|
146
|
-
@R 66
|
147
146
|
\|_ 66
|
148
|
-
_k_ 66
|
149
|
-
_|\|_ 66
|
150
147
|
|\|_ 66
|
148
|
+
@R 66
|
151
149
|
Y 66
|
152
|
-
|
150
|
+
_|\|_ 66
|
151
|
+
_k_ 66
|
153
152
|
|- 65
|
153
|
+
-| 65
|
154
154
|
_T_ 64
|
155
155
|
_x 62
|
156
|
-
_+_ 62
|
157
|
-
vv_ 62
|
158
156
|
@n 62
|
157
|
+
vv_ 62
|
158
|
+
_+_ 62
|
159
159
|
_F 61
|
160
|
-
F_ 60
|
161
160
|
IN 60
|
162
161
|
_N_ 60
|
162
|
+
F_ 60
|
163
163
|
_H 59
|
164
164
|
_P 59
|
165
165
|
@_ 57
|
166
|
-
_i 56
|
167
166
|
_a 56
|
168
|
-
|
167
|
+
_i 56
|
169
168
|
_@R 54
|
169
|
+
C 54
|
170
|
+
gh 50
|
170
171
|
s_ 50
|
171
172
|
tz 50
|
172
|
-
gh 50
|
173
|
-
_R_ 49
|
174
173
|
_vv_ 49
|
175
|
-
|
174
|
+
_R_ 49
|
176
175
|
! 48
|
177
176
|
_F_ 48
|
178
|
-
|
177
|
+
_C 48
|
179
178
|
_O 47
|
180
179
|
+h 47
|
180
|
+
_+h 47
|
181
181
|
cH 47
|
182
182
|
|-| 47
|
183
183
|
_x_ 46
|
184
|
-
_|\/ 45
|
185
|
-
\/| 45
|
186
184
|
/| 45
|
187
185
|
|\/| 45
|
188
|
-
_|\/| 45
|
189
186
|
|\/ 45
|
187
|
+
_|\/| 45
|
188
|
+
\/| 45
|
189
|
+
_|\/ 45
|
190
|
+
tz_ 44
|
190
191
|
u|_ 44
|
191
192
|
u| 44
|
192
|
-
tz_ 44
|
193
193
|
E_ 44
|
194
|
+
Wh 43
|
194
195
|
_IN 43
|
195
|
-
Ul 43
|
196
196
|
_gh 43
|
197
|
-
|
197
|
+
Ul 43
|
198
198
|
) 43
|
199
|
-
_s_ 42
|
200
199
|
|| 42
|
200
|
+
_s_ 42
|
201
201
|
A 41
|
202
202
|
Wh_ 41
|
203
|
-
@R_ 40
|
204
|
-
Or 40
|
205
|
-
_L_ 40
|
206
203
|
p| 40
|
204
|
+
Or 40
|
205
|
+
@R_ 40
|
207
206
|
nd 40
|
208
|
-
|
209
|
-
_@R_ 39
|
210
|
-
p@ 39
|
211
|
-
_Wh 39
|
207
|
+
_L_ 40
|
212
208
|
G 39
|
209
|
+
z, 39
|
210
|
+
_Wh 39
|
213
211
|
_p@ 39
|
214
212
|
_wh 39
|
213
|
+
z,_ 39
|
215
214
|
wh 39
|
216
|
-
|
215
|
+
p@ 39
|
216
|
+
_@R_ 39
|
217
217
|
d, 38
|
218
|
+
nD 38
|
218
219
|
rz 38
|
219
220
|
_rz 38
|
220
|
-
nD 38
|
221
221
|
aR 37
|
222
|
+
(h 37
|
222
223
|
df 37
|
224
|
+
c| 37
|
223
225
|
_D_ 37
|
224
|
-
(h 37
|
225
226
|
M_ 37
|
226
227
|
_Wh_ 37
|
227
|
-
c| 37
|
228
228
|
HE 36
|
229
|
-
(h_ 36
|
230
229
|
_d, 36
|
230
|
+
(h_ 36
|
231
231
|
Up 36
|
232
|
-
_nT 35
|
233
|
-
+H 35
|
234
|
-
_@n 35
|
235
232
|
nT 35
|
236
233
|
LL 35
|
234
|
+
_@n 35
|
237
235
|
_d@ 35
|
238
|
-
cH_ 35
|
239
236
|
d@ 35
|
240
|
-
|
237
|
+
_nT 35
|
238
|
+
cH_ 35
|
239
|
+
+H 35
|
240
|
+
pdf 34
|
241
241
|
_u| 34
|
242
|
-
|
243
|
-
|
242
|
+
B 34
|
243
|
+
_Up 34
|
244
|
+
pd 34
|
244
245
|
_j 34
|
245
246
|
n. 34
|
246
|
-
_Up 34
|
247
247
|
y, 34
|
248
|
-
|
249
|
-
|
250
|
-
|
248
|
+
_u|_ 34
|
249
|
+
j 34
|
250
|
+
lt 33
|
251
251
|
_|| 33
|
252
|
-
d,_ 33
|
253
252
|
@$ 33
|
254
|
-
|
255
|
-
|
256
|
-
_aR 32
|
257
|
-
_zUl 32
|
258
|
-
Rc 32
|
259
|
-
q 32
|
260
|
-
p|_ 32
|
261
|
-
_$ 32
|
262
|
-
_) 32
|
263
|
-
_p| 32
|
253
|
+
d,_ 33
|
254
|
+
_@$ 33
|
264
255
|
_zU 32
|
265
|
-
|
266
|
-
zUl 32
|
256
|
+
Rc 32
|
267
257
|
_p|_ 32
|
268
|
-
|
269
|
-
|
258
|
+
_) 32
|
259
|
+
_$ 32
|
260
|
+
p|_ 32
|
270
261
|
aRc 32
|
262
|
+
Y_ 32
|
263
|
+
@r 32
|
264
|
+
_aRc 32
|
265
|
+
zUl 32
|
266
|
+
_p| 32
|
267
|
+
_zUl 32
|
271
268
|
M@ 32
|
272
269
|
zU 32
|
273
|
-
|
270
|
+
_aR 32
|
271
|
+
z. 32
|
272
|
+
y,_ 32
|
273
|
+
q 32
|
274
274
|
_B 31
|
275
|
+
-|_ 31
|
276
|
+
o_ 31
|
275
277
|
_d,_ 31
|
276
278
|
|-|_ 31
|
277
|
-
o_ 31
|
278
279
|
nd_ 31
|
279
|
-
-|_ 31
|
280
280
|
|\/|_ 30
|
281
|
+
\/|_ 30
|
281
282
|
_M_ 30
|
282
283
|
/|_ 30
|
283
|
-
\/|_ 30
|
284
|
-
@g 29
|
285
284
|
_b|_ 29
|
286
|
-
_+H 29
|
287
285
|
|__ 29
|
288
|
-
b| 29
|
289
|
-
gh_ 29
|
290
|
-
r. 29
|
291
286
|
_b| 29
|
292
|
-
|
293
|
-
|
287
|
+
r. 29
|
288
|
+
@g 29
|
294
289
|
b|_ 29
|
290
|
+
O_ 29
|
291
|
+
_+H 29
|
292
|
+
-_ 29
|
295
293
|
)_ 29
|
296
|
-
|
297
|
-
|
298
|
-
|
294
|
+
gh_ 29
|
295
|
+
b| 29
|
296
|
+
h@ 29
|
299
297
|
G_ 28
|
300
|
-
|
298
|
+
||_ 28
|
299
|
+
ve 28
|
300
|
+
_nd 28
|
301
|
+
z._ 28
|
301
302
|
nT_ 28
|
302
|
-
_H_ 28
|
303
303
|
ND 28
|
304
|
+
PH 28
|
304
305
|
n._ 28
|
305
|
-
|
306
|
-
|
306
|
+
_nT_ 28
|
307
|
+
De 28
|
307
308
|
K 28
|
308
|
-
|
309
|
-
D, 27
|
309
|
+
_H_ 28
|
310
310
|
W_ 27
|
311
311
|
I_ 27
|
312
|
-
_PH 27
|
313
312
|
_K 27
|
313
|
+
D, 27
|
314
314
|
_n. 27
|
315
315
|
ve_ 27
|
316
|
-
|
316
|
+
_PH 27
|
317
|
+
r._ 26
|
317
318
|
rE 26
|
319
|
+
n,_ 26
|
320
|
+
Up_ 26
|
321
|
+
_rE 26
|
318
322
|
_+hO 26
|
319
323
|
_tz 26
|
320
|
-
|
324
|
+
_(h 26
|
325
|
+
LL_ 26
|
321
326
|
@n_ 26
|
327
|
+
+hO 26
|
322
328
|
hO 26
|
323
|
-
Up_ 26
|
324
329
|
n, 26
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
r._ 26
|
329
|
-
n,_ 26
|
330
|
+
ph 26
|
331
|
+
fOr 25
|
332
|
+
_tz_ 25
|
330
333
|
_(h_ 25
|
334
|
+
rz_ 25
|
331
335
|
= 25
|
336
|
+
_rz_ 25
|
337
|
+
+HE 25
|
332
338
|
fO 25
|
333
|
-
rz_ 25
|
334
|
-
!= 25
|
335
|
-
_r. 25
|
336
|
-
+o 25
|
337
339
|
_+HE 25
|
338
|
-
|
340
|
+
_r. 25
|
341
|
+
!= 25
|
339
342
|
P_ 25
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
Ult 24
|
343
|
+
+o 25
|
344
|
+
pR_ 24
|
345
|
+
_!= 24
|
346
|
+
_zUlt 24
|
347
|
+
$. 24
|
346
348
|
_pR 24
|
349
|
+
ltz 24
|
347
350
|
_Up_ 24
|
348
|
-
|
351
|
+
pR 24
|
352
|
+
zUlt 24
|
353
|
+
_iN 24
|
354
|
+
_wh_ 24
|
355
|
+
_+o 24
|
356
|
+
_nt 24
|
349
357
|
wh_ 24
|
350
|
-
|
358
|
+
iN 24
|
351
359
|
\/_ 24
|
352
|
-
' 24
|
353
|
-
_nt 24
|
354
360
|
zUltz 24
|
355
|
-
iN 24
|
356
|
-
_! 24
|
357
|
-
_zUlt 24
|
358
361
|
@g_ 24
|
359
|
-
|
362
|
+
' 24
|
363
|
+
_pR_ 24
|
360
364
|
/_ 24
|
361
|
-
|
362
|
-
|
363
|
-
zUlt 24
|
364
|
-
_+o 24
|
365
|
-
_!= 24
|
366
|
-
$. 24
|
367
|
-
ltz 24
|
365
|
+
nt 24
|
366
|
+
Ult 24
|
368
367
|
In 24
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
rv 23
|
368
|
+
Ultz 24
|
369
|
+
Or_ 24
|
370
|
+
_! 24
|
373
371
|
_|\|d 23
|
374
|
-
|
375
|
-
_pd 23
|
376
|
-
|\|d 23
|
377
|
-
aRcH 23
|
378
|
-
_rv 23
|
372
|
+
_n,_ 23
|
379
373
|
\|d 23
|
380
|
-
g|_ 23
|
381
374
|
d. 23
|
375
|
+
_rv 23
|
376
|
+
aRcH 23
|
377
|
+
RcH 23
|
378
|
+
be 23
|
379
|
+
rv_ 23
|
382
380
|
!_ 23
|
383
|
-
|
384
|
-
_n,
|
381
|
+
_rv_ 23
|
382
|
+
_n, 23
|
383
|
+
rv 23
|
384
|
+
_be 23
|
385
385
|
_pdf 23
|
386
386
|
rc 23
|
387
|
+
_aRcH 23
|
387
388
|
,. 23
|
389
|
+
|d 23
|
390
|
+
g|_ 23
|
391
|
+
|\|d 23
|
388
392
|
D,_ 23
|
389
|
-
|
390
|
-
_rv_ 23
|
393
|
+
_pd 23
|
391
394
|
g| 23
|
392
|
-
be 23
|
393
|
-
_n, 23
|
394
|
-
|_| 22
|
395
|
-
\|d_ 22
|
396
|
-
BuT 22
|
397
395
|
,,_ 22
|
398
|
-
|
399
|
-
|
400
|
-
|
396
|
+
|_| 22
|
397
|
+
_- 22
|
398
|
+
|d_ 22
|
399
|
+
d._ 22
|
400
|
+
_r._ 22
|