scylla 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -2
- data/Gemfile.lock +16 -1
- data/lib/scylla/classifier.rb +1 -1
- data/lib/scylla/generator.rb +16 -4
- data/lib/scylla/lms/afrikaans.lm +232 -232
- data/lib/scylla/lms/arabic.lm +175 -175
- data/lib/scylla/lms/bulgarian.lm +225 -225
- data/lib/scylla/lms/catalan.lm +309 -309
- data/lib/scylla/lms/danish.lm +167 -167
- data/lib/scylla/lms/english.lm +398 -398
- data/lib/scylla/lms/finnish.lm +237 -237
- data/lib/scylla/lms/french.lm +148 -148
- data/lib/scylla/lms/german.lm +258 -258
- data/lib/scylla/lms/greek.lm +236 -236
- data/lib/scylla/lms/hebrew.lm +154 -154
- data/lib/scylla/lms/hindi.lm +139 -139
- data/lib/scylla/lms/icelandic.lm +239 -239
- data/lib/scylla/lms/indonesian.lm +244 -244
- data/lib/scylla/lms/italian.lm +248 -248
- data/lib/scylla/lms/japanese.lm +90 -90
- data/lib/scylla/lms/korean.lm +306 -306
- data/lib/scylla/lms/norwegian.lm +193 -193
- data/lib/scylla/lms/polish.lm +241 -241
- data/lib/scylla/lms/portuguese.lm +232 -232
- data/lib/scylla/lms/romanian.lm +246 -246
- data/lib/scylla/lms/slovak.lm +242 -242
- data/lib/scylla/lms/slovenian.lm +229 -229
- data/lib/scylla/lms/spanish.lm +164 -164
- data/lib/scylla/lms/swedish.lm +157 -157
- data/lib/scylla/lms/tagalog.lm +247 -247
- data/lib/scylla/lms/thai.lm +252 -252
- data/lib/scylla/lms/turkish.lm +285 -285
- data/lib/scylla/lms/vietnamese.lm +250 -250
- data/lib/scylla/lms/welsh.lm +248 -248
- data/lib/scylla/resources.rb +1 -9
- data/lib/scylla.rb +4 -0
- data/scylla.gemspec +2 -120
- data/source_texts/english.txt +62 -27
- data/test/classifier_test.rb +1 -3
- data/test/fixtures/lms/danish.lm +173 -173
- data/test/fixtures/lms/english.lm +220 -220
- data/test/fixtures/lms/french.lm +175 -175
- data/test/fixtures/lms/german.lm +254 -254
- data/test/fixtures/lms/hindi.lm +139 -139
- data/test/fixtures/lms/italian.lm +236 -236
- data/test/fixtures/lms/japanese.lm +88 -88
- data/test/fixtures/lms/norwegian.lm +182 -182
- data/test/fixtures/lms/spanish.lm +164 -164
- data/test/fixtures/test_languages/spanish +0 -1
- data/test/generator_test.rb +13 -0
- data/test/helper.rb +2 -0
- metadata +18 -25
- data/.document +0 -5
- data/lib/scylla/lms/13375P33K.lm +0 -400
- data/scylla-0.1.0.gem +0 -0
- data/source_texts/13375P33K.txt +0 -199
- data/test/fixtures/lms/13375p33k.lm +0 -400
- data/test/fixtures/source_texts/13375P33K.txt +0 -199
data/lib/scylla/lms/hindi.lm
CHANGED
@@ -6,364 +6,364 @@ _ 9054
|
|
6
6
|
� 5427
|
7
7
|
_� 4527
|
8
8
|
_� 4495
|
9
|
-
�� 2729
|
10
9
|
ा 2729
|
11
10
|
� 2729
|
12
|
-
|
11
|
+
�� 2729
|
13
12
|
् 2281
|
13
|
+
� 2281
|
14
14
|
�� 2281
|
15
|
+
��� 2265
|
15
16
|
�� 2265
|
16
17
|
्� 2265
|
17
|
-
��� 2265
|
18
18
|
�� 2251
|
19
|
-
��� 2251
|
20
19
|
्� 2251
|
20
|
+
��� 2251
|
21
21
|
�� 2023
|
22
22
|
ा� 2023
|
23
23
|
��� 2023
|
24
|
-
र 2005
|
25
24
|
�� 2005
|
25
|
+
र 2005
|
26
26
|
� 2005
|
27
|
-
�� 1970
|
28
|
-
ा� 1970
|
29
27
|
��� 1970
|
30
|
-
|
28
|
+
ा� 1970
|
29
|
+
�� 1970
|
31
30
|
ि 1764
|
32
31
|
� 1764
|
32
|
+
�� 1764
|
33
|
+
�� 1706
|
33
34
|
र� 1706
|
34
35
|
��� 1706
|
35
|
-
|
36
|
+
� 1665
|
36
37
|
�� 1665
|
37
38
|
क 1665
|
38
|
-
� 1665
|
39
39
|
�� 1596
|
40
40
|
त 1596
|
41
|
-
�� 1516
|
42
|
-
��� 1516
|
43
|
-
�� 1516
|
44
|
-
ि� 1516
|
45
41
|
��� 1516
|
42
|
+
�� 1516
|
46
43
|
ि� 1516
|
44
|
+
��� 1516
|
45
|
+
ि� 1516
|
46
|
+
�� 1516
|
47
47
|
� 1391
|
48
|
-
��� 1390
|
49
48
|
�� 1390
|
49
|
+
��� 1390
|
50
50
|
क� 1390
|
51
|
-
�� 1381
|
52
51
|
न 1381
|
53
|
-
��
|
54
|
-
स 1345
|
52
|
+
�� 1381
|
55
53
|
� 1345
|
56
|
-
|
57
|
-
��
|
54
|
+
स 1345
|
55
|
+
�� 1345
|
58
56
|
स� 1264
|
57
|
+
�� 1264
|
58
|
+
��� 1264
|
59
59
|
�� 1226
|
60
60
|
त� 1200
|
61
61
|
��� 1200
|
62
62
|
� 1181
|
63
63
|
े 1172
|
64
64
|
�� 1172
|
65
|
-
� 1144
|
66
65
|
व 1144
|
66
|
+
� 1144
|
67
67
|
�� 1144
|
68
|
-
व� 1083
|
69
|
-
�� 1083
|
70
68
|
��� 1083
|
69
|
+
�� 1083
|
70
|
+
व� 1083
|
71
71
|
�� 1075
|
72
|
-
��� 1065
|
73
72
|
न� 1065
|
73
|
+
��� 1065
|
74
74
|
�� 963
|
75
75
|
��� 963
|
76
76
|
र� 963
|
77
77
|
�� 920
|
78
|
-
व� 908
|
79
78
|
��� 908
|
80
79
|
�� 908
|
80
|
+
व� 908
|
81
81
|
��� 894
|
82
82
|
त� 894
|
83
|
-
म 858
|
84
|
-
�� 858
|
85
83
|
� 858
|
84
|
+
�� 858
|
85
|
+
म 858
|
86
86
|
� 853
|
87
|
-
य 836
|
88
|
-
�� 836
|
89
87
|
� 836
|
90
|
-
��
|
88
|
+
�� 836
|
89
|
+
य 836
|
91
90
|
प 829
|
91
|
+
�� 829
|
92
92
|
�� 809
|
93
93
|
�� 792
|
94
|
-
म� 792
|
95
94
|
��� 792
|
96
|
-
|
95
|
+
म� 792
|
97
96
|
स� 786
|
98
97
|
�� 786
|
98
|
+
��� 786
|
99
99
|
प� 785
|
100
100
|
��� 785
|
101
|
-
�� 752
|
102
|
-
क� 752
|
103
101
|
��� 752
|
102
|
+
क� 752
|
103
|
+
�� 752
|
104
104
|
र� 743
|
105
105
|
��� 743
|
106
106
|
�� 743
|
107
|
+
��_ 706
|
107
108
|
�_ 706
|
108
109
|
ा_ 706
|
109
|
-
��_ 706
|
110
110
|
� 705
|
111
|
-
�� 683
|
112
111
|
द 683
|
112
|
+
�� 683
|
113
113
|
�� 671
|
114
|
-
��� 663
|
115
114
|
द� 663
|
115
|
+
��� 663
|
116
|
+
ु 655
|
116
117
|
� 655
|
117
|
-
|
118
|
-
ु� 655
|
119
|
-
�� 655
|
118
|
+
��� 655
|
120
119
|
�� 655
|
120
|
+
ु� 655
|
121
|
+
ु� 655
|
121
122
|
��� 655
|
122
|
-
ु 655
|
123
|
-
��� 655
|
124
123
|
�� 655
|
125
|
-
��
|
124
|
+
�� 655
|
125
|
+
�� 649
|
126
|
+
े� 640
|
127
|
+
��� 640
|
126
128
|
क� 638
|
129
|
+
�� 638
|
127
130
|
��� 638
|
128
131
|
_स 634
|
129
132
|
_स� 634
|
130
|
-
��
|
131
|
-
य� 621
|
133
|
+
�� 626
|
132
134
|
�� 621
|
133
135
|
��� 621
|
134
|
-
|
135
|
-
|
136
|
-
|
136
|
+
य� 621
|
137
|
+
��� 617
|
138
|
+
े� 617
|
137
139
|
�� 613
|
140
|
+
��� 613
|
138
141
|
न� 613
|
139
|
-
�� 604
|
140
|
-
े� 595
|
141
|
-
��� 595
|
142
142
|
�� 568
|
143
|
-
� 568
|
144
143
|
ह 568
|
145
|
-
|
144
|
+
� 568
|
146
145
|
म� 538
|
147
146
|
��� 538
|
147
|
+
�� 538
|
148
148
|
े_ 532
|
149
|
-
��_ 532
|
150
149
|
�_ 532
|
151
|
-
��
|
150
|
+
��_ 532
|
152
151
|
ल 527
|
152
|
+
�� 527
|
153
153
|
� 527
|
154
154
|
_व� 526
|
155
155
|
_व 526
|
156
|
+
��� 524
|
156
157
|
�� 524
|
157
158
|
ह� 524
|
158
|
-
��� 524
|
159
159
|
�_ 522
|
160
160
|
� 485
|
161
|
+
��� 478
|
161
162
|
�� 478
|
162
163
|
स� 478
|
163
|
-
��� 478
|
164
|
-
�् 477
|
165
|
-
��् 477
|
166
164
|
�्� 477
|
165
|
+
��् 477
|
166
|
+
�् 477
|
167
167
|
�� 473
|
168
168
|
�र 472
|
169
169
|
��र 472
|
170
|
-
�� 462
|
171
170
|
�� 462
|
172
|
-
|
171
|
+
�� 462
|
173
172
|
_प� 461
|
174
|
-
|
173
|
+
_प 461
|
174
|
+
� 454
|
175
|
+
��� 454
|
175
176
|
श 454
|
176
177
|
�� 454
|
177
|
-
|
178
|
+
�� 454
|
178
179
|
य� 454
|
179
|
-
|
180
|
+
ी 453
|
180
181
|
�� 453
|
181
182
|
� 453
|
182
|
-
ी 453
|
183
|
-
�ि� 452
|
184
|
-
�ि 452
|
185
183
|
न� 452
|
186
|
-
��ि 452
|
187
184
|
��� 452
|
185
|
+
�ि� 452
|
186
|
+
��ि 452
|
187
|
+
�ि 452
|
188
188
|
ब 442
|
189
|
-
|
189
|
+
� 442
|
190
190
|
ब� 442
|
191
|
+
�� 442
|
191
192
|
��� 442
|
192
|
-
� 442
|
193
193
|
�� 442
|
194
|
-
�� 435
|
195
194
|
� 435
|
195
|
+
�� 435
|
196
196
|
ो 435
|
197
197
|
�� 432
|
198
|
-
ल� 432
|
199
198
|
��� 432
|
199
|
+
ल� 432
|
200
200
|
��य 427
|
201
201
|
�य 427
|
202
202
|
�र� 424
|
203
|
-
�� 424
|
204
203
|
ं 424
|
204
|
+
�� 424
|
205
205
|
��ा 413
|
206
206
|
�ा 413
|
207
|
-
ं� 412
|
208
|
-
ज 412
|
209
|
-
�� 412
|
210
207
|
� 412
|
208
|
+
�� 412
|
209
|
+
ं� 412
|
211
210
|
��� 412
|
211
|
+
ज 412
|
212
212
|
�� 407
|
213
213
|
�� 402
|
214
|
-
��� 401
|
215
214
|
ं� 401
|
216
|
-
|
215
|
+
��� 401
|
217
216
|
�र 399
|
218
|
-
|
217
|
+
��र 399
|
219
218
|
प� 397
|
219
|
+
��� 397
|
220
220
|
��_ 396
|
221
221
|
त_ 396
|
222
|
-
��� 388
|
223
222
|
प� 388
|
223
|
+
��� 388
|
224
224
|
_क� 382
|
225
225
|
_क 382
|
226
|
-
ज� 377
|
227
226
|
��� 377
|
228
227
|
�� 377
|
228
|
+
ज� 377
|
229
|
+
द� 371
|
229
230
|
�� 371
|
230
231
|
��� 371
|
231
|
-
द� 371
|
232
|
-
�क 362
|
233
232
|
��क 362
|
234
|
-
|
235
|
-
ष 361
|
233
|
+
�क 362
|
236
234
|
�� 361
|
237
|
-
|
235
|
+
ष 361
|
236
|
+
� 361
|
238
237
|
�� 354
|
239
238
|
श� 354
|
239
|
+
��� 354
|
240
240
|
�र� 349
|
241
|
+
�� 348
|
241
242
|
��� 348
|
242
243
|
ब� 348
|
243
|
-
�� 348
|
244
244
|
��� 344
|
245
|
-
�� 344
|
246
245
|
ो� 344
|
246
|
+
�� 344
|
247
247
|
�य� 343
|
248
248
|
�� 336
|
249
|
-
ध 336
|
250
249
|
� 336
|
250
|
+
ध 336
|
251
251
|
�ा_ 323
|
252
252
|
�� 322
|
253
253
|
��� 322
|
254
254
|
ो� 322
|
255
255
|
��_ 316
|
256
|
-
न_ 316
|
257
256
|
�_ 316
|
257
|
+
न_ 316
|
258
258
|
�� 315
|
259
259
|
ग 315
|
260
260
|
� 315
|
261
261
|
ल� 313
|
262
262
|
�� 313
|
263
263
|
��� 313
|
264
|
-
�� 306
|
265
264
|
��� 306
|
266
265
|
त� 306
|
266
|
+
�� 306
|
267
267
|
�� 300
|
268
|
-
�_ 299
|
269
268
|
��_ 299
|
270
269
|
र_ 299
|
271
|
-
|
270
|
+
�_ 299
|
272
271
|
� 298
|
272
|
+
�� 298
|
273
273
|
ष� 296
|
274
|
-
�� 296
|
275
274
|
��� 296
|
276
|
-
|
275
|
+
�� 296
|
277
276
|
�न 294
|
277
|
+
��न 294
|
278
278
|
द� 292
|
279
279
|
��� 292
|
280
|
-
ह� 287
|
281
|
-
�� 287
|
282
280
|
��� 287
|
283
|
-
|
281
|
+
�� 287
|
282
|
+
ह� 287
|
284
283
|
_ब� 286
|
284
|
+
_ब 286
|
285
|
+
भ� 283
|
285
286
|
�� 283
|
286
287
|
��� 283
|
287
|
-
भ� 283
|
288
288
|
भ 283
|
289
|
-
अ� 281
|
290
289
|
�� 281
|
291
|
-
��� 281
|
292
|
-
अ� 281
|
293
|
-
�� 281
|
294
290
|
�� 281
|
295
|
-
|
291
|
+
अ� 281
|
296
292
|
� 281
|
293
|
+
अ� 281
|
297
294
|
��� 281
|
298
|
-
|
295
|
+
��� 281
|
296
|
+
अ 281
|
297
|
+
�� 281
|
299
298
|
��� 277
|
300
299
|
�� 277
|
300
|
+
ध� 277
|
301
301
|
क_ 275
|
302
302
|
�_ 275
|
303
303
|
��_ 275
|
304
|
-
��र 274
|
305
304
|
�र 274
|
305
|
+
��र 274
|
306
306
|
�त 267
|
307
307
|
��त 267
|
308
|
+
ज� 266
|
308
309
|
�� 266
|
309
|
-
ी� 266
|
310
310
|
�� 266
|
311
|
-
��� 266
|
312
311
|
ी� 266
|
313
312
|
��� 266
|
314
|
-
ज� 266
|
315
313
|
�� 266
|
314
|
+
��� 266
|
316
315
|
��� 266
|
317
|
-
|
316
|
+
ी� 266
|
317
|
+
_अ� 263
|
318
318
|
ष� 263
|
319
|
+
��� 263
|
319
320
|
�� 263
|
320
321
|
_अ 263
|
321
|
-
_अ� 263
|
322
|
-
��् 261
|
323
322
|
�्� 261
|
324
323
|
�् 261
|
324
|
+
��् 261
|
325
325
|
�्� 256
|
326
|
-
��् 256
|
327
326
|
�् 256
|
327
|
+
��् 256
|
328
|
+
� 255
|
328
329
|
�� 255
|
329
330
|
च 255
|
330
|
-
� 255
|
331
|
-
ग� 254
|
332
|
-
�� 254
|
333
331
|
��� 254
|
334
|
-
|
332
|
+
ग� 254
|
335
333
|
�� 254
|
334
|
+
म� 254
|
335
|
+
�� 254
|
336
336
|
��� 254
|
337
|
-
�_ 248
|
338
337
|
ि_ 248
|
338
|
+
�_ 248
|
339
339
|
��_ 248
|
340
|
-
�ा 246
|
341
340
|
��ा 246
|
341
|
+
�ा 246
|
342
|
+
��� 245
|
342
343
|
च� 245
|
343
344
|
�� 245
|
344
|
-
|
345
|
+
ह� 237
|
345
346
|
��� 237
|
346
|
-
��त 237
|
347
347
|
�� 237
|
348
|
-
ह� 237
|
349
348
|
�त 237
|
350
|
-
|
351
|
-
�र� 233
|
349
|
+
��त 237
|
352
350
|
��� 233
|
353
351
|
श� 233
|
352
|
+
�र� 233
|
353
|
+
�� 233
|
354
354
|
ण 230
|
355
355
|
�� 230
|
356
356
|
� 230
|
357
357
|
��ा 225
|
358
358
|
�ा 225
|
359
359
|
�ा� 225
|
360
|
-
|
360
|
+
�् 220
|
361
361
|
��् 220
|
362
|
+
�ा 220
|
362
363
|
��ा 220
|
363
|
-
�् 220
|
364
364
|
�् 219
|
365
|
-
��् 219
|
366
365
|
�्� 219
|
366
|
+
��् 219
|
367
367
|
�_ 215
|
368
368
|
��_ 215
|
369
369
|
य_ 215
|
@@ -372,29 +372,29 @@ _अ
|
|
372
372
|
��ा 206
|
373
373
|
�्� 206
|
374
374
|
ग� 205
|
375
|
-
�� 205
|
376
375
|
��� 205
|
377
|
-
|
378
|
-
भ� 204
|
379
|
-
��� 204
|
380
|
-
�� 204
|
381
|
-
��ि 204
|
376
|
+
�� 205
|
382
377
|
_म 204
|
383
378
|
� 204
|
379
|
+
��� 204
|
380
|
+
�� 204
|
384
381
|
�ि 204
|
382
|
+
भ� 204
|
383
|
+
�क� 204
|
384
|
+
��ि 204
|
385
385
|
ए 204
|
386
386
|
_म� 204
|
387
|
-
�े 200
|
388
387
|
��े 200
|
388
|
+
�े 200
|
389
389
|
�त� 199
|
390
|
-
��� 198
|
391
|
-
�� 198
|
392
390
|
च� 198
|
391
|
+
�� 198
|
392
|
+
��� 198
|
393
393
|
�त_ 195
|
394
|
+
��� 194
|
394
395
|
�� 194
|
395
396
|
�� 194
|
397
|
+
आ� 194
|
396
398
|
आ� 194
|
397
|
-
�� 194
|
398
399
|
� 194
|
399
|
-
|
400
|
-
��� 194
|
400
|
+
�� 194
|