scylla 0.8.0 → 0.8.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. data/Gemfile +4 -0
  2. data/Gemfile.lock +9 -1
  3. data/lib/scylla/generator.rb +46 -13
  4. data/lib/scylla/lms/afrikaans.lm +400 -400
  5. data/lib/scylla/lms/arabic.lm +400 -400
  6. data/lib/scylla/lms/bulgarian.lm +400 -400
  7. data/lib/scylla/lms/catalan.lm +399 -399
  8. data/lib/scylla/lms/chinese.lm +400 -400
  9. data/lib/scylla/lms/czech.lm +400 -0
  10. data/lib/scylla/lms/danish.lm +396 -396
  11. data/lib/scylla/lms/dutch.lm +400 -0
  12. data/lib/scylla/lms/english.lm +400 -400
  13. data/lib/scylla/lms/finnish.lm +400 -400
  14. data/lib/scylla/lms/french.lm +398 -398
  15. data/lib/scylla/lms/german.lm +400 -400
  16. data/lib/scylla/lms/greek.lm +400 -400
  17. data/lib/scylla/lms/hebrew.lm +399 -399
  18. data/lib/scylla/lms/hindi.lm +400 -400
  19. data/lib/scylla/lms/icelandic.lm +399 -399
  20. data/lib/scylla/lms/indonesian.lm +400 -400
  21. data/lib/scylla/lms/italian.lm +400 -400
  22. data/lib/scylla/lms/japanese.lm +399 -399
  23. data/lib/scylla/lms/kannada.lm +400 -0
  24. data/lib/scylla/lms/korean.lm +400 -400
  25. data/lib/scylla/lms/marathi.lm +400 -0
  26. data/lib/scylla/lms/norwegian.lm +400 -400
  27. data/lib/scylla/lms/persian.lm +400 -0
  28. data/lib/scylla/lms/polish.lm +400 -400
  29. data/lib/scylla/lms/portuguese.lm +400 -400
  30. data/lib/scylla/lms/romanian.lm +400 -400
  31. data/lib/scylla/lms/russian.lm +400 -400
  32. data/lib/scylla/lms/slovak.lm +400 -400
  33. data/lib/scylla/lms/slovenian.lm +387 -387
  34. data/lib/scylla/lms/spanish.lm +400 -400
  35. data/lib/scylla/lms/swedish.lm +399 -399
  36. data/lib/scylla/lms/tagalog.lm +400 -400
  37. data/lib/scylla/lms/thai.lm +400 -400
  38. data/lib/scylla/lms/turkish.lm +400 -400
  39. data/lib/scylla/lms/vietnamese.lm +400 -400
  40. data/lib/scylla/lms/welsh.lm +398 -398
  41. data/lib/scylla/resources.rb +43 -33
  42. data/lib/scylla/string.rb +2 -2
  43. data/lib/scylla.rb +0 -4
  44. data/pkg/scylla-0.5.0.gem +0 -0
  45. data/scylla.gemspec +1 -1
  46. data/source_texts/afrikaans.txt +330 -81
  47. data/source_texts/arabic.txt +590 -448
  48. data/source_texts/bulgarian.txt +588 -821
  49. data/source_texts/catalan.txt +435 -413
  50. data/source_texts/chinese.txt +526 -100
  51. data/source_texts/czech.txt +237 -0
  52. data/source_texts/danish.txt +233 -184
  53. data/source_texts/dutch.txt +503 -0
  54. data/source_texts/english.txt +673 -70
  55. data/source_texts/finnish.txt +939 -71
  56. data/source_texts/french.txt +879 -465
  57. data/source_texts/german.txt +1236 -137
  58. data/source_texts/greek.txt +488 -139
  59. data/source_texts/hebrew.txt +539 -100
  60. data/source_texts/hindi.txt +254 -100
  61. data/source_texts/icelandic.txt +301 -90
  62. data/source_texts/indonesian.txt +509 -93
  63. data/source_texts/italian.txt +1066 -120
  64. data/source_texts/japanese.txt +1217 -450
  65. data/source_texts/kannada.txt +340 -0
  66. data/source_texts/korean.txt +343 -219
  67. data/source_texts/marathi.txt +237 -0
  68. data/source_texts/norwegian.txt +555 -190
  69. data/source_texts/persian.txt +886 -0
  70. data/source_texts/polish.txt +1013 -90
  71. data/source_texts/portuguese.txt +690 -88
  72. data/source_texts/romanian.txt +436 -103
  73. data/source_texts/russian.txt +1029 -100
  74. data/source_texts/slovak.txt +575 -102
  75. data/source_texts/slovenian.txt +353 -99
  76. data/source_texts/spanish.txt +858 -675
  77. data/source_texts/swedish.txt +558 -488
  78. data/source_texts/tagalog.txt +391 -100
  79. data/source_texts/thai.txt +286 -60
  80. data/source_texts/turkish.txt +635 -87
  81. data/source_texts/vietnamese.txt +300 -92
  82. data/source_texts/welsh.txt +288 -104
  83. data/test/fixtures/lms/danish.lm +314 -314
  84. data/test/fixtures/lms/english.lm +301 -301
  85. data/test/fixtures/lms/french.lm +326 -326
  86. data/test/fixtures/lms/german.lm +331 -331
  87. data/test/fixtures/lms/hindi.lm +191 -191
  88. data/test/fixtures/lms/italian.lm +299 -299
  89. data/test/fixtures/lms/japanese.lm +103 -103
  90. data/test/fixtures/lms/norwegian.lm +309 -309
  91. data/test/fixtures/lms/spanish.lm +331 -331
  92. data/test/generator_test.rb +2 -2
  93. metadata +14 -3
@@ -1,400 +1,400 @@
1
- _ 1362
2
- a 715
3
- n 366
4
- e 308
5
- i 298
6
- u 211
7
- r 208
8
- k 205
9
- t 205
10
- an 185
11
- s 152
12
- g 152
13
- d 149
14
- m 149
15
- l 108
16
- ng 108
17
- a_ 105
18
- p 99
19
- n_ 96
20
- o 91
21
- an_ 87
22
- er 85
23
- b 85
24
- _d 80
25
- , 77
26
- ,_ 77
27
- i_ 76
28
- ka 76
29
- h 73
30
- _m 68
31
- ar 65
32
- en 65
33
- _k 62
34
- ta 59
35
- di 59
36
- me 58
37
- ang 56
38
- ra 55
39
- at 54
40
- ak 53
41
- y 53
42
- _me 52
43
- da 49
44
- ga 48
45
- in 46
46
- _di 44
47
- ya 43
48
- _s 42
49
- _p 42
50
- ._ 42
51
- . 42
52
- tu 41
53
- _b 40
54
- _t 40
55
- un 40
56
- g_ 38
57
- j 38
58
- ng_ 38
59
- la 38
60
- k_ 37
61
- ma 37
62
- na 36
63
- h_ 36
64
- ah 35
65
- ri 34
66
- ke 33
67
- be 33
68
- se 33
69
- al 32
70
- as 32
71
- pe 32
72
- ia 31
73
- men 31
74
- _ke 30
75
- _men 29
76
- _a 29
77
- si 29
78
- P 29
79
- pa 28
80
- it 28
81
- ti 28
82
- u_ 27
83
- _P 27
84
- em 27
85
- s_ 27
86
- ny 27
87
- ang_ 27
88
- kan 26
89
- sa 26
90
- _pe 26
91
- am 26
92
- _se 26
93
- t_ 25
94
- c 24
95
- _ka 24
96
- eng 24
97
- te 24
98
- _i 24
99
- _da 23
100
- ja 23
101
- nga 23
102
- r_ 23
103
- S 23
104
- ba 23
105
- el 22
106
- di_ 22
107
- li 22
108
- ber 22
109
- ni 21
110
- nt 21
111
- ak_ 21
112
- _be 21
113
- ah_ 21
114
- kan_ 20
115
- ata 20
116
- ad 19
117
- nya 19
118
- ik 19
119
- ap 19
120
- ran 19
121
- bu 19
122
- us 19
123
- gan 19
124
- is 19
125
- ara 19
126
- dan 18
127
- ru 18
128
- era 18
129
- _ber 18
130
- ur 17
131
- _S 17
132
- uk 17
133
- a, 17
134
- at_ 17
135
- ari 17
136
- a,_ 17
137
- es 17
138
- to 17
139
- nd 17
140
- _te 17
141
- _di_ 16
142
- ter 16
143
- I 16
144
- ung 16
145
- ala 16
146
- su 16
147
- yang 16
148
- _ya 16
149
- ol 16
150
- ngan 16
151
- _y 16
152
- yan 16
153
- ha 16
154
- M 15
155
- J 15
156
- dan_ 14
157
- lu 14
158
- itu 14
159
- _dan_ 14
160
- l_ 14
161
- yang_ 14
162
- du 14
163
- ek 14
164
- ya_ 14
165
- _yan 14
166
- _dan 14
167
- A 14
168
- ok 14
169
- enga 14
170
- _yang 14
171
- _ter 14
172
- de 14
173
- R 13
174
- ina 13
175
- D 13
176
- da_ 13
177
- ku 13
178
- kar 13
179
- na_ 13
180
- ko 13
181
- _c 13
182
- rang 13
183
- ge 13
184
- ai 13
185
- per 13
186
- eri 13
187
- ia_ 13
188
- po 12
189
- rin 12
190
- ut 12
191
- gi 12
192
- e_ 12
193
- _J 12
194
- ul 12
195
- ju 12
196
- w 12
197
- et 12
198
- gan_ 12
199
- ua 12
200
- ngan_ 12
201
- pen 12
202
- _ba 11
203
- _pen 11
204
- ama 11
205
- ena 11
206
- B 11
207
- or 11
208
- nya_ 11
209
- re 11
210
- ni_ 11
211
- gg 11
212
- ca 11
213
- mp 11
214
- ed 11
215
- aka 11
216
- ngg 11
217
- us_ 11
218
- lan 11
219
- im 10
220
- ini 10
221
- uh 10
222
- T 10
223
- adi 10
224
- Z 10
225
- _M 10
226
- rina 10
227
- si_ 10
228
- _Zari 10
229
- _Zar 10
230
- Zar 10
231
- _T 10
232
- _B 10
233
- ngk 10
234
- ta_ 10
235
- ika 10
236
- _in 10
237
- arin 10
238
- mi 10
239
- _Za 10
240
- _Z 10
241
- wa 10
242
- rt 10
243
- ab 10
244
- eru 10
245
- Zari 10
246
- Za 10
247
- arina 10
248
- Zarin 10
249
- ela 10
250
- gk 10
251
- uk_ 10
252
- il 9
253
- jadi 9
254
- i._ 9
255
- mem 9
256
- asi 9
257
- engan 9
258
- ntu 9
259
- _I 9
260
- i. 9
261
- _per 9
262
- _ma 9
263
- _mem 9
264
- tu_ 9
265
- K 9
266
- _ta 9
267
- _ini 9
268
- apa 9
269
- ag 9
270
- _de 9
271
- _itu 9
272
- eb 9
273
- jad 9
274
- ing 9
275
- i,_ 9
276
- _it 9
277
- os 9
278
- as_ 9
279
- i, 9
280
- mu 9
281
- dia 8
282
- le 8
283
- ki 8
284
- rk 8
285
- _K 8
286
- st 8
287
- isi 8
288
- al_ 8
289
- eny 8
290
- ip 8
291
- p_ 8
292
- _j 8
293
- ant 8
294
- rang_ 8
295
- pat 8
296
- L 8
297
- emb 8
298
- ada 8
299
- meng 8
300
- _meng 8
301
- man 8
302
- _A 8
303
- ar_ 8
304
- au 8
305
- itu_ 8
306
- oko 8
307
- oh 8
308
- aga 8
309
- ian 8
310
- ri_ 8
311
- ep 8
312
- ma_ 8
313
- nj 8
314
- mb 8
315
- ga_ 7
316
- u, 7
317
- ngka 7
318
- um 7
319
- uga 7
320
- pat_ 7
321
- ina_ 7
322
- a._ 7
323
- ada_ 7
324
- a. 7
325
- ntuk_ 7
326
- rina_ 7
327
- ay 7
328
- toko 7
329
- aya 7
330
- ot 7
331
- meny 7
332
- _u 7
333
- kal 7
334
- art 7
335
- alan 7
336
- _peng 7
337
- n._ 7
338
- n. 7
339
- g, 7
340
- ngga 7
341
- gga 7
342
- aran 7
343
- ib 7
344
- ce 7
345
- atan 7
346
- _L 7
347
- ej 7
348
- ug 7
349
- _meny 7
350
- ng,_ 7
351
- tak 7
352
- and 7
353
- tuk_ 7
354
- _itu_ 7
355
- rl 7
356
- ger 7
357
- rah 7
358
- nu 7
359
- lu_ 7
360
- u,_ 7
361
- ita 7
362
- arang 7
363
- gka 7
364
- tan 7
365
- ntuk 7
366
- sia 7
367
- Ja 7
368
- peng 7
369
- _Ja 7
370
- g,_ 7
371
- ro 7
372
- bur 7
373
- nda 7
374
- Ta 7
375
- erl 7
376
- ih 7
377
- tok 7
378
- ng, 7
379
- _Ta 7
380
- m_ 7
381
- pu 7
382
- eka 7
383
- oh_ 7
384
- tuk 7
385
- deng 6
386
- mun 6
387
- sus 6
388
- _an 6
389
- tar 6
390
- ub 6
391
- _tok 6
392
- rus 6
393
- _tak 6
394
- an._ 6
395
- N 6
396
- _mel 6
397
- f 6
398
- ang, 6
399
- aj 6
400
- nta 6
1
+ _ 8136
2
+ a 4662
3
+ n 2659
4
+ i 2220
5
+ e 2125
6
+ d 1389
7
+ r 1214
8
+ s 1194
9
+ an 1176
10
+ u 1091
11
+ t 1070
12
+ k 966
13
+ m 924
14
+ a_ 868
15
+ g 868
16
+ l 847
17
+ n_ 820
18
+ p 700
19
+ _d 674
20
+ an_ 664
21
+ i_ 633
22
+ b 621
23
+ o 601
24
+ h 578
25
+ da 559
26
+ ng 548
27
+ er 486
28
+ in 439
29
+ y 422
30
+ _m 411
31
+ en 398
32
+ _p 397
33
+ di 390
34
+ ya 373
35
+ si 368
36
+ ar 364
37
+ la 356
38
+ _s 349
39
+ ta 341
40
+ me 328
41
+ ga 328
42
+ ra 323
43
+ _b 311
44
+ ang 299
45
+ _da 298
46
+ g_ 290
47
+ ah 289
48
+ ng_ 288
49
+ ka 286
50
+ nd 282
51
+ ia 280
52
+ _me 279
53
+ se 273
54
+ _t 272
55
+ h_ 270
56
+ _i 269
57
+ _di 264
58
+ es 263
59
+ _k 254
60
+ on 250
61
+ al 239
62
+ be 236
63
+ _se 232
64
+ as 232
65
+ pe 230
66
+ ne 229
67
+ _a 229
68
+ j 228
69
+ sa 228
70
+ ba 224
71
+ ad 222
72
+ _in 218
73
+ at 213
74
+ dan 207
75
+ ak 207
76
+ pa 205
77
+ ia_ 197
78
+ k_ 195
79
+ ma 194
80
+ te 194
81
+ ri 191
82
+ un 188
83
+ ind 185
84
+ ke 184
85
+ _pe 183
86
+ _y 178
87
+ ah_ 177
88
+ _be 175
89
+ esi 174
90
+ _ya 174
91
+ r_ 168
92
+ na 167
93
+ kan 167
94
+ em 167
95
+ do 165
96
+ di_ 162
97
+ men 161
98
+ ndo 160
99
+ el 160
100
+ sia 159
101
+ nes 159
102
+ one 157
103
+ don 157
104
+ am 156
105
+ ny 156
106
+ yan 155
107
+ _ke 154
108
+ ai 150
109
+ t_ 145
110
+ ti 144
111
+ is 144
112
+ nya 138
113
+ ada 137
114
+ ja 135
115
+ nt 135
116
+ ara 133
117
+ ber 131
118
+ ha 131
119
+ tu 130
120
+ _te 130
121
+ eng 130
122
+ li 129
123
+ ya_ 128
124
+ s_ 125
125
+ u_ 123
126
+ ik 122
127
+ ag 121
128
+ de 121
129
+ nga 120
130
+ ek 118
131
+ l_ 117
132
+ il 115
133
+ da_ 114
134
+ eb 113
135
+ gan 112
136
+ _pa 111
137
+ ua 111
138
+ uk 110
139
+ ri_ 108
140
+ _l 107
141
+ w 106
142
+ c 106
143
+ ter 106
144
+ it 104
145
+ ni 104
146
+ era 103
147
+ ela 101
148
+ du 98
149
+ ra_ 96
150
+ per 96
151
+ at_ 95
152
+ aga 93
153
+ us 93
154
+ _n 92
155
+ ul 91
156
+ ala 91
157
+ mi 88
158
+ le 87
159
+ _ba 87
160
+ m_ 86
161
+ lah 86
162
+ asi 82
163
+ _h 82
164
+ bu 82
165
+ ing 81
166
+ si_ 80
167
+ pad 79
168
+ pu 79
169
+ ut 79
170
+ ari 79
171
+ ru 79
172
+ pen 78
173
+ al_ 77
174
+ mu 76
175
+ _de 76
176
+ rt 76
177
+ un_ 75
178
+ ap 75
179
+ _o 75
180
+ ki 74
181
+ _j 74
182
+ ai_ 74
183
+ ama 74
184
+ et 73
185
+ ol 73
186
+ asa 72
187
+ ar_ 72
188
+ lu 71
189
+ wa 71
190
+ aka 70
191
+ e_ 70
192
+ ju 69
193
+ ir 68
194
+ ak_ 68
195
+ _ta 68
196
+ _ma 68
197
+ nta 68
198
+ ay 67
199
+ au 67
200
+ re 66
201
+ dar 66
202
+ den 66
203
+ hu 65
204
+ seb 65
205
+ or 64
206
+ ga_ 64
207
+ uk_ 64
208
+ st 64
209
+ tah 64
210
+ bag 62
211
+ mb 62
212
+ ur 61
213
+ ku 61
214
+ nj 60
215
+ tan 59
216
+ mer 59
217
+ ngg 58
218
+ gg 58
219
+ end 57
220
+ f 57
221
+ aa 57
222
+ ata 57
223
+ ta_ 56
224
+ hi 56
225
+ ko 56
226
+ mp 55
227
+ su 55
228
+ eg 54
229
+ ma_ 53
230
+ ud 53
231
+ pr 53
232
+ adi 52
233
+ _la 51
234
+ _r 51
235
+ tar 51
236
+ ert 51
237
+ id 50
238
+ mas 50
239
+ gi 50
240
+ eh 50
241
+ gar 50
242
+ _e 49
243
+ mem 48
244
+ ni_ 48
245
+ ik_ 48
246
+ ula 47
247
+ eba 47
248
+ rd 47
249
+ ab 47
250
+ dal 47
251
+ ban 47
252
+ lan 46
253
+ hun 46
254
+ sar 46
255
+ eri 46
256
+ lam 46
257
+ _an 46
258
+ aya 46
259
+ aan 46
260
+ ahu 46
261
+ ih 45
262
+ um 45
263
+ as_ 45
264
+ lai 44
265
+ bi 44
266
+ _sa 44
267
+ eka 44
268
+ ant 44
269
+ dia 44
270
+ san 43
271
+ esa 43
272
+ po 43
273
+ ep 43
274
+ ran 43
275
+ _ad 42
276
+ _w 42
277
+ om 42
278
+ _u 42
279
+ no 41
280
+ bes 41
281
+ tu_ 41
282
+ pul 41
283
+ ain 41
284
+ nda 41
285
+ erd 41
286
+ aw 41
287
+ _ol 41
288
+ awa 40
289
+ ndi 40
290
+ ega 40
291
+ _ne 40
292
+ nja 40
293
+ in_ 40
294
+ io 39
295
+ aha 39
296
+ _ti 39
297
+ gai 39
298
+ jad 39
299
+ ca 39
300
+ han 39
301
+ any 39
302
+ nal 39
303
+ ian 39
304
+ enj 39
305
+ eh_ 38
306
+ v 38
307
+ apa 38
308
+ ali 38
309
+ eme 38
310
+ _pr 38
311
+ _ha 38
312
+ ip 37
313
+ ila 37
314
+ neg 37
315
+ aj 37
316
+ _ju 37
317
+ p_ 37
318
+ rb 37
319
+ emb 37
320
+ ini 37
321
+ ent 36
322
+ erb 36
323
+ isi 36
324
+ ntu 36
325
+ tr 36
326
+ im 36
327
+ man 35
328
+ dis 35
329
+ ole 35
330
+ itu 35
331
+ rta 35
332
+ _c 35
333
+ leh 35
334
+ d_ 35
335
+ gu 35
336
+ au_ 35
337
+ ngk 34
338
+ rk 34
339
+ pi 34
340
+ iri 34
341
+ up 34
342
+ ed 34
343
+ gk 34
344
+ ana 34
345
+ mel 34
346
+ am_ 34
347
+ lay 34
348
+ rn 34
349
+ ion 33
350
+ eru 33
351
+ ro 33
352
+ rs 33
353
+ _ka 33
354
+ lau 33
355
+ rah 33
356
+ _mu 33
357
+ mba 33
358
+ uh 33
359
+ rin 32
360
+ sel 32
361
+ lo 32
362
+ ndu 32
363
+ ena 32
364
+ mi_ 32
365
+ _na 31
366
+ uas 31
367
+ emi 31
368
+ sa_ 31
369
+ to 31
370
+ tel 30
371
+ usa 30
372
+ _bu 30
373
+ ut_ 30
374
+ gga 30
375
+ and 29
376
+ nny 29
377
+ _pu 29
378
+ nn 29
379
+ tas 29
380
+ ika 29
381
+ ili 29
382
+ _ja 29
383
+ ih_ 29
384
+ is_ 28
385
+ _ek 28
386
+ ke_ 28
387
+ ej 28
388
+ kat 28
389
+ kh 28
390
+ ung 28
391
+ pat 28
392
+ _re 28
393
+ dir 28
394
+ ita 28
395
+ duk 28
396
+ ebe 28
397
+ ers 28
398
+ uta 27
399
+ yak 27
400
+ int 27