scylla 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/Gemfile +4 -2
  2. data/Gemfile.lock +16 -1
  3. data/lib/scylla/classifier.rb +1 -1
  4. data/lib/scylla/generator.rb +16 -4
  5. data/lib/scylla/lms/afrikaans.lm +232 -232
  6. data/lib/scylla/lms/arabic.lm +175 -175
  7. data/lib/scylla/lms/bulgarian.lm +225 -225
  8. data/lib/scylla/lms/catalan.lm +309 -309
  9. data/lib/scylla/lms/danish.lm +167 -167
  10. data/lib/scylla/lms/english.lm +398 -398
  11. data/lib/scylla/lms/finnish.lm +237 -237
  12. data/lib/scylla/lms/french.lm +148 -148
  13. data/lib/scylla/lms/german.lm +258 -258
  14. data/lib/scylla/lms/greek.lm +236 -236
  15. data/lib/scylla/lms/hebrew.lm +154 -154
  16. data/lib/scylla/lms/hindi.lm +139 -139
  17. data/lib/scylla/lms/icelandic.lm +239 -239
  18. data/lib/scylla/lms/indonesian.lm +244 -244
  19. data/lib/scylla/lms/italian.lm +248 -248
  20. data/lib/scylla/lms/japanese.lm +90 -90
  21. data/lib/scylla/lms/korean.lm +306 -306
  22. data/lib/scylla/lms/norwegian.lm +193 -193
  23. data/lib/scylla/lms/polish.lm +241 -241
  24. data/lib/scylla/lms/portuguese.lm +232 -232
  25. data/lib/scylla/lms/romanian.lm +246 -246
  26. data/lib/scylla/lms/slovak.lm +242 -242
  27. data/lib/scylla/lms/slovenian.lm +229 -229
  28. data/lib/scylla/lms/spanish.lm +164 -164
  29. data/lib/scylla/lms/swedish.lm +157 -157
  30. data/lib/scylla/lms/tagalog.lm +247 -247
  31. data/lib/scylla/lms/thai.lm +252 -252
  32. data/lib/scylla/lms/turkish.lm +285 -285
  33. data/lib/scylla/lms/vietnamese.lm +250 -250
  34. data/lib/scylla/lms/welsh.lm +248 -248
  35. data/lib/scylla/resources.rb +1 -9
  36. data/lib/scylla.rb +4 -0
  37. data/scylla.gemspec +2 -120
  38. data/source_texts/english.txt +62 -27
  39. data/test/classifier_test.rb +1 -3
  40. data/test/fixtures/lms/danish.lm +173 -173
  41. data/test/fixtures/lms/english.lm +220 -220
  42. data/test/fixtures/lms/french.lm +175 -175
  43. data/test/fixtures/lms/german.lm +254 -254
  44. data/test/fixtures/lms/hindi.lm +139 -139
  45. data/test/fixtures/lms/italian.lm +236 -236
  46. data/test/fixtures/lms/japanese.lm +88 -88
  47. data/test/fixtures/lms/norwegian.lm +182 -182
  48. data/test/fixtures/lms/spanish.lm +164 -164
  49. data/test/fixtures/test_languages/spanish +0 -1
  50. data/test/generator_test.rb +13 -0
  51. data/test/helper.rb +2 -0
  52. metadata +18 -25
  53. data/.document +0 -5
  54. data/lib/scylla/lms/13375P33K.lm +0 -400
  55. data/scylla-0.1.0.gem +0 -0
  56. data/source_texts/13375P33K.txt +0 -199
  57. data/test/fixtures/lms/13375p33k.lm +0 -400
  58. data/test/fixtures/source_texts/13375P33K.txt +0 -199
@@ -1,14 +1,14 @@
1
- _ 1358
1
+ _ 1354
2
2
  a 856
3
3
  n 460
4
4
  i 336
5
5
  g 302
6
6
  ng 210
7
7
  t 183
8
- g_ 175
8
+ g_ 176
9
9
  o 173
10
- ng_ 169
11
- a_ 156
10
+ ng_ 170
11
+ a_ 159
12
12
  k 155
13
13
  an 155
14
14
  s 150
@@ -21,380 +21,380 @@ u 103
21
21
  na 101
22
22
  ang 98
23
23
  . 85
24
- ang_ 83
24
+ ang_ 84
25
25
  la 80
26
26
  in 79
27
27
  _a 77
28
28
  r 77
29
29
  b 72
30
30
  ka 72
31
- sa 67
32
31
  ak 67
32
+ sa 67
33
33
  pa 65
34
34
  _m 65
35
- ma 64
36
35
  _s 64
36
+ ma 64
37
37
  h 63
38
38
  _na 61
39
+ at 57
39
40
  d 57
40
41
  al 57
41
- at 54
42
- _p 52
43
42
  ag 52
43
+ _p 52
44
44
  ta 49
45
- _k 45
45
+ ay 46
46
46
  ni 45
47
+ _k 45
47
48
  t_ 44
48
49
  _sa 43
49
- " 42
50
50
  ya 42
51
- _ng 42
52
51
  n_ 42
53
- ay 41
52
+ _ng 42
54
53
  it 40
55
54
  _pa 40
56
- ga 39
57
- e 39
55
+ o_ 39
58
56
  on 39
57
+ e 39
58
+ ga 39
59
+ y_ 38
59
60
  sa_ 38
60
- y_ 37
61
61
  iy 36
62
62
  il 36
63
- o_ 35
64
- ala 35
65
63
  w 35
66
- _ng_ 34
64
+ ala 35
67
65
  am 34
66
+ _ng_ 34
68
67
  ._ 33
69
68
  _sa_ 33
70
- i_ 32
71
- _ma 32
72
69
  na_ 32
70
+ _ma 32
71
+ i_ 32
72
+ , 31
73
+ ,_ 31
73
74
  _an 31
74
75
  ra 31
75
- , 31
76
76
  ba 30
77
- _t 29
78
- iya 29
79
77
  _ang_ 29
78
+ ar 29
79
+ ay_ 29
80
80
  _ni 29
81
81
  _ang 29
82
- ar 29
82
+ iya 29
83
+ _t 29
83
84
  _i 28
84
85
  _ka 28
85
86
  _l 27
86
- ,_ 27
87
- as 27
87
+ li 27
88
+ is 27
88
89
  hi 27
89
- ong 27
90
90
  ha 27
91
- is 27
91
+ ong 27
92
92
  aka 27
93
- li 27
94
- ko 26
95
- gi 26
93
+ as 27
96
94
  _na_ 26
97
- to 25
95
+ gi 26
96
+ ko 26
98
97
  ap 25
99
98
  ri 25
100
- lan 24
101
- ay_ 24
99
+ to 25
100
+ ah 24
102
101
  ong_ 24
103
102
  un 24
104
- ah 24
103
+ lan 24
104
+ di 23
105
+ wa 23
105
106
  ata 23
106
- ing 23
107
+ at_ 23
107
108
  um 23
108
- o. 23
109
- di 23
110
109
  si 23
111
- wa 23
110
+ o. 23
111
+ s_ 23
112
+ ing 23
112
113
  ti 22
113
- s_ 22
114
114
  ki 22
115
- niy 21
116
- ab 21
117
- _d 21
118
- niya 21
119
- _niy 21
120
115
  mo 21
121
- an_ 21
122
116
  _niya 21
123
- a. 20
124
- at_ 20
117
+ _niy 21
118
+ niya 21
119
+ _d 21
120
+ an_ 21
121
+ niy 21
122
+ ab 21
125
123
  N 20
124
+ a. 20
125
+ aw 19
126
126
  yo 19
127
127
  ila 19
128
- - 19
129
128
  ot 19
130
- aw 19
131
- _mo 18
132
- nga 18
133
- _ak 18
129
+ - 19
134
130
  ig 18
131
+ _mo 18
135
132
  A 18
133
+ _ak 18
134
+ nga 18
136
135
  lang 18
137
- nag 17
138
- ama 17
139
136
  tu 17
137
+ ama 17
138
+ nag 17
139
+ _b 17
140
+ in_ 16
140
141
  ina 16
141
142
  P 16
142
- in_ 16
143
143
  ali 16
144
- "_ 16
145
- ara 16
146
- _b 16
147
144
  aki 16
148
- ' 15
145
+ ara 16
146
+ ing_ 15
147
+ ya_ 15
148
+ _si 15
149
149
  lang_ 15
150
+ o._ 15
150
151
  _at 15
151
- _si 15
152
- ing_ 15
153
- o._ 14
152
+ ga_ 14
154
153
  ai 14
155
- ib 14
156
- nd 14
157
- aa 14
158
- _r 14
159
154
  da 14
155
+ _h 14
156
+ _r 14
160
157
  bi 14
161
- ga_ 14
158
+ iya_ 14
159
+ aa 14
160
+ nd 14
161
+ ib 14
162
+ ik 13
162
163
  ro 13
164
+ ako 13
165
+ lo 13
163
166
  _at_ 13
164
- hin 13
165
167
  ito 13
166
- lo 13
167
- ik 13
168
- ako 13
169
- ya_ 13
170
- _h 13
171
- iya_ 13
172
- la_ 12
173
- rin 12
168
+ hin 13
169
+ _ta 12
174
170
  Na 12
171
+ rin 12
172
+ mag 12
173
+ a, 12
175
174
  K 12
175
+ _la 12
176
+ ahi 12
176
177
  ul 12
177
178
  S 12
178
- ahi 12
179
- _ta 12
180
- _la 12
181
- a, 12
182
- mag 12
183
- k_ 11
184
- pan 11
179
+ a,_ 12
180
+ la_ 12
181
+ man 11
182
+ go 11
185
183
  _tu 11
186
- mi 11
187
- a,_ 11
188
184
  asa 11
189
- ilan 11
185
+ _di 11
190
186
  apa 11
191
- kin 11
192
- _mag 11
193
- 'y 11
194
- 'y_ 11
187
+ M 11
195
188
  gk 11
196
189
  lu 11
190
+ k_ 11
191
+ kin 11
197
192
  _ri 11
198
- man 11
199
- go 11
200
- M 11
193
+ _mag 11
194
+ ilan 11
195
+ mi 11
196
+ pan 11
201
197
  ung 11
202
- ." 11
203
- _di 11
198
+ agk 10
199
+ akin 10
200
+ mg 10
201
+ Ma 10
204
202
  mga 10
205
- is_ 10
206
- wal 10
207
- nak 10
208
- _N 10
209
- awa 10
203
+ _K 10
210
204
  ot_ 10
211
- yon 10
212
- _is 10
205
+ awa 10
206
+ ir 10
213
207
  pa_ 10
214
- su 10
215
- _K 10
216
- agk 10
208
+ uma 10
209
+ ilang 10
210
+ wal 10
217
211
  mga_ 10
218
- mu 10
212
+ is_ 10
213
+ yon 10
219
214
  yan 10
220
- .. 10
221
- ir 10
222
- Ka 10
223
- akin 10
224
215
  wala 10
225
216
  ari 10
226
- ilang 10
227
- pi 10
228
- uma 10
229
217
  I 10
230
- mg 10
231
- Ma 10
218
+ pi 10
219
+ Ka 10
220
+ mu 10
221
+ _is 10
222
+ nak 10
223
+ .. 10
224
+ su 10
225
+ ini 9
232
226
  it_ 9
233
227
  n, 9
234
- _akin 9
235
- gka 9
236
- _aki 9
237
- agka 9
228
+ n,_ 9
229
+ _N 9
238
230
  n. 9
239
- pag 9
240
231
  san 9
241
232
  no 9
242
- isa 9
243
- nan 9
244
- _mga 9
245
233
  _mg 9
234
+ _Ka 9
235
+ _mga_ 9
236
+ _ba 9
246
237
  pu 9
247
- alan 9
238
+ nan 9
239
+ isa 9
240
+ pag 9
241
+ aba 9
248
242
  _A 9
249
- _mga_ 9
250
- ini 9
243
+ _P 9
244
+ _aki 9
245
+ _akin 9
246
+ agka 9
247
+ gka 9
248
+ alan 9
251
249
  An 9
252
- aba 9
253
- _Ka 9
254
- aha 8
255
- aman 8
256
- p_ 8
257
- ob 8
258
- _lu 8
259
- _ba 8
250
+ _mga 9
260
251
  ayo 8
252
+ igi 8
261
253
  si_ 8
262
- _w 8
263
- ili 8
264
- king 8
265
- _P 8
266
- di_ 8
267
- iti 8
254
+ Pa 8
255
+ p_ 8
256
+ naka 8
257
+ .n 8
258
+ _wala 8
259
+ aha 8
260
+ _nag 8
268
261
  niya_ 8
269
262
  yang 8
270
- as_ 8
271
263
  mo_ 8
272
- aga 8
273
- _nag 8
274
- aking 8
275
- tan 8
276
- tay 8
277
- dib 8
278
- a' 8
279
- abi 8
280
- l_ 8
281
- _wala 8
282
- naka 8
283
- _wal 8
284
- alang 8
285
264
  t. 8
286
- umi 8
287
- Pa 8
265
+ aman 8
266
+ abi 8
267
+ _wa 8
288
268
  ngi 8
289
269
  _ako 8
290
- _wa 8
291
- igi 8
270
+ l_ 8
271
+ king 8
272
+ di_ 8
273
+ tay 8
274
+ aking 8
275
+ dib 8
276
+ umi 8
277
+ _w 8
278
+ alang 8
292
279
  mat 8
293
- e_ 7
294
- _pag 7
295
- tin 7
296
- _M 7
297
- ra_ 7
280
+ _wal 8
281
+ ili 8
282
+ aga 8
283
+ _lu 8
284
+ oy 8
285
+ tan 8
286
+ ob 8
287
+ as_ 8
288
+ iti 8
289
+ ut 7
290
+ tak 7
298
291
  B 7
292
+ tat 7
293
+ tang 7
299
294
  kat 7
300
- _Na 7
301
- ip 7
302
- _isa 7
295
+ o,_ 7
296
+ bo 7
297
+ oy_ 7
303
298
  king_ 7
304
- g- 7
305
- .n 7
306
- ban 7
307
- oo 7
308
- gin 7
299
+ kan 7
300
+ anga 7
301
+ mata 7
302
+ ana 7
303
+ ala_ 7
304
+ _M 7
305
+ lis 7
306
+ e_ 7
309
307
  er 7
310
- iyo 7
311
- ut 7
312
- ram 7
313
- tak 7
308
+ _pag 7
314
309
  _ko 7
315
- ail 7
316
- n,_ 7
317
- o' 7
318
- _An 7
319
- _pa_ 7
320
- lis 7
321
- o, 7
322
- ala_ 7
323
- ka_ 7
310
+ ip 7
324
311
  ku 7
325
- a._ 7
326
- tat 7
327
- kan 7
328
- _mo_ 7
329
- _si_ 7
330
- bo 7
312
+ g- 7
331
313
  to_ 7
314
+ _hi 7
315
+ o, 7
316
+ _mo_ 7
317
+ tum 7
332
318
  pak 7
319
+ ban 7
320
+ a._ 7
321
+ _si_ 7
322
+ gin 7
323
+ tin 7
324
+ ra_ 7
325
+ _An 7
326
+ ka_ 7
327
+ ram 7
328
+ iyo 7
329
+ _isa 7
330
+ oo 7
331
+ _pa_ 7
332
+ ail 7
333
333
  _rin 7
334
- tum 7
335
- tang 7
336
- anga 7
337
- ana 7
338
- mata 7
339
- ita 6
340
- o'y_ 6
341
- pal 6
342
- _iyo 6
343
- ?" 6
344
- lis_ 6
345
- _ku 6
346
- nit 6
347
- up 6
348
- st 6
349
334
  par 6
350
- _hi 6
351
- _" 6
352
- man_ 6
353
- nang 6
354
- ung_ 6
355
- ak_ 6
356
- walan 6
357
- os 6
358
335
  aila 6
359
- ig_ 6
336
+ _ha 6
337
+ ha_ 6
338
+ siy 6
339
+ _iy 6
340
+ _S 6
341
+ pal 6
342
+ kun 6
343
+ isan 6
344
+ agi 6
345
+ Sa 6
346
+ mba 6
347
+ isang 6
348
+ tang_ 6
349
+ kah 6
350
+ st 6
360
351
  ag- 6
361
- yang_ 6
362
- ? 6
352
+ c 6
353
+ lis_ 6
363
354
  sang 6
364
- d_ 6
365
- _mu 6
366
- mb 6
367
- o'y 6
368
- _tum 6
355
+ _nga 6
356
+ nang 6
369
357
  ap_ 6
370
- bang 6
358
+ nt 6
359
+ os 6
360
+ ago 6
361
+ ig_ 6
362
+ man_ 6
371
363
  gay 6
364
+ _mu 6
372
365
  _par 6
373
- im 6
374
- ago 6
375
- _S 6
376
- kaka 6
366
+ bang 6
367
+ up 6
368
+ kak 6
369
+ ak_ 6
377
370
  siya 6
378
- _ha 6
379
- c 6
380
- _iy 6
381
- siy 6
382
- tang_ 6
383
- _nga 6
384
- kun 6
385
- Sa 6
386
- isan 6
387
- kah 6
371
+ ita 6
372
+ ung_ 6
373
+ yang_ 6
374
+ gan 6
375
+ d_ 6
376
+ _iyo 6
388
377
  una 6
389
- agi 6
390
- .na 6
391
- kak 6
392
378
  _Ma 6
393
- o." 6
394
- o,_ 6
379
+ ula 6
380
+ im 6
381
+ _ku 6
382
+ .na 6
383
+ _tum 6
384
+ walan 6
385
+ _Na 6
386
+ kaka 6
387
+ nit 6
388
+ .N 6
389
+ mb 6
395
390
  _I 6
396
391
  tata 6
397
- nt 6
398
- ula 6
399
- mba 6
400
- isang 6
392
+ han 5
393
+ nig_ 5
394
+ uh 5
395
+ bat 5
396
+ ito. 5
397
+ siya_ 5
398
+ hawak 5
399
+ go_ 5
400
+ ot. 5