scylla 0.8.32 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/lib/scylla/generator.rb +6 -2
  2. data/lib/scylla/lms/arabic.lm +318 -318
  3. data/lib/scylla/lms/bulgarian.lm +326 -326
  4. data/lib/scylla/lms/chinese.lm +399 -399
  5. data/lib/scylla/lms/french.lm +302 -302
  6. data/lib/scylla/lms/greek.lm +119 -119
  7. data/lib/scylla/lms/hebrew.lm +168 -168
  8. data/lib/scylla/lms/hindi.lm +108 -108
  9. data/lib/scylla/lms/japanese.lm +65 -65
  10. data/lib/scylla/lms/kannada.lm +147 -147
  11. data/lib/scylla/lms/korean.lm +151 -151
  12. data/lib/scylla/lms/marathi.lm +133 -133
  13. data/lib/scylla/lms/persian.lm +107 -107
  14. data/lib/scylla/lms/polish.lm +108 -108
  15. data/lib/scylla/lms/portuguese.lm +221 -221
  16. data/lib/scylla/lms/romanian.lm +132 -132
  17. data/lib/scylla/lms/russian.lm +82 -82
  18. data/lib/scylla/lms/thai.lm +119 -119
  19. data/lib/scylla/resources.rb +0 -1
  20. data/test/helper.rb +0 -1
  21. metadata +40 -55
  22. data/Gemfile +0 -23
  23. data/Gemfile.lock +0 -53
  24. data/Rakefile +0 -52
  25. data/VERSION +0 -1
  26. data/lib/scylla/lms/afrikaans.lm +0 -400
  27. data/pkg/scylla-0.5.0.gem +0 -0
  28. data/scylla-0.8.29.gem +0 -0
  29. data/scylla-0.8.31.gem +0 -0
  30. data/scylla.gemspec +0 -24
  31. data/source_texts/afrikaans.txt +0 -363
  32. data/source_texts/arabic.txt +0 -718
  33. data/source_texts/bulgarian.txt +0 -601
  34. data/source_texts/catalan.txt +0 -435
  35. data/source_texts/chinese.txt +0 -625
  36. data/source_texts/czech.txt +0 -237
  37. data/source_texts/danish.txt +0 -268
  38. data/source_texts/dutch.txt +0 -503
  39. data/source_texts/english.txt +0 -673
  40. data/source_texts/finnish.txt +0 -939
  41. data/source_texts/french.txt +0 -896
  42. data/source_texts/german.txt +0 -1236
  43. data/source_texts/greek.txt +0 -488
  44. data/source_texts/hebrew.txt +0 -638
  45. data/source_texts/hindi.txt +0 -353
  46. data/source_texts/icelandic.txt +0 -342
  47. data/source_texts/indonesian.txt +0 -509
  48. data/source_texts/italian.txt +0 -1066
  49. data/source_texts/japanese.txt +0 -1220
  50. data/source_texts/kannada.txt +0 -340
  51. data/source_texts/korean.txt +0 -343
  52. data/source_texts/marathi.txt +0 -237
  53. data/source_texts/norwegian.txt +0 -555
  54. data/source_texts/persian.txt +0 -886
  55. data/source_texts/polish.txt +0 -1014
  56. data/source_texts/portuguese.txt +0 -690
  57. data/source_texts/romanian.txt +0 -436
  58. data/source_texts/russian.txt +0 -1128
  59. data/source_texts/slovak.txt +0 -575
  60. data/source_texts/slovenian.txt +0 -354
  61. data/source_texts/spanish.txt +0 -1017
  62. data/source_texts/swedish.txt +0 -558
  63. data/source_texts/tagalog.txt +0 -426
  64. data/source_texts/thai.txt +0 -312
  65. data/source_texts/turkish.txt +0 -665
  66. data/source_texts/vietnamese.txt +0 -300
  67. data/source_texts/welsh.txt +0 -332
@@ -3,10 +3,10 @@
3
3
  � 23123
4
4
  � 4980
5
5
  � 4799
6
- _ 3024
7
- �� 2068
8
- า 2068
6
+ _ 2804
9
7
  � 2068
8
+ า 2068
9
+ �� 2068
10
10
  ��� 1939
11
11
  �� 1939
12
12
  �� 1842
@@ -15,8 +15,8 @@ _ 3024
15
15
  � 1666
16
16
  � 1644
17
17
  ร 1595
18
- � 1595
19
18
  �� 1595
19
+ � 1595
20
20
  �� 1586
21
21
  �� 1552
22
22
  ��� 1552
@@ -44,15 +44,15 @@ _
44
44
  �� 1054
45
45
  ��� 1025
46
46
  � 1025
47
- �� 1025
48
- �� 1025
49
- เ 1025
50
47
  �� 1025
48
+ เ 1025
49
+ �� 1025
50
+ �� 1025
51
51
  ��� 1016
52
52
  �� 1016
53
- ั 1003
54
- ��� 1003
55
53
  �� 1003
54
+ ��� 1003
55
+ ั 1003
56
56
  �� 1003
57
57
  � 1003
58
58
  � 991
@@ -62,14 +62,14 @@ _
62
62
  �� 956
63
63
  _� 945
64
64
  �� 928
65
+ � 928
65
66
  ย 928
66
67
  �� 928
67
- � 928
68
68
  ��� 920
69
69
  �� 920
70
+ � 906
70
71
  ท 906
71
72
  �� 906
72
- � 906
73
73
  �� 887
74
74
  ��� 887
75
75
  � 881
@@ -80,39 +80,39 @@ _
80
80
  �� 809
81
81
  ��� 809
82
82
  �� 805
83
- �� 795
84
83
  ้ 795
84
+ �� 795
85
85
  �� 795
86
86
  �� 789
87
87
  ��� 774
88
88
  �� 716
89
+ ล 686
89
90
  �� 686
90
91
  � 686
91
- ล 686
92
92
  � 668
93
- ��� 668
93
+ �� 668
94
94
  �� 668
95
95
  ป 668
96
- �� 668
97
- � 667
98
- ะ 667
96
+ ��� 668
99
97
  �� 667
100
- �� 661
98
+ 667
99
+ � 667
101
100
  ��� 661
101
+ �� 661
102
102
  � 642
103
- ี 642
104
103
  �� 642
104
+ ี 642
105
105
  �� 620
106
- �� 612
107
106
  ��� 612
107
+ �� 612
108
108
  � 611
109
109
  �� 609
110
- ต 609
111
110
  �� 609
111
+ ต 609
112
112
  ��� 607
113
113
  � 589
114
- �� 589
115
114
  ว 589
115
+ �� 589
116
116
  �� 565
117
117
  � 565
118
118
  ส 565
@@ -120,46 +120,46 @@ _
120
120
  �� 563
121
121
  ��� 562
122
122
  �� 562
123
- �� 561
124
123
  ��� 561
124
+ �� 561
125
125
  �� 561
126
126
  � 558
127
127
  ด 556
128
128
  �� 556
129
- ��� 532
130
129
  �� 532
130
+ ��� 532
131
131
  �� 491
132
- � 485
133
132
  �� 485
134
- บ 485
135
- �� 485
136
133
  � 485
137
- 484
138
- 484
134
+ �� 485
135
+ 485
136
+ � 485
139
137
  �� 484
140
- �� 465
138
+ 484
139
+ � 484
141
140
  ��� 465
142
- ��� 464
141
+ �� 465
143
142
  �� 464
143
+ ��� 464
144
144
  _� 455
145
145
  �� 448
146
- ��� 445
147
146
  แ 445
147
+ ��� 445
148
148
  �� 445
149
149
  ค 442
150
150
  �� 442
151
151
  ��� 439
152
+ ��� 439
152
153
  �� 439
153
154
  ไ 439
154
- ��� 439
155
155
  �� 425
156
156
  �� 421
157
157
  �� 420
158
158
  ห 403
159
- � 403
160
- �� 403
161
159
  ��� 403
162
160
  �� 403
161
+ � 403
162
+ �� 403
163
163
  �� 387
164
164
  �� 385
165
165
  �� 383
@@ -167,55 +167,55 @@ _
167
167
  จ 377
168
168
  ��� 363
169
169
  � 343
170
- �� 339
171
- �� 339
172
- � 339
173
170
  ศ 339
171
+ �� 339
174
172
  ช 339
173
+ �� 339
174
+ � 339
175
175
  �� 337
176
+ ��� 336
176
177
  ใ 336
177
- �� 336
178
178
  �� 336
179
- � 336
180
- ��� 336
181
179
  �� 336
180
+ �� 336
181
+ � 336
182
182
  ��� 335
183
183
  �� 328
184
184
  �� 322
185
185
  ข 322
186
186
  ��� 322
187
187
  �� 318
188
- �� 317
189
188
  ��� 317
190
- ื 317
191
189
  � 317
190
+ �� 317
191
+ ื 317
192
192
  �� 317
193
193
  � 307
194
- พ 307
195
194
  �� 307
195
+ พ 307
196
196
  �� 292
197
197
  �� 282
198
198
  �� 279
199
199
  ��� 279
200
200
  �� 277
201
201
  �� 269
202
- ��� 257
203
202
  �� 257
203
+ ��� 257
204
204
  �� 250
205
- ็ 246
206
205
  �� 246
206
+ ็ 246
207
207
  ��� 245
208
208
  �� 240
209
209
  �� 225
210
+ �� 224
210
211
  �� 224
211
- ��� 224
212
212
  ุ 224
213
- �� 224
214
- ำ 219
213
+ ��� 224
215
214
  � 219
216
215
  ��_ 219
217
- �_ 219
216
+ 219
218
217
  �� 219
218
+ �_ 219
219
219
  �� 214
220
220
  �� 213
221
221
  ��� 210
@@ -223,59 +223,59 @@ _
223
223
  �� 205
224
224
  �� 189
225
225
  �� 184
226
- �� 181
227
226
  �� 181
228
227
  ู 181
229
- ��� 179
228
+ �� 181
230
229
  �� 179
231
- 175
230
+ ��� 179
232
231
  �� 175
233
232
  ์ 175
233
+ � 175
234
+ ึ 170
234
235
  � 170
235
236
  ��� 170
236
- �� 170
237
237
  �� 170
238
- 170
238
+ �� 170
239
239
  �� 169
240
- โ 163
241
240
  ��� 163
241
+ โ 163
242
242
  �� 163
243
243
  �� 162
244
244
  �� 151
245
245
  �� 148
246
246
  �� 142
247
- � 140
248
247
  ษ 140
249
248
  �� 140
250
- 136
251
- �� 136
252
- �� 136
249
+ 140
253
250
  �� 136
254
251
  ภ 136
255
252
  ��� 136
256
- �� 135
253
+ 136
254
+ �� 136
255
+ �� 136
257
256
  �� 135
257
+ �� 135
258
258
  ��� 135
259
259
  �� 131
260
- �� 131
261
260
  ��� 131
261
+ �� 131
262
262
  �� 131
263
- �_ 129
264
263
  ��_ 129
264
+ �_ 129
265
265
  �� 127
266
- ��_ 118
267
- �_ 118
266
+ ��_ 119
267
+ �_ 119
268
268
  �� 117
269
269
  �_ 115
270
270
  ��_ 114
271
- �� 112
272
271
  ถ 112
272
+ �� 112
273
273
  � 112
274
274
  �� 111
275
275
  ��� 111
276
276
  � 109
277
- ��� 108
278
277
  �� 108
278
+ ��� 108
279
279
  ธ 108
280
280
  �� 108
281
281
  �� 107
@@ -285,116 +285,116 @@ _
285
285
  �� 93
286
286
  �� 91
287
287
  �� 90
288
- �� 89
289
288
  �� 89
289
+ �� 89
290
290
  � 89
291
- ณ 88
292
291
  �� 88
292
+ ณ 88
293
293
  �_ 88
294
294
  �� 87
295
295
  �� 84
296
- ซ 84
297
296
  � 84
298
- �� 84
297
+ 84
299
298
  ��� 84
299
+ �� 84
300
300
  �� 83
301
301
  �� 82
302
302
  ��_ 81
303
- �� 81
304
- �_ 81
305
303
  �� 81
304
+ �_ 81
305
+ �� 81
306
306
  ��� 80
307
307
  �� 80
308
- �� 79
309
- ญ 79
310
- �� 79
311
308
  � 79
309
+ �� 79
310
+ ญ 79
311
+ �� 79
312
312
  �� 76
313
313
  �� 75
314
314
  ��_ 74
315
315
  ��� 73
316
316
  �� 73
317
+ � 71
317
318
  ��_ 71
318
319
  ผ 71
319
- �_ 71
320
- ��� 71
320
+ �� 71
321
321
  �� 71
322
- a 71
322
+ �_ 71
323
323
  �� 71
324
- 71
325
- �� 71
326
- � 68
327
- �� 68
328
- ฐ 68
324
+ ��� 71
329
325
  �� 68
330
- �� 67
326
+ 68
327
+ �� 68
328
+ � 68
331
329
  ��� 67
332
- i 66
330
+ �� 67
333
331
  �� 65
334
332
  �� 63
335
333
  �� 61
336
- o 61
337
- �� 60
338
334
  ��_ 60
339
335
  �_ 60
336
+ �� 60
340
337
  �� 59
341
- n 59
342
- ��_ 57
343
- _ 57
344
- t 56
345
- �_ 53
346
- ��_ 53
338
+ ��_ 58
339
+ _ 58
340
+ ��_ 55
341
+ �_ 55
347
342
  �_ 50
348
343
  ��_ 50
349
344
  �� 45
350
- �_ 43
345
+ �_ 44
346
+ ��_ 44
351
347
  �_ 43
352
- e 43
353
- ��_ 43
354
- �� 43
355
348
  ��_ 43
356
- r 42
349
+ �� 43
357
350
  �� 42
358
- �_ 38
359
351
  ��_ 38
352
+ �_ 38
360
353
  �� 36
361
- l 32
362
- s 31
363
- �� 31
364
354
  ��� 31
355
+ �� 31
365
356
  ฉ 31
366
- h 30
367
357
  � 29
358
+ ��� 29
359
+ �� 29
360
+ �� 29
361
+ � 29
368
362
  �� 29
369
363
  ฤ 29
370
364
  �� 29
371
- � 29
372
- ��� 29
373
- �� 29
374
- �� 29
375
365
  �� 28
376
- �� 27
366
+ ��� 27
377
367
  �� 27
378
368
  ฒ 27
379
- ��� 27
369
+ �� 27
380
370
  ��_ 26
381
371
  �_ 26
382
372
  �_ 26
383
- th 26
384
- d 26
385
- c 25
386
- m 25
387
373
  ��_ 24
388
- _t 23
389
374
  �_ 21
390
375
  �_ 21
391
- ��_ 21
392
376
  ��_ 21
377
+ ��_ 21
393
378
  ๆ 20
379
+ ��_ 20
380
+ �_ 20
394
381
  �� 20
395
382
  � 20
396
- ��_ 19
383
+ _ 19
397
384
  �_ 19
398
385
  ��_ 19
399
- _ 19
400
- �_ 19
386
+ ��_ 19
387
+ �� 18
388
+ �_ 18
389
+ ��_ 18
390
+ �� 17
391
+ � 17
392
+ ฟ 17
393
+ �� 17
394
+ �� 17
395
+ ��� 16
396
+ �� 16
397
+ ��_ 14
398
+ �� 14
399
+ ��� 13
400
+ �� 13
@@ -33,7 +33,6 @@ module Scylla
33
33
  "welsh" => ["cy","cymru"],
34
34
  "catalan" => ["ca","Catalunya"],
35
35
  "thai" => ["th","ประเทศไทย"],
36
- "afrikaans" => ["af","Afrikaanse"],
37
36
  "czech" => ["cs", "Československo"],
38
37
  "hindi" => ["hi", "भारत"],
39
38
  "kannada" => ["kn","ಕರ್ನಾಟಕ"],
data/test/helper.rb CHANGED
@@ -7,7 +7,6 @@ require 'scylla'
7
7
  require 'sanitize'
8
8
  require 'mocha'
9
9
 
10
-
11
10
  begin
12
11
  Bundler.setup(:default, :development)
13
12
  rescue Bundler::BundlerError => e
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scylla
3
3
  version: !ruby/object:Gem::Version
4
- hash: 127
4
+ hash: 59
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 8
9
- - 32
10
- version: 0.8.32
8
+ - 9
9
+ - 0
10
+ version: 0.9.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ashwin Hegde
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-01-26 00:00:00 -08:00
18
+ date: 2012-01-27 00:00:00 -08:00
19
19
  default_executable: scylla
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -60,12 +60,8 @@ extra_rdoc_files:
60
60
  - LICENSE.txt
61
61
  - README.rdoc
62
62
  files:
63
- - bin/scylla
64
- - Gemfile
65
- - Gemfile.lock
66
63
  - lib/scylla/classifier.rb
67
64
  - lib/scylla/generator.rb
68
- - lib/scylla/lms/afrikaans.lm
69
65
  - lib/scylla/lms/arabic.lm
70
66
  - lib/scylla/lms/bulgarian.lm
71
67
  - lib/scylla/lms/catalan.lm
@@ -107,50 +103,8 @@ files:
107
103
  - lib/scylla/string.rb
108
104
  - lib/scylla/tasks.rb
109
105
  - lib/scylla.rb
110
- - LICENSE.txt
111
- - pkg/scylla-0.5.0.gem
112
- - Rakefile
113
106
  - README.rdoc
114
- - scylla-0.8.29.gem
115
- - scylla-0.8.31.gem
116
- - scylla.gemspec
117
- - source_texts/afrikaans.txt
118
- - source_texts/arabic.txt
119
- - source_texts/bulgarian.txt
120
- - source_texts/catalan.txt
121
- - source_texts/chinese.txt
122
- - source_texts/czech.txt
123
- - source_texts/danish.txt
124
- - source_texts/dutch.txt
125
- - source_texts/english.txt
126
- - source_texts/finnish.txt
127
- - source_texts/french.txt
128
- - source_texts/german.txt
129
- - source_texts/greek.txt
130
- - source_texts/hebrew.txt
131
- - source_texts/hindi.txt
132
- - source_texts/icelandic.txt
133
- - source_texts/indonesian.txt
134
- - source_texts/italian.txt
135
- - source_texts/japanese.txt
136
- - source_texts/kannada.txt
137
- - source_texts/korean.txt
138
- - source_texts/marathi.txt
139
- - source_texts/norwegian.txt
140
- - source_texts/persian.txt
141
- - source_texts/polish.txt
142
- - source_texts/portuguese.txt
143
- - source_texts/romanian.txt
144
- - source_texts/russian.txt
145
- - source_texts/slovak.txt
146
- - source_texts/slovenian.txt
147
- - source_texts/spanish.txt
148
- - source_texts/swedish.txt
149
- - source_texts/tagalog.txt
150
- - source_texts/thai.txt
151
- - source_texts/turkish.txt
152
- - source_texts/vietnamese.txt
153
- - source_texts/welsh.txt
107
+ - LICENSE.txt
154
108
  - test/classifier_test.rb
155
109
  - test/fixtures/lms/danish.lm
156
110
  - test/fixtures/lms/english.lm
@@ -183,7 +137,7 @@ files:
183
137
  - test/language_test.rb
184
138
  - test/loader_test.rb
185
139
  - test/scylla_test.rb
186
- - VERSION
140
+ - bin/scylla
187
141
  has_rdoc: true
188
142
  homepage: http://github.com/hashwin/scylla
189
143
  licenses:
@@ -218,5 +172,36 @@ rubygems_version: 1.5.3
218
172
  signing_key:
219
173
  specification_version: 3
220
174
  summary: Ruby port of Textcat language guesser
221
- test_files: []
222
-
175
+ test_files:
176
+ - test/classifier_test.rb
177
+ - test/fixtures/lms/danish.lm
178
+ - test/fixtures/lms/english.lm
179
+ - test/fixtures/lms/french.lm
180
+ - test/fixtures/lms/german.lm
181
+ - test/fixtures/lms/hindi.lm
182
+ - test/fixtures/lms/italian.lm
183
+ - test/fixtures/lms/japanese.lm
184
+ - test/fixtures/lms/norwegian.lm
185
+ - test/fixtures/lms/spanish.lm
186
+ - test/fixtures/source_texts/danish.txt
187
+ - test/fixtures/source_texts/english.txt
188
+ - test/fixtures/source_texts/french.txt
189
+ - test/fixtures/source_texts/german.txt
190
+ - test/fixtures/source_texts/hindi.txt
191
+ - test/fixtures/source_texts/italian.txt
192
+ - test/fixtures/source_texts/japanese.txt
193
+ - test/fixtures/source_texts/norwegian.txt
194
+ - test/fixtures/source_texts/spanish.txt
195
+ - test/fixtures/test_languages/english
196
+ - test/fixtures/test_languages/french
197
+ - test/fixtures/test_languages/german
198
+ - test/fixtures/test_languages/hindi
199
+ - test/fixtures/test_languages/italian
200
+ - test/fixtures/test_languages/japanese
201
+ - test/fixtures/test_languages/norwegian
202
+ - test/fixtures/test_languages/spanish
203
+ - test/generator_test.rb
204
+ - test/helper.rb
205
+ - test/language_test.rb
206
+ - test/loader_test.rb
207
+ - test/scylla_test.rb
data/Gemfile DELETED
@@ -1,23 +0,0 @@
1
- source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
-
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
8
- group :development do
9
- gem "bundler", "~> 1.0.0"
10
- gem "jeweler", "~> 1.6.4"
11
- gem "mail"
12
- gem "sanitize"
13
- gem "character-encodings"
14
- gem "wikipedia-client"
15
- gem "json"
16
- gem "unicode"
17
- end
18
-
19
- group :test do
20
- gem "shoulda", ">= 0"
21
- gem "mocha"
22
- gem "ruby-debug", "~> 0.10.4"
23
- end
data/Gemfile.lock DELETED
@@ -1,53 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- character-encodings (0.4.1)
5
- columnize (0.3.4)
6
- git (1.2.5)
7
- i18n (0.6.0)
8
- jeweler (1.6.4)
9
- bundler (~> 1.0)
10
- git (>= 1.2.5)
11
- rake
12
- json (1.6.3)
13
- linecache (0.46)
14
- rbx-require-relative (> 0.0.4)
15
- mail (2.3.0)
16
- i18n (>= 0.4.0)
17
- mime-types (~> 1.16)
18
- treetop (~> 1.4.8)
19
- mime-types (1.16)
20
- mocha (0.9.12)
21
- nokogiri (1.4.7)
22
- polyglot (0.3.2)
23
- rake (0.9.2.2)
24
- rbx-require-relative (0.0.5)
25
- ruby-debug (0.10.4)
26
- columnize (>= 0.1)
27
- ruby-debug-base (~> 0.10.4.0)
28
- ruby-debug-base (0.10.4)
29
- linecache (>= 0.3)
30
- sanitize (2.0.3)
31
- nokogiri (>= 1.4.4, < 1.6)
32
- shoulda (2.11.3)
33
- treetop (1.4.10)
34
- polyglot
35
- polyglot (>= 0.3.1)
36
- unicode (0.4.0)
37
- wikipedia-client (1.0.0)
38
-
39
- PLATFORMS
40
- ruby
41
-
42
- DEPENDENCIES
43
- bundler (~> 1.0.0)
44
- character-encodings
45
- jeweler (~> 1.6.4)
46
- json
47
- mail
48
- mocha
49
- ruby-debug (~> 0.10.4)
50
- sanitize
51
- shoulda
52
- unicode
53
- wikipedia-client