scylla 0.8.32 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/lib/scylla/generator.rb +6 -2
  2. data/lib/scylla/lms/arabic.lm +318 -318
  3. data/lib/scylla/lms/bulgarian.lm +326 -326
  4. data/lib/scylla/lms/chinese.lm +399 -399
  5. data/lib/scylla/lms/french.lm +302 -302
  6. data/lib/scylla/lms/greek.lm +119 -119
  7. data/lib/scylla/lms/hebrew.lm +168 -168
  8. data/lib/scylla/lms/hindi.lm +108 -108
  9. data/lib/scylla/lms/japanese.lm +65 -65
  10. data/lib/scylla/lms/kannada.lm +147 -147
  11. data/lib/scylla/lms/korean.lm +151 -151
  12. data/lib/scylla/lms/marathi.lm +133 -133
  13. data/lib/scylla/lms/persian.lm +107 -107
  14. data/lib/scylla/lms/polish.lm +108 -108
  15. data/lib/scylla/lms/portuguese.lm +221 -221
  16. data/lib/scylla/lms/romanian.lm +132 -132
  17. data/lib/scylla/lms/russian.lm +82 -82
  18. data/lib/scylla/lms/thai.lm +119 -119
  19. data/lib/scylla/resources.rb +0 -1
  20. data/test/helper.rb +0 -1
  21. metadata +40 -55
  22. data/Gemfile +0 -23
  23. data/Gemfile.lock +0 -53
  24. data/Rakefile +0 -52
  25. data/VERSION +0 -1
  26. data/lib/scylla/lms/afrikaans.lm +0 -400
  27. data/pkg/scylla-0.5.0.gem +0 -0
  28. data/scylla-0.8.29.gem +0 -0
  29. data/scylla-0.8.31.gem +0 -0
  30. data/scylla.gemspec +0 -24
  31. data/source_texts/afrikaans.txt +0 -363
  32. data/source_texts/arabic.txt +0 -718
  33. data/source_texts/bulgarian.txt +0 -601
  34. data/source_texts/catalan.txt +0 -435
  35. data/source_texts/chinese.txt +0 -625
  36. data/source_texts/czech.txt +0 -237
  37. data/source_texts/danish.txt +0 -268
  38. data/source_texts/dutch.txt +0 -503
  39. data/source_texts/english.txt +0 -673
  40. data/source_texts/finnish.txt +0 -939
  41. data/source_texts/french.txt +0 -896
  42. data/source_texts/german.txt +0 -1236
  43. data/source_texts/greek.txt +0 -488
  44. data/source_texts/hebrew.txt +0 -638
  45. data/source_texts/hindi.txt +0 -353
  46. data/source_texts/icelandic.txt +0 -342
  47. data/source_texts/indonesian.txt +0 -509
  48. data/source_texts/italian.txt +0 -1066
  49. data/source_texts/japanese.txt +0 -1220
  50. data/source_texts/kannada.txt +0 -340
  51. data/source_texts/korean.txt +0 -343
  52. data/source_texts/marathi.txt +0 -237
  53. data/source_texts/norwegian.txt +0 -555
  54. data/source_texts/persian.txt +0 -886
  55. data/source_texts/polish.txt +0 -1014
  56. data/source_texts/portuguese.txt +0 -690
  57. data/source_texts/romanian.txt +0 -436
  58. data/source_texts/russian.txt +0 -1128
  59. data/source_texts/slovak.txt +0 -575
  60. data/source_texts/slovenian.txt +0 -354
  61. data/source_texts/spanish.txt +0 -1017
  62. data/source_texts/swedish.txt +0 -558
  63. data/source_texts/tagalog.txt +0 -426
  64. data/source_texts/thai.txt +0 -312
  65. data/source_texts/turkish.txt +0 -665
  66. data/source_texts/vietnamese.txt +0 -300
  67. data/source_texts/welsh.txt +0 -332
@@ -3,10 +3,10 @@
3
3
  � 23123
4
4
  � 4980
5
5
  � 4799
6
- _ 3024
7
- �� 2068
8
- า 2068
6
+ _ 2804
9
7
  � 2068
8
+ า 2068
9
+ �� 2068
10
10
  ��� 1939
11
11
  �� 1939
12
12
  �� 1842
@@ -15,8 +15,8 @@ _ 3024
15
15
  � 1666
16
16
  � 1644
17
17
  ร 1595
18
- � 1595
19
18
  �� 1595
19
+ � 1595
20
20
  �� 1586
21
21
  �� 1552
22
22
  ��� 1552
@@ -44,15 +44,15 @@ _
44
44
  �� 1054
45
45
  ��� 1025
46
46
  � 1025
47
- �� 1025
48
- �� 1025
49
- เ 1025
50
47
  �� 1025
48
+ เ 1025
49
+ �� 1025
50
+ �� 1025
51
51
  ��� 1016
52
52
  �� 1016
53
- ั 1003
54
- ��� 1003
55
53
  �� 1003
54
+ ��� 1003
55
+ ั 1003
56
56
  �� 1003
57
57
  � 1003
58
58
  � 991
@@ -62,14 +62,14 @@ _
62
62
  �� 956
63
63
  _� 945
64
64
  �� 928
65
+ � 928
65
66
  ย 928
66
67
  �� 928
67
- � 928
68
68
  ��� 920
69
69
  �� 920
70
+ � 906
70
71
  ท 906
71
72
  �� 906
72
- � 906
73
73
  �� 887
74
74
  ��� 887
75
75
  � 881
@@ -80,39 +80,39 @@ _
80
80
  �� 809
81
81
  ��� 809
82
82
  �� 805
83
- �� 795
84
83
  ้ 795
84
+ �� 795
85
85
  �� 795
86
86
  �� 789
87
87
  ��� 774
88
88
  �� 716
89
+ ล 686
89
90
  �� 686
90
91
  � 686
91
- ล 686
92
92
  � 668
93
- ��� 668
93
+ �� 668
94
94
  �� 668
95
95
  ป 668
96
- �� 668
97
- � 667
98
- ะ 667
96
+ ��� 668
99
97
  �� 667
100
- �� 661
98
+ 667
99
+ � 667
101
100
  ��� 661
101
+ �� 661
102
102
  � 642
103
- ี 642
104
103
  �� 642
104
+ ี 642
105
105
  �� 620
106
- �� 612
107
106
  ��� 612
107
+ �� 612
108
108
  � 611
109
109
  �� 609
110
- ต 609
111
110
  �� 609
111
+ ต 609
112
112
  ��� 607
113
113
  � 589
114
- �� 589
115
114
  ว 589
115
+ �� 589
116
116
  �� 565
117
117
  � 565
118
118
  ส 565
@@ -120,46 +120,46 @@ _
120
120
  �� 563
121
121
  ��� 562
122
122
  �� 562
123
- �� 561
124
123
  ��� 561
124
+ �� 561
125
125
  �� 561
126
126
  � 558
127
127
  ด 556
128
128
  �� 556
129
- ��� 532
130
129
  �� 532
130
+ ��� 532
131
131
  �� 491
132
- � 485
133
132
  �� 485
134
- บ 485
135
- �� 485
136
133
  � 485
137
- 484
138
- 484
134
+ �� 485
135
+ 485
136
+ � 485
139
137
  �� 484
140
- �� 465
138
+ 484
139
+ � 484
141
140
  ��� 465
142
- ��� 464
141
+ �� 465
143
142
  �� 464
143
+ ��� 464
144
144
  _� 455
145
145
  �� 448
146
- ��� 445
147
146
  แ 445
147
+ ��� 445
148
148
  �� 445
149
149
  ค 442
150
150
  �� 442
151
151
  ��� 439
152
+ ��� 439
152
153
  �� 439
153
154
  ไ 439
154
- ��� 439
155
155
  �� 425
156
156
  �� 421
157
157
  �� 420
158
158
  ห 403
159
- � 403
160
- �� 403
161
159
  ��� 403
162
160
  �� 403
161
+ � 403
162
+ �� 403
163
163
  �� 387
164
164
  �� 385
165
165
  �� 383
@@ -167,55 +167,55 @@ _
167
167
  จ 377
168
168
  ��� 363
169
169
  � 343
170
- �� 339
171
- �� 339
172
- � 339
173
170
  ศ 339
171
+ �� 339
174
172
  ช 339
173
+ �� 339
174
+ � 339
175
175
  �� 337
176
+ ��� 336
176
177
  ใ 336
177
- �� 336
178
178
  �� 336
179
- � 336
180
- ��� 336
181
179
  �� 336
180
+ �� 336
181
+ � 336
182
182
  ��� 335
183
183
  �� 328
184
184
  �� 322
185
185
  ข 322
186
186
  ��� 322
187
187
  �� 318
188
- �� 317
189
188
  ��� 317
190
- ื 317
191
189
  � 317
190
+ �� 317
191
+ ื 317
192
192
  �� 317
193
193
  � 307
194
- พ 307
195
194
  �� 307
195
+ พ 307
196
196
  �� 292
197
197
  �� 282
198
198
  �� 279
199
199
  ��� 279
200
200
  �� 277
201
201
  �� 269
202
- ��� 257
203
202
  �� 257
203
+ ��� 257
204
204
  �� 250
205
- ็ 246
206
205
  �� 246
206
+ ็ 246
207
207
  ��� 245
208
208
  �� 240
209
209
  �� 225
210
+ �� 224
210
211
  �� 224
211
- ��� 224
212
212
  ุ 224
213
- �� 224
214
- ำ 219
213
+ ��� 224
215
214
  � 219
216
215
  ��_ 219
217
- �_ 219
216
+ 219
218
217
  �� 219
218
+ �_ 219
219
219
  �� 214
220
220
  �� 213
221
221
  ��� 210
@@ -223,59 +223,59 @@ _
223
223
  �� 205
224
224
  �� 189
225
225
  �� 184
226
- �� 181
227
226
  �� 181
228
227
  ู 181
229
- ��� 179
228
+ �� 181
230
229
  �� 179
231
- 175
230
+ ��� 179
232
231
  �� 175
233
232
  ์ 175
233
+ � 175
234
+ ึ 170
234
235
  � 170
235
236
  ��� 170
236
- �� 170
237
237
  �� 170
238
- 170
238
+ �� 170
239
239
  �� 169
240
- โ 163
241
240
  ��� 163
241
+ โ 163
242
242
  �� 163
243
243
  �� 162
244
244
  �� 151
245
245
  �� 148
246
246
  �� 142
247
- � 140
248
247
  ษ 140
249
248
  �� 140
250
- 136
251
- �� 136
252
- �� 136
249
+ 140
253
250
  �� 136
254
251
  ภ 136
255
252
  ��� 136
256
- �� 135
253
+ 136
254
+ �� 136
255
+ �� 136
257
256
  �� 135
257
+ �� 135
258
258
  ��� 135
259
259
  �� 131
260
- �� 131
261
260
  ��� 131
261
+ �� 131
262
262
  �� 131
263
- �_ 129
264
263
  ��_ 129
264
+ �_ 129
265
265
  �� 127
266
- ��_ 118
267
- �_ 118
266
+ ��_ 119
267
+ �_ 119
268
268
  �� 117
269
269
  �_ 115
270
270
  ��_ 114
271
- �� 112
272
271
  ถ 112
272
+ �� 112
273
273
  � 112
274
274
  �� 111
275
275
  ��� 111
276
276
  � 109
277
- ��� 108
278
277
  �� 108
278
+ ��� 108
279
279
  ธ 108
280
280
  �� 108
281
281
  �� 107
@@ -285,116 +285,116 @@ _
285
285
  �� 93
286
286
  �� 91
287
287
  �� 90
288
- �� 89
289
288
  �� 89
289
+ �� 89
290
290
  � 89
291
- ณ 88
292
291
  �� 88
292
+ ณ 88
293
293
  �_ 88
294
294
  �� 87
295
295
  �� 84
296
- ซ 84
297
296
  � 84
298
- �� 84
297
+ 84
299
298
  ��� 84
299
+ �� 84
300
300
  �� 83
301
301
  �� 82
302
302
  ��_ 81
303
- �� 81
304
- �_ 81
305
303
  �� 81
304
+ �_ 81
305
+ �� 81
306
306
  ��� 80
307
307
  �� 80
308
- �� 79
309
- ญ 79
310
- �� 79
311
308
  � 79
309
+ �� 79
310
+ ญ 79
311
+ �� 79
312
312
  �� 76
313
313
  �� 75
314
314
  ��_ 74
315
315
  ��� 73
316
316
  �� 73
317
+ � 71
317
318
  ��_ 71
318
319
  ผ 71
319
- �_ 71
320
- ��� 71
320
+ �� 71
321
321
  �� 71
322
- a 71
322
+ �_ 71
323
323
  �� 71
324
- 71
325
- �� 71
326
- � 68
327
- �� 68
328
- ฐ 68
324
+ ��� 71
329
325
  �� 68
330
- �� 67
326
+ 68
327
+ �� 68
328
+ � 68
331
329
  ��� 67
332
- i 66
330
+ �� 67
333
331
  �� 65
334
332
  �� 63
335
333
  �� 61
336
- o 61
337
- �� 60
338
334
  ��_ 60
339
335
  �_ 60
336
+ �� 60
340
337
  �� 59
341
- n 59
342
- ��_ 57
343
- _ 57
344
- t 56
345
- �_ 53
346
- ��_ 53
338
+ ��_ 58
339
+ _ 58
340
+ ��_ 55
341
+ �_ 55
347
342
  �_ 50
348
343
  ��_ 50
349
344
  �� 45
350
- �_ 43
345
+ �_ 44
346
+ ��_ 44
351
347
  �_ 43
352
- e 43
353
- ��_ 43
354
- �� 43
355
348
  ��_ 43
356
- r 42
349
+ �� 43
357
350
  �� 42
358
- �_ 38
359
351
  ��_ 38
352
+ �_ 38
360
353
  �� 36
361
- l 32
362
- s 31
363
- �� 31
364
354
  ��� 31
355
+ �� 31
365
356
  ฉ 31
366
- h 30
367
357
  � 29
358
+ ��� 29
359
+ �� 29
360
+ �� 29
361
+ � 29
368
362
  �� 29
369
363
  ฤ 29
370
364
  �� 29
371
- � 29
372
- ��� 29
373
- �� 29
374
- �� 29
375
365
  �� 28
376
- �� 27
366
+ ��� 27
377
367
  �� 27
378
368
  ฒ 27
379
- ��� 27
369
+ �� 27
380
370
  ��_ 26
381
371
  �_ 26
382
372
  �_ 26
383
- th 26
384
- d 26
385
- c 25
386
- m 25
387
373
  ��_ 24
388
- _t 23
389
374
  �_ 21
390
375
  �_ 21
391
- ��_ 21
392
376
  ��_ 21
377
+ ��_ 21
393
378
  ๆ 20
379
+ ��_ 20
380
+ �_ 20
394
381
  �� 20
395
382
  � 20
396
- ��_ 19
383
+ _ 19
397
384
  �_ 19
398
385
  ��_ 19
399
- _ 19
400
- �_ 19
386
+ ��_ 19
387
+ �� 18
388
+ �_ 18
389
+ ��_ 18
390
+ �� 17
391
+ � 17
392
+ ฟ 17
393
+ �� 17
394
+ �� 17
395
+ ��� 16
396
+ �� 16
397
+ ��_ 14
398
+ �� 14
399
+ ��� 13
400
+ �� 13
@@ -33,7 +33,6 @@ module Scylla
33
33
  "welsh" => ["cy","cymru"],
34
34
  "catalan" => ["ca","Catalunya"],
35
35
  "thai" => ["th","ประเทศไทย"],
36
- "afrikaans" => ["af","Afrikaanse"],
37
36
  "czech" => ["cs", "Československo"],
38
37
  "hindi" => ["hi", "भारत"],
39
38
  "kannada" => ["kn","ಕರ್ನಾಟಕ"],
data/test/helper.rb CHANGED
@@ -7,7 +7,6 @@ require 'scylla'
7
7
  require 'sanitize'
8
8
  require 'mocha'
9
9
 
10
-
11
10
  begin
12
11
  Bundler.setup(:default, :development)
13
12
  rescue Bundler::BundlerError => e
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scylla
3
3
  version: !ruby/object:Gem::Version
4
- hash: 127
4
+ hash: 59
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 8
9
- - 32
10
- version: 0.8.32
8
+ - 9
9
+ - 0
10
+ version: 0.9.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ashwin Hegde
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-01-26 00:00:00 -08:00
18
+ date: 2012-01-27 00:00:00 -08:00
19
19
  default_executable: scylla
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -60,12 +60,8 @@ extra_rdoc_files:
60
60
  - LICENSE.txt
61
61
  - README.rdoc
62
62
  files:
63
- - bin/scylla
64
- - Gemfile
65
- - Gemfile.lock
66
63
  - lib/scylla/classifier.rb
67
64
  - lib/scylla/generator.rb
68
- - lib/scylla/lms/afrikaans.lm
69
65
  - lib/scylla/lms/arabic.lm
70
66
  - lib/scylla/lms/bulgarian.lm
71
67
  - lib/scylla/lms/catalan.lm
@@ -107,50 +103,8 @@ files:
107
103
  - lib/scylla/string.rb
108
104
  - lib/scylla/tasks.rb
109
105
  - lib/scylla.rb
110
- - LICENSE.txt
111
- - pkg/scylla-0.5.0.gem
112
- - Rakefile
113
106
  - README.rdoc
114
- - scylla-0.8.29.gem
115
- - scylla-0.8.31.gem
116
- - scylla.gemspec
117
- - source_texts/afrikaans.txt
118
- - source_texts/arabic.txt
119
- - source_texts/bulgarian.txt
120
- - source_texts/catalan.txt
121
- - source_texts/chinese.txt
122
- - source_texts/czech.txt
123
- - source_texts/danish.txt
124
- - source_texts/dutch.txt
125
- - source_texts/english.txt
126
- - source_texts/finnish.txt
127
- - source_texts/french.txt
128
- - source_texts/german.txt
129
- - source_texts/greek.txt
130
- - source_texts/hebrew.txt
131
- - source_texts/hindi.txt
132
- - source_texts/icelandic.txt
133
- - source_texts/indonesian.txt
134
- - source_texts/italian.txt
135
- - source_texts/japanese.txt
136
- - source_texts/kannada.txt
137
- - source_texts/korean.txt
138
- - source_texts/marathi.txt
139
- - source_texts/norwegian.txt
140
- - source_texts/persian.txt
141
- - source_texts/polish.txt
142
- - source_texts/portuguese.txt
143
- - source_texts/romanian.txt
144
- - source_texts/russian.txt
145
- - source_texts/slovak.txt
146
- - source_texts/slovenian.txt
147
- - source_texts/spanish.txt
148
- - source_texts/swedish.txt
149
- - source_texts/tagalog.txt
150
- - source_texts/thai.txt
151
- - source_texts/turkish.txt
152
- - source_texts/vietnamese.txt
153
- - source_texts/welsh.txt
107
+ - LICENSE.txt
154
108
  - test/classifier_test.rb
155
109
  - test/fixtures/lms/danish.lm
156
110
  - test/fixtures/lms/english.lm
@@ -183,7 +137,7 @@ files:
183
137
  - test/language_test.rb
184
138
  - test/loader_test.rb
185
139
  - test/scylla_test.rb
186
- - VERSION
140
+ - bin/scylla
187
141
  has_rdoc: true
188
142
  homepage: http://github.com/hashwin/scylla
189
143
  licenses:
@@ -218,5 +172,36 @@ rubygems_version: 1.5.3
218
172
  signing_key:
219
173
  specification_version: 3
220
174
  summary: Ruby port of Textcat language guesser
221
- test_files: []
222
-
175
+ test_files:
176
+ - test/classifier_test.rb
177
+ - test/fixtures/lms/danish.lm
178
+ - test/fixtures/lms/english.lm
179
+ - test/fixtures/lms/french.lm
180
+ - test/fixtures/lms/german.lm
181
+ - test/fixtures/lms/hindi.lm
182
+ - test/fixtures/lms/italian.lm
183
+ - test/fixtures/lms/japanese.lm
184
+ - test/fixtures/lms/norwegian.lm
185
+ - test/fixtures/lms/spanish.lm
186
+ - test/fixtures/source_texts/danish.txt
187
+ - test/fixtures/source_texts/english.txt
188
+ - test/fixtures/source_texts/french.txt
189
+ - test/fixtures/source_texts/german.txt
190
+ - test/fixtures/source_texts/hindi.txt
191
+ - test/fixtures/source_texts/italian.txt
192
+ - test/fixtures/source_texts/japanese.txt
193
+ - test/fixtures/source_texts/norwegian.txt
194
+ - test/fixtures/source_texts/spanish.txt
195
+ - test/fixtures/test_languages/english
196
+ - test/fixtures/test_languages/french
197
+ - test/fixtures/test_languages/german
198
+ - test/fixtures/test_languages/hindi
199
+ - test/fixtures/test_languages/italian
200
+ - test/fixtures/test_languages/japanese
201
+ - test/fixtures/test_languages/norwegian
202
+ - test/fixtures/test_languages/spanish
203
+ - test/generator_test.rb
204
+ - test/helper.rb
205
+ - test/language_test.rb
206
+ - test/loader_test.rb
207
+ - test/scylla_test.rb
data/Gemfile DELETED
@@ -1,23 +0,0 @@
1
- source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
-
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
8
- group :development do
9
- gem "bundler", "~> 1.0.0"
10
- gem "jeweler", "~> 1.6.4"
11
- gem "mail"
12
- gem "sanitize"
13
- gem "character-encodings"
14
- gem "wikipedia-client"
15
- gem "json"
16
- gem "unicode"
17
- end
18
-
19
- group :test do
20
- gem "shoulda", ">= 0"
21
- gem "mocha"
22
- gem "ruby-debug", "~> 0.10.4"
23
- end
data/Gemfile.lock DELETED
@@ -1,53 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- character-encodings (0.4.1)
5
- columnize (0.3.4)
6
- git (1.2.5)
7
- i18n (0.6.0)
8
- jeweler (1.6.4)
9
- bundler (~> 1.0)
10
- git (>= 1.2.5)
11
- rake
12
- json (1.6.3)
13
- linecache (0.46)
14
- rbx-require-relative (> 0.0.4)
15
- mail (2.3.0)
16
- i18n (>= 0.4.0)
17
- mime-types (~> 1.16)
18
- treetop (~> 1.4.8)
19
- mime-types (1.16)
20
- mocha (0.9.12)
21
- nokogiri (1.4.7)
22
- polyglot (0.3.2)
23
- rake (0.9.2.2)
24
- rbx-require-relative (0.0.5)
25
- ruby-debug (0.10.4)
26
- columnize (>= 0.1)
27
- ruby-debug-base (~> 0.10.4.0)
28
- ruby-debug-base (0.10.4)
29
- linecache (>= 0.3)
30
- sanitize (2.0.3)
31
- nokogiri (>= 1.4.4, < 1.6)
32
- shoulda (2.11.3)
33
- treetop (1.4.10)
34
- polyglot
35
- polyglot (>= 0.3.1)
36
- unicode (0.4.0)
37
- wikipedia-client (1.0.0)
38
-
39
- PLATFORMS
40
- ruby
41
-
42
- DEPENDENCIES
43
- bundler (~> 1.0.0)
44
- character-encodings
45
- jeweler (~> 1.6.4)
46
- json
47
- mail
48
- mocha
49
- ruby-debug (~> 0.10.4)
50
- sanitize
51
- shoulda
52
- unicode
53
- wikipedia-client