scylla 0.8.32 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/lib/scylla/generator.rb +6 -2
  2. data/lib/scylla/lms/arabic.lm +318 -318
  3. data/lib/scylla/lms/bulgarian.lm +326 -326
  4. data/lib/scylla/lms/chinese.lm +399 -399
  5. data/lib/scylla/lms/french.lm +302 -302
  6. data/lib/scylla/lms/greek.lm +119 -119
  7. data/lib/scylla/lms/hebrew.lm +168 -168
  8. data/lib/scylla/lms/hindi.lm +108 -108
  9. data/lib/scylla/lms/japanese.lm +65 -65
  10. data/lib/scylla/lms/kannada.lm +147 -147
  11. data/lib/scylla/lms/korean.lm +151 -151
  12. data/lib/scylla/lms/marathi.lm +133 -133
  13. data/lib/scylla/lms/persian.lm +107 -107
  14. data/lib/scylla/lms/polish.lm +108 -108
  15. data/lib/scylla/lms/portuguese.lm +221 -221
  16. data/lib/scylla/lms/romanian.lm +132 -132
  17. data/lib/scylla/lms/russian.lm +82 -82
  18. data/lib/scylla/lms/thai.lm +119 -119
  19. data/lib/scylla/resources.rb +0 -1
  20. data/test/helper.rb +0 -1
  21. metadata +40 -55
  22. data/Gemfile +0 -23
  23. data/Gemfile.lock +0 -53
  24. data/Rakefile +0 -52
  25. data/VERSION +0 -1
  26. data/lib/scylla/lms/afrikaans.lm +0 -400
  27. data/pkg/scylla-0.5.0.gem +0 -0
  28. data/scylla-0.8.29.gem +0 -0
  29. data/scylla-0.8.31.gem +0 -0
  30. data/scylla.gemspec +0 -24
  31. data/source_texts/afrikaans.txt +0 -363
  32. data/source_texts/arabic.txt +0 -718
  33. data/source_texts/bulgarian.txt +0 -601
  34. data/source_texts/catalan.txt +0 -435
  35. data/source_texts/chinese.txt +0 -625
  36. data/source_texts/czech.txt +0 -237
  37. data/source_texts/danish.txt +0 -268
  38. data/source_texts/dutch.txt +0 -503
  39. data/source_texts/english.txt +0 -673
  40. data/source_texts/finnish.txt +0 -939
  41. data/source_texts/french.txt +0 -896
  42. data/source_texts/german.txt +0 -1236
  43. data/source_texts/greek.txt +0 -488
  44. data/source_texts/hebrew.txt +0 -638
  45. data/source_texts/hindi.txt +0 -353
  46. data/source_texts/icelandic.txt +0 -342
  47. data/source_texts/indonesian.txt +0 -509
  48. data/source_texts/italian.txt +0 -1066
  49. data/source_texts/japanese.txt +0 -1220
  50. data/source_texts/kannada.txt +0 -340
  51. data/source_texts/korean.txt +0 -343
  52. data/source_texts/marathi.txt +0 -237
  53. data/source_texts/norwegian.txt +0 -555
  54. data/source_texts/persian.txt +0 -886
  55. data/source_texts/polish.txt +0 -1014
  56. data/source_texts/portuguese.txt +0 -690
  57. data/source_texts/romanian.txt +0 -436
  58. data/source_texts/russian.txt +0 -1128
  59. data/source_texts/slovak.txt +0 -575
  60. data/source_texts/slovenian.txt +0 -354
  61. data/source_texts/spanish.txt +0 -1017
  62. data/source_texts/swedish.txt +0 -558
  63. data/source_texts/tagalog.txt +0 -426
  64. data/source_texts/thai.txt +0 -312
  65. data/source_texts/turkish.txt +0 -665
  66. data/source_texts/vietnamese.txt +0 -300
  67. data/source_texts/welsh.txt +0 -332
data/Rakefile DELETED
@@ -1,52 +0,0 @@
1
- # encoding: utf-8
2
-
3
- $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
4
-
5
- require 'rubygems'
6
- require 'bundler'
7
- require 'scylla'
8
- require 'scylla/tasks'
9
-
10
- begin
11
- Bundler.setup(:default, :development)
12
- rescue Bundler::BundlerError => e
13
- $stderr.puts e.message
14
- $stderr.puts "Run `bundle install` to install missing gems"
15
- exit e.status_code
16
- end
17
- require 'rake'
18
-
19
- require 'jeweler'
20
- Jeweler::Tasks.new do |gem|
21
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
22
- gem.name = "scylla"
23
- gem.homepage = "http://github.com/hashwin/scylla"
24
- gem.license = "MIT"
25
- gem.summary = "Ruby port of Textcat language guesser"
26
- gem.description = "Allows for text categorization by guessing the language of a given text using n-grams"
27
- gem.email = "ahegde@zendesk.com"
28
- gem.authors = ["Ashwin Hegde"]
29
- # dependencies defined in Gemfile
30
- end
31
- Jeweler::RubygemsDotOrgTasks.new
32
-
33
- require 'rake/testtask'
34
- Rake::TestTask.new(:test) do |test|
35
- test.libs << 'lib' << 'test'
36
- test.pattern = 'test/**/*_test.rb'
37
- test.verbose = true
38
- end
39
-
40
- task :default => :test
41
-
42
- require 'rake/rdoctask'
43
- Rake::RDocTask.new do |rdoc|
44
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
45
-
46
- rdoc.rdoc_dir = 'rdoc'
47
- rdoc.title = "scylla #{version}"
48
- rdoc.rdoc_files.include('README*')
49
- rdoc.rdoc_files.include('lib/**/*.rb')
50
- end
51
-
52
- Scylla::Tasks.new
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.5.0
@@ -1,400 +0,0 @@
1
- _ 8334
2
- e 3489
3
- a 2570
4
- n 1985
5
- i 1910
6
- s 1565
7
- r 1462
8
- d 1244
9
- t 1191
10
- e_ 1057
11
- o 1019
12
- l 972
13
- k 865
14
- n_ 739
15
- an 684
16
- s_ 670
17
- g 662
18
- aa 563
19
- ie 562
20
- er 526
21
- _d 507
22
- v 456
23
- m 453
24
- u 447
25
- _a 440
26
- en 437
27
- di 432
28
- ie_ 429
29
- de 380
30
- w 376
31
- t_ 371
32
- in 368
33
- _di 367
34
- die 358
35
- ik 347
36
- _v 347
37
- ka 330
38
- p 330
39
- b 324
40
- f 320
41
- _n 298
42
- _i 298
43
- ri 291
44
- ge 290
45
- _e 272
46
- te 262
47
- nd 261
48
- al 261
49
- ns 255
50
- h 254
51
- aan 253
52
- el 252
53
- _s 251
54
- af 232
55
- ta 230
56
- r_ 228
57
- ika 225
58
- _o 224
59
- _af 224
60
- fr 223
61
- an_ 220
62
- se 219
63
- va 218
64
- kaa 218
65
- rik 218
66
- _w 216
67
- l_ 215
68
- en_ 215
69
- afr 213
70
- fri 213
71
- ng 206
72
- _t 196
73
- _in 194
74
- _h 193
75
- ans 191
76
- _b 184
77
- es 181
78
- _en 177
79
- in_ 177
80
- oo 177
81
- ee 176
82
- et 176
83
- st 176
84
- _g 175
85
- la 174
86
- van 174
87
- _va 173
88
- as 171
89
- d_ 168
90
- at 165
91
- der 165
92
- is 163
93
- _m 160
94
- g_ 154
95
- ed 153
96
- and 152
97
- or 151
98
- se_ 148
99
- ui 145
100
- ns_ 144
101
- ke 143
102
- ar 142
103
- li 142
104
- ne 140
105
- le 139
106
- wa 137
107
- k_ 136
108
- et_ 135
109
- al_ 134
110
- on 133
111
- taa 132
112
- aal 132
113
- re 130
114
- lan 127
115
- _k 127
116
- de_ 127
117
- _ge 123
118
- y 123
119
- rd 122
120
- rs 121
121
- it 121
122
- nde 120
123
- er_ 120
124
- oe 120
125
- is_ 119
126
- ing 118
127
- be 118
128
- as_ 117
129
- he 117
130
- at_ 116
131
- ede 115
132
- me 114
133
- _wa 110
134
- _n_ 109
135
- we 106
136
- ve 105
137
- ng_ 103
138
- _he 102
139
- _ta 102
140
- ra 99
141
- ek 97
142
- sk 97
143
- si 96
144
- ers 96
145
- _is 96
146
- a_ 95
147
- ver 94
148
- oor 92
149
- te_ 92
150
- ei 91
151
- het 90
152
- ds 90
153
- _ve 89
154
- nt 88
155
- rl 87
156
- _ne 86
157
- ro 84
158
- da 83
159
- _be 83
160
- erl 80
161
- ig 79
162
- aar 78
163
- ni 77
164
- ned 76
165
- am 75
166
- ur 74
167
- om 74
168
- ord 73
169
- rla 73
170
- id 73
171
- nds 73
172
- vo 73
173
- na 73
174
- _p 72
175
- pe 71
176
- wo 70
177
- _me 69
178
- _as 68
179
- sta 67
180
- ste 67
181
- ti 66
182
- m_ 65
183
- lik 65
184
- � 65
185
- _on 64
186
- op 64
187
- _ka 64
188
- le_ 63
189
- ll 63
190
- nse 61
191
- _da 61
192
- pr 60
193
- wat 60
194
- ma 59
195
- p_ 59
196
- uit 58
197
- _wo 58
198
- _oo 57
199
- em 57
200
- nge 56
201
- rt 56
202
- rs_ 56
203
- ap 56
204
- eli 55
205
- j 55
206
- gr 55
207
- ho 55
208
- ot 54
209
- ls 53
210
- _r 53
211
- mi 53
212
- ik_ 53
213
- eu 53
214
- es_ 53
215
- ko 53
216
- _aa 52
217
- eb 52
218
- ou 52
219
- sp 52
220
- rde 52
221
- eg 51
222
- so 51
223
- ar_ 51
224
- end 50
225
- tal 50
226
- tu 49
227
- els 48
228
- ke_ 48
229
- eer 48
230
- ol 47
231
- _om 47
232
- it_ 46
233
- ë 46
234
- � 46
235
- br 46
236
- gel 46
237
- ds_ 46
238
- _na 46
239
- tel 46
240
- gs 45
241
- _de 45
242
- _so 45
243
- el_ 44
244
- ale 44
245
- ike 43
246
- _l 42
247
- ki 42
248
- u_ 42
249
- _u 42
250
- ru 41
251
- bl 41
252
- ter 41
253
- ond 40
254
- eng 40
255
- ew 40
256
- eke 39
257
- nd_ 39
258
- eur 39
259
- _vo 39
260
- c 39
261
- su 39
262
- ken 39
263
- ang 39
264
- aap 39
265
- mo 38
266
- nie 38
267
- ss 38
268
- was 38
269
- ls_ 38
270
- _ui 38
271
- sie 38
272
- ngs 38
273
- to 37
274
- est 37
275
- ok 37
276
- _op 37
277
- ul 37
278
- _su 37
279
- ens 36
280
- _te 36
281
- f_ 36
282
- rd_ 36
283
- _ho 36
284
- _gr 36
285
- y_ 36
286
- een 36
287
- uid 36
288
- geb 36
289
- ts 36
290
- _we 35
291
- erd 35
292
- ese 35
293
- id_ 35
294
- _st 35
295
- rk 35
296
- ies 35
297
- wor 34
298
- woo 34
299
- ge_ 34
300
- ges 34
301
- ga 34
302
- ska 34
303
- lin 34
304
- nk 34
305
- esk 34
306
- mp 33
307
- vi 33
308
- ige 33
309
- il 33
310
- ad 33
311
- _ma 33
312
- sui 32
313
- ier 32
314
- ep 32
315
- ite 32
316
- i� 32
317
- ou_ 32
318
- lle 31
319
- ok_ 31
320
- ook 31
321
- dse 31
322
- rui 30
323
- ps 30
324
- bo 30
325
- _bl 30
326
- _mo 30
327
- spr 30
328
- ië 30
329
- daa 30
330
- _am 30
331
- gro 30
332
- ent 30
333
- os 30
334
- kl 30
335
- erk 30
336
- tr 29
337
- op_ 29
338
- lg 29
339
- ka_ 29
340
- wy 28
341
- mee 28
342
- re_ 28
343
- ot_ 28
344
- _vi 28
345
- met 28
346
- us 28
347
- amp 28
348
- vol 27
349
- pa 27
350
- nv 27
351
- gt 27
352
- rw 27
353
- _ee 27
354
- bru 27
355
- dat 27
356
- eni 26
357
- hu 26
358
- ak 26
359
- ten 26
360
- eel 26
361
- men 26
362
- ai 26
363
- gen 26
364
- bi 26
365
- ont 26
366
- _mi 26
367
- rg 26
368
- _re 26
369
- asi 26
370
- wi 25
371
- gi 25
372
- erw 25
373
- ev 25
374
- kan 25
375
- ig_ 25
376
- om_ 25
377
- ir 25
378
- ebr 25
379
- kr 24
380
- _sp 24
381
- sen 24
382
- _hu 24
383
- tw 24
384
- ks 24
385
- _j 24
386
- ku 24
387
- maa 24
388
- aps 24
389
- sa 24
390
- ys 24
391
- rm 24
392
- ir_ 24
393
- rsk 24
394
- moe 24
395
- ran 24
396
- del 23
397
- waa 23
398
- kel 23
399
- soo 23
400
- pt 23
data/pkg/scylla-0.5.0.gem DELETED
Binary file
data/scylla-0.8.29.gem DELETED
Binary file
data/scylla-0.8.31.gem DELETED
Binary file
data/scylla.gemspec DELETED
@@ -1,24 +0,0 @@
1
- Gem::Specification.new do |s|
2
- s.name = %q{scylla}
3
- s.version = "0.8.32"
4
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
5
- s.authors = ["Ashwin Hegde"]
6
- s.date = %q{2012-01-26}
7
- s.default_executable = %q{scylla}
8
- s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
9
- s.email = %q{ahegde@zendesk.com}
10
- s.executables = ["scylla"]
11
- s.extra_rdoc_files = [
12
- "LICENSE.txt",
13
- "README.rdoc"
14
- ]
15
- s.files = Dir.glob("**/**")
16
- s.homepage = %q{http://github.com/hashwin/scylla}
17
- s.licenses = ["MIT"]
18
- s.require_paths = ["lib"]
19
- s.rubygems_version = %q{1.5.3}
20
- s.summary = %q{Ruby port of Textcat language guesser}
21
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
22
- s.add_dependency(%q<sanitize>, ["~> 2.0.0"])
23
- end
24
-