scylla 0.8.32 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/lib/scylla/generator.rb +6 -2
  2. data/lib/scylla/lms/arabic.lm +318 -318
  3. data/lib/scylla/lms/bulgarian.lm +326 -326
  4. data/lib/scylla/lms/chinese.lm +399 -399
  5. data/lib/scylla/lms/french.lm +302 -302
  6. data/lib/scylla/lms/greek.lm +119 -119
  7. data/lib/scylla/lms/hebrew.lm +168 -168
  8. data/lib/scylla/lms/hindi.lm +108 -108
  9. data/lib/scylla/lms/japanese.lm +65 -65
  10. data/lib/scylla/lms/kannada.lm +147 -147
  11. data/lib/scylla/lms/korean.lm +151 -151
  12. data/lib/scylla/lms/marathi.lm +133 -133
  13. data/lib/scylla/lms/persian.lm +107 -107
  14. data/lib/scylla/lms/polish.lm +108 -108
  15. data/lib/scylla/lms/portuguese.lm +221 -221
  16. data/lib/scylla/lms/romanian.lm +132 -132
  17. data/lib/scylla/lms/russian.lm +82 -82
  18. data/lib/scylla/lms/thai.lm +119 -119
  19. data/lib/scylla/resources.rb +0 -1
  20. data/test/helper.rb +0 -1
  21. metadata +40 -55
  22. data/Gemfile +0 -23
  23. data/Gemfile.lock +0 -53
  24. data/Rakefile +0 -52
  25. data/VERSION +0 -1
  26. data/lib/scylla/lms/afrikaans.lm +0 -400
  27. data/pkg/scylla-0.5.0.gem +0 -0
  28. data/scylla-0.8.29.gem +0 -0
  29. data/scylla-0.8.31.gem +0 -0
  30. data/scylla.gemspec +0 -24
  31. data/source_texts/afrikaans.txt +0 -363
  32. data/source_texts/arabic.txt +0 -718
  33. data/source_texts/bulgarian.txt +0 -601
  34. data/source_texts/catalan.txt +0 -435
  35. data/source_texts/chinese.txt +0 -625
  36. data/source_texts/czech.txt +0 -237
  37. data/source_texts/danish.txt +0 -268
  38. data/source_texts/dutch.txt +0 -503
  39. data/source_texts/english.txt +0 -673
  40. data/source_texts/finnish.txt +0 -939
  41. data/source_texts/french.txt +0 -896
  42. data/source_texts/german.txt +0 -1236
  43. data/source_texts/greek.txt +0 -488
  44. data/source_texts/hebrew.txt +0 -638
  45. data/source_texts/hindi.txt +0 -353
  46. data/source_texts/icelandic.txt +0 -342
  47. data/source_texts/indonesian.txt +0 -509
  48. data/source_texts/italian.txt +0 -1066
  49. data/source_texts/japanese.txt +0 -1220
  50. data/source_texts/kannada.txt +0 -340
  51. data/source_texts/korean.txt +0 -343
  52. data/source_texts/marathi.txt +0 -237
  53. data/source_texts/norwegian.txt +0 -555
  54. data/source_texts/persian.txt +0 -886
  55. data/source_texts/polish.txt +0 -1014
  56. data/source_texts/portuguese.txt +0 -690
  57. data/source_texts/romanian.txt +0 -436
  58. data/source_texts/russian.txt +0 -1128
  59. data/source_texts/slovak.txt +0 -575
  60. data/source_texts/slovenian.txt +0 -354
  61. data/source_texts/spanish.txt +0 -1017
  62. data/source_texts/swedish.txt +0 -558
  63. data/source_texts/tagalog.txt +0 -426
  64. data/source_texts/thai.txt +0 -312
  65. data/source_texts/turkish.txt +0 -665
  66. data/source_texts/vietnamese.txt +0 -300
  67. data/source_texts/welsh.txt +0 -332
data/Rakefile DELETED
@@ -1,52 +0,0 @@
1
- # encoding: utf-8
2
-
3
- $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
4
-
5
- require 'rubygems'
6
- require 'bundler'
7
- require 'scylla'
8
- require 'scylla/tasks'
9
-
10
- begin
11
- Bundler.setup(:default, :development)
12
- rescue Bundler::BundlerError => e
13
- $stderr.puts e.message
14
- $stderr.puts "Run `bundle install` to install missing gems"
15
- exit e.status_code
16
- end
17
- require 'rake'
18
-
19
- require 'jeweler'
20
- Jeweler::Tasks.new do |gem|
21
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
22
- gem.name = "scylla"
23
- gem.homepage = "http://github.com/hashwin/scylla"
24
- gem.license = "MIT"
25
- gem.summary = "Ruby port of Textcat language guesser"
26
- gem.description = "Allows for text categorization by guessing the language of a given text using n-grams"
27
- gem.email = "ahegde@zendesk.com"
28
- gem.authors = ["Ashwin Hegde"]
29
- # dependencies defined in Gemfile
30
- end
31
- Jeweler::RubygemsDotOrgTasks.new
32
-
33
- require 'rake/testtask'
34
- Rake::TestTask.new(:test) do |test|
35
- test.libs << 'lib' << 'test'
36
- test.pattern = 'test/**/*_test.rb'
37
- test.verbose = true
38
- end
39
-
40
- task :default => :test
41
-
42
- require 'rake/rdoctask'
43
- Rake::RDocTask.new do |rdoc|
44
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
45
-
46
- rdoc.rdoc_dir = 'rdoc'
47
- rdoc.title = "scylla #{version}"
48
- rdoc.rdoc_files.include('README*')
49
- rdoc.rdoc_files.include('lib/**/*.rb')
50
- end
51
-
52
- Scylla::Tasks.new
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.5.0
@@ -1,400 +0,0 @@
1
- _ 8334
2
- e 3489
3
- a 2570
4
- n 1985
5
- i 1910
6
- s 1565
7
- r 1462
8
- d 1244
9
- t 1191
10
- e_ 1057
11
- o 1019
12
- l 972
13
- k 865
14
- n_ 739
15
- an 684
16
- s_ 670
17
- g 662
18
- aa 563
19
- ie 562
20
- er 526
21
- _d 507
22
- v 456
23
- m 453
24
- u 447
25
- _a 440
26
- en 437
27
- di 432
28
- ie_ 429
29
- de 380
30
- w 376
31
- t_ 371
32
- in 368
33
- _di 367
34
- die 358
35
- ik 347
36
- _v 347
37
- ka 330
38
- p 330
39
- b 324
40
- f 320
41
- _n 298
42
- _i 298
43
- ri 291
44
- ge 290
45
- _e 272
46
- te 262
47
- nd 261
48
- al 261
49
- ns 255
50
- h 254
51
- aan 253
52
- el 252
53
- _s 251
54
- af 232
55
- ta 230
56
- r_ 228
57
- ika 225
58
- _o 224
59
- _af 224
60
- fr 223
61
- an_ 220
62
- se 219
63
- va 218
64
- kaa 218
65
- rik 218
66
- _w 216
67
- l_ 215
68
- en_ 215
69
- afr 213
70
- fri 213
71
- ng 206
72
- _t 196
73
- _in 194
74
- _h 193
75
- ans 191
76
- _b 184
77
- es 181
78
- _en 177
79
- in_ 177
80
- oo 177
81
- ee 176
82
- et 176
83
- st 176
84
- _g 175
85
- la 174
86
- van 174
87
- _va 173
88
- as 171
89
- d_ 168
90
- at 165
91
- der 165
92
- is 163
93
- _m 160
94
- g_ 154
95
- ed 153
96
- and 152
97
- or 151
98
- se_ 148
99
- ui 145
100
- ns_ 144
101
- ke 143
102
- ar 142
103
- li 142
104
- ne 140
105
- le 139
106
- wa 137
107
- k_ 136
108
- et_ 135
109
- al_ 134
110
- on 133
111
- taa 132
112
- aal 132
113
- re 130
114
- lan 127
115
- _k 127
116
- de_ 127
117
- _ge 123
118
- y 123
119
- rd 122
120
- rs 121
121
- it 121
122
- nde 120
123
- er_ 120
124
- oe 120
125
- is_ 119
126
- ing 118
127
- be 118
128
- as_ 117
129
- he 117
130
- at_ 116
131
- ede 115
132
- me 114
133
- _wa 110
134
- _n_ 109
135
- we 106
136
- ve 105
137
- ng_ 103
138
- _he 102
139
- _ta 102
140
- ra 99
141
- ek 97
142
- sk 97
143
- si 96
144
- ers 96
145
- _is 96
146
- a_ 95
147
- ver 94
148
- oor 92
149
- te_ 92
150
- ei 91
151
- het 90
152
- ds 90
153
- _ve 89
154
- nt 88
155
- rl 87
156
- _ne 86
157
- ro 84
158
- da 83
159
- _be 83
160
- erl 80
161
- ig 79
162
- aar 78
163
- ni 77
164
- ned 76
165
- am 75
166
- ur 74
167
- om 74
168
- ord 73
169
- rla 73
170
- id 73
171
- nds 73
172
- vo 73
173
- na 73
174
- _p 72
175
- pe 71
176
- wo 70
177
- _me 69
178
- _as 68
179
- sta 67
180
- ste 67
181
- ti 66
182
- m_ 65
183
- lik 65
184
- � 65
185
- _on 64
186
- op 64
187
- _ka 64
188
- le_ 63
189
- ll 63
190
- nse 61
191
- _da 61
192
- pr 60
193
- wat 60
194
- ma 59
195
- p_ 59
196
- uit 58
197
- _wo 58
198
- _oo 57
199
- em 57
200
- nge 56
201
- rt 56
202
- rs_ 56
203
- ap 56
204
- eli 55
205
- j 55
206
- gr 55
207
- ho 55
208
- ot 54
209
- ls 53
210
- _r 53
211
- mi 53
212
- ik_ 53
213
- eu 53
214
- es_ 53
215
- ko 53
216
- _aa 52
217
- eb 52
218
- ou 52
219
- sp 52
220
- rde 52
221
- eg 51
222
- so 51
223
- ar_ 51
224
- end 50
225
- tal 50
226
- tu 49
227
- els 48
228
- ke_ 48
229
- eer 48
230
- ol 47
231
- _om 47
232
- it_ 46
233
- ë 46
234
- � 46
235
- br 46
236
- gel 46
237
- ds_ 46
238
- _na 46
239
- tel 46
240
- gs 45
241
- _de 45
242
- _so 45
243
- el_ 44
244
- ale 44
245
- ike 43
246
- _l 42
247
- ki 42
248
- u_ 42
249
- _u 42
250
- ru 41
251
- bl 41
252
- ter 41
253
- ond 40
254
- eng 40
255
- ew 40
256
- eke 39
257
- nd_ 39
258
- eur 39
259
- _vo 39
260
- c 39
261
- su 39
262
- ken 39
263
- ang 39
264
- aap 39
265
- mo 38
266
- nie 38
267
- ss 38
268
- was 38
269
- ls_ 38
270
- _ui 38
271
- sie 38
272
- ngs 38
273
- to 37
274
- est 37
275
- ok 37
276
- _op 37
277
- ul 37
278
- _su 37
279
- ens 36
280
- _te 36
281
- f_ 36
282
- rd_ 36
283
- _ho 36
284
- _gr 36
285
- y_ 36
286
- een 36
287
- uid 36
288
- geb 36
289
- ts 36
290
- _we 35
291
- erd 35
292
- ese 35
293
- id_ 35
294
- _st 35
295
- rk 35
296
- ies 35
297
- wor 34
298
- woo 34
299
- ge_ 34
300
- ges 34
301
- ga 34
302
- ska 34
303
- lin 34
304
- nk 34
305
- esk 34
306
- mp 33
307
- vi 33
308
- ige 33
309
- il 33
310
- ad 33
311
- _ma 33
312
- sui 32
313
- ier 32
314
- ep 32
315
- ite 32
316
- i� 32
317
- ou_ 32
318
- lle 31
319
- ok_ 31
320
- ook 31
321
- dse 31
322
- rui 30
323
- ps 30
324
- bo 30
325
- _bl 30
326
- _mo 30
327
- spr 30
328
- ië 30
329
- daa 30
330
- _am 30
331
- gro 30
332
- ent 30
333
- os 30
334
- kl 30
335
- erk 30
336
- tr 29
337
- op_ 29
338
- lg 29
339
- ka_ 29
340
- wy 28
341
- mee 28
342
- re_ 28
343
- ot_ 28
344
- _vi 28
345
- met 28
346
- us 28
347
- amp 28
348
- vol 27
349
- pa 27
350
- nv 27
351
- gt 27
352
- rw 27
353
- _ee 27
354
- bru 27
355
- dat 27
356
- eni 26
357
- hu 26
358
- ak 26
359
- ten 26
360
- eel 26
361
- men 26
362
- ai 26
363
- gen 26
364
- bi 26
365
- ont 26
366
- _mi 26
367
- rg 26
368
- _re 26
369
- asi 26
370
- wi 25
371
- gi 25
372
- erw 25
373
- ev 25
374
- kan 25
375
- ig_ 25
376
- om_ 25
377
- ir 25
378
- ebr 25
379
- kr 24
380
- _sp 24
381
- sen 24
382
- _hu 24
383
- tw 24
384
- ks 24
385
- _j 24
386
- ku 24
387
- maa 24
388
- aps 24
389
- sa 24
390
- ys 24
391
- rm 24
392
- ir_ 24
393
- rsk 24
394
- moe 24
395
- ran 24
396
- del 23
397
- waa 23
398
- kel 23
399
- soo 23
400
- pt 23
data/pkg/scylla-0.5.0.gem DELETED
Binary file
data/scylla-0.8.29.gem DELETED
Binary file
data/scylla-0.8.31.gem DELETED
Binary file
data/scylla.gemspec DELETED
@@ -1,24 +0,0 @@
1
- Gem::Specification.new do |s|
2
- s.name = %q{scylla}
3
- s.version = "0.8.32"
4
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
5
- s.authors = ["Ashwin Hegde"]
6
- s.date = %q{2012-01-26}
7
- s.default_executable = %q{scylla}
8
- s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
9
- s.email = %q{ahegde@zendesk.com}
10
- s.executables = ["scylla"]
11
- s.extra_rdoc_files = [
12
- "LICENSE.txt",
13
- "README.rdoc"
14
- ]
15
- s.files = Dir.glob("**/**")
16
- s.homepage = %q{http://github.com/hashwin/scylla}
17
- s.licenses = ["MIT"]
18
- s.require_paths = ["lib"]
19
- s.rubygems_version = %q{1.5.3}
20
- s.summary = %q{Ruby port of Textcat language guesser}
21
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
22
- s.add_dependency(%q<sanitize>, ["~> 2.0.0"])
23
- end
24
-