scylla 0.8.32 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scylla/generator.rb +6 -2
- data/lib/scylla/lms/arabic.lm +318 -318
- data/lib/scylla/lms/bulgarian.lm +326 -326
- data/lib/scylla/lms/chinese.lm +399 -399
- data/lib/scylla/lms/french.lm +302 -302
- data/lib/scylla/lms/greek.lm +119 -119
- data/lib/scylla/lms/hebrew.lm +168 -168
- data/lib/scylla/lms/hindi.lm +108 -108
- data/lib/scylla/lms/japanese.lm +65 -65
- data/lib/scylla/lms/kannada.lm +147 -147
- data/lib/scylla/lms/korean.lm +151 -151
- data/lib/scylla/lms/marathi.lm +133 -133
- data/lib/scylla/lms/persian.lm +107 -107
- data/lib/scylla/lms/polish.lm +108 -108
- data/lib/scylla/lms/portuguese.lm +221 -221
- data/lib/scylla/lms/romanian.lm +132 -132
- data/lib/scylla/lms/russian.lm +82 -82
- data/lib/scylla/lms/thai.lm +119 -119
- data/lib/scylla/resources.rb +0 -1
- data/test/helper.rb +0 -1
- metadata +40 -55
- data/Gemfile +0 -23
- data/Gemfile.lock +0 -53
- data/Rakefile +0 -52
- data/VERSION +0 -1
- data/lib/scylla/lms/afrikaans.lm +0 -400
- data/pkg/scylla-0.5.0.gem +0 -0
- data/scylla-0.8.29.gem +0 -0
- data/scylla-0.8.31.gem +0 -0
- data/scylla.gemspec +0 -24
- data/source_texts/afrikaans.txt +0 -363
- data/source_texts/arabic.txt +0 -718
- data/source_texts/bulgarian.txt +0 -601
- data/source_texts/catalan.txt +0 -435
- data/source_texts/chinese.txt +0 -625
- data/source_texts/czech.txt +0 -237
- data/source_texts/danish.txt +0 -268
- data/source_texts/dutch.txt +0 -503
- data/source_texts/english.txt +0 -673
- data/source_texts/finnish.txt +0 -939
- data/source_texts/french.txt +0 -896
- data/source_texts/german.txt +0 -1236
- data/source_texts/greek.txt +0 -488
- data/source_texts/hebrew.txt +0 -638
- data/source_texts/hindi.txt +0 -353
- data/source_texts/icelandic.txt +0 -342
- data/source_texts/indonesian.txt +0 -509
- data/source_texts/italian.txt +0 -1066
- data/source_texts/japanese.txt +0 -1220
- data/source_texts/kannada.txt +0 -340
- data/source_texts/korean.txt +0 -343
- data/source_texts/marathi.txt +0 -237
- data/source_texts/norwegian.txt +0 -555
- data/source_texts/persian.txt +0 -886
- data/source_texts/polish.txt +0 -1014
- data/source_texts/portuguese.txt +0 -690
- data/source_texts/romanian.txt +0 -436
- data/source_texts/russian.txt +0 -1128
- data/source_texts/slovak.txt +0 -575
- data/source_texts/slovenian.txt +0 -354
- data/source_texts/spanish.txt +0 -1017
- data/source_texts/swedish.txt +0 -558
- data/source_texts/tagalog.txt +0 -426
- data/source_texts/thai.txt +0 -312
- data/source_texts/turkish.txt +0 -665
- data/source_texts/vietnamese.txt +0 -300
- data/source_texts/welsh.txt +0 -332
data/Rakefile
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
|
4
|
-
|
5
|
-
require 'rubygems'
|
6
|
-
require 'bundler'
|
7
|
-
require 'scylla'
|
8
|
-
require 'scylla/tasks'
|
9
|
-
|
10
|
-
begin
|
11
|
-
Bundler.setup(:default, :development)
|
12
|
-
rescue Bundler::BundlerError => e
|
13
|
-
$stderr.puts e.message
|
14
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
15
|
-
exit e.status_code
|
16
|
-
end
|
17
|
-
require 'rake'
|
18
|
-
|
19
|
-
require 'jeweler'
|
20
|
-
Jeweler::Tasks.new do |gem|
|
21
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
22
|
-
gem.name = "scylla"
|
23
|
-
gem.homepage = "http://github.com/hashwin/scylla"
|
24
|
-
gem.license = "MIT"
|
25
|
-
gem.summary = "Ruby port of Textcat language guesser"
|
26
|
-
gem.description = "Allows for text categorization by guessing the language of a given text using n-grams"
|
27
|
-
gem.email = "ahegde@zendesk.com"
|
28
|
-
gem.authors = ["Ashwin Hegde"]
|
29
|
-
# dependencies defined in Gemfile
|
30
|
-
end
|
31
|
-
Jeweler::RubygemsDotOrgTasks.new
|
32
|
-
|
33
|
-
require 'rake/testtask'
|
34
|
-
Rake::TestTask.new(:test) do |test|
|
35
|
-
test.libs << 'lib' << 'test'
|
36
|
-
test.pattern = 'test/**/*_test.rb'
|
37
|
-
test.verbose = true
|
38
|
-
end
|
39
|
-
|
40
|
-
task :default => :test
|
41
|
-
|
42
|
-
require 'rake/rdoctask'
|
43
|
-
Rake::RDocTask.new do |rdoc|
|
44
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
45
|
-
|
46
|
-
rdoc.rdoc_dir = 'rdoc'
|
47
|
-
rdoc.title = "scylla #{version}"
|
48
|
-
rdoc.rdoc_files.include('README*')
|
49
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
50
|
-
end
|
51
|
-
|
52
|
-
Scylla::Tasks.new
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.5.0
|
data/lib/scylla/lms/afrikaans.lm
DELETED
@@ -1,400 +0,0 @@
|
|
1
|
-
_ 8334
|
2
|
-
e 3489
|
3
|
-
a 2570
|
4
|
-
n 1985
|
5
|
-
i 1910
|
6
|
-
s 1565
|
7
|
-
r 1462
|
8
|
-
d 1244
|
9
|
-
t 1191
|
10
|
-
e_ 1057
|
11
|
-
o 1019
|
12
|
-
l 972
|
13
|
-
k 865
|
14
|
-
n_ 739
|
15
|
-
an 684
|
16
|
-
s_ 670
|
17
|
-
g 662
|
18
|
-
aa 563
|
19
|
-
ie 562
|
20
|
-
er 526
|
21
|
-
_d 507
|
22
|
-
v 456
|
23
|
-
m 453
|
24
|
-
u 447
|
25
|
-
_a 440
|
26
|
-
en 437
|
27
|
-
di 432
|
28
|
-
ie_ 429
|
29
|
-
de 380
|
30
|
-
w 376
|
31
|
-
t_ 371
|
32
|
-
in 368
|
33
|
-
_di 367
|
34
|
-
die 358
|
35
|
-
ik 347
|
36
|
-
_v 347
|
37
|
-
ka 330
|
38
|
-
p 330
|
39
|
-
b 324
|
40
|
-
f 320
|
41
|
-
_n 298
|
42
|
-
_i 298
|
43
|
-
ri 291
|
44
|
-
ge 290
|
45
|
-
_e 272
|
46
|
-
te 262
|
47
|
-
nd 261
|
48
|
-
al 261
|
49
|
-
ns 255
|
50
|
-
h 254
|
51
|
-
aan 253
|
52
|
-
el 252
|
53
|
-
_s 251
|
54
|
-
af 232
|
55
|
-
ta 230
|
56
|
-
r_ 228
|
57
|
-
ika 225
|
58
|
-
_o 224
|
59
|
-
_af 224
|
60
|
-
fr 223
|
61
|
-
an_ 220
|
62
|
-
se 219
|
63
|
-
va 218
|
64
|
-
kaa 218
|
65
|
-
rik 218
|
66
|
-
_w 216
|
67
|
-
l_ 215
|
68
|
-
en_ 215
|
69
|
-
afr 213
|
70
|
-
fri 213
|
71
|
-
ng 206
|
72
|
-
_t 196
|
73
|
-
_in 194
|
74
|
-
_h 193
|
75
|
-
ans 191
|
76
|
-
_b 184
|
77
|
-
es 181
|
78
|
-
_en 177
|
79
|
-
in_ 177
|
80
|
-
oo 177
|
81
|
-
ee 176
|
82
|
-
et 176
|
83
|
-
st 176
|
84
|
-
_g 175
|
85
|
-
la 174
|
86
|
-
van 174
|
87
|
-
_va 173
|
88
|
-
as 171
|
89
|
-
d_ 168
|
90
|
-
at 165
|
91
|
-
der 165
|
92
|
-
is 163
|
93
|
-
_m 160
|
94
|
-
g_ 154
|
95
|
-
ed 153
|
96
|
-
and 152
|
97
|
-
or 151
|
98
|
-
se_ 148
|
99
|
-
ui 145
|
100
|
-
ns_ 144
|
101
|
-
ke 143
|
102
|
-
ar 142
|
103
|
-
li 142
|
104
|
-
ne 140
|
105
|
-
le 139
|
106
|
-
wa 137
|
107
|
-
k_ 136
|
108
|
-
et_ 135
|
109
|
-
al_ 134
|
110
|
-
on 133
|
111
|
-
taa 132
|
112
|
-
aal 132
|
113
|
-
re 130
|
114
|
-
lan 127
|
115
|
-
_k 127
|
116
|
-
de_ 127
|
117
|
-
_ge 123
|
118
|
-
y 123
|
119
|
-
rd 122
|
120
|
-
rs 121
|
121
|
-
it 121
|
122
|
-
nde 120
|
123
|
-
er_ 120
|
124
|
-
oe 120
|
125
|
-
is_ 119
|
126
|
-
ing 118
|
127
|
-
be 118
|
128
|
-
as_ 117
|
129
|
-
he 117
|
130
|
-
at_ 116
|
131
|
-
ede 115
|
132
|
-
me 114
|
133
|
-
_wa 110
|
134
|
-
_n_ 109
|
135
|
-
we 106
|
136
|
-
ve 105
|
137
|
-
ng_ 103
|
138
|
-
_he 102
|
139
|
-
_ta 102
|
140
|
-
ra 99
|
141
|
-
ek 97
|
142
|
-
sk 97
|
143
|
-
si 96
|
144
|
-
ers 96
|
145
|
-
_is 96
|
146
|
-
a_ 95
|
147
|
-
ver 94
|
148
|
-
oor 92
|
149
|
-
te_ 92
|
150
|
-
ei 91
|
151
|
-
het 90
|
152
|
-
ds 90
|
153
|
-
_ve 89
|
154
|
-
nt 88
|
155
|
-
rl 87
|
156
|
-
_ne 86
|
157
|
-
ro 84
|
158
|
-
da 83
|
159
|
-
_be 83
|
160
|
-
erl 80
|
161
|
-
ig 79
|
162
|
-
aar 78
|
163
|
-
ni 77
|
164
|
-
ned 76
|
165
|
-
am 75
|
166
|
-
ur 74
|
167
|
-
om 74
|
168
|
-
ord 73
|
169
|
-
rla 73
|
170
|
-
id 73
|
171
|
-
nds 73
|
172
|
-
vo 73
|
173
|
-
na 73
|
174
|
-
_p 72
|
175
|
-
pe 71
|
176
|
-
wo 70
|
177
|
-
_me 69
|
178
|
-
_as 68
|
179
|
-
sta 67
|
180
|
-
ste 67
|
181
|
-
ti 66
|
182
|
-
m_ 65
|
183
|
-
lik 65
|
184
|
-
� 65
|
185
|
-
_on 64
|
186
|
-
op 64
|
187
|
-
_ka 64
|
188
|
-
le_ 63
|
189
|
-
ll 63
|
190
|
-
nse 61
|
191
|
-
_da 61
|
192
|
-
pr 60
|
193
|
-
wat 60
|
194
|
-
ma 59
|
195
|
-
p_ 59
|
196
|
-
uit 58
|
197
|
-
_wo 58
|
198
|
-
_oo 57
|
199
|
-
em 57
|
200
|
-
nge 56
|
201
|
-
rt 56
|
202
|
-
rs_ 56
|
203
|
-
ap 56
|
204
|
-
eli 55
|
205
|
-
j 55
|
206
|
-
gr 55
|
207
|
-
ho 55
|
208
|
-
ot 54
|
209
|
-
ls 53
|
210
|
-
_r 53
|
211
|
-
mi 53
|
212
|
-
ik_ 53
|
213
|
-
eu 53
|
214
|
-
es_ 53
|
215
|
-
ko 53
|
216
|
-
_aa 52
|
217
|
-
eb 52
|
218
|
-
ou 52
|
219
|
-
sp 52
|
220
|
-
rde 52
|
221
|
-
eg 51
|
222
|
-
so 51
|
223
|
-
ar_ 51
|
224
|
-
end 50
|
225
|
-
tal 50
|
226
|
-
tu 49
|
227
|
-
els 48
|
228
|
-
ke_ 48
|
229
|
-
eer 48
|
230
|
-
ol 47
|
231
|
-
_om 47
|
232
|
-
it_ 46
|
233
|
-
ë 46
|
234
|
-
� 46
|
235
|
-
br 46
|
236
|
-
gel 46
|
237
|
-
ds_ 46
|
238
|
-
_na 46
|
239
|
-
tel 46
|
240
|
-
gs 45
|
241
|
-
_de 45
|
242
|
-
_so 45
|
243
|
-
el_ 44
|
244
|
-
ale 44
|
245
|
-
ike 43
|
246
|
-
_l 42
|
247
|
-
ki 42
|
248
|
-
u_ 42
|
249
|
-
_u 42
|
250
|
-
ru 41
|
251
|
-
bl 41
|
252
|
-
ter 41
|
253
|
-
ond 40
|
254
|
-
eng 40
|
255
|
-
ew 40
|
256
|
-
eke 39
|
257
|
-
nd_ 39
|
258
|
-
eur 39
|
259
|
-
_vo 39
|
260
|
-
c 39
|
261
|
-
su 39
|
262
|
-
ken 39
|
263
|
-
ang 39
|
264
|
-
aap 39
|
265
|
-
mo 38
|
266
|
-
nie 38
|
267
|
-
ss 38
|
268
|
-
was 38
|
269
|
-
ls_ 38
|
270
|
-
_ui 38
|
271
|
-
sie 38
|
272
|
-
ngs 38
|
273
|
-
to 37
|
274
|
-
est 37
|
275
|
-
ok 37
|
276
|
-
_op 37
|
277
|
-
ul 37
|
278
|
-
_su 37
|
279
|
-
ens 36
|
280
|
-
_te 36
|
281
|
-
f_ 36
|
282
|
-
rd_ 36
|
283
|
-
_ho 36
|
284
|
-
_gr 36
|
285
|
-
y_ 36
|
286
|
-
een 36
|
287
|
-
uid 36
|
288
|
-
geb 36
|
289
|
-
ts 36
|
290
|
-
_we 35
|
291
|
-
erd 35
|
292
|
-
ese 35
|
293
|
-
id_ 35
|
294
|
-
_st 35
|
295
|
-
rk 35
|
296
|
-
ies 35
|
297
|
-
wor 34
|
298
|
-
woo 34
|
299
|
-
ge_ 34
|
300
|
-
ges 34
|
301
|
-
ga 34
|
302
|
-
ska 34
|
303
|
-
lin 34
|
304
|
-
nk 34
|
305
|
-
esk 34
|
306
|
-
mp 33
|
307
|
-
vi 33
|
308
|
-
ige 33
|
309
|
-
il 33
|
310
|
-
ad 33
|
311
|
-
_ma 33
|
312
|
-
sui 32
|
313
|
-
ier 32
|
314
|
-
ep 32
|
315
|
-
ite 32
|
316
|
-
i� 32
|
317
|
-
ou_ 32
|
318
|
-
lle 31
|
319
|
-
ok_ 31
|
320
|
-
ook 31
|
321
|
-
dse 31
|
322
|
-
rui 30
|
323
|
-
ps 30
|
324
|
-
bo 30
|
325
|
-
_bl 30
|
326
|
-
_mo 30
|
327
|
-
spr 30
|
328
|
-
ië 30
|
329
|
-
daa 30
|
330
|
-
_am 30
|
331
|
-
gro 30
|
332
|
-
ent 30
|
333
|
-
os 30
|
334
|
-
kl 30
|
335
|
-
erk 30
|
336
|
-
tr 29
|
337
|
-
op_ 29
|
338
|
-
lg 29
|
339
|
-
ka_ 29
|
340
|
-
wy 28
|
341
|
-
mee 28
|
342
|
-
re_ 28
|
343
|
-
ot_ 28
|
344
|
-
_vi 28
|
345
|
-
met 28
|
346
|
-
us 28
|
347
|
-
amp 28
|
348
|
-
vol 27
|
349
|
-
pa 27
|
350
|
-
nv 27
|
351
|
-
gt 27
|
352
|
-
rw 27
|
353
|
-
_ee 27
|
354
|
-
bru 27
|
355
|
-
dat 27
|
356
|
-
eni 26
|
357
|
-
hu 26
|
358
|
-
ak 26
|
359
|
-
ten 26
|
360
|
-
eel 26
|
361
|
-
men 26
|
362
|
-
ai 26
|
363
|
-
gen 26
|
364
|
-
bi 26
|
365
|
-
ont 26
|
366
|
-
_mi 26
|
367
|
-
rg 26
|
368
|
-
_re 26
|
369
|
-
asi 26
|
370
|
-
wi 25
|
371
|
-
gi 25
|
372
|
-
erw 25
|
373
|
-
ev 25
|
374
|
-
kan 25
|
375
|
-
ig_ 25
|
376
|
-
om_ 25
|
377
|
-
ir 25
|
378
|
-
ebr 25
|
379
|
-
kr 24
|
380
|
-
_sp 24
|
381
|
-
sen 24
|
382
|
-
_hu 24
|
383
|
-
tw 24
|
384
|
-
ks 24
|
385
|
-
_j 24
|
386
|
-
ku 24
|
387
|
-
maa 24
|
388
|
-
aps 24
|
389
|
-
sa 24
|
390
|
-
ys 24
|
391
|
-
rm 24
|
392
|
-
ir_ 24
|
393
|
-
rsk 24
|
394
|
-
moe 24
|
395
|
-
ran 24
|
396
|
-
del 23
|
397
|
-
waa 23
|
398
|
-
kel 23
|
399
|
-
soo 23
|
400
|
-
pt 23
|
data/pkg/scylla-0.5.0.gem
DELETED
Binary file
|
data/scylla-0.8.29.gem
DELETED
Binary file
|
data/scylla-0.8.31.gem
DELETED
Binary file
|
data/scylla.gemspec
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
Gem::Specification.new do |s|
|
2
|
-
s.name = %q{scylla}
|
3
|
-
s.version = "0.8.32"
|
4
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
5
|
-
s.authors = ["Ashwin Hegde"]
|
6
|
-
s.date = %q{2012-01-26}
|
7
|
-
s.default_executable = %q{scylla}
|
8
|
-
s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
|
9
|
-
s.email = %q{ahegde@zendesk.com}
|
10
|
-
s.executables = ["scylla"]
|
11
|
-
s.extra_rdoc_files = [
|
12
|
-
"LICENSE.txt",
|
13
|
-
"README.rdoc"
|
14
|
-
]
|
15
|
-
s.files = Dir.glob("**/**")
|
16
|
-
s.homepage = %q{http://github.com/hashwin/scylla}
|
17
|
-
s.licenses = ["MIT"]
|
18
|
-
s.require_paths = ["lib"]
|
19
|
-
s.rubygems_version = %q{1.5.3}
|
20
|
-
s.summary = %q{Ruby port of Textcat language guesser}
|
21
|
-
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
22
|
-
s.add_dependency(%q<sanitize>, ["~> 2.0.0"])
|
23
|
-
end
|
24
|
-
|