scylla 0.8.32 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scylla/generator.rb +6 -2
- data/lib/scylla/lms/arabic.lm +318 -318
- data/lib/scylla/lms/bulgarian.lm +326 -326
- data/lib/scylla/lms/chinese.lm +399 -399
- data/lib/scylla/lms/french.lm +302 -302
- data/lib/scylla/lms/greek.lm +119 -119
- data/lib/scylla/lms/hebrew.lm +168 -168
- data/lib/scylla/lms/hindi.lm +108 -108
- data/lib/scylla/lms/japanese.lm +65 -65
- data/lib/scylla/lms/kannada.lm +147 -147
- data/lib/scylla/lms/korean.lm +151 -151
- data/lib/scylla/lms/marathi.lm +133 -133
- data/lib/scylla/lms/persian.lm +107 -107
- data/lib/scylla/lms/polish.lm +108 -108
- data/lib/scylla/lms/portuguese.lm +221 -221
- data/lib/scylla/lms/romanian.lm +132 -132
- data/lib/scylla/lms/russian.lm +82 -82
- data/lib/scylla/lms/thai.lm +119 -119
- data/lib/scylla/resources.rb +0 -1
- data/test/helper.rb +0 -1
- metadata +40 -55
- data/Gemfile +0 -23
- data/Gemfile.lock +0 -53
- data/Rakefile +0 -52
- data/VERSION +0 -1
- data/lib/scylla/lms/afrikaans.lm +0 -400
- data/pkg/scylla-0.5.0.gem +0 -0
- data/scylla-0.8.29.gem +0 -0
- data/scylla-0.8.31.gem +0 -0
- data/scylla.gemspec +0 -24
- data/source_texts/afrikaans.txt +0 -363
- data/source_texts/arabic.txt +0 -718
- data/source_texts/bulgarian.txt +0 -601
- data/source_texts/catalan.txt +0 -435
- data/source_texts/chinese.txt +0 -625
- data/source_texts/czech.txt +0 -237
- data/source_texts/danish.txt +0 -268
- data/source_texts/dutch.txt +0 -503
- data/source_texts/english.txt +0 -673
- data/source_texts/finnish.txt +0 -939
- data/source_texts/french.txt +0 -896
- data/source_texts/german.txt +0 -1236
- data/source_texts/greek.txt +0 -488
- data/source_texts/hebrew.txt +0 -638
- data/source_texts/hindi.txt +0 -353
- data/source_texts/icelandic.txt +0 -342
- data/source_texts/indonesian.txt +0 -509
- data/source_texts/italian.txt +0 -1066
- data/source_texts/japanese.txt +0 -1220
- data/source_texts/kannada.txt +0 -340
- data/source_texts/korean.txt +0 -343
- data/source_texts/marathi.txt +0 -237
- data/source_texts/norwegian.txt +0 -555
- data/source_texts/persian.txt +0 -886
- data/source_texts/polish.txt +0 -1014
- data/source_texts/portuguese.txt +0 -690
- data/source_texts/romanian.txt +0 -436
- data/source_texts/russian.txt +0 -1128
- data/source_texts/slovak.txt +0 -575
- data/source_texts/slovenian.txt +0 -354
- data/source_texts/spanish.txt +0 -1017
- data/source_texts/swedish.txt +0 -558
- data/source_texts/tagalog.txt +0 -426
- data/source_texts/thai.txt +0 -312
- data/source_texts/turkish.txt +0 -665
- data/source_texts/vietnamese.txt +0 -300
- data/source_texts/welsh.txt +0 -332
data/Rakefile
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
|
4
|
-
|
5
|
-
require 'rubygems'
|
6
|
-
require 'bundler'
|
7
|
-
require 'scylla'
|
8
|
-
require 'scylla/tasks'
|
9
|
-
|
10
|
-
begin
|
11
|
-
Bundler.setup(:default, :development)
|
12
|
-
rescue Bundler::BundlerError => e
|
13
|
-
$stderr.puts e.message
|
14
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
15
|
-
exit e.status_code
|
16
|
-
end
|
17
|
-
require 'rake'
|
18
|
-
|
19
|
-
require 'jeweler'
|
20
|
-
Jeweler::Tasks.new do |gem|
|
21
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
22
|
-
gem.name = "scylla"
|
23
|
-
gem.homepage = "http://github.com/hashwin/scylla"
|
24
|
-
gem.license = "MIT"
|
25
|
-
gem.summary = "Ruby port of Textcat language guesser"
|
26
|
-
gem.description = "Allows for text categorization by guessing the language of a given text using n-grams"
|
27
|
-
gem.email = "ahegde@zendesk.com"
|
28
|
-
gem.authors = ["Ashwin Hegde"]
|
29
|
-
# dependencies defined in Gemfile
|
30
|
-
end
|
31
|
-
Jeweler::RubygemsDotOrgTasks.new
|
32
|
-
|
33
|
-
require 'rake/testtask'
|
34
|
-
Rake::TestTask.new(:test) do |test|
|
35
|
-
test.libs << 'lib' << 'test'
|
36
|
-
test.pattern = 'test/**/*_test.rb'
|
37
|
-
test.verbose = true
|
38
|
-
end
|
39
|
-
|
40
|
-
task :default => :test
|
41
|
-
|
42
|
-
require 'rake/rdoctask'
|
43
|
-
Rake::RDocTask.new do |rdoc|
|
44
|
-
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
45
|
-
|
46
|
-
rdoc.rdoc_dir = 'rdoc'
|
47
|
-
rdoc.title = "scylla #{version}"
|
48
|
-
rdoc.rdoc_files.include('README*')
|
49
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
50
|
-
end
|
51
|
-
|
52
|
-
Scylla::Tasks.new
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.5.0
|
data/lib/scylla/lms/afrikaans.lm
DELETED
@@ -1,400 +0,0 @@
|
|
1
|
-
_ 8334
|
2
|
-
e 3489
|
3
|
-
a 2570
|
4
|
-
n 1985
|
5
|
-
i 1910
|
6
|
-
s 1565
|
7
|
-
r 1462
|
8
|
-
d 1244
|
9
|
-
t 1191
|
10
|
-
e_ 1057
|
11
|
-
o 1019
|
12
|
-
l 972
|
13
|
-
k 865
|
14
|
-
n_ 739
|
15
|
-
an 684
|
16
|
-
s_ 670
|
17
|
-
g 662
|
18
|
-
aa 563
|
19
|
-
ie 562
|
20
|
-
er 526
|
21
|
-
_d 507
|
22
|
-
v 456
|
23
|
-
m 453
|
24
|
-
u 447
|
25
|
-
_a 440
|
26
|
-
en 437
|
27
|
-
di 432
|
28
|
-
ie_ 429
|
29
|
-
de 380
|
30
|
-
w 376
|
31
|
-
t_ 371
|
32
|
-
in 368
|
33
|
-
_di 367
|
34
|
-
die 358
|
35
|
-
ik 347
|
36
|
-
_v 347
|
37
|
-
ka 330
|
38
|
-
p 330
|
39
|
-
b 324
|
40
|
-
f 320
|
41
|
-
_n 298
|
42
|
-
_i 298
|
43
|
-
ri 291
|
44
|
-
ge 290
|
45
|
-
_e 272
|
46
|
-
te 262
|
47
|
-
nd 261
|
48
|
-
al 261
|
49
|
-
ns 255
|
50
|
-
h 254
|
51
|
-
aan 253
|
52
|
-
el 252
|
53
|
-
_s 251
|
54
|
-
af 232
|
55
|
-
ta 230
|
56
|
-
r_ 228
|
57
|
-
ika 225
|
58
|
-
_o 224
|
59
|
-
_af 224
|
60
|
-
fr 223
|
61
|
-
an_ 220
|
62
|
-
se 219
|
63
|
-
va 218
|
64
|
-
kaa 218
|
65
|
-
rik 218
|
66
|
-
_w 216
|
67
|
-
l_ 215
|
68
|
-
en_ 215
|
69
|
-
afr 213
|
70
|
-
fri 213
|
71
|
-
ng 206
|
72
|
-
_t 196
|
73
|
-
_in 194
|
74
|
-
_h 193
|
75
|
-
ans 191
|
76
|
-
_b 184
|
77
|
-
es 181
|
78
|
-
_en 177
|
79
|
-
in_ 177
|
80
|
-
oo 177
|
81
|
-
ee 176
|
82
|
-
et 176
|
83
|
-
st 176
|
84
|
-
_g 175
|
85
|
-
la 174
|
86
|
-
van 174
|
87
|
-
_va 173
|
88
|
-
as 171
|
89
|
-
d_ 168
|
90
|
-
at 165
|
91
|
-
der 165
|
92
|
-
is 163
|
93
|
-
_m 160
|
94
|
-
g_ 154
|
95
|
-
ed 153
|
96
|
-
and 152
|
97
|
-
or 151
|
98
|
-
se_ 148
|
99
|
-
ui 145
|
100
|
-
ns_ 144
|
101
|
-
ke 143
|
102
|
-
ar 142
|
103
|
-
li 142
|
104
|
-
ne 140
|
105
|
-
le 139
|
106
|
-
wa 137
|
107
|
-
k_ 136
|
108
|
-
et_ 135
|
109
|
-
al_ 134
|
110
|
-
on 133
|
111
|
-
taa 132
|
112
|
-
aal 132
|
113
|
-
re 130
|
114
|
-
lan 127
|
115
|
-
_k 127
|
116
|
-
de_ 127
|
117
|
-
_ge 123
|
118
|
-
y 123
|
119
|
-
rd 122
|
120
|
-
rs 121
|
121
|
-
it 121
|
122
|
-
nde 120
|
123
|
-
er_ 120
|
124
|
-
oe 120
|
125
|
-
is_ 119
|
126
|
-
ing 118
|
127
|
-
be 118
|
128
|
-
as_ 117
|
129
|
-
he 117
|
130
|
-
at_ 116
|
131
|
-
ede 115
|
132
|
-
me 114
|
133
|
-
_wa 110
|
134
|
-
_n_ 109
|
135
|
-
we 106
|
136
|
-
ve 105
|
137
|
-
ng_ 103
|
138
|
-
_he 102
|
139
|
-
_ta 102
|
140
|
-
ra 99
|
141
|
-
ek 97
|
142
|
-
sk 97
|
143
|
-
si 96
|
144
|
-
ers 96
|
145
|
-
_is 96
|
146
|
-
a_ 95
|
147
|
-
ver 94
|
148
|
-
oor 92
|
149
|
-
te_ 92
|
150
|
-
ei 91
|
151
|
-
het 90
|
152
|
-
ds 90
|
153
|
-
_ve 89
|
154
|
-
nt 88
|
155
|
-
rl 87
|
156
|
-
_ne 86
|
157
|
-
ro 84
|
158
|
-
da 83
|
159
|
-
_be 83
|
160
|
-
erl 80
|
161
|
-
ig 79
|
162
|
-
aar 78
|
163
|
-
ni 77
|
164
|
-
ned 76
|
165
|
-
am 75
|
166
|
-
ur 74
|
167
|
-
om 74
|
168
|
-
ord 73
|
169
|
-
rla 73
|
170
|
-
id 73
|
171
|
-
nds 73
|
172
|
-
vo 73
|
173
|
-
na 73
|
174
|
-
_p 72
|
175
|
-
pe 71
|
176
|
-
wo 70
|
177
|
-
_me 69
|
178
|
-
_as 68
|
179
|
-
sta 67
|
180
|
-
ste 67
|
181
|
-
ti 66
|
182
|
-
m_ 65
|
183
|
-
lik 65
|
184
|
-
� 65
|
185
|
-
_on 64
|
186
|
-
op 64
|
187
|
-
_ka 64
|
188
|
-
le_ 63
|
189
|
-
ll 63
|
190
|
-
nse 61
|
191
|
-
_da 61
|
192
|
-
pr 60
|
193
|
-
wat 60
|
194
|
-
ma 59
|
195
|
-
p_ 59
|
196
|
-
uit 58
|
197
|
-
_wo 58
|
198
|
-
_oo 57
|
199
|
-
em 57
|
200
|
-
nge 56
|
201
|
-
rt 56
|
202
|
-
rs_ 56
|
203
|
-
ap 56
|
204
|
-
eli 55
|
205
|
-
j 55
|
206
|
-
gr 55
|
207
|
-
ho 55
|
208
|
-
ot 54
|
209
|
-
ls 53
|
210
|
-
_r 53
|
211
|
-
mi 53
|
212
|
-
ik_ 53
|
213
|
-
eu 53
|
214
|
-
es_ 53
|
215
|
-
ko 53
|
216
|
-
_aa 52
|
217
|
-
eb 52
|
218
|
-
ou 52
|
219
|
-
sp 52
|
220
|
-
rde 52
|
221
|
-
eg 51
|
222
|
-
so 51
|
223
|
-
ar_ 51
|
224
|
-
end 50
|
225
|
-
tal 50
|
226
|
-
tu 49
|
227
|
-
els 48
|
228
|
-
ke_ 48
|
229
|
-
eer 48
|
230
|
-
ol 47
|
231
|
-
_om 47
|
232
|
-
it_ 46
|
233
|
-
ë 46
|
234
|
-
� 46
|
235
|
-
br 46
|
236
|
-
gel 46
|
237
|
-
ds_ 46
|
238
|
-
_na 46
|
239
|
-
tel 46
|
240
|
-
gs 45
|
241
|
-
_de 45
|
242
|
-
_so 45
|
243
|
-
el_ 44
|
244
|
-
ale 44
|
245
|
-
ike 43
|
246
|
-
_l 42
|
247
|
-
ki 42
|
248
|
-
u_ 42
|
249
|
-
_u 42
|
250
|
-
ru 41
|
251
|
-
bl 41
|
252
|
-
ter 41
|
253
|
-
ond 40
|
254
|
-
eng 40
|
255
|
-
ew 40
|
256
|
-
eke 39
|
257
|
-
nd_ 39
|
258
|
-
eur 39
|
259
|
-
_vo 39
|
260
|
-
c 39
|
261
|
-
su 39
|
262
|
-
ken 39
|
263
|
-
ang 39
|
264
|
-
aap 39
|
265
|
-
mo 38
|
266
|
-
nie 38
|
267
|
-
ss 38
|
268
|
-
was 38
|
269
|
-
ls_ 38
|
270
|
-
_ui 38
|
271
|
-
sie 38
|
272
|
-
ngs 38
|
273
|
-
to 37
|
274
|
-
est 37
|
275
|
-
ok 37
|
276
|
-
_op 37
|
277
|
-
ul 37
|
278
|
-
_su 37
|
279
|
-
ens 36
|
280
|
-
_te 36
|
281
|
-
f_ 36
|
282
|
-
rd_ 36
|
283
|
-
_ho 36
|
284
|
-
_gr 36
|
285
|
-
y_ 36
|
286
|
-
een 36
|
287
|
-
uid 36
|
288
|
-
geb 36
|
289
|
-
ts 36
|
290
|
-
_we 35
|
291
|
-
erd 35
|
292
|
-
ese 35
|
293
|
-
id_ 35
|
294
|
-
_st 35
|
295
|
-
rk 35
|
296
|
-
ies 35
|
297
|
-
wor 34
|
298
|
-
woo 34
|
299
|
-
ge_ 34
|
300
|
-
ges 34
|
301
|
-
ga 34
|
302
|
-
ska 34
|
303
|
-
lin 34
|
304
|
-
nk 34
|
305
|
-
esk 34
|
306
|
-
mp 33
|
307
|
-
vi 33
|
308
|
-
ige 33
|
309
|
-
il 33
|
310
|
-
ad 33
|
311
|
-
_ma 33
|
312
|
-
sui 32
|
313
|
-
ier 32
|
314
|
-
ep 32
|
315
|
-
ite 32
|
316
|
-
i� 32
|
317
|
-
ou_ 32
|
318
|
-
lle 31
|
319
|
-
ok_ 31
|
320
|
-
ook 31
|
321
|
-
dse 31
|
322
|
-
rui 30
|
323
|
-
ps 30
|
324
|
-
bo 30
|
325
|
-
_bl 30
|
326
|
-
_mo 30
|
327
|
-
spr 30
|
328
|
-
ië 30
|
329
|
-
daa 30
|
330
|
-
_am 30
|
331
|
-
gro 30
|
332
|
-
ent 30
|
333
|
-
os 30
|
334
|
-
kl 30
|
335
|
-
erk 30
|
336
|
-
tr 29
|
337
|
-
op_ 29
|
338
|
-
lg 29
|
339
|
-
ka_ 29
|
340
|
-
wy 28
|
341
|
-
mee 28
|
342
|
-
re_ 28
|
343
|
-
ot_ 28
|
344
|
-
_vi 28
|
345
|
-
met 28
|
346
|
-
us 28
|
347
|
-
amp 28
|
348
|
-
vol 27
|
349
|
-
pa 27
|
350
|
-
nv 27
|
351
|
-
gt 27
|
352
|
-
rw 27
|
353
|
-
_ee 27
|
354
|
-
bru 27
|
355
|
-
dat 27
|
356
|
-
eni 26
|
357
|
-
hu 26
|
358
|
-
ak 26
|
359
|
-
ten 26
|
360
|
-
eel 26
|
361
|
-
men 26
|
362
|
-
ai 26
|
363
|
-
gen 26
|
364
|
-
bi 26
|
365
|
-
ont 26
|
366
|
-
_mi 26
|
367
|
-
rg 26
|
368
|
-
_re 26
|
369
|
-
asi 26
|
370
|
-
wi 25
|
371
|
-
gi 25
|
372
|
-
erw 25
|
373
|
-
ev 25
|
374
|
-
kan 25
|
375
|
-
ig_ 25
|
376
|
-
om_ 25
|
377
|
-
ir 25
|
378
|
-
ebr 25
|
379
|
-
kr 24
|
380
|
-
_sp 24
|
381
|
-
sen 24
|
382
|
-
_hu 24
|
383
|
-
tw 24
|
384
|
-
ks 24
|
385
|
-
_j 24
|
386
|
-
ku 24
|
387
|
-
maa 24
|
388
|
-
aps 24
|
389
|
-
sa 24
|
390
|
-
ys 24
|
391
|
-
rm 24
|
392
|
-
ir_ 24
|
393
|
-
rsk 24
|
394
|
-
moe 24
|
395
|
-
ran 24
|
396
|
-
del 23
|
397
|
-
waa 23
|
398
|
-
kel 23
|
399
|
-
soo 23
|
400
|
-
pt 23
|
data/pkg/scylla-0.5.0.gem
DELETED
Binary file
|
data/scylla-0.8.29.gem
DELETED
Binary file
|
data/scylla-0.8.31.gem
DELETED
Binary file
|
data/scylla.gemspec
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
Gem::Specification.new do |s|
|
2
|
-
s.name = %q{scylla}
|
3
|
-
s.version = "0.8.32"
|
4
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
5
|
-
s.authors = ["Ashwin Hegde"]
|
6
|
-
s.date = %q{2012-01-26}
|
7
|
-
s.default_executable = %q{scylla}
|
8
|
-
s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
|
9
|
-
s.email = %q{ahegde@zendesk.com}
|
10
|
-
s.executables = ["scylla"]
|
11
|
-
s.extra_rdoc_files = [
|
12
|
-
"LICENSE.txt",
|
13
|
-
"README.rdoc"
|
14
|
-
]
|
15
|
-
s.files = Dir.glob("**/**")
|
16
|
-
s.homepage = %q{http://github.com/hashwin/scylla}
|
17
|
-
s.licenses = ["MIT"]
|
18
|
-
s.require_paths = ["lib"]
|
19
|
-
s.rubygems_version = %q{1.5.3}
|
20
|
-
s.summary = %q{Ruby port of Textcat language guesser}
|
21
|
-
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
22
|
-
s.add_dependency(%q<sanitize>, ["~> 2.0.0"])
|
23
|
-
end
|
24
|
-
|