scylla 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/scylla/generator.rb +0 -1
- data/lib/scylla/lms/{greek-iso8859-7.lm → greek.lm} +0 -0
- data/lib/scylla/lms/{serbian-ascii.lm → serbian.lm} +0 -0
- data/lib/scylla/lms/{slovak-ascii.lm → slovak.lm} +0 -0
- data/lib/scylla/lms/{slovenian-ascii.lm → slovenian.lm} +0 -0
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/{ukrainian-koi8_u.lm → ukrainian.lm} +0 -0
- data/lib/scylla/lms/{yiddish-utf.lm → yiddish.lm} +0 -0
- data/lib/scylla/resources.rb +53 -0
- data/lib/scylla/string.rb +18 -4
- data/lib/scylla.rb +2 -1
- data/scylla.gemspec +14 -13
- data/source_texts/{greek-iso8859-7.txt → greek.txt} +0 -0
- data/source_texts/{serbian-ascii.txt → serbian.txt} +0 -0
- data/source_texts/{slovak-ascii.txt → slovak.txt} +0 -0
- data/source_texts/{slovenian-ascii.txt → slovenian.txt} +0 -0
- data/source_texts/swedish.txt +479 -66
- data/source_texts/{ukrainian-koi8_u.txt → ukrainian.txt} +0 -0
- data/test/classifier_test.rb +1 -1
- data/test/loader_test.rb +1 -1
- data/test/scylla_test.rb +5 -1
- metadata +16 -15
File without changes
|
File without changes
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Scylla
|
2
|
+
class Resources
|
3
|
+
@locales = {
|
4
|
+
"english" => "en",
|
5
|
+
"spanish" => "es",
|
6
|
+
"german" => "de",
|
7
|
+
"chinese" => "zh",
|
8
|
+
"dutch" => "nl",
|
9
|
+
"polish" => "pl",
|
10
|
+
"russian" => "ru",
|
11
|
+
"italian" => "it",
|
12
|
+
"icelandic" => "is",
|
13
|
+
"vietnamese" => "vi",
|
14
|
+
"turkish" => "tr",
|
15
|
+
"french" => "fr",
|
16
|
+
"norwegian" => "no",
|
17
|
+
"tagalog" => "fil",
|
18
|
+
"japanese" => "ja",
|
19
|
+
"arabic" => "ar",
|
20
|
+
"slovenian" => "sl",
|
21
|
+
"swedish" => "sv",
|
22
|
+
"croatian" => "hr",
|
23
|
+
"indonesian" => "id",
|
24
|
+
"czech" => "cs",
|
25
|
+
"portugese" => "pt",
|
26
|
+
"finnish" => "fi",
|
27
|
+
"korean" => "ko",
|
28
|
+
"greek" => "el",
|
29
|
+
"bulgarian" => "bg",
|
30
|
+
"romanian" => "ro",
|
31
|
+
"estonian" => "et",
|
32
|
+
"danish" => "da",
|
33
|
+
"hebrew" => "he",
|
34
|
+
"slovak" => "sk",
|
35
|
+
"bosnian" => "bs",
|
36
|
+
"magyar" => "hu",
|
37
|
+
"farsi" => "fa",
|
38
|
+
"welsh" => "cy",
|
39
|
+
"lithuanian" => "lt",
|
40
|
+
"catalan" => "ca",
|
41
|
+
"thai" => "th",
|
42
|
+
"afrikaans" => "nl",
|
43
|
+
"latvian" => "lv"}
|
44
|
+
|
45
|
+
def self.locales
|
46
|
+
return @locales
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.get_locale(name)
|
50
|
+
return @locales[name]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/scylla/string.rb
CHANGED
@@ -1,11 +1,25 @@
|
|
1
1
|
class String
|
2
|
-
def guess
|
3
|
-
sc = Scylla::Classifier.new
|
4
|
-
sc.classify_string(self)
|
5
|
-
end
|
6
2
|
|
7
3
|
def language
|
8
4
|
sc = Scylla::Classifier.new
|
9
5
|
sc.classify_string(self).first
|
10
6
|
end
|
7
|
+
|
8
|
+
def locale
|
9
|
+
sc = Scylla::Classifier.new
|
10
|
+
Scylla::Resources.locales[sc.classify_string(self).first]
|
11
|
+
end
|
12
|
+
|
13
|
+
def guess_locale
|
14
|
+
sc = Scylla::Classifier.new
|
15
|
+
languages = sc.classify_string(self)
|
16
|
+
locales = []
|
17
|
+
languages.each {|lan| locales << Scylla::Resources.locales[lan]}
|
18
|
+
return locales
|
19
|
+
end
|
20
|
+
|
21
|
+
def guess_language
|
22
|
+
sc = Scylla::Classifier.new
|
23
|
+
sc.classify_string(self)
|
24
|
+
end
|
11
25
|
end
|
data/lib/scylla.rb
CHANGED
data/scylla.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{scylla}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ashwin Hegde"]
|
12
|
-
s.date = %q{2011-08-
|
12
|
+
s.date = %q{2011-08-29}
|
13
13
|
s.default_executable = %q{scylla}
|
14
14
|
s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
|
15
15
|
s.email = %q{ahegde@zendesk.com}
|
@@ -42,7 +42,7 @@ Gem::Specification.new do |s|
|
|
42
42
|
"lib/scylla/lms/finnish.lm",
|
43
43
|
"lib/scylla/lms/french.lm",
|
44
44
|
"lib/scylla/lms/german.lm",
|
45
|
-
"lib/scylla/lms/greek
|
45
|
+
"lib/scylla/lms/greek.lm",
|
46
46
|
"lib/scylla/lms/hebrew.lm",
|
47
47
|
"lib/scylla/lms/hindi.lm",
|
48
48
|
"lib/scylla/lms/hungarian.lm",
|
@@ -67,9 +67,9 @@ Gem::Specification.new do |s|
|
|
67
67
|
"lib/scylla/lms/russian.lm",
|
68
68
|
"lib/scylla/lms/sanskrit.lm",
|
69
69
|
"lib/scylla/lms/scots_gaelic.lm",
|
70
|
-
"lib/scylla/lms/serbian
|
71
|
-
"lib/scylla/lms/slovak
|
72
|
-
"lib/scylla/lms/slovenian
|
70
|
+
"lib/scylla/lms/serbian.lm",
|
71
|
+
"lib/scylla/lms/slovak.lm",
|
72
|
+
"lib/scylla/lms/slovenian.lm",
|
73
73
|
"lib/scylla/lms/spanish.lm",
|
74
74
|
"lib/scylla/lms/swahili.lm",
|
75
75
|
"lib/scylla/lms/swedish.lm",
|
@@ -77,11 +77,12 @@ Gem::Specification.new do |s|
|
|
77
77
|
"lib/scylla/lms/tamil.lm",
|
78
78
|
"lib/scylla/lms/thai.lm",
|
79
79
|
"lib/scylla/lms/turkish.lm",
|
80
|
-
"lib/scylla/lms/ukrainian
|
80
|
+
"lib/scylla/lms/ukrainian.lm",
|
81
81
|
"lib/scylla/lms/vietnamese.lm",
|
82
82
|
"lib/scylla/lms/welsh.lm",
|
83
|
-
"lib/scylla/lms/yiddish
|
83
|
+
"lib/scylla/lms/yiddish.lm",
|
84
84
|
"lib/scylla/loader.rb",
|
85
|
+
"lib/scylla/resources.rb",
|
85
86
|
"lib/scylla/string.rb",
|
86
87
|
"lib/scylla/tasks.rb",
|
87
88
|
"scylla-0.1.0.gem",
|
@@ -98,7 +99,7 @@ Gem::Specification.new do |s|
|
|
98
99
|
"source_texts/finnish.txt",
|
99
100
|
"source_texts/french.txt",
|
100
101
|
"source_texts/german.txt",
|
101
|
-
"source_texts/greek
|
102
|
+
"source_texts/greek.txt",
|
102
103
|
"source_texts/hebrew.txt",
|
103
104
|
"source_texts/hindi.txt",
|
104
105
|
"source_texts/hungarian.txt",
|
@@ -123,9 +124,9 @@ Gem::Specification.new do |s|
|
|
123
124
|
"source_texts/russian.txt",
|
124
125
|
"source_texts/sanskrit.txt",
|
125
126
|
"source_texts/scots_gaelic.txt",
|
126
|
-
"source_texts/serbian
|
127
|
-
"source_texts/slovak
|
128
|
-
"source_texts/slovenian
|
127
|
+
"source_texts/serbian.txt",
|
128
|
+
"source_texts/slovak.txt",
|
129
|
+
"source_texts/slovenian.txt",
|
129
130
|
"source_texts/spanish.txt",
|
130
131
|
"source_texts/swahili.txt",
|
131
132
|
"source_texts/swedish.txt",
|
@@ -133,7 +134,7 @@ Gem::Specification.new do |s|
|
|
133
134
|
"source_texts/tamil.txt",
|
134
135
|
"source_texts/thai.txt",
|
135
136
|
"source_texts/turkish.txt",
|
136
|
-
"source_texts/ukrainian
|
137
|
+
"source_texts/ukrainian.txt",
|
137
138
|
"source_texts/vietnamese.txt",
|
138
139
|
"source_texts/welsh.txt",
|
139
140
|
"source_texts/yiddish-utf.txt",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|