scylla 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/scylla/generator.rb +0 -1
- data/lib/scylla/lms/{greek-iso8859-7.lm → greek.lm} +0 -0
- data/lib/scylla/lms/{serbian-ascii.lm → serbian.lm} +0 -0
- data/lib/scylla/lms/{slovak-ascii.lm → slovak.lm} +0 -0
- data/lib/scylla/lms/{slovenian-ascii.lm → slovenian.lm} +0 -0
- data/lib/scylla/lms/swedish.lm +400 -400
- data/lib/scylla/lms/{ukrainian-koi8_u.lm → ukrainian.lm} +0 -0
- data/lib/scylla/lms/{yiddish-utf.lm → yiddish.lm} +0 -0
- data/lib/scylla/resources.rb +53 -0
- data/lib/scylla/string.rb +18 -4
- data/lib/scylla.rb +2 -1
- data/scylla.gemspec +14 -13
- data/source_texts/{greek-iso8859-7.txt → greek.txt} +0 -0
- data/source_texts/{serbian-ascii.txt → serbian.txt} +0 -0
- data/source_texts/{slovak-ascii.txt → slovak.txt} +0 -0
- data/source_texts/{slovenian-ascii.txt → slovenian.txt} +0 -0
- data/source_texts/swedish.txt +479 -66
- data/source_texts/{ukrainian-koi8_u.txt → ukrainian.txt} +0 -0
- data/test/classifier_test.rb +1 -1
- data/test/loader_test.rb +1 -1
- data/test/scylla_test.rb +5 -1
- metadata +16 -15
File without changes
|
File without changes
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Scylla
|
2
|
+
class Resources
|
3
|
+
@locales = {
|
4
|
+
"english" => "en",
|
5
|
+
"spanish" => "es",
|
6
|
+
"german" => "de",
|
7
|
+
"chinese" => "zh",
|
8
|
+
"dutch" => "nl",
|
9
|
+
"polish" => "pl",
|
10
|
+
"russian" => "ru",
|
11
|
+
"italian" => "it",
|
12
|
+
"icelandic" => "is",
|
13
|
+
"vietnamese" => "vi",
|
14
|
+
"turkish" => "tr",
|
15
|
+
"french" => "fr",
|
16
|
+
"norwegian" => "no",
|
17
|
+
"tagalog" => "fil",
|
18
|
+
"japanese" => "ja",
|
19
|
+
"arabic" => "ar",
|
20
|
+
"slovenian" => "sl",
|
21
|
+
"swedish" => "sv",
|
22
|
+
"croatian" => "hr",
|
23
|
+
"indonesian" => "id",
|
24
|
+
"czech" => "cs",
|
25
|
+
"portugese" => "pt",
|
26
|
+
"finnish" => "fi",
|
27
|
+
"korean" => "ko",
|
28
|
+
"greek" => "el",
|
29
|
+
"bulgarian" => "bg",
|
30
|
+
"romanian" => "ro",
|
31
|
+
"estonian" => "et",
|
32
|
+
"danish" => "da",
|
33
|
+
"hebrew" => "he",
|
34
|
+
"slovak" => "sk",
|
35
|
+
"bosnian" => "bs",
|
36
|
+
"magyar" => "hu",
|
37
|
+
"farsi" => "fa",
|
38
|
+
"welsh" => "cy",
|
39
|
+
"lithuanian" => "lt",
|
40
|
+
"catalan" => "ca",
|
41
|
+
"thai" => "th",
|
42
|
+
"afrikaans" => "nl",
|
43
|
+
"latvian" => "lv"}
|
44
|
+
|
45
|
+
def self.locales
|
46
|
+
return @locales
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.get_locale(name)
|
50
|
+
return @locales[name]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/lib/scylla/string.rb
CHANGED
@@ -1,11 +1,25 @@
|
|
1
1
|
class String
|
2
|
-
def guess
|
3
|
-
sc = Scylla::Classifier.new
|
4
|
-
sc.classify_string(self)
|
5
|
-
end
|
6
2
|
|
7
3
|
def language
|
8
4
|
sc = Scylla::Classifier.new
|
9
5
|
sc.classify_string(self).first
|
10
6
|
end
|
7
|
+
|
8
|
+
def locale
|
9
|
+
sc = Scylla::Classifier.new
|
10
|
+
Scylla::Resources.locales[sc.classify_string(self).first]
|
11
|
+
end
|
12
|
+
|
13
|
+
def guess_locale
|
14
|
+
sc = Scylla::Classifier.new
|
15
|
+
languages = sc.classify_string(self)
|
16
|
+
locales = []
|
17
|
+
languages.each {|lan| locales << Scylla::Resources.locales[lan]}
|
18
|
+
return locales
|
19
|
+
end
|
20
|
+
|
21
|
+
def guess_language
|
22
|
+
sc = Scylla::Classifier.new
|
23
|
+
sc.classify_string(self)
|
24
|
+
end
|
11
25
|
end
|
data/lib/scylla.rb
CHANGED
data/scylla.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{scylla}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Ashwin Hegde"]
|
12
|
-
s.date = %q{2011-08-
|
12
|
+
s.date = %q{2011-08-29}
|
13
13
|
s.default_executable = %q{scylla}
|
14
14
|
s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
|
15
15
|
s.email = %q{ahegde@zendesk.com}
|
@@ -42,7 +42,7 @@ Gem::Specification.new do |s|
|
|
42
42
|
"lib/scylla/lms/finnish.lm",
|
43
43
|
"lib/scylla/lms/french.lm",
|
44
44
|
"lib/scylla/lms/german.lm",
|
45
|
-
"lib/scylla/lms/greek
|
45
|
+
"lib/scylla/lms/greek.lm",
|
46
46
|
"lib/scylla/lms/hebrew.lm",
|
47
47
|
"lib/scylla/lms/hindi.lm",
|
48
48
|
"lib/scylla/lms/hungarian.lm",
|
@@ -67,9 +67,9 @@ Gem::Specification.new do |s|
|
|
67
67
|
"lib/scylla/lms/russian.lm",
|
68
68
|
"lib/scylla/lms/sanskrit.lm",
|
69
69
|
"lib/scylla/lms/scots_gaelic.lm",
|
70
|
-
"lib/scylla/lms/serbian
|
71
|
-
"lib/scylla/lms/slovak
|
72
|
-
"lib/scylla/lms/slovenian
|
70
|
+
"lib/scylla/lms/serbian.lm",
|
71
|
+
"lib/scylla/lms/slovak.lm",
|
72
|
+
"lib/scylla/lms/slovenian.lm",
|
73
73
|
"lib/scylla/lms/spanish.lm",
|
74
74
|
"lib/scylla/lms/swahili.lm",
|
75
75
|
"lib/scylla/lms/swedish.lm",
|
@@ -77,11 +77,12 @@ Gem::Specification.new do |s|
|
|
77
77
|
"lib/scylla/lms/tamil.lm",
|
78
78
|
"lib/scylla/lms/thai.lm",
|
79
79
|
"lib/scylla/lms/turkish.lm",
|
80
|
-
"lib/scylla/lms/ukrainian
|
80
|
+
"lib/scylla/lms/ukrainian.lm",
|
81
81
|
"lib/scylla/lms/vietnamese.lm",
|
82
82
|
"lib/scylla/lms/welsh.lm",
|
83
|
-
"lib/scylla/lms/yiddish
|
83
|
+
"lib/scylla/lms/yiddish.lm",
|
84
84
|
"lib/scylla/loader.rb",
|
85
|
+
"lib/scylla/resources.rb",
|
85
86
|
"lib/scylla/string.rb",
|
86
87
|
"lib/scylla/tasks.rb",
|
87
88
|
"scylla-0.1.0.gem",
|
@@ -98,7 +99,7 @@ Gem::Specification.new do |s|
|
|
98
99
|
"source_texts/finnish.txt",
|
99
100
|
"source_texts/french.txt",
|
100
101
|
"source_texts/german.txt",
|
101
|
-
"source_texts/greek
|
102
|
+
"source_texts/greek.txt",
|
102
103
|
"source_texts/hebrew.txt",
|
103
104
|
"source_texts/hindi.txt",
|
104
105
|
"source_texts/hungarian.txt",
|
@@ -123,9 +124,9 @@ Gem::Specification.new do |s|
|
|
123
124
|
"source_texts/russian.txt",
|
124
125
|
"source_texts/sanskrit.txt",
|
125
126
|
"source_texts/scots_gaelic.txt",
|
126
|
-
"source_texts/serbian
|
127
|
-
"source_texts/slovak
|
128
|
-
"source_texts/slovenian
|
127
|
+
"source_texts/serbian.txt",
|
128
|
+
"source_texts/slovak.txt",
|
129
|
+
"source_texts/slovenian.txt",
|
129
130
|
"source_texts/spanish.txt",
|
130
131
|
"source_texts/swahili.txt",
|
131
132
|
"source_texts/swedish.txt",
|
@@ -133,7 +134,7 @@ Gem::Specification.new do |s|
|
|
133
134
|
"source_texts/tamil.txt",
|
134
135
|
"source_texts/thai.txt",
|
135
136
|
"source_texts/turkish.txt",
|
136
|
-
"source_texts/ukrainian
|
137
|
+
"source_texts/ukrainian.txt",
|
137
138
|
"source_texts/vietnamese.txt",
|
138
139
|
"source_texts/welsh.txt",
|
139
140
|
"source_texts/yiddish-utf.txt",
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|