scylla 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,53 @@
1
+ module Scylla
2
+ class Resources
3
+ @locales = {
4
+ "english" => "en",
5
+ "spanish" => "es",
6
+ "german" => "de",
7
+ "chinese" => "zh",
8
+ "dutch" => "nl",
9
+ "polish" => "pl",
10
+ "russian" => "ru",
11
+ "italian" => "it",
12
+ "icelandic" => "is",
13
+ "vietnamese" => "vi",
14
+ "turkish" => "tr",
15
+ "french" => "fr",
16
+ "norwegian" => "no",
17
+ "tagalog" => "fil",
18
+ "japanese" => "ja",
19
+ "arabic" => "ar",
20
+ "slovenian" => "sl",
21
+ "swedish" => "sv",
22
+ "croatian" => "hr",
23
+ "indonesian" => "id",
24
+ "czech" => "cs",
25
+ "portugese" => "pt",
26
+ "finnish" => "fi",
27
+ "korean" => "ko",
28
+ "greek" => "el",
29
+ "bulgarian" => "bg",
30
+ "romanian" => "ro",
31
+ "estonian" => "et",
32
+ "danish" => "da",
33
+ "hebrew" => "he",
34
+ "slovak" => "sk",
35
+ "bosnian" => "bs",
36
+ "magyar" => "hu",
37
+ "farsi" => "fa",
38
+ "welsh" => "cy",
39
+ "lithuanian" => "lt",
40
+ "catalan" => "ca",
41
+ "thai" => "th",
42
+ "afrikaans" => "nl",
43
+ "latvian" => "lv"}
44
+
45
+ def self.locales
46
+ return @locales
47
+ end
48
+
49
+ def self.get_locale(name)
50
+ return @locales[name]
51
+ end
52
+ end
53
+ end
data/lib/scylla/string.rb CHANGED
@@ -1,11 +1,25 @@
1
1
  class String
2
- def guess
3
- sc = Scylla::Classifier.new
4
- sc.classify_string(self)
5
- end
6
2
 
7
3
  def language
8
4
  sc = Scylla::Classifier.new
9
5
  sc.classify_string(self).first
10
6
  end
7
+
8
+ def locale
9
+ sc = Scylla::Classifier.new
10
+ Scylla::Resources.locales[sc.classify_string(self).first]
11
+ end
12
+
13
+ def guess_locale
14
+ sc = Scylla::Classifier.new
15
+ languages = sc.classify_string(self)
16
+ locales = []
17
+ languages.each {|lan| locales << Scylla::Resources.locales[lan]}
18
+ return locales
19
+ end
20
+
21
+ def guess_language
22
+ sc = Scylla::Classifier.new
23
+ sc.classify_string(self)
24
+ end
11
25
  end
data/lib/scylla.rb CHANGED
@@ -7,4 +7,5 @@ end
7
7
  require 'scylla/classifier'
8
8
  require 'scylla/generator'
9
9
  require 'scylla/loader'
10
- require 'scylla/string'
10
+ require 'scylla/string'
11
+ require 'scylla/resources'
data/scylla.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{scylla}
8
- s.version = "0.3.0"
8
+ s.version = "0.4.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ashwin Hegde"]
12
- s.date = %q{2011-08-26}
12
+ s.date = %q{2011-08-29}
13
13
  s.default_executable = %q{scylla}
14
14
  s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
15
15
  s.email = %q{ahegde@zendesk.com}
@@ -42,7 +42,7 @@ Gem::Specification.new do |s|
42
42
  "lib/scylla/lms/finnish.lm",
43
43
  "lib/scylla/lms/french.lm",
44
44
  "lib/scylla/lms/german.lm",
45
- "lib/scylla/lms/greek-iso8859-7.lm",
45
+ "lib/scylla/lms/greek.lm",
46
46
  "lib/scylla/lms/hebrew.lm",
47
47
  "lib/scylla/lms/hindi.lm",
48
48
  "lib/scylla/lms/hungarian.lm",
@@ -67,9 +67,9 @@ Gem::Specification.new do |s|
67
67
  "lib/scylla/lms/russian.lm",
68
68
  "lib/scylla/lms/sanskrit.lm",
69
69
  "lib/scylla/lms/scots_gaelic.lm",
70
- "lib/scylla/lms/serbian-ascii.lm",
71
- "lib/scylla/lms/slovak-ascii.lm",
72
- "lib/scylla/lms/slovenian-ascii.lm",
70
+ "lib/scylla/lms/serbian.lm",
71
+ "lib/scylla/lms/slovak.lm",
72
+ "lib/scylla/lms/slovenian.lm",
73
73
  "lib/scylla/lms/spanish.lm",
74
74
  "lib/scylla/lms/swahili.lm",
75
75
  "lib/scylla/lms/swedish.lm",
@@ -77,11 +77,12 @@ Gem::Specification.new do |s|
77
77
  "lib/scylla/lms/tamil.lm",
78
78
  "lib/scylla/lms/thai.lm",
79
79
  "lib/scylla/lms/turkish.lm",
80
- "lib/scylla/lms/ukrainian-koi8_u.lm",
80
+ "lib/scylla/lms/ukrainian.lm",
81
81
  "lib/scylla/lms/vietnamese.lm",
82
82
  "lib/scylla/lms/welsh.lm",
83
- "lib/scylla/lms/yiddish-utf.lm",
83
+ "lib/scylla/lms/yiddish.lm",
84
84
  "lib/scylla/loader.rb",
85
+ "lib/scylla/resources.rb",
85
86
  "lib/scylla/string.rb",
86
87
  "lib/scylla/tasks.rb",
87
88
  "scylla-0.1.0.gem",
@@ -98,7 +99,7 @@ Gem::Specification.new do |s|
98
99
  "source_texts/finnish.txt",
99
100
  "source_texts/french.txt",
100
101
  "source_texts/german.txt",
101
- "source_texts/greek-iso8859-7.txt",
102
+ "source_texts/greek.txt",
102
103
  "source_texts/hebrew.txt",
103
104
  "source_texts/hindi.txt",
104
105
  "source_texts/hungarian.txt",
@@ -123,9 +124,9 @@ Gem::Specification.new do |s|
123
124
  "source_texts/russian.txt",
124
125
  "source_texts/sanskrit.txt",
125
126
  "source_texts/scots_gaelic.txt",
126
- "source_texts/serbian-ascii.txt",
127
- "source_texts/slovak-ascii.txt",
128
- "source_texts/slovenian-ascii.txt",
127
+ "source_texts/serbian.txt",
128
+ "source_texts/slovak.txt",
129
+ "source_texts/slovenian.txt",
129
130
  "source_texts/spanish.txt",
130
131
  "source_texts/swahili.txt",
131
132
  "source_texts/swedish.txt",
@@ -133,7 +134,7 @@ Gem::Specification.new do |s|
133
134
  "source_texts/tamil.txt",
134
135
  "source_texts/thai.txt",
135
136
  "source_texts/turkish.txt",
136
- "source_texts/ukrainian-koi8_u.txt",
137
+ "source_texts/ukrainian.txt",
137
138
  "source_texts/vietnamese.txt",
138
139
  "source_texts/welsh.txt",
139
140
  "source_texts/yiddish-utf.txt",
File without changes
File without changes
File without changes
File without changes