scylla 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +17 -0
- data/Gemfile.lock +30 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +19 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/lib/scylla/classifier.rb +65 -0
- data/lib/scylla/generator.rb +73 -0
- data/lib/scylla/loader.rb +37 -0
- data/lib/scylla/string.rb +11 -0
- data/lib/scylla/tasks.rb +20 -0
- data/lib/scylla.rb +10 -0
- data/scylla.gemspec +117 -0
- data/source_texts/13375P33K.txt +199 -0
- data/source_texts/afrikaans.txt +114 -0
- data/source_texts/arabic.txt +576 -0
- data/source_texts/armenian.txt +86 -0
- data/source_texts/bulgarian.txt +834 -0
- data/source_texts/catalan.txt +413 -0
- data/source_texts/chinese.txt +199 -0
- data/source_texts/danish.txt +219 -0
- data/source_texts/english.txt +35 -0
- data/source_texts/esperanto.txt +199 -0
- data/source_texts/finnish.txt +71 -0
- data/source_texts/french.txt +89 -0
- data/source_texts/german.txt +137 -0
- data/source_texts/greek-iso8859-7.txt +139 -0
- data/source_texts/hebrew.txt +199 -0
- data/source_texts/hindi.txt +199 -0
- data/source_texts/hungarian.txt +102 -0
- data/source_texts/icelandic.txt +131 -0
- data/source_texts/indonesian.txt +93 -0
- data/source_texts/irish.txt +209 -0
- data/source_texts/italian.txt +120 -0
- data/source_texts/japanese.txt +199 -0
- data/source_texts/korean.txt +134 -0
- data/source_texts/latin.txt +120 -0
- data/source_texts/malay.txt +108 -0
- data/source_texts/marathi.txt +100 -0
- data/source_texts/mingo.txt +146 -0
- data/source_texts/nepali.txt +131 -0
- data/source_texts/norwegian.txt +157 -0
- data/source_texts/polish.txt +91 -0
- data/source_texts/portuguese.txt +88 -0
- data/source_texts/quechua.txt +108 -0
- data/source_texts/romanian.txt +103 -0
- data/source_texts/rumantsch.txt +110 -0
- data/source_texts/russian.txt +199 -0
- data/source_texts/sanskrit.txt +135 -0
- data/source_texts/scots_gaelic.txt +93 -0
- data/source_texts/serbian-ascii.txt +121 -0
- data/source_texts/slovak-ascii.txt +102 -0
- data/source_texts/slovenian-ascii.txt +100 -0
- data/source_texts/spanish.txt +834 -0
- data/source_texts/swahili.txt +120 -0
- data/source_texts/swedish.txt +75 -0
- data/source_texts/tagalog.txt +135 -0
- data/source_texts/tamil.txt +167 -0
- data/source_texts/thai.txt +86 -0
- data/source_texts/turkish.txt +117 -0
- data/source_texts/ukrainian-koi8_u.txt +214 -0
- data/source_texts/vietnamese.txt +92 -0
- data/source_texts/welsh.txt +148 -0
- data/source_texts/yiddish-utf.txt +83 -0
- data/test/classifier_test.rb +29 -0
- data/test/fixtures/source_texts/danish.txt +219 -0
- data/test/fixtures/source_texts/english.txt +35 -0
- data/test/fixtures/source_texts/french.txt +89 -0
- data/test/fixtures/source_texts/german.txt +137 -0
- data/test/fixtures/source_texts/spanish.txt +834 -0
- data/test/generator_test.rb +72 -0
- data/test/helper.rb +22 -0
- data/test/loader_test.rb +31 -0
- data/test/scylla_test.rb +20 -0
- metadata +173 -0
data/.document
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
|
6
|
+
# Add dependencies to develop your gem here.
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
8
|
+
group :development do
|
9
|
+
gem "bundler", "~> 1.0.0"
|
10
|
+
gem "jeweler", "~> 1.6.4"
|
11
|
+
end
|
12
|
+
|
13
|
+
group :test do
|
14
|
+
gem "shoulda", ">= 0"
|
15
|
+
gem "mocha", "~> 0.9.12", :require => nil
|
16
|
+
gem "ruby-debug"
|
17
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
columnize (0.3.4)
|
5
|
+
git (1.2.5)
|
6
|
+
jeweler (1.6.4)
|
7
|
+
bundler (~> 1.0)
|
8
|
+
git (>= 1.2.5)
|
9
|
+
rake
|
10
|
+
linecache (0.46)
|
11
|
+
rbx-require-relative (> 0.0.4)
|
12
|
+
mocha (0.9.12)
|
13
|
+
rake (0.9.2)
|
14
|
+
rbx-require-relative (0.0.5)
|
15
|
+
ruby-debug (0.10.4)
|
16
|
+
columnize (>= 0.1)
|
17
|
+
ruby-debug-base (~> 0.10.4.0)
|
18
|
+
ruby-debug-base (0.10.4)
|
19
|
+
linecache (>= 0.3)
|
20
|
+
shoulda (2.11.3)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
bundler (~> 1.0.0)
|
27
|
+
jeweler (~> 1.6.4)
|
28
|
+
mocha (~> 0.9.12)
|
29
|
+
ruby-debug
|
30
|
+
shoulda
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 Ashwin Hegde
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= scylla
|
2
|
+
|
3
|
+
Scylla is a language categorizing gem that allows you to guess the language of a given text. Scylla is a Ruby port of TextCat (http://www.let.rug.nl/~vannoord/TextCat) and is based on the text categorization algorithm presented in Cavnar, W. B. and J. M. Trenkle, ``N-Gram-Based Text Categorization'' In Proceedings of Third Annual Symposium on Document Analysis and Information Retrieval, Las Vegas, NV, UNLV Publications/Reprographics, pp. 161-175, 11-13 April 1994.
|
4
|
+
|
5
|
+
== Contributing to scylla
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
9
|
+
* Fork the project
|
10
|
+
* Start a feature/bugfix branch
|
11
|
+
* Commit and push until you are happy with your contribution
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2011 Ashwin Hegde. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'bundler'
|
7
|
+
require 'scylla'
|
8
|
+
require 'scylla/tasks'
|
9
|
+
|
10
|
+
begin
|
11
|
+
Bundler.setup(:default, :development)
|
12
|
+
rescue Bundler::BundlerError => e
|
13
|
+
$stderr.puts e.message
|
14
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
15
|
+
exit e.status_code
|
16
|
+
end
|
17
|
+
require 'rake'
|
18
|
+
|
19
|
+
require 'jeweler'
|
20
|
+
Jeweler::Tasks.new do |gem|
|
21
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
22
|
+
gem.name = "scylla"
|
23
|
+
gem.homepage = "http://github.com/hashwin/scylla"
|
24
|
+
gem.license = "MIT"
|
25
|
+
gem.summary = "Ruby port of Textcat language guesser"
|
26
|
+
gem.description = "Allows for text categorization by guessing the language of a given text using n-grams"
|
27
|
+
gem.email = "ahegde@zendesk.com"
|
28
|
+
gem.authors = ["Ashwin Hegde"]
|
29
|
+
# dependencies defined in Gemfile
|
30
|
+
end
|
31
|
+
Jeweler::RubygemsDotOrgTasks.new
|
32
|
+
|
33
|
+
require 'rake/testtask'
|
34
|
+
Rake::TestTask.new(:test) do |test|
|
35
|
+
test.libs << 'lib' << 'test'
|
36
|
+
test.pattern = 'test/**/*_test.rb'
|
37
|
+
test.verbose = true
|
38
|
+
end
|
39
|
+
|
40
|
+
task :default => :test
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
45
|
+
|
46
|
+
rdoc.rdoc_dir = 'rdoc'
|
47
|
+
rdoc.title = "scylla #{version}"
|
48
|
+
rdoc.rdoc_files.include('README*')
|
49
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
50
|
+
end
|
51
|
+
|
52
|
+
Scylla::Tasks.new
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Scylla
|
2
|
+
class Classifier
|
3
|
+
attr_accessor :limit, :dir, :ngrams, :threshold, :input
|
4
|
+
|
5
|
+
# limit : Up to how many matching language results should be displayed
|
6
|
+
# ngrams : The total number of ngrams that are stored for each language
|
7
|
+
# threshold: The threshold score for matches
|
8
|
+
def initialize(limit = 10, ngrams = 400, threshold = 1.05)
|
9
|
+
@limit = limit
|
10
|
+
@ngrams = ngrams
|
11
|
+
@threshold = threshold
|
12
|
+
end
|
13
|
+
|
14
|
+
# Classifies a string to a list of languages in order of best match
|
15
|
+
def classify_string(text)
|
16
|
+
@input = ""
|
17
|
+
text.each_line { |line| @input += line.strip }
|
18
|
+
classify
|
19
|
+
end
|
20
|
+
|
21
|
+
# Classifies a file to a list of languages in order of best match
|
22
|
+
def classify_file(path)
|
23
|
+
@input = ""
|
24
|
+
File.readlines(path).each { |line| @input += line.strip }
|
25
|
+
classify
|
26
|
+
end
|
27
|
+
|
28
|
+
# Classifies @input to a list of languages in order of best match
|
29
|
+
def classify
|
30
|
+
results = Hash.new
|
31
|
+
languages = Scylla::Loader.languages
|
32
|
+
if languages.empty?
|
33
|
+
p "No languages (.lm files) found. Please run rake scylla:train after placing your training texts in the source_texts directory."
|
34
|
+
return
|
35
|
+
end
|
36
|
+
sg = Scylla::Generator.new
|
37
|
+
unknown = sg.create_lm(@input)
|
38
|
+
languages.each_key do |key|
|
39
|
+
ngram = languages[key]
|
40
|
+
results[key] = get_score(unknown, ngram)
|
41
|
+
end
|
42
|
+
results = results.sort {|a,b| a[1]<=>b[1]}
|
43
|
+
a = results[0][1]
|
44
|
+
answers = [results.shift[0]]
|
45
|
+
while (!results.empty? and results[0][1] < (@threshold * a))
|
46
|
+
answers << results.shift[0]
|
47
|
+
end
|
48
|
+
return answers
|
49
|
+
end
|
50
|
+
|
51
|
+
# Gets the score of the text in question compared to a particular language
|
52
|
+
def get_score(unknown, ngram)
|
53
|
+
i, p = 0,0
|
54
|
+
while i < unknown.size
|
55
|
+
if (ngram[unknown[i]])
|
56
|
+
p += (ngram[unknown[i]]-i).abs
|
57
|
+
else
|
58
|
+
p += @ngrams
|
59
|
+
end
|
60
|
+
i += 1
|
61
|
+
end
|
62
|
+
return p
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Scylla
|
2
|
+
class Generator
|
3
|
+
attr_accessor :dirtext, :dirlm, :minsize
|
4
|
+
|
5
|
+
# dirtext: The location of the source training text files
|
6
|
+
# minsize: The minimum size of the ngrams that you would like to store
|
7
|
+
def initialize(dirtext = DEFAULT_SOURCE_DIR, dirlm = DEFAULT_TARGET_DIR, minsize = 0, silent = false)
|
8
|
+
@dirtext = dirtext
|
9
|
+
@dirlm = dirlm
|
10
|
+
@minsize = minsize
|
11
|
+
end
|
12
|
+
|
13
|
+
# This loads all the .txt files in the specified source training text folder
|
14
|
+
# and creates language maps using ngram frequencies. The maps are stored in
|
15
|
+
# lib/scylla/lms as .lm files
|
16
|
+
def train
|
17
|
+
languages = Dir.glob("**/*.lm")
|
18
|
+
textpaths = Dir.glob(@dirtext + "/*.txt")
|
19
|
+
languages.each {|l| File.delete(l) }
|
20
|
+
textpaths.each do |path|
|
21
|
+
write_lm(path)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Reads a single text file specified by a path and writes a .lm file in
|
26
|
+
# lib/scylla/lms
|
27
|
+
def write_lm(path)
|
28
|
+
text = ""
|
29
|
+
File.open(path).each { |line| text += line.strip }
|
30
|
+
p "Creating language map for " + path
|
31
|
+
lm = create_lm(text, true)
|
32
|
+
lmname = File.join(@dirlm, File.basename(path, ".txt") + ".lm")
|
33
|
+
File.delete(lmname) if File.exists?(lmname)
|
34
|
+
File.open(lmname, 'w') do |f|
|
35
|
+
i = 0
|
36
|
+
lm.each do |freq|
|
37
|
+
break if i == 400
|
38
|
+
f.write(freq[0] + "\t" + freq[1].to_s + "\n")
|
39
|
+
i += 1
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# This creates a language map for a given input string.
|
45
|
+
# The frequencies boolean specifies whether or not the method should
|
46
|
+
# return the freqencies of the ngrams, or simply an array in sorted order
|
47
|
+
def create_lm(input, frequencies = false)
|
48
|
+
text = ""
|
49
|
+
input.each_line {|line| text += line.strip }
|
50
|
+
input = text
|
51
|
+
ngram = Hash.new
|
52
|
+
input.split(/[0-9\s]/).each do |word|
|
53
|
+
word = "_" + word + "_";
|
54
|
+
len = word.size
|
55
|
+
for i in 0..word.size
|
56
|
+
(1..5).each do |j|
|
57
|
+
ngram[word[i,j]] ||= 0
|
58
|
+
ngram[word[i,j]] += 1 if (len > (j - 1))
|
59
|
+
end
|
60
|
+
len = len - 1
|
61
|
+
end
|
62
|
+
end
|
63
|
+
ngram.each_key do |key|
|
64
|
+
ngram.delete(key) if key.size <= @minsize
|
65
|
+
end
|
66
|
+
ngram = ngram.sort {|a,b| b[1] <=> a[1]}
|
67
|
+
return ngram if frequencies
|
68
|
+
sorted = []
|
69
|
+
ngram.each {|key| sorted << key[0]}
|
70
|
+
return sorted
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Scylla
|
2
|
+
class Loader
|
3
|
+
# Loads all the language maps once into memory using the .lm files located
|
4
|
+
# in lib/scylla/lm
|
5
|
+
def self.load_language_maps
|
6
|
+
languages = Hash.new
|
7
|
+
Dir.glob("**/*.lm").each do |filepath|
|
8
|
+
language = File.basename(filepath, ".lm")
|
9
|
+
languages[language] = language_map(filepath)
|
10
|
+
end
|
11
|
+
return languages
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns a single language map from a specified .lm file
|
15
|
+
def self.language_map(path)
|
16
|
+
rank, ngram = 1, Hash.new
|
17
|
+
File.readlines(path).each do |line|
|
18
|
+
line = line.strip.split("\t").first
|
19
|
+
if(line =~ /^[^0-9\s]+/o)
|
20
|
+
ngram[line] = rank
|
21
|
+
rank += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return ngram
|
25
|
+
end
|
26
|
+
|
27
|
+
# Loads all maps from the .lm files, or loads them from memory if the
|
28
|
+
# files have already been read and loaded.
|
29
|
+
def self.languages
|
30
|
+
@languages ||= load_language_maps
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.clear
|
34
|
+
@languages = nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/scylla/tasks.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
|
4
|
+
module Scylla
|
5
|
+
class Tasks < ::Rake::TaskLib
|
6
|
+
def initialize
|
7
|
+
define_training_task
|
8
|
+
end
|
9
|
+
|
10
|
+
def define_training_task
|
11
|
+
namespace :scylla do
|
12
|
+
desc "Trains Scylla in new languages"
|
13
|
+
task :train do
|
14
|
+
sg = Scylla::Generator.new
|
15
|
+
sg.train
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/scylla.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Scylla
|
3
|
+
DEFAULT_SOURCE_DIR = File.join(File.dirname(__FILE__), "..", "source_texts")
|
4
|
+
DEFAULT_TARGET_DIR = File.join(File.dirname(__FILE__), "scylla", "lms")
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'scylla/classifier'
|
8
|
+
require 'scylla/generator'
|
9
|
+
require 'scylla/loader'
|
10
|
+
require 'scylla/string'
|
data/scylla.gemspec
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{scylla}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Ashwin Hegde"]
|
12
|
+
s.date = %q{2011-08-25}
|
13
|
+
s.description = %q{Allows for text categorization by guessing the language of a given text using n-grams}
|
14
|
+
s.email = %q{ahegde@zendesk.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
"Gemfile",
|
22
|
+
"Gemfile.lock",
|
23
|
+
"LICENSE.txt",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"lib/scylla.rb",
|
28
|
+
"lib/scylla/classifier.rb",
|
29
|
+
"lib/scylla/generator.rb",
|
30
|
+
"lib/scylla/loader.rb",
|
31
|
+
"lib/scylla/string.rb",
|
32
|
+
"lib/scylla/tasks.rb",
|
33
|
+
"scylla.gemspec",
|
34
|
+
"source_texts/13375P33K.txt",
|
35
|
+
"source_texts/afrikaans.txt",
|
36
|
+
"source_texts/arabic.txt",
|
37
|
+
"source_texts/armenian.txt",
|
38
|
+
"source_texts/bulgarian.txt",
|
39
|
+
"source_texts/catalan.txt",
|
40
|
+
"source_texts/chinese.txt",
|
41
|
+
"source_texts/danish.txt",
|
42
|
+
"source_texts/english.txt",
|
43
|
+
"source_texts/esperanto.txt",
|
44
|
+
"source_texts/finnish.txt",
|
45
|
+
"source_texts/french.txt",
|
46
|
+
"source_texts/german.txt",
|
47
|
+
"source_texts/greek-iso8859-7.txt",
|
48
|
+
"source_texts/hebrew.txt",
|
49
|
+
"source_texts/hindi.txt",
|
50
|
+
"source_texts/hungarian.txt",
|
51
|
+
"source_texts/icelandic.txt",
|
52
|
+
"source_texts/indonesian.txt",
|
53
|
+
"source_texts/irish.txt",
|
54
|
+
"source_texts/italian.txt",
|
55
|
+
"source_texts/japanese.txt",
|
56
|
+
"source_texts/korean.txt",
|
57
|
+
"source_texts/latin.txt",
|
58
|
+
"source_texts/malay.txt",
|
59
|
+
"source_texts/marathi.txt",
|
60
|
+
"source_texts/mingo.txt",
|
61
|
+
"source_texts/nepali.txt",
|
62
|
+
"source_texts/norwegian.txt",
|
63
|
+
"source_texts/polish.txt",
|
64
|
+
"source_texts/portuguese.txt",
|
65
|
+
"source_texts/quechua.txt",
|
66
|
+
"source_texts/romanian.txt",
|
67
|
+
"source_texts/rumantsch.txt",
|
68
|
+
"source_texts/russian.txt",
|
69
|
+
"source_texts/sanskrit.txt",
|
70
|
+
"source_texts/scots_gaelic.txt",
|
71
|
+
"source_texts/serbian-ascii.txt",
|
72
|
+
"source_texts/slovak-ascii.txt",
|
73
|
+
"source_texts/slovenian-ascii.txt",
|
74
|
+
"source_texts/spanish.txt",
|
75
|
+
"source_texts/swahili.txt",
|
76
|
+
"source_texts/swedish.txt",
|
77
|
+
"source_texts/tagalog.txt",
|
78
|
+
"source_texts/tamil.txt",
|
79
|
+
"source_texts/thai.txt",
|
80
|
+
"source_texts/turkish.txt",
|
81
|
+
"source_texts/ukrainian-koi8_u.txt",
|
82
|
+
"source_texts/vietnamese.txt",
|
83
|
+
"source_texts/welsh.txt",
|
84
|
+
"source_texts/yiddish-utf.txt",
|
85
|
+
"test/classifier_test.rb",
|
86
|
+
"test/fixtures/source_texts/danish.txt",
|
87
|
+
"test/fixtures/source_texts/english.txt",
|
88
|
+
"test/fixtures/source_texts/french.txt",
|
89
|
+
"test/fixtures/source_texts/german.txt",
|
90
|
+
"test/fixtures/source_texts/spanish.txt",
|
91
|
+
"test/generator_test.rb",
|
92
|
+
"test/helper.rb",
|
93
|
+
"test/loader_test.rb",
|
94
|
+
"test/scylla_test.rb"
|
95
|
+
]
|
96
|
+
s.homepage = %q{http://github.com/hashwin/scylla}
|
97
|
+
s.licenses = ["MIT"]
|
98
|
+
s.require_paths = ["lib"]
|
99
|
+
s.rubygems_version = %q{1.6.2}
|
100
|
+
s.summary = %q{Ruby port of Textcat language guesser}
|
101
|
+
|
102
|
+
if s.respond_to? :specification_version then
|
103
|
+
s.specification_version = 3
|
104
|
+
|
105
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
106
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
107
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
108
|
+
else
|
109
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
110
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
111
|
+
end
|
112
|
+
else
|
113
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
114
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|