linnaeus 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 1.9.2
5
+ services:
6
+ - redis
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem 'redis', '~> 3.0.0'
4
+ gem 'stemmer', '~> 1.0.0'
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.11.0"
10
+ gem "yard", "~> 0.7"
11
+ gem "rdoc", "~> 3.12"
12
+ gem "bundler"
13
+ gem "jeweler"
14
+ gem "simplecov"
15
+ end
@@ -0,0 +1,43 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.8.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rdoc
11
+ json (1.7.5)
12
+ multi_json (1.3.6)
13
+ rake (0.9.2.2)
14
+ rdoc (3.12)
15
+ json (~> 1.4)
16
+ redis (3.0.2)
17
+ rspec (2.11.0)
18
+ rspec-core (~> 2.11.0)
19
+ rspec-expectations (~> 2.11.0)
20
+ rspec-mocks (~> 2.11.0)
21
+ rspec-core (2.11.1)
22
+ rspec-expectations (2.11.3)
23
+ diff-lcs (~> 1.1.3)
24
+ rspec-mocks (2.11.3)
25
+ simplecov (0.7.1)
26
+ multi_json (~> 1.0)
27
+ simplecov-html (~> 0.7.1)
28
+ simplecov-html (0.7.1)
29
+ stemmer (1.0.1)
30
+ yard (0.8.3)
31
+
32
+ PLATFORMS
33
+ ruby
34
+
35
+ DEPENDENCIES
36
+ bundler
37
+ jeweler
38
+ rdoc (~> 3.12)
39
+ redis (~> 3.0.0)
40
+ rspec (~> 2.11.0)
41
+ simplecov
42
+ stemmer (~> 1.0.0)
43
+ yard (~> 0.7)
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Dan Collis-Puro
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ = Linnaeus
2
+
3
+ https://raw.github.com/djcp/linnaeus/master/images/linnaeus.jpg
4
+
5
+ Linnaeus is a redis-backed Bayesian classification system. Please see the generated rdocs for more information.
6
+
7
+ == Examples
8
+
9
+ lt = Linnaeus::Trainer.new # Used to train documents
10
+ lc = Linnaeus::Classifier.new # Used to classify documents
11
+
12
+ lt.train 'language', 'Ruby is a dynamic, reflective, general-purpose object-oriented programming language that combines syntax inspired by Perl with Smalltalk-like features.'
13
+ lt.train 'database', 'PostgreSQL, often simply Postgres, is an object-relational database management system (ORDBMS) available for many platforms including Linux, FreeBSD, Solaris, Microsoft Windows and Mac OS X.'
14
+
15
+ lc.classify 'Perl is a high-level, general-purpose, interpreted, dynamic programming language.' # returns "language"
16
+
17
+
18
+ == Contributing to linnaeus
19
+
20
+ * Submit bugs to the github issue tracker: https://github.com/djcp/linnaeus/issues
21
+ * If you'd like to add a feature, please submit a description of it to the issue tracker so we can discuss.
22
+ * If the feature makes sense, fork the github repository. Write rspec tests and issue a pull request when your change is done.
23
+
24
+ == The Future
25
+
26
+ * Make sure we're unicode OK
27
+ * Create additional storage backends - sqlite, postgresql, mongodb, etc.
28
+ * Allow for weighting tweaks.
29
+
30
+ == Copyright
31
+
32
+ Copyright (c) 2012 Dan Collis-Puro. See LICENSE.txt for further details.
33
+
34
+ == Credits
35
+
36
+ * Image courtesy wikipedia. About Carl Linnaeus: http://en.wikipedia.org/wiki/Linnaeus
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "linnaeus"
18
+ gem.homepage = "http://github.com/djcp/linnaeus"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Another redis-backed Bayesian classifier}
21
+ gem.description = %Q{Linnaeus provides a redis-backed Bayesian classifier. Words are stemmed, stopwords are stopped, and redis is used to allow for persistent and concurrent training and classification.}
22
+ gem.email = "dan@collispuro.net"
23
+ gem.authors = ["djcp"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ task :default => :spec
35
+
36
+ require 'yard'
37
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
Binary file
@@ -0,0 +1,60 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ require 'redis'
4
+ require 'stemmer'
5
+
6
+ # The base class. You won't use this directly - use one of the subclasses.
7
+ class Linnaeus
8
+
9
+ def initialize(opts = {})
10
+ options = {
11
+ persistence_class: Persistence,
12
+ stopwords_class: Stopwords,
13
+ skip_stemming: false
14
+ }.merge(opts)
15
+
16
+ @db = options[:persistence_class].new(options)
17
+ @stopword_generator = options[:stopwords_class].new
18
+ @skip_stemming = options[:skip_stemming]
19
+ end
20
+
21
+ # Count occurences of words in a text corpus.
22
+ #
23
+ # == Parameters
24
+ # text::
25
+ # A string representing a document. Stopwords are removed and words are stemmed using the "Stemmer" gem.
26
+ def count_word_occurrences(text = '')
27
+ count = {}
28
+ text.downcase.split.each do |word|
29
+ stemmed_word = (@skip_stemming) ? word : word.stem_porter
30
+ unless stopwords.include? stemmed_word
31
+ count[stemmed_word] = count[stemmed_word] ? count[stemmed_word] + 1 : 1
32
+ end
33
+ end
34
+ count
35
+ end
36
+
37
+ private
38
+ # Format categories for training or untraining.
39
+ #
40
+ # == Parameters
41
+ # categories::
42
+ # A string or array of categories
43
+ def normalize_categories(categories = [])
44
+ [categories].flatten.collect do |cat|
45
+ cat.to_s.downcase.gsub(/[^a-z\d\.\-_]/,'')
46
+ end.reject{|cat| cat == ''}.compact
47
+ end
48
+
49
+ # Get a Set of stopwords to remove from documents for training / classifying.
50
+ def stopwords
51
+ @stopwords ||= @stopword_generator.to_set
52
+ end
53
+
54
+ end
55
+
56
+ require 'set'
57
+ require 'linnaeus/stopwords'
58
+ require 'linnaeus/persistence'
59
+ require 'linnaeus/trainer'
60
+ require 'linnaeus/classifier'
@@ -0,0 +1,41 @@
1
+ # Classify documents against the Bayesian corpus.
2
+ class Linnaeus::Classifier < Linnaeus
3
+
4
+ # Returns a hash of scores for each category in the Bayesian corpus.
5
+ # The closer a score is to 0, the more likely a match it is.
6
+ #
7
+ # == Parameters
8
+ # text::
9
+ # a string of text to classify.
10
+ #
11
+ # == Returns
12
+ # a hash of categories with a score as the values.
13
+ def classification_scores(text)
14
+ scores = {}
15
+
16
+ @db.get_categories.each do |category|
17
+ words_with_count_for_category = @db.get_words_with_count_for_category category
18
+ total_word_count_sum_for_category = words_with_count_for_category.values.reduce(0){|sum, count| sum += count.to_i}
19
+
20
+ scores[category] = 0
21
+ count_word_occurrences(text).each do |word, count|
22
+ tmp_score = (words_with_count_for_category[word].nil?) ? 0.1 : words_with_count_for_category[word].to_i
23
+ scores[category] += Math.log(tmp_score / total_word_count_sum_for_category.to_f)
24
+ end
25
+ end
26
+ scores
27
+ end
28
+
29
+ # The most likely category for a document.
30
+ #
31
+ # == Parameters
32
+ # text::
33
+ # a string of text to classify.
34
+ #
35
+ # == Returns
36
+ # A string representing the most likely category.
37
+ def classify(text)
38
+ (classification_scores(text).sort_by { |a| -a[1] })[0][0]
39
+ end
40
+
41
+ end
@@ -0,0 +1,114 @@
1
+ # The redis persistence layer.
2
+ class Linnaeus::Persistence < Linnaeus
3
+ # The Set (in the redis sense) of categories are stored in this key.
4
+ CATEGORIES_KEY = 'Linnaeus:category'
5
+ # The base key for a category in the redis corpus. Word occurrence counts for a category appear under here.
6
+ BASE_CATEGORY_KEY = 'Linnaeus:cat:'
7
+
8
+ attr_accessor :redis
9
+
10
+ def initialize(opts = {})
11
+ options = {
12
+ redis_host: '127.0.0.1',
13
+ redis_port: '6379',
14
+ redis_db: 0
15
+ }.merge(opts)
16
+
17
+ @redis = Redis.new(
18
+ host: options[:redis_host],
19
+ port: options[:redis_port],
20
+ db: options[:redis_db]
21
+ )
22
+ self
23
+ end
24
+
25
+ # Add categories to the bayesian corpus.
26
+ #
27
+ # == Parameters
28
+ # categories::
29
+ # A string or array of categories.
30
+ def add_categories(categories)
31
+ @redis.sadd CATEGORIES_KEY, categories
32
+ end
33
+
34
+ # Remove categories from the bayesian corpus
35
+ #
36
+ # == Parameters
37
+ # categories::
38
+ # A string or array of categories.
39
+ def remove_category(category)
40
+ @redis.srem CATEGORIES_KEY, category
41
+ end
42
+
43
+ # Get categories from the bayesian corpus
44
+ #
45
+ # == Parameters
46
+ # categories::
47
+ # A string or array of categories.
48
+ def get_categories
49
+ @redis.smembers CATEGORIES_KEY
50
+ end
51
+
52
+ # Get a list of words with their number of occurrences.
53
+ #
54
+ # == Parameters
55
+ # category::
56
+ # A string representing a category.
57
+ #
58
+ # == Returns
59
+ # A hash with the word counts for this category.
60
+ def get_words_with_count_for_category(category)
61
+ @redis.hgetall BASE_CATEGORY_KEY + category
62
+ end
63
+
64
+ # Clear all training data from the backend.
65
+ def clear_all_training_data
66
+ @redis.flushdb
67
+ end
68
+
69
+ # Increment word counts within a category
70
+ #
71
+ # == Parameters
72
+ # category::
73
+ # A string representing a category.
74
+ # word_occurrences::
75
+ # A hash containing a count of the number of word occurences in a document
76
+ def increment_word_counts_for_category(category, word_occurrences)
77
+ @redis.multi do |multi|
78
+ word_occurrences.each do|word,count|
79
+ multi.hincrby BASE_CATEGORY_KEY + category, word, count
80
+ end
81
+ end
82
+ end
83
+
84
+ # Decrement word counts within a category. This is used when removing a document from the corpus.
85
+ #
86
+ # == Parameters
87
+ # category::
88
+ # A string representing a category.
89
+ # word_occurrences::
90
+ # A hash containing a count of the number of word occurences in a document
91
+ def decrement_word_counts_for_category(category, word_occurrences)
92
+ @redis.multi do |multi|
93
+ word_occurrences.each do|word,count|
94
+ multi.hincrby BASE_CATEGORY_KEY + category, word, - count
95
+ end
96
+ end
97
+ end
98
+
99
+ # Clean out words with a count of zero in a category. Used during untraining.
100
+ #
101
+ # == Parameters
102
+ # category::
103
+ # A string representing a category.
104
+ def cleanup_empty_words_in_category(category)
105
+ word_counts = @redis.hgetall BASE_CATEGORY_KEY + category
106
+ empty_words = word_counts.select{|word, count| count.to_i <= 0}
107
+ if empty_words == word_counts
108
+ @redis.del BASE_CATEGORY_KEY + category
109
+ else
110
+ @redis.hdel BASE_CATEGORY_KEY + category, empty_words.keys
111
+ end
112
+ end
113
+
114
+ end
@@ -0,0 +1,17 @@
1
+ # The stopword list - you can override this list by creating a stopword generator and registering it in the Linnaeus::Trainer or Linnaeus::Classifier constructors.
2
+ class Linnaeus::Stopwords
3
+ # The default stopword list.
4
+ DEFAULT_STOPWORDS = %w(a able about across after all almost also am among an and any are as at be because been but by can cannot could dear did do does either else ever every for from get got had has have he her hers him his how however i if in into is it its just least let like likely may me might most must my neither no nor not of off often on only or other our own rather said say says she should since so some than that the their them then there these they this tis to too twas us wants was we were what when where which while who whom why will with would yet you your)
5
+
6
+ attr_accessor :stopwords
7
+
8
+ # The list of stopwords as an array
9
+ def to_a
10
+ @stopwords || DEFAULT_STOPWORDS
11
+ end
12
+
13
+ # The list of stopwords as a ruby Set
14
+ def to_set
15
+ to_a.to_set
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ # Train or untrain documents from the Bayesian corpus.
2
+ class Linnaeus::Trainer < Linnaeus
3
+
4
+ # Add a document to the training corpus.
5
+ #
6
+ # == Parameters
7
+ # categories::
8
+ # A string or array of categories
9
+ # text::
10
+ # A string of text in this document.
11
+ def train(categories, text)
12
+ categories = normalize_categories categories
13
+ @db.add_categories(categories)
14
+
15
+ word_occurrences = count_word_occurrences text
16
+ categories.each do|cat|
17
+ @db.increment_word_counts_for_category cat, word_occurrences
18
+ end
19
+ end
20
+
21
+ # Remove a document from the training corpus.
22
+ #
23
+ # == Parameters
24
+ # categories::
25
+ # A string or array of categories
26
+ # text::
27
+ # A string of text in this document.
28
+ def untrain(categories, text)
29
+ categories = normalize_categories categories
30
+
31
+ word_occurrences = count_word_occurrences text
32
+ categories.each do|cat|
33
+ @db.decrement_word_counts_for_category cat, word_occurrences
34
+ @db.cleanup_empty_words_in_category cat
35
+ end
36
+ end
37
+
38
+ end
@@ -0,0 +1,23 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Linnaeus::Classifier do
4
+ context 'with a very small dataset' do
5
+ it 'should classify easy things well' do
6
+ create_small_dataset
7
+ subject.classify('A bird that migrates').should eq('bird')
8
+ subject.classify('This was directed by Gus Van Sant').should eq('movie')
9
+ end
10
+ end
11
+
12
+ def create_small_dataset
13
+ Linnaeus::Persistence.new.clear_all_training_data
14
+ lt = Linnaeus::Trainer.new
15
+ lt.train 'movie', "Gone with the Wind is a 1939 American historical epic film adapted from Margaret Mitchell's Pulitzer-winning 1936 novel of the same name."
16
+ lt.train 'movie', "THX 1138 is a 1971 science fiction film directed by George Lucas in his feature directorial debut. The film was written by Lucas and Walter Murch."
17
+ lt.train 'movie', "Top Gun is a 1986 American action drama film directed by Tony Scott, and produced by Don Simpson and Jerry Bruckheimer, in association with the Paramount Pictures company."
18
+
19
+ lt.train 'bird', "The Yellow-throated Warbler (Setophaga dominica) is a small migratory songbird species breeding in temperate North America. It belongs to the New World warbler family (Parulidae)."
20
+ lt.train 'bird', "The Blue Jay (Cyanocitta cristata) is a passerine bird in the family Corvidae, native to North America. It is resident through most of eastern and central United States and southern Canada, although western populations may be migratory."
21
+ lt.train 'bird', "The Mallard or Wild Duck (Anas platyrhynchos) is a dabbling duck which breeds throughout the temperate and subtropical Americas, Europe, Asia, and North Africa, and has been introduced to New Zealand and Australia. This duck belongs to the subfamily Anatinae of the waterfowl family Anatidae"
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Linnaeus::Persistence do
4
+ it 'stores categories successfully' do
5
+ lp = Linnaeus::Persistence.new
6
+ lp.clear_all_training_data
7
+ add_categories lp
8
+ lp.get_categories.sort.should eq ['bar','baz','foo']
9
+ end
10
+
11
+ it 'can remove categories' do
12
+ lp = Linnaeus::Persistence.new
13
+ lp.clear_all_training_data
14
+ add_categories lp
15
+ lp.remove_category 'bar'
16
+ lp.get_categories.sort.should eq ['baz','foo']
17
+ end
18
+
19
+ def add_categories(lp)
20
+ lp.add_categories(['foo','bar','baz','foo', 'bar'])
21
+ end
22
+ end
@@ -0,0 +1,4 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Linnaeus do
4
+ end
@@ -0,0 +1,20 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Linnaeus::Stopwords do
4
+ subject { Linnaeus::Stopwords.new }
5
+ it '.to_a' do
6
+ subject.should respond_to :to_a
7
+ subject.to_a.should be_an_instance_of Array
8
+ subject.to_a.should include 'the'
9
+ end
10
+ it '.to_set' do
11
+ subject.should respond_to :to_set
12
+ subject.to_set.should be_an_instance_of Set
13
+ subject.to_set.should include 'the'
14
+ end
15
+ it 'can have stopwords overridden' do
16
+ subject.stopwords = ['foo','bar']
17
+ subject.to_a.should eq ['foo','bar']
18
+ subject.to_set.should eq ['foo','bar'].to_set
19
+ end
20
+ end
@@ -0,0 +1,73 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe Linnaeus::Trainer do
4
+ context 'with default options' do
5
+ subject { Linnaeus::Trainer.new }
6
+
7
+ it 'should count word occurrencs properly' do
8
+ subject.count_word_occurrences('foo bar foo baz').should ==
9
+ { 'foo' => 2, 'bar' => 1, 'baz' => 1 }
10
+ end
11
+
12
+ it 'should not count stopwords' do
13
+ subject.count_word_occurrences('foo the you').should == { 'foo' => 1 }
14
+ end
15
+
16
+ it 'returns an empty hash when given an empty string' do
17
+ subject.count_word_occurrences.should == { }
18
+ end
19
+
20
+ it 'should train on documents properly' do
21
+ lp = Linnaeus::Persistence.new
22
+ lp.clear_all_training_data
23
+ subject.train 'fruit', grape
24
+ subject.train 'fruit', orange
25
+ lp.get_words_with_count_for_category('fruit').should eq(
26
+ {
27
+ "grape"=>"1", "purpl"=>"1", "blue"=>"1", "green"=>"1",
28
+ "fruit"=>"2", "sweet"=>"2", "wine"=>"1", "oval"=>"1",
29
+ "orang"=>"1", "round"=>"1", "citru"=>"1"
30
+ })
31
+ end
32
+
33
+ it 'should partially untrain properly' do
34
+ lp = Linnaeus::Persistence.new
35
+ lp.clear_all_training_data
36
+ subject.train 'fruit', grape
37
+ subject.train 'fruit', orange
38
+
39
+ subject.untrain 'fruit', grape
40
+ lp.get_words_with_count_for_category('fruit').should eq({"fruit"=>"1", "sweet"=>"1", "orang"=>"1", "round"=>"1", "citru"=>"1"})
41
+ end
42
+
43
+ it 'should fully untrain properly' do
44
+ lp = Linnaeus::Persistence.new
45
+ lp.clear_all_training_data
46
+ subject.train 'fruit', grape
47
+ subject.untrain 'fruit', grape
48
+ lp.get_words_with_count_for_category('fruit').should eq({})
49
+ end
50
+
51
+ end
52
+
53
+ context 'with non-default stopwords' do
54
+ subject { Linnaeus::Trainer.new(stopwords_class: FooStop) }
55
+ it 'should count word occurrencs properly' do
56
+ subject.count_word_occurrences('foo bar foo baz').should == { 'baz' => 1 }
57
+ end
58
+ end
59
+
60
+ def grape
61
+ 'grape purple blue green fruit sweet wine oval'
62
+ end
63
+
64
+ def orange
65
+ 'orange round citrus fruit sweet'
66
+ end
67
+ end
68
+
69
+ class FooStop
70
+ def to_set
71
+ Set.new ['foo','bar']
72
+ end
73
+ end
@@ -0,0 +1,14 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'simplecov'
4
+ SimpleCov.start
5
+ require 'rspec'
6
+ require 'linnaeus'
7
+
8
+ # Requires supporting files with custom matchers and macros, etc,
9
+ # in ./support/ and its subdirectories.
10
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
11
+
12
+ RSpec.configure do |config|
13
+
14
+ end
metadata ADDED
@@ -0,0 +1,201 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linnaeus
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - djcp
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: redis
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 3.0.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 3.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: stemmer
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 1.0.0
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 1.0.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 2.11.0
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.11.0
62
+ - !ruby/object:Gem::Dependency
63
+ name: yard
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '0.7'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '0.7'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rdoc
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: '3.12'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: '3.12'
94
+ - !ruby/object:Gem::Dependency
95
+ name: bundler
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: jeweler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: simplecov
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description: Linnaeus provides a redis-backed Bayesian classifier. Words are stemmed,
143
+ stopwords are stopped, and redis is used to allow for persistent and concurrent
144
+ training and classification.
145
+ email: dan@collispuro.net
146
+ executables: []
147
+ extensions: []
148
+ extra_rdoc_files:
149
+ - LICENSE.txt
150
+ - README.rdoc
151
+ files:
152
+ - .document
153
+ - .rspec
154
+ - .travis.yml
155
+ - Gemfile
156
+ - Gemfile.lock
157
+ - LICENSE.txt
158
+ - README.rdoc
159
+ - Rakefile
160
+ - VERSION
161
+ - images/linnaeus.jpg
162
+ - lib/linnaeus.rb
163
+ - lib/linnaeus/classifier.rb
164
+ - lib/linnaeus/persistence.rb
165
+ - lib/linnaeus/stopwords.rb
166
+ - lib/linnaeus/trainer.rb
167
+ - spec/linnaeus_classifier_spec.rb
168
+ - spec/linnaeus_persistence_spec.rb
169
+ - spec/linnaeus_spec.rb
170
+ - spec/linnaeus_stopwords_spec.rb
171
+ - spec/linnaeus_trainer_spec.rb
172
+ - spec/spec_helper.rb
173
+ homepage: http://github.com/djcp/linnaeus
174
+ licenses:
175
+ - MIT
176
+ post_install_message:
177
+ rdoc_options: []
178
+ require_paths:
179
+ - lib
180
+ required_ruby_version: !ruby/object:Gem::Requirement
181
+ none: false
182
+ requirements:
183
+ - - ! '>='
184
+ - !ruby/object:Gem::Version
185
+ version: '0'
186
+ segments:
187
+ - 0
188
+ hash: 494428062127756217
189
+ required_rubygems_version: !ruby/object:Gem::Requirement
190
+ none: false
191
+ requirements:
192
+ - - ! '>='
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ requirements: []
196
+ rubyforge_project:
197
+ rubygems_version: 1.8.24
198
+ signing_key:
199
+ specification_version: 3
200
+ summary: Another redis-backed Bayesian classifier
201
+ test_files: []