positronic_brain 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +33 -0
- data/Rakefile +1 -0
- data/lib/positronic_brain/base.rb +50 -0
- data/lib/positronic_brain/classifier/classifier.rb +120 -0
- data/lib/positronic_brain/classifier/fisher.rb +38 -0
- data/lib/positronic_brain/classifier/naive_bayes.rb +35 -0
- data/lib/positronic_brain/persistence/classifier/base.rb +52 -0
- data/lib/positronic_brain/persistence/persistence.rb +42 -0
- data/lib/positronic_brain/version.rb +3 -0
- data/lib/positronic_brain.rb +17 -0
- data/spec/matchers/storage_matcher.rb +47 -0
- data/spec/positronic_brain/classifier/classifier_spec.rb +170 -0
- data/spec/positronic_brain/classifier/fisher_spec.rb +64 -0
- data/spec/positronic_brain/classifier/naive_bayes_spec.rb +64 -0
- data/spec/spec_helper.rb +28 -0
- metadata +165 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a194faebdf6efeebbc0950272d2ceee4517f8351
|
4
|
+
data.tar.gz: f5d73245fb0659f68739a87e5037f423166484f0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: de9a8fbcdb4c68840c1ca628154fa7047cd924f936a90dd370207b63ae2463f38aaa66765e99688674ed94752500c9ae888ae95bf12ee7bd764b431cab545458
|
7
|
+
data.tar.gz: f38626ab11eaed8baaadecc8350d349e6f6c2786fc2d1e357a6be47a6c54a85dda56dd9e47e7cdd9077ebcacea41823e40ac6793b274ef6894a416e6dddf3932
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Dalton
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# PositronicBrain
|
2
|
+
|
3
|
+
A toolbox of AI algorithms
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'positronic_brain'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install positronic_brain
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
Setup persistence configurations
|
22
|
+
|
23
|
+
PositronicBrain::Base.dump_path = 'a/dump/path'
|
24
|
+
|
25
|
+
For more details check test files at spec/
|
26
|
+
|
27
|
+
## Contributing
|
28
|
+
|
29
|
+
1. Fork it
|
30
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
31
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
32
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
33
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module PositronicBrain
|
2
|
+
class Base
|
3
|
+
attr_reader :namespace
|
4
|
+
attr_accessor :persistence
|
5
|
+
|
6
|
+
delegate :dump, to: :persistence
|
7
|
+
|
8
|
+
def initialize(namespace, options = {})
|
9
|
+
@namespace = namespace
|
10
|
+
@persistence = self.class.initialize_persistence(dump_path, options[:persistence] || {})
|
11
|
+
end
|
12
|
+
|
13
|
+
def dump_path
|
14
|
+
path_parts = [self.class.dump_path, self.class.dump_name, "#{@namespace}.marshal"].compact
|
15
|
+
File.join *path_parts
|
16
|
+
end
|
17
|
+
|
18
|
+
def inspect
|
19
|
+
"#<#{self.class.name} #{namespace}>"
|
20
|
+
end
|
21
|
+
|
22
|
+
class << self
|
23
|
+
attr_writer :dump_path, :dump_name
|
24
|
+
|
25
|
+
def dump_name
|
26
|
+
@dump_name || self.name.gsub(/^.*\:\:/, '').underscore
|
27
|
+
end
|
28
|
+
|
29
|
+
def dump_path
|
30
|
+
@dump_path || self.superclass.dump_path
|
31
|
+
end
|
32
|
+
|
33
|
+
def persistence_class
|
34
|
+
@persistence_class || self.superclass.persistence_class
|
35
|
+
end
|
36
|
+
|
37
|
+
def persistence_options
|
38
|
+
@persistence_options || self.superclass.persistence_options
|
39
|
+
end
|
40
|
+
|
41
|
+
def persistence(klass, options = {})
|
42
|
+
@persistence_class, @persistence_options = klass, options
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize_persistence(path, options = {})
|
46
|
+
persistence_class.new path, persistence_options.merge(options)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
module PositronicBrain
|
2
|
+
module Classifier
|
3
|
+
autoload :Fisher, 'positronic_brain/classifier/fisher.rb'
|
4
|
+
autoload :NaiveBayes, 'positronic_brain/classifier/naive_bayes.rb'
|
5
|
+
|
6
|
+
class Base < PositronicBrain::Base
|
7
|
+
persistence Persistence::Classifier::Base
|
8
|
+
|
9
|
+
delegate :categories, :category_count, to: :persistence
|
10
|
+
delegate :total_count, :increment_feature, to: :persistence
|
11
|
+
delegate :feature_count, :increment_category, to: :persistence
|
12
|
+
delegate :feature_in_category_count, :dump, to: :persistence
|
13
|
+
|
14
|
+
def initialize(namespace, options = {}, &extract_block)
|
15
|
+
options = options.dup
|
16
|
+
|
17
|
+
@assumed_probability = options.delete(:assumed_probability) || 0.5
|
18
|
+
@default_category = options.delete(:default_category)
|
19
|
+
@minimum_score = options.delete(:minimum_score) || 0.0
|
20
|
+
@assumed_weight = options.delete(:assumed_weight) || 1
|
21
|
+
|
22
|
+
@extract_block = extract_block if block_given?
|
23
|
+
|
24
|
+
super namespace, options
|
25
|
+
end
|
26
|
+
|
27
|
+
def extract_features(item)
|
28
|
+
item.downcase.split(/\P{Word}+/).uniq
|
29
|
+
end
|
30
|
+
|
31
|
+
def classify(item, default = nil)
|
32
|
+
raise NotImplementedError
|
33
|
+
end
|
34
|
+
|
35
|
+
def train(item, category)
|
36
|
+
extract_features!(item).each do |feature|
|
37
|
+
increment_feature feature, category
|
38
|
+
end
|
39
|
+
increment_category category
|
40
|
+
end
|
41
|
+
|
42
|
+
def prob_category(category)
|
43
|
+
tc = total_count
|
44
|
+
return 0.0 if total_count == 0.0
|
45
|
+
|
46
|
+
category_count(category)/tc
|
47
|
+
end
|
48
|
+
|
49
|
+
def item_given_category_product(item, category, options={})
|
50
|
+
prod = 1.0
|
51
|
+
extract_features!(item).each do |feature|
|
52
|
+
prod *= prob_feature_given_category(feature, category, options)
|
53
|
+
end
|
54
|
+
prod
|
55
|
+
end
|
56
|
+
|
57
|
+
def category_given_item_product(category, item, options={})
|
58
|
+
prod = 1.0
|
59
|
+
extract_features!(item).each do |feature|
|
60
|
+
prod *= normalized_prob_category_given_feature(category, feature, options)
|
61
|
+
end
|
62
|
+
prod
|
63
|
+
end
|
64
|
+
|
65
|
+
def prob_feature_given_category(feature, category, options={})
|
66
|
+
cc = category_count category
|
67
|
+
feature_in_category = feature_in_category_count feature, category
|
68
|
+
|
69
|
+
if options[:weighted]
|
70
|
+
fc = feature_count feature
|
71
|
+
weighted feature_in_category/cc, fc
|
72
|
+
else
|
73
|
+
return 0.0 if cc == 0.0
|
74
|
+
feature_in_category/cc
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def normalized_prob_category_given_feature(category, feature, options={})
|
79
|
+
feature_given_category = prob_feature_given_category feature, category
|
80
|
+
all_feature_given_categories = categories.map do |cat|
|
81
|
+
prob_feature_given_category feature, cat
|
82
|
+
end.sum
|
83
|
+
|
84
|
+
if options[:weighted]
|
85
|
+
fc = feature_count feature
|
86
|
+
weighted feature_given_category/all_feature_given_categories, fc
|
87
|
+
else
|
88
|
+
return 0.0 if fc == 0.0
|
89
|
+
feature_given_category/all_feature_given_categories
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def prob_category_given_feature(category, feature, options={})
|
94
|
+
fc = feature_count feature
|
95
|
+
feature_in_category = feature_in_category_count feature, category
|
96
|
+
|
97
|
+
if options[:weighted]
|
98
|
+
weighted feature_in_category/fc, fc
|
99
|
+
else
|
100
|
+
return 0.0 if fc == 0.0
|
101
|
+
feature_in_category/fc
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
protected
|
106
|
+
def weighted(result, weight)
|
107
|
+
return @assumed_probability if weight == 0
|
108
|
+
(result*weight + @assumed_weight*@assumed_probability)/(weight + @assumed_weight)
|
109
|
+
end
|
110
|
+
|
111
|
+
def extract_features!(item)
|
112
|
+
if @extract_block
|
113
|
+
@extract_block.call item
|
114
|
+
else
|
115
|
+
extract_features item
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module PositronicBrain
|
2
|
+
module Classifier
|
3
|
+
class Fisher < PositronicBrain::Classifier::Base
|
4
|
+
def classify(item, options={})
|
5
|
+
best_category = options[:default] || @default_category
|
6
|
+
best_score = options[:minimum] || @minimum_score || 0.0
|
7
|
+
|
8
|
+
scores = scores item
|
9
|
+
|
10
|
+
scores.each do |category, score|
|
11
|
+
if score > best_score
|
12
|
+
best_category = category
|
13
|
+
best_score = score
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
[best_category, best_score]
|
18
|
+
end
|
19
|
+
|
20
|
+
def fisher_scores(item)
|
21
|
+
categories.map do |category|
|
22
|
+
score = fisher_score item, category
|
23
|
+
[category, score]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
alias :scores :fisher_scores
|
27
|
+
|
28
|
+
def fisher_score(item, category)
|
29
|
+
prod = category_given_item_product category, item, weighted: true
|
30
|
+
return 0.0 if prod == 0.0
|
31
|
+
|
32
|
+
features_count = extract_features!(item).count
|
33
|
+
Distribution::ChiSquare.q_chi2 2*features_count, -2*Math.log(prod)
|
34
|
+
end
|
35
|
+
alias :score :fisher_score
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module PositronicBrain
|
2
|
+
module Classifier
|
3
|
+
class NaiveBayes < PositronicBrain::Classifier::Base
|
4
|
+
def classify(item, options={})
|
5
|
+
best_category = options[:default] || @default_category
|
6
|
+
best_score = options[:minimum] || @minimum_score || 0.0
|
7
|
+
|
8
|
+
scores = scores item
|
9
|
+
|
10
|
+
scores.each do |category, score|
|
11
|
+
if score > best_score
|
12
|
+
best_category = category
|
13
|
+
best_score = score
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
[best_category, best_score]
|
18
|
+
end
|
19
|
+
|
20
|
+
def bayes_scores(item)
|
21
|
+
categories.map do |category|
|
22
|
+
score = bayes_score item, category
|
23
|
+
[category, score]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
alias :scores :bayes_scores
|
27
|
+
|
28
|
+
def bayes_score(item, category)
|
29
|
+
item_given_category_product(item, category, weighted: true)*prob_category(category)
|
30
|
+
end
|
31
|
+
alias :score :bayes_score
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module PositronicBrain
|
2
|
+
module Persistence
|
3
|
+
module Classifier
|
4
|
+
class Base < PositronicBrain::Persistence::Base
|
5
|
+
def feature_count(feature)
|
6
|
+
(@persistence[:features][feature] || 0.0).to_f
|
7
|
+
end
|
8
|
+
|
9
|
+
def category_count(category)
|
10
|
+
(@persistence[:categories][category] || 0.0).to_f
|
11
|
+
end
|
12
|
+
|
13
|
+
def feature_in_category_count(feature, category)
|
14
|
+
return 0.0 unless @persistence[:features_in_category].has_key?(feature)
|
15
|
+
(@persistence[:features_in_category][feature][category] || 0.0).to_f
|
16
|
+
end
|
17
|
+
|
18
|
+
def increment_feature(feature, category)
|
19
|
+
@persistence[:features_in_category][feature] ||= Hash.new
|
20
|
+
@persistence[:features_in_category][feature][category] ||= 0
|
21
|
+
@persistence[:features_in_category][feature][category] += 1
|
22
|
+
|
23
|
+
@persistence[:features][feature] ||= 0
|
24
|
+
@persistence[:features][feature] += 1
|
25
|
+
end
|
26
|
+
|
27
|
+
def increment_category(category)
|
28
|
+
@persistence[:categories][category] ||= 0
|
29
|
+
@persistence[:categories][category] += 1
|
30
|
+
@persistence[:total_count] += 1
|
31
|
+
end
|
32
|
+
|
33
|
+
def categories
|
34
|
+
@persistence[:categories].keys
|
35
|
+
end
|
36
|
+
|
37
|
+
def total_count
|
38
|
+
(@persistence[:total_count] || 0.0).to_f
|
39
|
+
end
|
40
|
+
|
41
|
+
protected
|
42
|
+
def init
|
43
|
+
@persistence = Hash.new
|
44
|
+
@persistence[:features] = Hash.new
|
45
|
+
@persistence[:categories] = Hash.new
|
46
|
+
@persistence[:features_in_category] = Hash.new
|
47
|
+
@persistence[:total_count] = 0
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module PositronicBrain
|
2
|
+
module Persistence
|
3
|
+
autoload :Classifier, 'positronic_brain/persistence/classifier/base.rb'
|
4
|
+
|
5
|
+
class Base
|
6
|
+
attr_reader :dump_path, :options, :persistence
|
7
|
+
|
8
|
+
def initialize(dump_path, options = {})
|
9
|
+
@dump_path, @options = dump_path, options
|
10
|
+
load || init
|
11
|
+
end
|
12
|
+
|
13
|
+
def dump
|
14
|
+
ensure_dump_directory
|
15
|
+
File.open(@dump_path, 'w'){ |f| f.puts Marshal.dump @persistence }
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
@persistence == other.persistence
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
def ensure_dump_directory
|
24
|
+
dump_dir = File.dirname @dump_path
|
25
|
+
FileUtils.mkdir_p dump_dir unless Dir.exists? dump_dir
|
26
|
+
end
|
27
|
+
|
28
|
+
def dumped_data
|
29
|
+
File.read @dump_path
|
30
|
+
end
|
31
|
+
|
32
|
+
def load
|
33
|
+
return false unless File.exists? dump_path
|
34
|
+
@persistence = Marshal.load dumped_data
|
35
|
+
end
|
36
|
+
|
37
|
+
def init
|
38
|
+
@persistence = Hash.new
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
gem_root = File.dirname __FILE__
|
2
|
+
$LOAD_PATH.unshift(gem_root) unless $LOAD_PATH.include?(gem_root)
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
require 'active_support'
|
7
|
+
require 'active_support/core_ext'
|
8
|
+
|
9
|
+
require 'distribution'
|
10
|
+
|
11
|
+
module PositronicBrain
|
12
|
+
autoload :VERSION, 'positronic_brain/version.rb'
|
13
|
+
autoload :Base, 'positronic_brain/base.rb'
|
14
|
+
|
15
|
+
autoload :Classifier, 'positronic_brain/classifier/classifier.rb'
|
16
|
+
autoload :Persistence, 'positronic_brain/persistence/persistence.rb'
|
17
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module StorageMatcher
|
2
|
+
def be_stored(expected)
|
3
|
+
BeStored.new expected
|
4
|
+
end
|
5
|
+
|
6
|
+
def clean_storage
|
7
|
+
FileUtils.rm_rf PositronicBrain::Base.dump_path
|
8
|
+
end
|
9
|
+
|
10
|
+
class BeStored
|
11
|
+
def initialize(expected_path)
|
12
|
+
@expected_path = expected_path
|
13
|
+
end
|
14
|
+
|
15
|
+
def matches?(target)
|
16
|
+
@target = target
|
17
|
+
if File.exists?(@target.dump_path)
|
18
|
+
@persited = true
|
19
|
+
|
20
|
+
persistence = @target.class.initialize_persistence expected_full_path
|
21
|
+
persistence == target.persistence
|
22
|
+
else
|
23
|
+
false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def expected_full_path
|
28
|
+
File.expand_path File.join(PositronicBrain::Base.dump_path, @expected_path)
|
29
|
+
end
|
30
|
+
|
31
|
+
def is_at_message
|
32
|
+
if @persited
|
33
|
+
"is at #{@target.dump_path}"
|
34
|
+
else
|
35
|
+
'is nowhere'
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def failure_message
|
40
|
+
"expected #{@target.inspect} to be stored at #{@expected_path}, but #{is_at_message}"
|
41
|
+
end
|
42
|
+
|
43
|
+
def negative_failure_message
|
44
|
+
"expected #{@target.inspect} not to be stored at #{@expected_path}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module PositronicBrain::Classifier
|
5
|
+
describe Base do
|
6
|
+
describe 'perform probabilities calculations' do
|
7
|
+
context 'unweighted_classifier' do
|
8
|
+
before(:all) do
|
9
|
+
$classifier = Base.new 'unweighted_classifier', assumed_probability: 0.0, assumed_weight: 0.0
|
10
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
11
|
+
end
|
12
|
+
|
13
|
+
after(:all){ $classifier = nil }
|
14
|
+
|
15
|
+
subject{ $classifier }
|
16
|
+
|
17
|
+
it 'should have category probabilities' do
|
18
|
+
subject.prob_category(:good).should be_within(0.001).of(0.6)
|
19
|
+
subject.prob_category(:bad ).should be_within(0.001).of(0.4)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have zero category given feature probabilities for missing word' do
|
23
|
+
subject.prob_category_given_feature(:good, 'bazinga').should be_within(0.001).of(0.0)
|
24
|
+
subject.prob_category_given_feature(:bad, 'bazinga').should be_within(0.001).of(0.0)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have zero category given feature weighted probabilities for missing word' do
|
28
|
+
subject.prob_category_given_feature(:good, 'bazinga', weighted: true).should be_within(0.001).of(0.0)
|
29
|
+
subject.prob_category_given_feature(:bad, 'bazinga', weighted: true).should be_within(0.001).of(0.0)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should have category given feature probabilities for matched word' do
|
33
|
+
subject.prob_category_given_feature(:good, 'quick').should be_within(0.001).of(0.666)
|
34
|
+
subject.prob_category_given_feature(:bad, 'quick').should be_within(0.001).of(0.333)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should have category given feature weighted probabilities for matched word' do
|
38
|
+
subject.prob_category_given_feature(:good, 'quick', weighted: true).should be_within(0.001).of(0.666)
|
39
|
+
subject.prob_category_given_feature(:bad, 'quick', weighted: true).should be_within(0.001).of(0.333)
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'should have category given feature probabilities for only good matched word' do
|
43
|
+
subject.prob_category_given_feature(:good, 'rabbit').should be_within(0.001).of(1.0)
|
44
|
+
subject.prob_category_given_feature(:bad, 'rabbit').should be_within(0.001).of(0.0)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should have category given feature weighted probabilities for only good matched word' do
|
48
|
+
subject.prob_category_given_feature(:good, 'rabbit', weighted: true).should be_within(0.001).of(1.0)
|
49
|
+
subject.prob_category_given_feature(:bad, 'rabbit', weighted: true).should be_within(0.001).of(0.0)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should have category given feature probabilities for only bad matched word' do
|
53
|
+
subject.prob_category_given_feature(:good, 'casino').should be_within(0.001).of(0.0)
|
54
|
+
subject.prob_category_given_feature(:bad, 'casino').should be_within(0.001).of(1.0)
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'should have category given feature weighted probabilities for only bad matched word' do
|
58
|
+
subject.prob_category_given_feature(:good, 'casino', weighted: true).should be_within(0.001).of(0.0)
|
59
|
+
subject.prob_category_given_feature(:bad, 'casino', weighted: true).should be_within(0.001).of(1.0)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should have zero category given item products if has some missing word' do
|
63
|
+
subject.category_given_item_product(:good, 'bazinga rabbit casino', weighted: true).should be_within(0.001).of(0.0)
|
64
|
+
subject.category_given_item_product(:bad, 'bazinga rabbit casino', weighted: true).should be_within(0.001).of(0.0)
|
65
|
+
subject.category_given_item_product(:good, 'bazinga airplane', weighted: true).should be_within(0.001).of(0.0)
|
66
|
+
subject.category_given_item_product(:bad, 'bazinga airplane', weighted: true).should be_within(0.001).of(0.0)
|
67
|
+
subject.category_given_item_product(:good, 'rabbit casino', weighted: true).should be_within(0.001).of(0.0)
|
68
|
+
subject.category_given_item_product(:bad, 'rabbit casino', weighted: true).should be_within(0.001).of(0.0)
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'should have category given item products if has all matched words' do
|
72
|
+
subject.category_given_item_product(:good, 'the quick', weighted: true).should be_within(0.001).of(0.3809)
|
73
|
+
subject.category_given_item_product(:bad, 'the quick', weighted: true).should be_within(0.001).of(0.1428)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
context 'weighted_classifier' do
|
78
|
+
before(:all) do
|
79
|
+
$classifier = Base.new 'weighted_classifier'
|
80
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
81
|
+
end
|
82
|
+
|
83
|
+
after(:all){ $classifier = nil }
|
84
|
+
|
85
|
+
subject{ $classifier }
|
86
|
+
|
87
|
+
it 'should have category probabilities' do
|
88
|
+
subject.prob_category(:good).should be_within(0.001).of(0.6)
|
89
|
+
subject.prob_category(:bad ).should be_within(0.001).of(0.4)
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should have zero category given feature probabilities for missing word' do
|
93
|
+
subject.prob_category_given_feature(:good, 'bazinga').should be_within(0.001).of(0.0)
|
94
|
+
subject.prob_category_given_feature(:bad, 'bazinga').should be_within(0.001).of(0.0)
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should have default category given feature weighted probabilities for missing word' do
|
98
|
+
subject.prob_category_given_feature(:good, 'bazinga', weighted: true).should be_within(0.001).of(0.5)
|
99
|
+
subject.prob_category_given_feature(:bad, 'bazinga', weighted: true).should be_within(0.001).of(0.5)
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'should have category given feature probabilities for matched word' do
|
103
|
+
subject.prob_category_given_feature(:good, 'quick').should be_within(0.001).of(0.666)
|
104
|
+
subject.prob_category_given_feature(:bad, 'quick').should be_within(0.001).of(0.333)
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'should have category given feature weighted probabilities for matched word' do
|
108
|
+
subject.prob_category_given_feature(:good, 'quick', weighted: true).should be_within(0.001).of(0.6245 )
|
109
|
+
subject.prob_category_given_feature(:bad, 'quick', weighted: true).should be_within(0.001).of(0.37475)
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'should have category given feature probabilities for only good matched word' do
|
113
|
+
subject.prob_category_given_feature(:good, 'rabbit').should be_within(0.001).of(1.0)
|
114
|
+
subject.prob_category_given_feature(:bad, 'rabbit' ).should be_within(0.001).of(0.0)
|
115
|
+
end
|
116
|
+
|
117
|
+
it 'should have category given feature weighted probabilities for only good matched word' do
|
118
|
+
subject.prob_category_given_feature(:good, 'rabbit', weighted: true).should be_within(0.001).of(0.75)
|
119
|
+
subject.prob_category_given_feature(:bad, 'rabbit', weighted: true).should be_within(0.001).of(0.25)
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'should have category given feature probabilities for only bad matched word' do
|
123
|
+
subject.prob_category_given_feature(:good, 'casino').should be_within(0.001).of(0.0)
|
124
|
+
subject.prob_category_given_feature(:bad, 'casino').should be_within(0.001).of(1.0)
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'should have category given feature weighted probabilities for only bad matched word' do
|
128
|
+
subject.prob_category_given_feature(:good, 'casino', weighted: true).should be_within(0.001).of(0.25)
|
129
|
+
subject.prob_category_given_feature(:bad, 'casino', weighted: true).should be_within(0.001).of(0.75)
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'should use assumed probability avoiding zero category given item products at missing words' do
|
133
|
+
subject.category_given_item_product(:good, 'bazinga rabbit casino', weighted: true).should be_within(0.001).of(0.09375)
|
134
|
+
subject.category_given_item_product(:bad, 'bazinga rabbit casino', weighted: true).should be_within(0.001).of(0.09375)
|
135
|
+
subject.category_given_item_product(:good, 'bazinga airplane', weighted: true).should be_within(0.001).of(0.25 )
|
136
|
+
subject.category_given_item_product(:bad, 'bazinga airplane', weighted: true).should be_within(0.001).of(0.25 )
|
137
|
+
subject.category_given_item_product(:good, 'rabbit casino', weighted: true).should be_within(0.001).of(0.1875 )
|
138
|
+
subject.category_given_item_product(:bad, 'rabbit casino', weighted: true).should be_within(0.001).of(0.1875 )
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should have category given item products if has all matched words' do
|
142
|
+
subject.category_given_item_product(:good, 'the quick', weighted: true).should be_within(0.001).of(0.35059)
|
143
|
+
subject.category_given_item_product(:bad, 'the quick', weighted: true).should be_within(0.001).of(0.16369)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
describe 'perform persistence operations' do
|
149
|
+
context 'classifier' do
|
150
|
+
before(:all) do
|
151
|
+
$classifier = Base.new 'some_classifier'
|
152
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
153
|
+
end
|
154
|
+
|
155
|
+
before(:each){ clean_storage }
|
156
|
+
after(:each){ clean_storage }
|
157
|
+
|
158
|
+
after(:all){ $classifier = nil }
|
159
|
+
|
160
|
+
subject{ $classifier }
|
161
|
+
|
162
|
+
it 'should have able to be stored' do
|
163
|
+
subject.should_not be_stored('base/some_classifier.marshal')
|
164
|
+
subject.dump
|
165
|
+
subject.should be_stored('base/some_classifier.marshal')
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module PositronicBrain::Classifier
|
5
|
+
describe Fisher do
|
6
|
+
describe 'perform probabilities calculations' do
|
7
|
+
context 'unweighted_classifier' do
|
8
|
+
before(:all) do
|
9
|
+
$classifier = Fisher.new 'unweighted_classifier', assumed_probability: 0.0, assumed_weight: 0.0
|
10
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
11
|
+
end
|
12
|
+
|
13
|
+
after(:all){ $classifier = nil }
|
14
|
+
|
15
|
+
subject{ $classifier }
|
16
|
+
|
17
|
+
it 'should calculate scores' do
|
18
|
+
subject.score('quick rabbit', :good).should be_within(0.001).of(0.8912)
|
19
|
+
subject.score('quick rabbit', :bad ).should be_within(0.001).of(0.0 )
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'weighted_classifier' do
|
25
|
+
before(:all) do
|
26
|
+
$classifier = Fisher.new 'weighted_classifier'
|
27
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
28
|
+
end
|
29
|
+
|
30
|
+
after(:all){ $classifier = nil }
|
31
|
+
|
32
|
+
subject{ $classifier }
|
33
|
+
|
34
|
+
it 'should have category probabilities' do
|
35
|
+
subject.score('quick rabbit', :good).should be_within(0.001).of(0.780139)
|
36
|
+
subject.score('quick rabbit', :bad ).should be_within(0.001).of(0.356335)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe 'perform persistence operations' do
|
43
|
+
context 'classifier' do
|
44
|
+
before(:all) do
|
45
|
+
$classifier = Fisher.new 'some_classifier'
|
46
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
47
|
+
end
|
48
|
+
|
49
|
+
before(:each){ clean_storage }
|
50
|
+
after(:each){ clean_storage }
|
51
|
+
|
52
|
+
after(:all){ $classifier = nil }
|
53
|
+
|
54
|
+
subject{ $classifier }
|
55
|
+
|
56
|
+
it 'should have able to be stored' do
|
57
|
+
subject.should_not be_stored('fisher/some_classifier.marshal')
|
58
|
+
subject.dump
|
59
|
+
subject.should be_stored('fisher/some_classifier.marshal')
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
module PositronicBrain::Classifier
|
5
|
+
describe NaiveBayes do
|
6
|
+
describe 'perform probabilities calculations' do
|
7
|
+
context 'unweighted_classifier' do
|
8
|
+
before(:all) do
|
9
|
+
$classifier = NaiveBayes.new 'unweighted_classifier', assumed_probability: 0.0, assumed_weight: 0.0
|
10
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
11
|
+
end
|
12
|
+
|
13
|
+
after(:all){ $classifier = nil }
|
14
|
+
|
15
|
+
subject{ $classifier }
|
16
|
+
|
17
|
+
it 'should calculate scores' do
|
18
|
+
subject.score('quick rabbit', :good).should be_within(0.001).of(0.1333)
|
19
|
+
subject.score('quick rabbit', :bad ).should be_within(0.001).of(0.0 )
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'weighted_classifier' do
|
25
|
+
before(:all) do
|
26
|
+
$classifier = NaiveBayes.new 'weighted_classifier'
|
27
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
28
|
+
end
|
29
|
+
|
30
|
+
after(:all){ $classifier = nil }
|
31
|
+
|
32
|
+
subject{ $classifier }
|
33
|
+
|
34
|
+
it 'should have category probabilities' do
|
35
|
+
subject.score('quick rabbit', :good).should be_within(0.001).of(0.156249)
|
36
|
+
subject.score('quick rabbit', :bad ).should be_within(0.001).of(0.050000)
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
describe 'perform persistence operations' do
|
43
|
+
context 'classifier' do
|
44
|
+
before(:all) do
|
45
|
+
$classifier = NaiveBayes.new 'some_classifier'
|
46
|
+
TRAIN_BASE.each{ |item, category| $classifier.train item, category }
|
47
|
+
end
|
48
|
+
|
49
|
+
before(:each){ clean_storage }
|
50
|
+
after(:each){ clean_storage }
|
51
|
+
|
52
|
+
after(:all){ $classifier = nil }
|
53
|
+
|
54
|
+
subject{ $classifier }
|
55
|
+
|
56
|
+
it 'should have able to be stored' do
|
57
|
+
subject.should_not be_stored('naive_bayes/some_classifier.marshal')
|
58
|
+
subject.dump
|
59
|
+
subject.should be_stored('naive_bayes/some_classifier.marshal')
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
root_path = File.expand_path File.join(File.dirname(__FILE__), '..')
|
3
|
+
load File.join(root_path, 'lib/positronic_brain.rb')
|
4
|
+
|
5
|
+
require 'rspec/mocks'
|
6
|
+
require 'pry'
|
7
|
+
|
8
|
+
PositronicBrain::Base.dump_path = File.join(root_path, 'tmp/dump')
|
9
|
+
|
10
|
+
matchers = []
|
11
|
+
Dir[File.join(root_path, 'spec/matchers/**/*.rb')].each do |file|
|
12
|
+
require file
|
13
|
+
matchers << File.basename(file).gsub('.rb', '').camelize.constantize
|
14
|
+
end
|
15
|
+
|
16
|
+
RSpec.configure do |config|
|
17
|
+
matchers.each do |matcher|
|
18
|
+
config.include matcher
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
TRAIN_BASE = [
|
23
|
+
['Nobody owns the water.', :good],
|
24
|
+
['the quick rabbit jumps fences', :good],
|
25
|
+
['the quick brown fox jumps', :good],
|
26
|
+
['buy pharmaceuticals now', :bad ],
|
27
|
+
['make quick money at the online casino', :bad ]
|
28
|
+
]
|
metadata
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: positronic_brain
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dalton
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: active_support
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.0.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: distribution
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: i18n
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description: A Toolbox of Artificial Intelligence designed to help to solve a wide
|
112
|
+
range of problems
|
113
|
+
email:
|
114
|
+
- dalton@expertte.com
|
115
|
+
executables: []
|
116
|
+
extensions: []
|
117
|
+
extra_rdoc_files: []
|
118
|
+
files:
|
119
|
+
- spec/matchers/storage_matcher.rb
|
120
|
+
- spec/positronic_brain/classifier/classifier_spec.rb
|
121
|
+
- spec/positronic_brain/classifier/fisher_spec.rb
|
122
|
+
- spec/positronic_brain/classifier/naive_bayes_spec.rb
|
123
|
+
- spec/spec_helper.rb
|
124
|
+
- lib/positronic_brain/base.rb
|
125
|
+
- lib/positronic_brain/classifier/classifier.rb
|
126
|
+
- lib/positronic_brain/classifier/fisher.rb
|
127
|
+
- lib/positronic_brain/classifier/naive_bayes.rb
|
128
|
+
- lib/positronic_brain/persistence/classifier/base.rb
|
129
|
+
- lib/positronic_brain/persistence/persistence.rb
|
130
|
+
- lib/positronic_brain/version.rb
|
131
|
+
- lib/positronic_brain.rb
|
132
|
+
- LICENSE.txt
|
133
|
+
- Rakefile
|
134
|
+
- README.md
|
135
|
+
- Gemfile
|
136
|
+
homepage: ''
|
137
|
+
licenses:
|
138
|
+
- MIT
|
139
|
+
metadata: {}
|
140
|
+
post_install_message:
|
141
|
+
rdoc_options: []
|
142
|
+
require_paths:
|
143
|
+
- lib
|
144
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
145
|
+
requirements:
|
146
|
+
- - '>='
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
version: '0'
|
149
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - '>='
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: '0'
|
154
|
+
requirements: []
|
155
|
+
rubyforge_project:
|
156
|
+
rubygems_version: 2.0.0
|
157
|
+
signing_key:
|
158
|
+
specification_version: 4
|
159
|
+
summary: A Toolbox of Artificial Intelligence
|
160
|
+
test_files:
|
161
|
+
- spec/matchers/storage_matcher.rb
|
162
|
+
- spec/positronic_brain/classifier/classifier_spec.rb
|
163
|
+
- spec/positronic_brain/classifier/fisher_spec.rb
|
164
|
+
- spec/positronic_brain/classifier/naive_bayes_spec.rb
|
165
|
+
- spec/spec_helper.rb
|