naive_bayes_rb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +8 -0
- data/Guardfile +53 -0
- data/README.md +59 -0
- data/Rakefile +6 -0
- data/lib/naive_bayes_rb.rb +5 -0
- data/lib/naive_bayes_rb/marshal_serializer.rb +12 -0
- data/lib/naive_bayes_rb/naive_bayes.rb +32 -0
- data/lib/naive_bayes_rb/saved_model.rb +27 -0
- data/lib/naive_bayes_rb/stats.rb +48 -0
- data/lib/naive_bayes_rb/version.rb +3 -0
- data/naive_bayes_rb.gemspec +33 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d58fb9885c51f52a094b39943f6486029d15d810
|
4
|
+
data.tar.gz: a5d0d2d8f4d1db8febd0786c6b911016dfe4a5bf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 26ac613de8b7e622bba0e53a9a7322f176db1c37d842348276ecf8ee979b804fc563c9acf6822be404cebee5a848e90945c0ea6efea755ec3853c1dd1a7d2641
|
7
|
+
data.tar.gz: 7271432d30ddc176c31975344ed9260e5ac4fdb82f2c12cdfca4fb3a43a91ae31e7fdb90765268540ec13f3e7354f14226618537ef3b3cd470f86a27a85774d5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
# Note: The cmd option is now required due to the increasing number of ways
|
19
|
+
# rspec may be run, below are examples of the most common uses.
|
20
|
+
# * bundler: 'bundle exec rspec'
|
21
|
+
# * bundler binstubs: 'bin/rspec'
|
22
|
+
# * spring: 'bin/rspec' (This will use spring if running and you have
|
23
|
+
# installed the spring binstubs per the docs)
|
24
|
+
# * zeus: 'zeus rspec' (requires the server to be started separately)
|
25
|
+
# * 'just' rspec: 'rspec'
|
26
|
+
|
27
|
+
guard :rspec, cmd: "bundle exec rspec" do
|
28
|
+
require "guard/rspec/dsl"
|
29
|
+
dsl = Guard::RSpec::Dsl.new(self)
|
30
|
+
|
31
|
+
# Feel free to open issues for suggestions and improvements
|
32
|
+
|
33
|
+
# RSpec files
|
34
|
+
rspec = dsl.rspec
|
35
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
36
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
37
|
+
watch(rspec.spec_files)
|
38
|
+
|
39
|
+
# Ruby files
|
40
|
+
ruby = dsl.ruby
|
41
|
+
dsl.watch_spec_files_for(ruby.lib_files)
|
42
|
+
|
43
|
+
# Rails files
|
44
|
+
rails = dsl.rails(view_extensions: %w(erb haml slim))
|
45
|
+
dsl.watch_spec_files_for(rails.app_files)
|
46
|
+
dsl.watch_spec_files_for(rails.views)
|
47
|
+
|
48
|
+
# Turnip features and steps
|
49
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
50
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
|
51
|
+
Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
|
52
|
+
end
|
53
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# NaiveBayesRb
|
2
|
+
|
3
|
+
A very simple Ruby implementation of Naive Bayes classification model.
|
4
|
+
|
5
|
+
## Design Considerations
|
6
|
+
|
7
|
+
1. the interface closely resembles the python [scikit-learn interface].
|
8
|
+
2. enable model serialization and persistence, so that the model can be reused and even distributed and shared. With the default `MarshalSerializer`, it also allows custom serializer to be plugged in.
|
9
|
+
|
10
|
+
[scikit-learn interface]: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
### basics
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
nb = NaiveBayesRb::NaiveBayes.new
|
18
|
+
train = [[1, 20], [2, 21], [3, 22], [4, 23]]
|
19
|
+
target = [1, 0, 1, 0]
|
20
|
+
test = [[0, 0], [4, 24]]
|
21
|
+
predictions = nb.fit(train, target).predict(test) #=> [1, 0]
|
22
|
+
@nb.accuracy(prediction, [1, 1]) #=> 50
|
23
|
+
```
|
24
|
+
### Model Persistence
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
NaiveBayesRb::NaiveBayes.serializer =
|
28
|
+
nb = NaiveBayesRb::NaiveBayes.new
|
29
|
+
nb.fit(train, target).save('model.pb')
|
30
|
+
```
|
31
|
+
|
32
|
+
### Loading Persisted Model
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
NaiveBayesRb::NaiveBayes.serializer =
|
36
|
+
nb = NaiveBayesRb::NaiveBayes.load('model.pb')
|
37
|
+
```
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
Add this line to your application's Gemfile:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
gem 'naive_bayes_rb'
|
45
|
+
```
|
46
|
+
|
47
|
+
And then execute:
|
48
|
+
|
49
|
+
$ bundle
|
50
|
+
|
51
|
+
Or install it yourself as:
|
52
|
+
|
53
|
+
$ gem install naive_bayes_rb
|
54
|
+
|
55
|
+
## Thanks
|
56
|
+
|
57
|
+
I followed the tutorials from [this blog].
|
58
|
+
|
59
|
+
[this blog]: https://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module NaiveBayesRb
|
2
|
+
class NaiveBayes
|
3
|
+
include SavedModel
|
4
|
+
|
5
|
+
attr_accessor :model
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@model = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def fit(data, target)
|
12
|
+
@model = data.zip(target)
|
13
|
+
.group_by(&:last)
|
14
|
+
.inject({}) { |h, (k, v)| h[k] = Stats.mean_stdev(v.map(&:first)); h}
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def predict(data)
|
19
|
+
data.map {|v| Stats.prediction(v, @model) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def dimension
|
23
|
+
(@model.values || []).length
|
24
|
+
end
|
25
|
+
|
26
|
+
def accuracy(prediction, actual)
|
27
|
+
Stats.accuracy(prediction, actual)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module NaiveBayesRb
|
2
|
+
module SavedModel
|
3
|
+
def self.included(base)
|
4
|
+
base.extend(ClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def serializer=(serializer)
|
9
|
+
@serializer = serializer
|
10
|
+
end
|
11
|
+
|
12
|
+
def serializer
|
13
|
+
@serializer
|
14
|
+
end
|
15
|
+
|
16
|
+
def load(path)
|
17
|
+
self.new.tap do |nb|
|
18
|
+
nb.model = @serializer.load(path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def save(path)
|
24
|
+
self.class.serializer.save(self.model, path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module NaiveBayesRb
|
2
|
+
module Stats
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def mean_stdev(data)
|
6
|
+
dimension = data[0].length-1
|
7
|
+
means = (0..dimension).map {|i| mean(data.map {|x| x[i]})}
|
8
|
+
stdevs = (0..dimension).map {|i| stdev(data.map {|x| x[i]})}
|
9
|
+
means.zip(stdevs)
|
10
|
+
end
|
11
|
+
|
12
|
+
def probability(value, mean, stdev)
|
13
|
+
exponent = Math.exp(-((value-mean)**2)/( 2 * stdev**2 ))
|
14
|
+
(1 / (Math.sqrt(2*Math::PI) * stdev) ) * exponent
|
15
|
+
end
|
16
|
+
|
17
|
+
def class_probability(value, summaries)
|
18
|
+
summaries.inject({}) { |h, (k, v)|
|
19
|
+
h[k] = v.zip(Array(value)).inject(1) { |p, ms|
|
20
|
+
p * probability(ms[1], ms[0][0], ms[0][1])}; h}
|
21
|
+
end
|
22
|
+
|
23
|
+
def prediction(value, summaries)
|
24
|
+
class_probability(value, summaries).sort_by {|_, v| -v}.first.first
|
25
|
+
end
|
26
|
+
|
27
|
+
def accuracy(predictions, target)
|
28
|
+
predictions.zip(target)
|
29
|
+
.map {|x| x[0] == x[1]}
|
30
|
+
.count {|x| x}
|
31
|
+
.send(:*, 100.0)
|
32
|
+
.send(:/, predictions.length)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def mean(list)
|
38
|
+
list.inject(:+).to_f.div(list.length)
|
39
|
+
end
|
40
|
+
|
41
|
+
def stdev(list)
|
42
|
+
m = mean(list)
|
43
|
+
sum = list.inject(0){|accum, i| accum +(i-m)**2 }
|
44
|
+
Math.sqrt(sum/(list.length - 1).to_f)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'naive_bayes_rb/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "naive_bayes_rb"
|
8
|
+
spec.version = NaiveBayesRb::VERSION
|
9
|
+
spec.authors = ["Jack Xu"]
|
10
|
+
spec.email = ["jackxxu@gmail.com"]
|
11
|
+
spec.summary = %q{simply naive bayes implementation in ruby.}
|
12
|
+
spec.homepage = "https://github.com/jackxxu/naive_bayes_rb"
|
13
|
+
|
14
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
15
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
16
|
+
if spec.respond_to?(:metadata)
|
17
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
18
|
+
else
|
19
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
20
|
+
"public gem pushes."
|
21
|
+
end
|
22
|
+
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
24
|
+
f.match(%r{^(test|spec|features)/})
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: naive_bayes_rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jack Xu
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-03-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- jackxxu@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".travis.yml"
|
65
|
+
- Gemfile
|
66
|
+
- Guardfile
|
67
|
+
- README.md
|
68
|
+
- Rakefile
|
69
|
+
- lib/naive_bayes_rb.rb
|
70
|
+
- lib/naive_bayes_rb/marshal_serializer.rb
|
71
|
+
- lib/naive_bayes_rb/naive_bayes.rb
|
72
|
+
- lib/naive_bayes_rb/saved_model.rb
|
73
|
+
- lib/naive_bayes_rb/stats.rb
|
74
|
+
- lib/naive_bayes_rb/version.rb
|
75
|
+
- naive_bayes_rb.gemspec
|
76
|
+
homepage: https://github.com/jackxxu/naive_bayes_rb
|
77
|
+
licenses: []
|
78
|
+
metadata:
|
79
|
+
allowed_push_host: https://rubygems.org
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.4.6
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: simply naive bayes implementation in ruby.
|
100
|
+
test_files: []
|