naive_bayes_rb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +8 -0
- data/Guardfile +53 -0
- data/README.md +59 -0
- data/Rakefile +6 -0
- data/lib/naive_bayes_rb.rb +5 -0
- data/lib/naive_bayes_rb/marshal_serializer.rb +12 -0
- data/lib/naive_bayes_rb/naive_bayes.rb +32 -0
- data/lib/naive_bayes_rb/saved_model.rb +27 -0
- data/lib/naive_bayes_rb/stats.rb +48 -0
- data/lib/naive_bayes_rb/version.rb +3 -0
- data/naive_bayes_rb.gemspec +33 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d58fb9885c51f52a094b39943f6486029d15d810
|
4
|
+
data.tar.gz: a5d0d2d8f4d1db8febd0786c6b911016dfe4a5bf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 26ac613de8b7e622bba0e53a9a7322f176db1c37d842348276ecf8ee979b804fc563c9acf6822be404cebee5a848e90945c0ea6efea755ec3853c1dd1a7d2641
|
7
|
+
data.tar.gz: 7271432d30ddc176c31975344ed9260e5ac4fdb82f2c12cdfca4fb3a43a91ae31e7fdb90765268540ec13f3e7354f14226618537ef3b3cd470f86a27a85774d5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
# Note: The cmd option is now required due to the increasing number of ways
|
19
|
+
# rspec may be run, below are examples of the most common uses.
|
20
|
+
# * bundler: 'bundle exec rspec'
|
21
|
+
# * bundler binstubs: 'bin/rspec'
|
22
|
+
# * spring: 'bin/rspec' (This will use spring if running and you have
|
23
|
+
# installed the spring binstubs per the docs)
|
24
|
+
# * zeus: 'zeus rspec' (requires the server to be started separately)
|
25
|
+
# * 'just' rspec: 'rspec'
|
26
|
+
|
27
|
+
guard :rspec, cmd: "bundle exec rspec" do
|
28
|
+
require "guard/rspec/dsl"
|
29
|
+
dsl = Guard::RSpec::Dsl.new(self)
|
30
|
+
|
31
|
+
# Feel free to open issues for suggestions and improvements
|
32
|
+
|
33
|
+
# RSpec files
|
34
|
+
rspec = dsl.rspec
|
35
|
+
watch(rspec.spec_helper) { rspec.spec_dir }
|
36
|
+
watch(rspec.spec_support) { rspec.spec_dir }
|
37
|
+
watch(rspec.spec_files)
|
38
|
+
|
39
|
+
# Ruby files
|
40
|
+
ruby = dsl.ruby
|
41
|
+
dsl.watch_spec_files_for(ruby.lib_files)
|
42
|
+
|
43
|
+
# Rails files
|
44
|
+
rails = dsl.rails(view_extensions: %w(erb haml slim))
|
45
|
+
dsl.watch_spec_files_for(rails.app_files)
|
46
|
+
dsl.watch_spec_files_for(rails.views)
|
47
|
+
|
48
|
+
# Turnip features and steps
|
49
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
50
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
|
51
|
+
Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
|
52
|
+
end
|
53
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# NaiveBayesRb
|
2
|
+
|
3
|
+
A very simple Ruby implementation of Naive Bayes classification model.
|
4
|
+
|
5
|
+
## Design Considerations
|
6
|
+
|
7
|
+
1. the interface closely resembles the python [scikit-learn interface].
|
8
|
+
2. enable model serialization and persistence, so that the model can be reused and even distributed and shared. With the default `MarshalSerializer`, it also allows custom serializer to be plugged in.
|
9
|
+
|
10
|
+
[scikit-learn interface]: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
### basics
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
nb = NaiveBayesRb::NaiveBayes.new
|
18
|
+
train = [[1, 20], [2, 21], [3, 22], [4, 23]]
|
19
|
+
target = [1, 0, 1, 0]
|
20
|
+
test = [[0, 0], [4, 24]]
|
21
|
+
predictions = nb.fit(train, target).predict(test) #=> [1, 0]
|
22
|
+
@nb.accuracy(prediction, [1, 1]) #=> 50
|
23
|
+
```
|
24
|
+
### Model Persistence
|
25
|
+
|
26
|
+
```ruby
|
27
|
+
NaiveBayesRb::NaiveBayes.serializer =
|
28
|
+
nb = NaiveBayesRb::NaiveBayes.new
|
29
|
+
nb.fit(train, target).save('model.pb')
|
30
|
+
```
|
31
|
+
|
32
|
+
### Loading Persisted Model
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
NaiveBayesRb::NaiveBayes.serializer =
|
36
|
+
nb = NaiveBayesRb::NaiveBayes.load('model.pb')
|
37
|
+
```
|
38
|
+
|
39
|
+
## Installation
|
40
|
+
|
41
|
+
Add this line to your application's Gemfile:
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
gem 'naive_bayes_rb'
|
45
|
+
```
|
46
|
+
|
47
|
+
And then execute:
|
48
|
+
|
49
|
+
$ bundle
|
50
|
+
|
51
|
+
Or install it yourself as:
|
52
|
+
|
53
|
+
$ gem install naive_bayes_rb
|
54
|
+
|
55
|
+
## Thanks
|
56
|
+
|
57
|
+
I followed the tutorials from [this blog].
|
58
|
+
|
59
|
+
[this blog]: https://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
|
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module NaiveBayesRb
|
2
|
+
class NaiveBayes
|
3
|
+
include SavedModel
|
4
|
+
|
5
|
+
attr_accessor :model
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@model = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def fit(data, target)
|
12
|
+
@model = data.zip(target)
|
13
|
+
.group_by(&:last)
|
14
|
+
.inject({}) { |h, (k, v)| h[k] = Stats.mean_stdev(v.map(&:first)); h}
|
15
|
+
self
|
16
|
+
end
|
17
|
+
|
18
|
+
def predict(data)
|
19
|
+
data.map {|v| Stats.prediction(v, @model) }
|
20
|
+
end
|
21
|
+
|
22
|
+
def dimension
|
23
|
+
(@model.values || []).length
|
24
|
+
end
|
25
|
+
|
26
|
+
def accuracy(prediction, actual)
|
27
|
+
Stats.accuracy(prediction, actual)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module NaiveBayesRb
|
2
|
+
module SavedModel
|
3
|
+
def self.included(base)
|
4
|
+
base.extend(ClassMethods)
|
5
|
+
end
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def serializer=(serializer)
|
9
|
+
@serializer = serializer
|
10
|
+
end
|
11
|
+
|
12
|
+
def serializer
|
13
|
+
@serializer
|
14
|
+
end
|
15
|
+
|
16
|
+
def load(path)
|
17
|
+
self.new.tap do |nb|
|
18
|
+
nb.model = @serializer.load(path)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def save(path)
|
24
|
+
self.class.serializer.save(self.model, path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module NaiveBayesRb
|
2
|
+
module Stats
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def mean_stdev(data)
|
6
|
+
dimension = data[0].length-1
|
7
|
+
means = (0..dimension).map {|i| mean(data.map {|x| x[i]})}
|
8
|
+
stdevs = (0..dimension).map {|i| stdev(data.map {|x| x[i]})}
|
9
|
+
means.zip(stdevs)
|
10
|
+
end
|
11
|
+
|
12
|
+
def probability(value, mean, stdev)
|
13
|
+
exponent = Math.exp(-((value-mean)**2)/( 2 * stdev**2 ))
|
14
|
+
(1 / (Math.sqrt(2*Math::PI) * stdev) ) * exponent
|
15
|
+
end
|
16
|
+
|
17
|
+
def class_probability(value, summaries)
|
18
|
+
summaries.inject({}) { |h, (k, v)|
|
19
|
+
h[k] = v.zip(Array(value)).inject(1) { |p, ms|
|
20
|
+
p * probability(ms[1], ms[0][0], ms[0][1])}; h}
|
21
|
+
end
|
22
|
+
|
23
|
+
def prediction(value, summaries)
|
24
|
+
class_probability(value, summaries).sort_by {|_, v| -v}.first.first
|
25
|
+
end
|
26
|
+
|
27
|
+
def accuracy(predictions, target)
|
28
|
+
predictions.zip(target)
|
29
|
+
.map {|x| x[0] == x[1]}
|
30
|
+
.count {|x| x}
|
31
|
+
.send(:*, 100.0)
|
32
|
+
.send(:/, predictions.length)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def mean(list)
|
38
|
+
list.inject(:+).to_f.div(list.length)
|
39
|
+
end
|
40
|
+
|
41
|
+
def stdev(list)
|
42
|
+
m = mean(list)
|
43
|
+
sum = list.inject(0){|accum, i| accum +(i-m)**2 }
|
44
|
+
Math.sqrt(sum/(list.length - 1).to_f)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'naive_bayes_rb/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "naive_bayes_rb"
|
8
|
+
spec.version = NaiveBayesRb::VERSION
|
9
|
+
spec.authors = ["Jack Xu"]
|
10
|
+
spec.email = ["jackxxu@gmail.com"]
|
11
|
+
spec.summary = %q{simply naive bayes implementation in ruby.}
|
12
|
+
spec.homepage = "https://github.com/jackxxu/naive_bayes_rb"
|
13
|
+
|
14
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
15
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
16
|
+
if spec.respond_to?(:metadata)
|
17
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
18
|
+
else
|
19
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
20
|
+
"public gem pushes."
|
21
|
+
end
|
22
|
+
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
24
|
+
f.match(%r{^(test|spec|features)/})
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: naive_bayes_rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jack Xu
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-03-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- jackxxu@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".travis.yml"
|
65
|
+
- Gemfile
|
66
|
+
- Guardfile
|
67
|
+
- README.md
|
68
|
+
- Rakefile
|
69
|
+
- lib/naive_bayes_rb.rb
|
70
|
+
- lib/naive_bayes_rb/marshal_serializer.rb
|
71
|
+
- lib/naive_bayes_rb/naive_bayes.rb
|
72
|
+
- lib/naive_bayes_rb/saved_model.rb
|
73
|
+
- lib/naive_bayes_rb/stats.rb
|
74
|
+
- lib/naive_bayes_rb/version.rb
|
75
|
+
- naive_bayes_rb.gemspec
|
76
|
+
homepage: https://github.com/jackxxu/naive_bayes_rb
|
77
|
+
licenses: []
|
78
|
+
metadata:
|
79
|
+
allowed_push_host: https://rubygems.org
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.4.6
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: simply naive bayes implementation in ruby.
|
100
|
+
test_files: []
|