naive_bayes_rb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d58fb9885c51f52a094b39943f6486029d15d810
4
+ data.tar.gz: a5d0d2d8f4d1db8febd0786c6b911016dfe4a5bf
5
+ SHA512:
6
+ metadata.gz: 26ac613de8b7e622bba0e53a9a7322f176db1c37d842348276ecf8ee979b804fc563c9acf6822be404cebee5a848e90945c0ea6efea755ec3853c1dd1a7d2641
7
+ data.tar.gz: 7271432d30ddc176c31975344ed9260e5ac4fdb82f2c12cdfca4fb3a43a91ae31e7fdb90765268540ec13f3e7354f14226618537ef3b3cd470f86a27a85774d5
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
11
+ test.pb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.1
5
+ before_install: gem install bundler -v 1.13.5
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in naive_bayes_rb.gemspec
4
+ gemspec
5
+
6
+ gem 'guard-rspec', require: false
7
+ gem 'terminal-notifier-guard'
8
+ gem 'pry'
@@ -0,0 +1,53 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features) \
6
+ # .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
7
+
8
+ ## Note: if you are using the `directories` clause above and you are not
9
+ ## watching the project directory ('.'), then you will want to move
10
+ ## the Guardfile to a watched dir and symlink it back, e.g.
11
+ #
12
+ # $ mkdir config
13
+ # $ mv Guardfile config/
14
+ # $ ln -s config/Guardfile .
15
+ #
16
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
17
+
18
+ # Note: The cmd option is now required due to the increasing number of ways
19
+ # rspec may be run, below are examples of the most common uses.
20
+ # * bundler: 'bundle exec rspec'
21
+ # * bundler binstubs: 'bin/rspec'
22
+ # * spring: 'bin/rspec' (This will use spring if running and you have
23
+ # installed the spring binstubs per the docs)
24
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
25
+ # * 'just' rspec: 'rspec'
26
+
27
+ guard :rspec, cmd: "bundle exec rspec" do
28
+ require "guard/rspec/dsl"
29
+ dsl = Guard::RSpec::Dsl.new(self)
30
+
31
+ # Feel free to open issues for suggestions and improvements
32
+
33
+ # RSpec files
34
+ rspec = dsl.rspec
35
+ watch(rspec.spec_helper) { rspec.spec_dir }
36
+ watch(rspec.spec_support) { rspec.spec_dir }
37
+ watch(rspec.spec_files)
38
+
39
+ # Ruby files
40
+ ruby = dsl.ruby
41
+ dsl.watch_spec_files_for(ruby.lib_files)
42
+
43
+ # Rails files
44
+ rails = dsl.rails(view_extensions: %w(erb haml slim))
45
+ dsl.watch_spec_files_for(rails.app_files)
46
+ dsl.watch_spec_files_for(rails.views)
47
+
48
+ # Turnip features and steps
49
+ watch(%r{^spec/acceptance/(.+)\.feature$})
50
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
51
+ Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
52
+ end
53
+ end
@@ -0,0 +1,59 @@
1
+ # NaiveBayesRb
2
+
3
+ A very simple Ruby implementation of Naive Bayes classification model.
4
+
5
+ ## Design Considerations
6
+
7
+ 1. the interface closely resembles the python [scikit-learn interface].
8
+ 2. enable model serialization and persistence, so that the model can be reused and even distributed and shared. With the default `MarshalSerializer`, it also allows custom serializer to be plugged in.
9
+
10
+ [scikit-learn interface]: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes
11
+
12
+ ## Usage
13
+
14
+ ### basics
15
+
16
+ ```ruby
17
+ nb = NaiveBayesRb::NaiveBayes.new
18
+ train = [[1, 20], [2, 21], [3, 22], [4, 23]]
19
+ target = [1, 0, 1, 0]
20
+ test = [[0, 0], [4, 24]]
21
+ predictions = nb.fit(train, target).predict(test) #=> [1, 0]
22
+ @nb.accuracy(prediction, [1, 1]) #=> 50
23
+ ```
24
+ ### Model Persistence
25
+
26
+ ```ruby
27
+ NaiveBayesRb::NaiveBayes.serializer =
28
+ nb = NaiveBayesRb::NaiveBayes.new
29
+ nb.fit(train, target).save('model.pb')
30
+ ```
31
+
32
+ ### Loading Persisted Model
33
+
34
+ ```ruby
35
+ NaiveBayesRb::NaiveBayes.serializer =
36
+ nb = NaiveBayesRb::NaiveBayes.load('model.pb')
37
+ ```
38
+
39
+ ## Installation
40
+
41
+ Add this line to your application's Gemfile:
42
+
43
+ ```ruby
44
+ gem 'naive_bayes_rb'
45
+ ```
46
+
47
+ And then execute:
48
+
49
+ $ bundle
50
+
51
+ Or install it yourself as:
52
+
53
+ $ gem install naive_bayes_rb
54
+
55
+ ## Thanks
56
+
57
+ I followed the tutorials from [this blog].
58
+
59
+ [this blog]: https://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,5 @@
1
+ require "naive_bayes_rb/version"
2
+ require "naive_bayes_rb/marshal_serializer"
3
+ require "naive_bayes_rb/saved_model"
4
+ require "naive_bayes_rb/stats"
5
+ require "naive_bayes_rb/naive_bayes"
@@ -0,0 +1,12 @@
1
+ module NaiveBayesRb
2
+ module MarshalSerializer
3
+ extend self
4
+ def self.save(model, path)
5
+ File.open(path, 'wb') {|f| f.write( Marshal.dump(model) ) }
6
+ end
7
+
8
+ def self.load(path)
9
+ Marshal.load(File.binread(path))
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,32 @@
1
+ module NaiveBayesRb
2
+ class NaiveBayes
3
+ include SavedModel
4
+
5
+ attr_accessor :model
6
+
7
+ def initialize
8
+ @model = {}
9
+ end
10
+
11
+ def fit(data, target)
12
+ @model = data.zip(target)
13
+ .group_by(&:last)
14
+ .inject({}) { |h, (k, v)| h[k] = Stats.mean_stdev(v.map(&:first)); h}
15
+ self
16
+ end
17
+
18
+ def predict(data)
19
+ data.map {|v| Stats.prediction(v, @model) }
20
+ end
21
+
22
+ def dimension
23
+ (@model.values || []).length
24
+ end
25
+
26
+ def accuracy(prediction, actual)
27
+ Stats.accuracy(prediction, actual)
28
+ end
29
+
30
+ end
31
+ end
32
+
@@ -0,0 +1,27 @@
1
+ module NaiveBayesRb
2
+ module SavedModel
3
+ def self.included(base)
4
+ base.extend(ClassMethods)
5
+ end
6
+
7
+ module ClassMethods
8
+ def serializer=(serializer)
9
+ @serializer = serializer
10
+ end
11
+
12
+ def serializer
13
+ @serializer
14
+ end
15
+
16
+ def load(path)
17
+ self.new.tap do |nb|
18
+ nb.model = @serializer.load(path)
19
+ end
20
+ end
21
+ end
22
+
23
+ def save(path)
24
+ self.class.serializer.save(self.model, path)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ module NaiveBayesRb
2
+ module Stats
3
+ extend self
4
+
5
+ def mean_stdev(data)
6
+ dimension = data[0].length-1
7
+ means = (0..dimension).map {|i| mean(data.map {|x| x[i]})}
8
+ stdevs = (0..dimension).map {|i| stdev(data.map {|x| x[i]})}
9
+ means.zip(stdevs)
10
+ end
11
+
12
+ def probability(value, mean, stdev)
13
+ exponent = Math.exp(-((value-mean)**2)/( 2 * stdev**2 ))
14
+ (1 / (Math.sqrt(2*Math::PI) * stdev) ) * exponent
15
+ end
16
+
17
+ def class_probability(value, summaries)
18
+ summaries.inject({}) { |h, (k, v)|
19
+ h[k] = v.zip(Array(value)).inject(1) { |p, ms|
20
+ p * probability(ms[1], ms[0][0], ms[0][1])}; h}
21
+ end
22
+
23
+ def prediction(value, summaries)
24
+ class_probability(value, summaries).sort_by {|_, v| -v}.first.first
25
+ end
26
+
27
+ def accuracy(predictions, target)
28
+ predictions.zip(target)
29
+ .map {|x| x[0] == x[1]}
30
+ .count {|x| x}
31
+ .send(:*, 100.0)
32
+ .send(:/, predictions.length)
33
+ end
34
+
35
+ private
36
+
37
+ def mean(list)
38
+ list.inject(:+).to_f.div(list.length)
39
+ end
40
+
41
+ def stdev(list)
42
+ m = mean(list)
43
+ sum = list.inject(0){|accum, i| accum +(i-m)**2 }
44
+ Math.sqrt(sum/(list.length - 1).to_f)
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,3 @@
1
+ module NaiveBayesRb
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'naive_bayes_rb/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "naive_bayes_rb"
8
+ spec.version = NaiveBayesRb::VERSION
9
+ spec.authors = ["Jack Xu"]
10
+ spec.email = ["jackxxu@gmail.com"]
11
+ spec.summary = %q{simply naive bayes implementation in ruby.}
12
+ spec.homepage = "https://github.com/jackxxu/naive_bayes_rb"
13
+
14
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
15
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
16
+ if spec.respond_to?(:metadata)
17
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
18
+ else
19
+ raise "RubyGems 2.0 or newer is required to protect against " \
20
+ "public gem pushes."
21
+ end
22
+
23
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
24
+ f.match(%r{^(test|spec|features)/})
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.13"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec", "~> 3.0"
33
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: naive_bayes_rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jack Xu
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-03-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description:
56
+ email:
57
+ - jackxxu@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - Gemfile
66
+ - Guardfile
67
+ - README.md
68
+ - Rakefile
69
+ - lib/naive_bayes_rb.rb
70
+ - lib/naive_bayes_rb/marshal_serializer.rb
71
+ - lib/naive_bayes_rb/naive_bayes.rb
72
+ - lib/naive_bayes_rb/saved_model.rb
73
+ - lib/naive_bayes_rb/stats.rb
74
+ - lib/naive_bayes_rb/version.rb
75
+ - naive_bayes_rb.gemspec
76
+ homepage: https://github.com/jackxxu/naive_bayes_rb
77
+ licenses: []
78
+ metadata:
79
+ allowed_push_host: https://rubygems.org
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.4.6
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: simply naive bayes implementation in ruby.
100
+ test_files: []