naive_bayes_rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d58fb9885c51f52a094b39943f6486029d15d810
4
+ data.tar.gz: a5d0d2d8f4d1db8febd0786c6b911016dfe4a5bf
5
+ SHA512:
6
+ metadata.gz: 26ac613de8b7e622bba0e53a9a7322f176db1c37d842348276ecf8ee979b804fc563c9acf6822be404cebee5a848e90945c0ea6efea755ec3853c1dd1a7d2641
7
+ data.tar.gz: 7271432d30ddc176c31975344ed9260e5ac4fdb82f2c12cdfca4fb3a43a91ae31e7fdb90765268540ec13f3e7354f14226618537ef3b3cd470f86a27a85774d5
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
11
+ test.pb
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.1
5
+ before_install: gem install bundler -v 1.13.5
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in naive_bayes_rb.gemspec
4
+ gemspec
5
+
6
+ gem 'guard-rspec', require: false
7
+ gem 'terminal-notifier-guard'
8
+ gem 'pry'
@@ -0,0 +1,53 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ ## Uncomment and set this to only include directories you want to watch
5
+ # directories %w(app lib config test spec features) \
6
+ # .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
7
+
8
+ ## Note: if you are using the `directories` clause above and you are not
9
+ ## watching the project directory ('.'), then you will want to move
10
+ ## the Guardfile to a watched dir and symlink it back, e.g.
11
+ #
12
+ # $ mkdir config
13
+ # $ mv Guardfile config/
14
+ # $ ln -s config/Guardfile .
15
+ #
16
+ # and, you'll have to watch "config/Guardfile" instead of "Guardfile"
17
+
18
+ # Note: The cmd option is now required due to the increasing number of ways
19
+ # rspec may be run, below are examples of the most common uses.
20
+ # * bundler: 'bundle exec rspec'
21
+ # * bundler binstubs: 'bin/rspec'
22
+ # * spring: 'bin/rspec' (This will use spring if running and you have
23
+ # installed the spring binstubs per the docs)
24
+ # * zeus: 'zeus rspec' (requires the server to be started separately)
25
+ # * 'just' rspec: 'rspec'
26
+
27
+ guard :rspec, cmd: "bundle exec rspec" do
28
+ require "guard/rspec/dsl"
29
+ dsl = Guard::RSpec::Dsl.new(self)
30
+
31
+ # Feel free to open issues for suggestions and improvements
32
+
33
+ # RSpec files
34
+ rspec = dsl.rspec
35
+ watch(rspec.spec_helper) { rspec.spec_dir }
36
+ watch(rspec.spec_support) { rspec.spec_dir }
37
+ watch(rspec.spec_files)
38
+
39
+ # Ruby files
40
+ ruby = dsl.ruby
41
+ dsl.watch_spec_files_for(ruby.lib_files)
42
+
43
+ # Rails files
44
+ rails = dsl.rails(view_extensions: %w(erb haml slim))
45
+ dsl.watch_spec_files_for(rails.app_files)
46
+ dsl.watch_spec_files_for(rails.views)
47
+
48
+ # Turnip features and steps
49
+ watch(%r{^spec/acceptance/(.+)\.feature$})
50
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) do |m|
51
+ Dir[File.join("**/#{m[1]}.feature")][0] || "spec/acceptance"
52
+ end
53
+ end
@@ -0,0 +1,59 @@
1
+ # NaiveBayesRb
2
+
3
+ A very simple Ruby implementation of Naive Bayes classification model.
4
+
5
+ ## Design Considerations
6
+
7
+ 1. the interface closely resembles the python [scikit-learn interface].
8
+ 2. enable model serialization and persistence, so that the model can be reused and even distributed and shared. With the default `MarshalSerializer`, it also allows custom serializer to be plugged in.
9
+
10
+ [scikit-learn interface]: http://scikit-learn.org/stable/modules/classes.html#module-sklearn.naive_bayes
11
+
12
+ ## Usage
13
+
14
+ ### basics
15
+
16
+ ```ruby
17
+ nb = NaiveBayesRb::NaiveBayes.new
18
+ train = [[1, 20], [2, 21], [3, 22], [4, 23]]
19
+ target = [1, 0, 1, 0]
20
+ test = [[0, 0], [4, 24]]
21
+ predictions = nb.fit(train, target).predict(test) #=> [1, 0]
22
+ @nb.accuracy(prediction, [1, 1]) #=> 50
23
+ ```
24
+ ### Model Persistence
25
+
26
+ ```ruby
27
+ NaiveBayesRb::NaiveBayes.serializer =
28
+ nb = NaiveBayesRb::NaiveBayes.new
29
+ nb.fit(train, target).save('model.pb')
30
+ ```
31
+
32
+ ### Loading Persisted Model
33
+
34
+ ```ruby
35
+ NaiveBayesRb::NaiveBayes.serializer =
36
+ nb = NaiveBayesRb::NaiveBayes.load('model.pb')
37
+ ```
38
+
39
+ ## Installation
40
+
41
+ Add this line to your application's Gemfile:
42
+
43
+ ```ruby
44
+ gem 'naive_bayes_rb'
45
+ ```
46
+
47
+ And then execute:
48
+
49
+ $ bundle
50
+
51
+ Or install it yourself as:
52
+
53
+ $ gem install naive_bayes_rb
54
+
55
+ ## Thanks
56
+
57
+ I followed the tutorials from [this blog].
58
+
59
+ [this blog]: https://machinelearningmastery.com/naive-bayes-classifier-scratch-python/
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,5 @@
1
+ require "naive_bayes_rb/version"
2
+ require "naive_bayes_rb/marshal_serializer"
3
+ require "naive_bayes_rb/saved_model"
4
+ require "naive_bayes_rb/stats"
5
+ require "naive_bayes_rb/naive_bayes"
@@ -0,0 +1,12 @@
1
+ module NaiveBayesRb
2
+ module MarshalSerializer
3
+ extend self
4
+ def self.save(model, path)
5
+ File.open(path, 'wb') {|f| f.write( Marshal.dump(model) ) }
6
+ end
7
+
8
+ def self.load(path)
9
+ Marshal.load(File.binread(path))
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,32 @@
1
+ module NaiveBayesRb
2
+ class NaiveBayes
3
+ include SavedModel
4
+
5
+ attr_accessor :model
6
+
7
+ def initialize
8
+ @model = {}
9
+ end
10
+
11
+ def fit(data, target)
12
+ @model = data.zip(target)
13
+ .group_by(&:last)
14
+ .inject({}) { |h, (k, v)| h[k] = Stats.mean_stdev(v.map(&:first)); h}
15
+ self
16
+ end
17
+
18
+ def predict(data)
19
+ data.map {|v| Stats.prediction(v, @model) }
20
+ end
21
+
22
+ def dimension
23
+ (@model.values || []).length
24
+ end
25
+
26
+ def accuracy(prediction, actual)
27
+ Stats.accuracy(prediction, actual)
28
+ end
29
+
30
+ end
31
+ end
32
+
@@ -0,0 +1,27 @@
1
+ module NaiveBayesRb
2
+ module SavedModel
3
+ def self.included(base)
4
+ base.extend(ClassMethods)
5
+ end
6
+
7
+ module ClassMethods
8
+ def serializer=(serializer)
9
+ @serializer = serializer
10
+ end
11
+
12
+ def serializer
13
+ @serializer
14
+ end
15
+
16
+ def load(path)
17
+ self.new.tap do |nb|
18
+ nb.model = @serializer.load(path)
19
+ end
20
+ end
21
+ end
22
+
23
+ def save(path)
24
+ self.class.serializer.save(self.model, path)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ module NaiveBayesRb
2
+ module Stats
3
+ extend self
4
+
5
+ def mean_stdev(data)
6
+ dimension = data[0].length-1
7
+ means = (0..dimension).map {|i| mean(data.map {|x| x[i]})}
8
+ stdevs = (0..dimension).map {|i| stdev(data.map {|x| x[i]})}
9
+ means.zip(stdevs)
10
+ end
11
+
12
+ def probability(value, mean, stdev)
13
+ exponent = Math.exp(-((value-mean)**2)/( 2 * stdev**2 ))
14
+ (1 / (Math.sqrt(2*Math::PI) * stdev) ) * exponent
15
+ end
16
+
17
+ def class_probability(value, summaries)
18
+ summaries.inject({}) { |h, (k, v)|
19
+ h[k] = v.zip(Array(value)).inject(1) { |p, ms|
20
+ p * probability(ms[1], ms[0][0], ms[0][1])}; h}
21
+ end
22
+
23
+ def prediction(value, summaries)
24
+ class_probability(value, summaries).sort_by {|_, v| -v}.first.first
25
+ end
26
+
27
+ def accuracy(predictions, target)
28
+ predictions.zip(target)
29
+ .map {|x| x[0] == x[1]}
30
+ .count {|x| x}
31
+ .send(:*, 100.0)
32
+ .send(:/, predictions.length)
33
+ end
34
+
35
+ private
36
+
37
+ def mean(list)
38
+ list.inject(:+).to_f.div(list.length)
39
+ end
40
+
41
+ def stdev(list)
42
+ m = mean(list)
43
+ sum = list.inject(0){|accum, i| accum +(i-m)**2 }
44
+ Math.sqrt(sum/(list.length - 1).to_f)
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,3 @@
1
+ module NaiveBayesRb
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'naive_bayes_rb/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "naive_bayes_rb"
8
+ spec.version = NaiveBayesRb::VERSION
9
+ spec.authors = ["Jack Xu"]
10
+ spec.email = ["jackxxu@gmail.com"]
11
+ spec.summary = %q{simply naive bayes implementation in ruby.}
12
+ spec.homepage = "https://github.com/jackxxu/naive_bayes_rb"
13
+
14
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
15
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
16
+ if spec.respond_to?(:metadata)
17
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
18
+ else
19
+ raise "RubyGems 2.0 or newer is required to protect against " \
20
+ "public gem pushes."
21
+ end
22
+
23
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
24
+ f.match(%r{^(test|spec|features)/})
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.13"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec", "~> 3.0"
33
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: naive_bayes_rb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jack Xu
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-03-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description:
56
+ email:
57
+ - jackxxu@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - Gemfile
66
+ - Guardfile
67
+ - README.md
68
+ - Rakefile
69
+ - lib/naive_bayes_rb.rb
70
+ - lib/naive_bayes_rb/marshal_serializer.rb
71
+ - lib/naive_bayes_rb/naive_bayes.rb
72
+ - lib/naive_bayes_rb/saved_model.rb
73
+ - lib/naive_bayes_rb/stats.rb
74
+ - lib/naive_bayes_rb/version.rb
75
+ - naive_bayes_rb.gemspec
76
+ homepage: https://github.com/jackxxu/naive_bayes_rb
77
+ licenses: []
78
+ metadata:
79
+ allowed_push_host: https://rubygems.org
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.4.6
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: simply naive bayes implementation in ruby.
100
+ test_files: []