multinomial_naive_bayes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e8ddc006ffc53986e48eae80b26a28bb4463328c
4
+ data.tar.gz: 096b99930d2f1fcbed9f599a3e7703b23791eea0
5
+ SHA512:
6
+ metadata.gz: cb56810f0793160629bf2475e67b9aab4c5def427274a99bf1c65dc445a98dedd78008c02f3345ae2e9f1cd8a300f128185917c08fe894f661478c52f2997429
7
+ data.tar.gz: 1771a1e833359cf10aacc333ef1999daf8763311c7a784cbf8ca50db36bb1dfaccb648e4da625b94b9bb4c3c475054258bb9e70b8a87e2ef7235425c464b99a7
@@ -0,0 +1,2 @@
1
+ require "multinomial_naive_bayes/classifier"
2
+ require "multinomial_naive_bayes/learner"
@@ -0,0 +1,34 @@
1
+ module MultinomialNaiveBayes
2
+ class Classifier
3
+ def initialize(categories_summaries, categories_probabilities)
4
+ @categories_summaries = categories_summaries
5
+ @categories_probabilities = categories_probabilities
6
+ end
7
+
8
+ def classify(vector)
9
+ max_ln_category_probability(vector)[0]
10
+ end
11
+
12
+ def max_ln_category_probability(vector)
13
+ all_ln_categories_probabilities(vector).
14
+ to_a.
15
+ sort_by{|ln_category_probability| -ln_category_probability[1]}.
16
+ first
17
+ end
18
+
19
+ def all_ln_categories_probabilities(vector)
20
+ @categories_summaries.keys.inject({}) do |map, category|
21
+ map[category] = ln_category_probability(vector, category)
22
+ map
23
+ end
24
+ end
25
+
26
+ def ln_category_probability(vector, category)
27
+ sum = 0
28
+ vector.each_with_index do |feature_value, feature|
29
+ sum += Math.log(@categories_summaries[category][feature]**feature_value)
30
+ end
31
+ sum + Math.log(@categories_probabilities[category])
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,47 @@
1
+ module MultinomialNaiveBayes
2
+ class Learner
3
+ def initialize(alpha = 1.0)
4
+ @alpha = alpha
5
+ end
6
+
7
+ def train(vector, category)
8
+ @category_to_feature_group ||= {}
9
+ @category_to_feature_group[category] ||= {}
10
+ vector.each_with_index do |feature_value, feature|
11
+ @category_to_feature_group[category][feature] ||= []
12
+ @category_to_feature_group[category][feature] << feature_value
13
+ end
14
+ @number_of_features ||= vector.length
15
+ @category_to_num_instances ||= Hash.new(0)
16
+ @category_to_num_instances[category] += 1
17
+ end
18
+
19
+ def classifier
20
+ Classifier.new(categories_summaries, categories_probabilities)
21
+ end
22
+
23
+ def categories_summaries
24
+ @category_to_feature_group.inject({}) do |map, (category, feature_group)|
25
+ map[category] = category_summary(feature_group)
26
+ map
27
+ end
28
+ end
29
+
30
+ def category_summary(feature_group)
31
+ total_count_of_all_features = feature_group.values.flatten.reduce(&:+)
32
+ feature_group.inject({}) do |map, (feature, feature_values)|
33
+ total_count_of_feature = feature_values.reduce(&:+)
34
+ map[feature] = (total_count_of_feature + @alpha).to_f/(total_count_of_all_features + @number_of_features)
35
+ map
36
+ end
37
+ end
38
+
39
+ def categories_probabilities
40
+ total_instances = @category_to_num_instances.values.reduce(&:+)
41
+ @category_to_num_instances.inject({}) do |map, (category, num_instances)|
42
+ map[category] = num_instances.to_f/total_instances
43
+ map
44
+ end
45
+ end
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: multinomial_naive_bayes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - An Le
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-04-24 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email:
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/multinomial_naive_bayes.rb
20
+ - lib/multinomial_naive_bayes/classifier.rb
21
+ - lib/multinomial_naive_bayes/learner.rb
22
+ homepage: https://github.com/lntan/multinomial_naive_bayes
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.6.3
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Implement the naive Bayes algorithm for multinomially distributed data
46
+ test_files: []