RubyGems - scoruby - Versions diffs - 0.2.4 → 0.2.5 - Mend

scoruby 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/.gitignore +2 -2
data/.travis.yml +1 -1
data/README.md +3 -61
data/lib/scoruby/models/naive_bayes.rb +92 -0
data/lib/scoruby/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5ab6779638032408bc0a38812ded29891831aebd
-  data.tar.gz: d26d4338fdbb2f00a7d6fed974a6e2d577c1d847
+  metadata.gz: 6d01f41ef6e3e3485acb65587892402866004bd2
+  data.tar.gz: 7d898f2a4c86915e19952a3641286dd252d7c3fd
 SHA512:
-  metadata.gz: 080081fe11bdba935bdd99b178ce74a04d1d05dadc080283f9e474a4d84e8415d2a39cf6a9916c4ee46c78fb46965aa934a0850491de04f6fd1d5806aaa32463
-  data.tar.gz: 38a7ac8547797f2367fcd66036ea87971c531106c0dd0b4c56abf684f48a72db14a9139fa5ff29b0e809aadb5ffb0908a265a603aa6d3ddf5725dc4858cdab0a
+  metadata.gz: de6e7acbbcf5acd97f2b980253be94c7eb2fb7c31fb76c9c9d1fe6ca09447751974734370826d1477db639685a6ab37108ced5d85e4fc9b397285e1b61468a4e
+  data.tar.gz: 92e5728b151d7c3daa24e5fccebe6b7e8567c92af4fb03cdbc5b5ef20f5a75b53c88b5ef79e6d4db20df157995f45b6222677a0e1634702e3769240b61f025d4

data/.gitignore CHANGED Viewed

@@ -3,12 +3,12 @@ coverage
 *.log
+sample.pmml
 *.gem
 spec/fixtures/decision_tree_v2.pmml
-sample.pmml
 test_gbm.pmml
 test_gbm.rb

data/.travis.yml CHANGED Viewed

@@ -1,4 +1,4 @@
 language: ruby
 rvm:
-  - 2.1.2
+  - 2.4.1
 before_install: gem install bundler -v 1.10.5

data/README.md CHANGED Viewed

@@ -7,7 +7,7 @@
 Ruby scoring API for Predictive Model Markup Language (PMML).
-Currently supports random forest and gradient boosted models.
+Currently supports Decision Tree, Random Forest and Gradient Boosted Models.
 Will be happy to implement new models by demand, or assist with any other issue.
@@ -31,37 +31,8 @@ Or install it yourself as:
 ## Usage
 ### Random Forest
-#### Generate  PMML - R
-```R
-# Install and require randomForest, pmml packages
-install.packages('randomForest')
-install.packages('pmml')
-library('randomForest')
-library('pmml')
-# Login to Kaggle and download titanic dataset
-# https://www.kaggle.com/c/titanic/data
-# Load CSV to data frame -
-titanic.train <- read.table("titanic_train.csv", header = TRUE, sep = ",")
-titanic.train$Survived <- as.factor(titanic.train$Survived)
-# Train RF model
-titanic.rf <- randomForest(Survived ~ . - Name - Cabin - Ticket,
-                           data = titanic.train,
-                           na.action = na.roughfix)
-# Generate pmml from model
-pmml <- pmml(titanic.rf)
-saveXML(pmml, 'titanic_rf.pmml')
-```
+[Generate PMML - R](https://github.com/asafschers/scoruby/wiki/Random-Forest)
 #### Classify by PMML - Ruby
 ```ruby
@@ -89,34 +60,7 @@ random_forest.decisions_count(features)
 ### Gradient Boosted model
-#### Generate  PMML - R
-```R
-# Install and require gbm, r2pmml
-library("devtools")
-install_github(repo = "jpmml/r2pmml")
-library("r2pmml")
-library("gbm")
-# Login to Kaggle and download titanic dataset
-# https://www.kaggle.com/c/titanic/data
-# Load CSV to data frame -
-titanic.train <- read.table("titanic_train.csv", header = TRUE, sep = ",")
-titanic.train$Survived <- as.factor(titanic.train$Survived)
-# Train GBM model
-titanic.gbm <- gbm(Survived ~ . - PassengerId - Name - Cabin - Ticket,  data = titanic.train)
-# Generate pmml from model
-pmml <- r2pmml(titanic.gbm, 'titanic_gbm.pmml')
-```
+[Generate PMML - R](https://github.com/asafschers/scoruby/wiki/Gradient-Boosted-Model)
 #### Classify by PMML - Ruby
@@ -142,8 +86,6 @@ gbm.score(features)
 ### Decision Tree
-#### Classify by PMML - Ruby
 ```ruby
 decision_tree = Scoruby.get_model 'decision_tree.pmml'

data/lib/scoruby/models/naive_bayes.rb ADDED Viewed

@@ -0,0 +1,92 @@
+module Scoruby
+  module Models
+    class NaiveBayes
+      attr_reader :data
+      def initialize(xml)
+        @threshold = xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
+        @data = {}
+        xml.xpath('//BayesInput').each do |feature|
+          @data[feature.attr('fieldName').to_sym] = fetch_feature(feature)
+        end
+        @labels = {}
+        xml.xpath('//BayesOutput//TargetValueCount').each do |l| l.attr('value')
+          @labels[l.attr('value')] = { 'count': l.attr('count').to_f }
+        end
+      end
+      def lvalues(features)
+        @labels.each do |label, _|
+          features.each do |feature_name, feature_value|
+            if @data[feature_name][feature_value]
+              value_count = @data[feature_name][feature_value][label].to_f
+              overall_count = @data[feature_name].sum { |_, value| value[label].to_f }
+              @labels[label][feature_name] = value_count / overall_count
+            elsif @data[feature_name][label]
+              @labels[label][feature_name] = calc_numerical(@data[feature_name][label], feature_value)
+            end
+          end
+        end
+        lvalues = {}
+        @labels.each do |label, label_data|
+          label_data.each do |key, value|
+            label_data[key] = @threshold if value.round(5).zero?
+          end
+          lvalues[label] = label_data.values.reduce(:*)
+        end
+        lvalues
+      end
+      def score(features, label)
+        lvalues = lvalues(features)
+        lvalues[label] / lvalues.values.reduce(:+)
+      end
+      private
+      def calc_numerical(label_data, feature_value)
+        variance = label_data[:variance].to_f
+        mean = label_data[:mean].to_f
+        feature_value = feature_value.to_f
+        Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
+      end
+      def fetch_feature(feature)
+        return fetch_numerical_feature(feature) if feature.child.name == 'TargetValueStats'
+        fetch_category_feature(feature)
+      end
+      def fetch_numerical_feature(feature)
+        features_data = {}
+        feature.child.children.each do |child|
+          features_data[child.attr('value').strip] = {
+            mean: child.child.attr('mean'),
+            variance: child.child.attr('variance')
+          }
+        end
+        features_data
+      end
+      def fetch_category_feature(feature)
+        feature_data = {}
+        feature.children.each do |category|
+          feature_data[category.attr('value')] = fetch_category(category)
+        end
+        feature_data
+      end
+      def fetch_category(category)
+        category_data = {}
+        category.child.children.each do |label|
+          category_data[label.attr('value')] = label.attr('count')
+        end
+        category_data
+      end
+    end
+  end
+end

data/lib/scoruby/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Scoruby
-  VERSION = '0.2.4'
+  VERSION = '0.2.5'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: scoruby
 version: !ruby/object:Gem::Version
-  version: 0.2.4
+  version: 0.2.5
 platform: ruby
 authors:
 - Asaf Schers
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2017-07-22 00:00:00.000000000 Z
+date: 2017-09-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -131,6 +131,7 @@ files:
 - lib/scoruby/features.rb
 - lib/scoruby/models/decision_tree.rb
 - lib/scoruby/models/gbm.rb
+- lib/scoruby/models/naive_bayes.rb
 - lib/scoruby/models/random_forest.rb
 - lib/scoruby/models_factory.rb
 - lib/scoruby/node.rb