scoruby 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d01f41ef6e3e3485acb65587892402866004bd2
4
- data.tar.gz: 7d898f2a4c86915e19952a3641286dd252d7c3fd
3
+ metadata.gz: 2a1c339dc5c029d90d8e3a495af90499368bc92d
4
+ data.tar.gz: 5ff3c0147290c83d76261b9a6907e35ca493b678
5
5
  SHA512:
6
- metadata.gz: de6e7acbbcf5acd97f2b980253be94c7eb2fb7c31fb76c9c9d1fe6ca09447751974734370826d1477db639685a6ab37108ced5d85e4fc9b397285e1b61468a4e
7
- data.tar.gz: 92e5728b151d7c3daa24e5fccebe6b7e8567c92af4fb03cdbc5b5ef20f5a75b53c88b5ef79e6d4db20df157995f45b6222677a0e1634702e3769240b61f025d4
6
+ metadata.gz: 8843f6c37d4b4d4e30f018348b5ec113e66558b2c78c945f4755ea01554b609af3dbb4a50334143ffa8f4bab205303b833831273a97bd18a2ecc9fa4e214f401
7
+ data.tar.gz: 94078bb11bd0cf3490c93c90e822fed248d4721e24a2623004d2cd3af8805407d6a69599ba2d753bd024caefb0d7e08eb8ddebefef1f37b3dbfe2dcdf49a3bad
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scoruby (0.2.4)
4
+ scoruby (0.2.5)
5
5
  nokogiri (~> 1.7)
6
6
 
7
7
  GEM
@@ -18,9 +18,9 @@ GEM
18
18
  docile (1.1.5)
19
19
  json (2.1.0)
20
20
  method_source (0.8.2)
21
- mini_portile2 (2.1.0)
22
- nokogiri (1.7.0.1)
23
- mini_portile2 (~> 2.1.0)
21
+ mini_portile2 (2.2.0)
22
+ nokogiri (1.8.0)
23
+ mini_portile2 (~> 2.2.0)
24
24
  pry (0.10.3)
25
25
  coderay (~> 1.1.0)
26
26
  method_source (~> 0.8.1)
@@ -64,4 +64,4 @@ DEPENDENCIES
64
64
  scoruby!
65
65
 
66
66
  BUNDLED WITH
67
- 1.11.2
67
+ 1.15.4
@@ -0,0 +1,64 @@
1
+ require 'scoruby/models/naive_bayes/model_data'
2
+ require 'forwardable'
3
+
4
+ module Scoruby
5
+ module Models
6
+ module NaiveBayes
7
+ class Model
8
+ extend Forwardable
9
+ def_delegators :@model_data, :threshold, :labels, :numerical_features, :category_features
10
+
11
+ def initialize(xml)
12
+ @model_data = ModelData.new(xml)
13
+ end
14
+
15
+ def lvalues(features)
16
+ calc_label_feature_values(features)
17
+ calc_label_values
18
+ end
19
+
20
+ def score(features, label)
21
+ lvalues(features)[label] / lvalues(features).values.reduce(:+)
22
+ end
23
+
24
+ private
25
+
26
+ def calc_label_values
27
+ label_values = {}
28
+ labels.each do |label, label_data|
29
+ label_data.each do |key, value|
30
+ label_data[key] = threshold if value.round(5).zero?
31
+ end
32
+ label_values[label] = label_data.values.reduce(:*)
33
+ end
34
+ label_values
35
+ end
36
+
37
+ def calc_label_feature_values(features)
38
+ labels.each do |label, _|
39
+ features.each do |feature_name, feature_value|
40
+ label_value = calc_category(feature_name, feature_value, label)
41
+ label_value ||= calc_numerical(feature_name, feature_value, label)
42
+ labels[label][feature_name] = label_value if label_value
43
+ end
44
+ end
45
+ end
46
+
47
+ def calc_category(feature_name, feature_value, label)
48
+ return unless category_features[feature_name] && category_features[feature_name][feature_value]
49
+ value_count = category_features[feature_name][feature_value][label].to_f
50
+ overall_count = category_features[feature_name].sum { |_, value| value[label].to_f }
51
+ value_count / overall_count
52
+ end
53
+
54
+ def calc_numerical(feature_name, feature_value, label)
55
+ return unless numerical_features[feature_name] && numerical_features[feature_name][label]
56
+ variance = numerical_features[feature_name][label][:variance].to_f
57
+ mean = numerical_features[feature_name][label][:mean].to_f
58
+ feature_value = feature_value.to_f
59
+ Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,68 @@
1
+ module Scoruby
2
+ module Models
3
+ module NaiveBayes
4
+ class ModelData
5
+ attr_reader :threshold, :labels, :numerical_features, :category_features
6
+
7
+ def initialize(xml)
8
+ @xml = xml
9
+ fetch_threshold
10
+ fetch_features_data
11
+ fetch_label_counts
12
+ end
13
+
14
+ private
15
+
16
+ def fetch_threshold
17
+ @threshold = @xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
18
+ end
19
+
20
+ def fetch_features_data
21
+ @category_features = {}
22
+ @numerical_features = {}
23
+ @xml.xpath('//BayesInput').each do |feature|
24
+ @category_features[feature.attr('fieldName').to_sym] = fetch_category_feature(feature)
25
+ @numerical_features[feature.attr('fieldName').to_sym] = fetch_numerical_feature(feature)
26
+ end
27
+ end
28
+
29
+ def fetch_label_counts
30
+ @labels = {}
31
+ @xml.xpath('//BayesOutput//TargetValueCount').each do |l|
32
+ l.attr('value')
33
+ @labels[l.attr('value')] = {'count': l.attr('count').to_f}
34
+ end
35
+ end
36
+
37
+ def fetch_numerical_feature(feature)
38
+ return unless feature.child.name == 'TargetValueStats'
39
+ features_data = {}
40
+ feature.child.children.each do |child|
41
+ features_data[child.attr('value').strip] = {
42
+ mean: child.child.attr('mean'),
43
+ variance: child.child.attr('variance')
44
+ }
45
+ end
46
+ features_data
47
+ end
48
+
49
+ def fetch_category_feature(feature)
50
+ return unless feature.children.any? { |f| f.name == 'PairCounts' }
51
+ feature_data = {}
52
+ feature.children.each do |category|
53
+ feature_data[category.attr('value')] = fetch_category(category)
54
+ end
55
+ feature_data
56
+ end
57
+
58
+ def fetch_category(category)
59
+ category_data = {}
60
+ category.child.children.each do |label|
61
+ category_data[label.attr('value')] = label.attr('count')
62
+ end
63
+ category_data
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -1,6 +1,7 @@
1
1
  require 'scoruby/models/decision_tree'
2
2
  require 'scoruby/models/gbm'
3
3
  require 'scoruby/models/random_forest'
4
+ require 'scoruby/models/naive_bayes/model'
4
5
 
5
6
  module Scoruby
6
7
  class ModelsFactory
@@ -12,10 +13,15 @@ module Scoruby
12
13
  return Models::RandomForest.new(xml) if random_forest?(xml)
13
14
  return Models::Gbm.new(xml) if gbm?(xml)
14
15
  return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
16
+ return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
15
17
 
16
18
  raise MODEL_NOT_SUPPORTED_ERROR
17
19
  end
18
20
 
21
+ def self.naive_bayes?(xml)
22
+ !xml.xpath('PMML/NaiveBayesModel').empty?
23
+ end
24
+
19
25
  def self.decision_tree?(xml)
20
26
  !xml.xpath('PMML/TreeModel').empty?
21
27
  end
@@ -1,3 +1,3 @@
1
1
  module Scoruby
2
- VERSION = '0.2.5'
2
+ VERSION = '0.2.6'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-14 00:00:00.000000000 Z
11
+ date: 2017-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -131,7 +131,8 @@ files:
131
131
  - lib/scoruby/features.rb
132
132
  - lib/scoruby/models/decision_tree.rb
133
133
  - lib/scoruby/models/gbm.rb
134
- - lib/scoruby/models/naive_bayes.rb
134
+ - lib/scoruby/models/naive_bayes/model.rb
135
+ - lib/scoruby/models/naive_bayes/model_data.rb
135
136
  - lib/scoruby/models/random_forest.rb
136
137
  - lib/scoruby/models_factory.rb
137
138
  - lib/scoruby/node.rb
@@ -1,92 +0,0 @@
1
- module Scoruby
2
- module Models
3
- class NaiveBayes
4
- attr_reader :data
5
-
6
- def initialize(xml)
7
- @threshold = xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
8
- @data = {}
9
- xml.xpath('//BayesInput').each do |feature|
10
- @data[feature.attr('fieldName').to_sym] = fetch_feature(feature)
11
- end
12
-
13
- @labels = {}
14
- xml.xpath('//BayesOutput//TargetValueCount').each do |l| l.attr('value')
15
- @labels[l.attr('value')] = { 'count': l.attr('count').to_f }
16
- end
17
- end
18
-
19
- def lvalues(features)
20
- @labels.each do |label, _|
21
- features.each do |feature_name, feature_value|
22
-
23
- if @data[feature_name][feature_value]
24
- value_count = @data[feature_name][feature_value][label].to_f
25
- overall_count = @data[feature_name].sum { |_, value| value[label].to_f }
26
-
27
- @labels[label][feature_name] = value_count / overall_count
28
- elsif @data[feature_name][label]
29
- @labels[label][feature_name] = calc_numerical(@data[feature_name][label], feature_value)
30
- end
31
- end
32
- end
33
-
34
- lvalues = {}
35
- @labels.each do |label, label_data|
36
- label_data.each do |key, value|
37
- label_data[key] = @threshold if value.round(5).zero?
38
- end
39
- lvalues[label] = label_data.values.reduce(:*)
40
- end
41
- lvalues
42
- end
43
-
44
- def score(features, label)
45
- lvalues = lvalues(features)
46
- lvalues[label] / lvalues.values.reduce(:+)
47
- end
48
-
49
- private
50
-
51
- def calc_numerical(label_data, feature_value)
52
- variance = label_data[:variance].to_f
53
- mean = label_data[:mean].to_f
54
- feature_value = feature_value.to_f
55
-
56
- Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
57
- end
58
-
59
- def fetch_feature(feature)
60
- return fetch_numerical_feature(feature) if feature.child.name == 'TargetValueStats'
61
- fetch_category_feature(feature)
62
- end
63
-
64
- def fetch_numerical_feature(feature)
65
- features_data = {}
66
- feature.child.children.each do |child|
67
- features_data[child.attr('value').strip] = {
68
- mean: child.child.attr('mean'),
69
- variance: child.child.attr('variance')
70
- }
71
- end
72
- features_data
73
- end
74
-
75
- def fetch_category_feature(feature)
76
- feature_data = {}
77
- feature.children.each do |category|
78
- feature_data[category.attr('value')] = fetch_category(category)
79
- end
80
- feature_data
81
- end
82
-
83
- def fetch_category(category)
84
- category_data = {}
85
- category.child.children.each do |label|
86
- category_data[label.attr('value')] = label.attr('count')
87
- end
88
- category_data
89
- end
90
- end
91
- end
92
- end