scoruby 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d01f41ef6e3e3485acb65587892402866004bd2
4
- data.tar.gz: 7d898f2a4c86915e19952a3641286dd252d7c3fd
3
+ metadata.gz: 2a1c339dc5c029d90d8e3a495af90499368bc92d
4
+ data.tar.gz: 5ff3c0147290c83d76261b9a6907e35ca493b678
5
5
  SHA512:
6
- metadata.gz: de6e7acbbcf5acd97f2b980253be94c7eb2fb7c31fb76c9c9d1fe6ca09447751974734370826d1477db639685a6ab37108ced5d85e4fc9b397285e1b61468a4e
7
- data.tar.gz: 92e5728b151d7c3daa24e5fccebe6b7e8567c92af4fb03cdbc5b5ef20f5a75b53c88b5ef79e6d4db20df157995f45b6222677a0e1634702e3769240b61f025d4
6
+ metadata.gz: 8843f6c37d4b4d4e30f018348b5ec113e66558b2c78c945f4755ea01554b609af3dbb4a50334143ffa8f4bab205303b833831273a97bd18a2ecc9fa4e214f401
7
+ data.tar.gz: 94078bb11bd0cf3490c93c90e822fed248d4721e24a2623004d2cd3af8805407d6a69599ba2d753bd024caefb0d7e08eb8ddebefef1f37b3dbfe2dcdf49a3bad
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scoruby (0.2.4)
4
+ scoruby (0.2.5)
5
5
  nokogiri (~> 1.7)
6
6
 
7
7
  GEM
@@ -18,9 +18,9 @@ GEM
18
18
  docile (1.1.5)
19
19
  json (2.1.0)
20
20
  method_source (0.8.2)
21
- mini_portile2 (2.1.0)
22
- nokogiri (1.7.0.1)
23
- mini_portile2 (~> 2.1.0)
21
+ mini_portile2 (2.2.0)
22
+ nokogiri (1.8.0)
23
+ mini_portile2 (~> 2.2.0)
24
24
  pry (0.10.3)
25
25
  coderay (~> 1.1.0)
26
26
  method_source (~> 0.8.1)
@@ -64,4 +64,4 @@ DEPENDENCIES
64
64
  scoruby!
65
65
 
66
66
  BUNDLED WITH
67
- 1.11.2
67
+ 1.15.4
@@ -0,0 +1,64 @@
1
+ require 'scoruby/models/naive_bayes/model_data'
2
+ require 'forwardable'
3
+
4
+ module Scoruby
5
+ module Models
6
+ module NaiveBayes
7
+ class Model
8
+ extend Forwardable
9
+ def_delegators :@model_data, :threshold, :labels, :numerical_features, :category_features
10
+
11
+ def initialize(xml)
12
+ @model_data = ModelData.new(xml)
13
+ end
14
+
15
+ def lvalues(features)
16
+ calc_label_feature_values(features)
17
+ calc_label_values
18
+ end
19
+
20
+ def score(features, label)
21
+ lvalues(features)[label] / lvalues(features).values.reduce(:+)
22
+ end
23
+
24
+ private
25
+
26
+ def calc_label_values
27
+ label_values = {}
28
+ labels.each do |label, label_data|
29
+ label_data.each do |key, value|
30
+ label_data[key] = threshold if value.round(5).zero?
31
+ end
32
+ label_values[label] = label_data.values.reduce(:*)
33
+ end
34
+ label_values
35
+ end
36
+
37
+ def calc_label_feature_values(features)
38
+ labels.each do |label, _|
39
+ features.each do |feature_name, feature_value|
40
+ label_value = calc_category(feature_name, feature_value, label)
41
+ label_value ||= calc_numerical(feature_name, feature_value, label)
42
+ labels[label][feature_name] = label_value if label_value
43
+ end
44
+ end
45
+ end
46
+
47
+ def calc_category(feature_name, feature_value, label)
48
+ return unless category_features[feature_name] && category_features[feature_name][feature_value]
49
+ value_count = category_features[feature_name][feature_value][label].to_f
50
+ overall_count = category_features[feature_name].sum { |_, value| value[label].to_f }
51
+ value_count / overall_count
52
+ end
53
+
54
+ def calc_numerical(feature_name, feature_value, label)
55
+ return unless numerical_features[feature_name] && numerical_features[feature_name][label]
56
+ variance = numerical_features[feature_name][label][:variance].to_f
57
+ mean = numerical_features[feature_name][label][:mean].to_f
58
+ feature_value = feature_value.to_f
59
+ Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,68 @@
1
+ module Scoruby
2
+ module Models
3
+ module NaiveBayes
4
+ class ModelData
5
+ attr_reader :threshold, :labels, :numerical_features, :category_features
6
+
7
+ def initialize(xml)
8
+ @xml = xml
9
+ fetch_threshold
10
+ fetch_features_data
11
+ fetch_label_counts
12
+ end
13
+
14
+ private
15
+
16
+ def fetch_threshold
17
+ @threshold = @xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
18
+ end
19
+
20
+ def fetch_features_data
21
+ @category_features = {}
22
+ @numerical_features = {}
23
+ @xml.xpath('//BayesInput').each do |feature|
24
+ @category_features[feature.attr('fieldName').to_sym] = fetch_category_feature(feature)
25
+ @numerical_features[feature.attr('fieldName').to_sym] = fetch_numerical_feature(feature)
26
+ end
27
+ end
28
+
29
+ def fetch_label_counts
30
+ @labels = {}
31
+ @xml.xpath('//BayesOutput//TargetValueCount').each do |l|
32
+ l.attr('value')
33
+ @labels[l.attr('value')] = {'count': l.attr('count').to_f}
34
+ end
35
+ end
36
+
37
+ def fetch_numerical_feature(feature)
38
+ return unless feature.child.name == 'TargetValueStats'
39
+ features_data = {}
40
+ feature.child.children.each do |child|
41
+ features_data[child.attr('value').strip] = {
42
+ mean: child.child.attr('mean'),
43
+ variance: child.child.attr('variance')
44
+ }
45
+ end
46
+ features_data
47
+ end
48
+
49
+ def fetch_category_feature(feature)
50
+ return unless feature.children.any? { |f| f.name == 'PairCounts' }
51
+ feature_data = {}
52
+ feature.children.each do |category|
53
+ feature_data[category.attr('value')] = fetch_category(category)
54
+ end
55
+ feature_data
56
+ end
57
+
58
+ def fetch_category(category)
59
+ category_data = {}
60
+ category.child.children.each do |label|
61
+ category_data[label.attr('value')] = label.attr('count')
62
+ end
63
+ category_data
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -1,6 +1,7 @@
1
1
  require 'scoruby/models/decision_tree'
2
2
  require 'scoruby/models/gbm'
3
3
  require 'scoruby/models/random_forest'
4
+ require 'scoruby/models/naive_bayes/model'
4
5
 
5
6
  module Scoruby
6
7
  class ModelsFactory
@@ -12,10 +13,15 @@ module Scoruby
12
13
  return Models::RandomForest.new(xml) if random_forest?(xml)
13
14
  return Models::Gbm.new(xml) if gbm?(xml)
14
15
  return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
16
+ return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
15
17
 
16
18
  raise MODEL_NOT_SUPPORTED_ERROR
17
19
  end
18
20
 
21
+ def self.naive_bayes?(xml)
22
+ !xml.xpath('PMML/NaiveBayesModel').empty?
23
+ end
24
+
19
25
  def self.decision_tree?(xml)
20
26
  !xml.xpath('PMML/TreeModel').empty?
21
27
  end
@@ -1,3 +1,3 @@
1
1
  module Scoruby
2
- VERSION = '0.2.5'
2
+ VERSION = '0.2.6'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-14 00:00:00.000000000 Z
11
+ date: 2017-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -131,7 +131,8 @@ files:
131
131
  - lib/scoruby/features.rb
132
132
  - lib/scoruby/models/decision_tree.rb
133
133
  - lib/scoruby/models/gbm.rb
134
- - lib/scoruby/models/naive_bayes.rb
134
+ - lib/scoruby/models/naive_bayes/model.rb
135
+ - lib/scoruby/models/naive_bayes/model_data.rb
135
136
  - lib/scoruby/models/random_forest.rb
136
137
  - lib/scoruby/models_factory.rb
137
138
  - lib/scoruby/node.rb
@@ -1,92 +0,0 @@
1
- module Scoruby
2
- module Models
3
- class NaiveBayes
4
- attr_reader :data
5
-
6
- def initialize(xml)
7
- @threshold = xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
8
- @data = {}
9
- xml.xpath('//BayesInput').each do |feature|
10
- @data[feature.attr('fieldName').to_sym] = fetch_feature(feature)
11
- end
12
-
13
- @labels = {}
14
- xml.xpath('//BayesOutput//TargetValueCount').each do |l| l.attr('value')
15
- @labels[l.attr('value')] = { 'count': l.attr('count').to_f }
16
- end
17
- end
18
-
19
- def lvalues(features)
20
- @labels.each do |label, _|
21
- features.each do |feature_name, feature_value|
22
-
23
- if @data[feature_name][feature_value]
24
- value_count = @data[feature_name][feature_value][label].to_f
25
- overall_count = @data[feature_name].sum { |_, value| value[label].to_f }
26
-
27
- @labels[label][feature_name] = value_count / overall_count
28
- elsif @data[feature_name][label]
29
- @labels[label][feature_name] = calc_numerical(@data[feature_name][label], feature_value)
30
- end
31
- end
32
- end
33
-
34
- lvalues = {}
35
- @labels.each do |label, label_data|
36
- label_data.each do |key, value|
37
- label_data[key] = @threshold if value.round(5).zero?
38
- end
39
- lvalues[label] = label_data.values.reduce(:*)
40
- end
41
- lvalues
42
- end
43
-
44
- def score(features, label)
45
- lvalues = lvalues(features)
46
- lvalues[label] / lvalues.values.reduce(:+)
47
- end
48
-
49
- private
50
-
51
- def calc_numerical(label_data, feature_value)
52
- variance = label_data[:variance].to_f
53
- mean = label_data[:mean].to_f
54
- feature_value = feature_value.to_f
55
-
56
- Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
57
- end
58
-
59
- def fetch_feature(feature)
60
- return fetch_numerical_feature(feature) if feature.child.name == 'TargetValueStats'
61
- fetch_category_feature(feature)
62
- end
63
-
64
- def fetch_numerical_feature(feature)
65
- features_data = {}
66
- feature.child.children.each do |child|
67
- features_data[child.attr('value').strip] = {
68
- mean: child.child.attr('mean'),
69
- variance: child.child.attr('variance')
70
- }
71
- end
72
- features_data
73
- end
74
-
75
- def fetch_category_feature(feature)
76
- feature_data = {}
77
- feature.children.each do |category|
78
- feature_data[category.attr('value')] = fetch_category(category)
79
- end
80
- feature_data
81
- end
82
-
83
- def fetch_category(category)
84
- category_data = {}
85
- category.child.children.each do |label|
86
- category_data[label.attr('value')] = label.attr('count')
87
- end
88
- category_data
89
- end
90
- end
91
- end
92
- end