scoruby 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/lib/scoruby/models/naive_bayes/model.rb +64 -0
- data/lib/scoruby/models/naive_bayes/model_data.rb +68 -0
- data/lib/scoruby/models_factory.rb +6 -0
- data/lib/scoruby/version.rb +1 -1
- metadata +4 -3
- data/lib/scoruby/models/naive_bayes.rb +0 -92
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a1c339dc5c029d90d8e3a495af90499368bc92d
|
4
|
+
data.tar.gz: 5ff3c0147290c83d76261b9a6907e35ca493b678
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8843f6c37d4b4d4e30f018348b5ec113e66558b2c78c945f4755ea01554b609af3dbb4a50334143ffa8f4bab205303b833831273a97bd18a2ecc9fa4e214f401
|
7
|
+
data.tar.gz: 94078bb11bd0cf3490c93c90e822fed248d4721e24a2623004d2cd3af8805407d6a69599ba2d753bd024caefb0d7e08eb8ddebefef1f37b3dbfe2dcdf49a3bad
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
scoruby (0.2.
|
4
|
+
scoruby (0.2.5)
|
5
5
|
nokogiri (~> 1.7)
|
6
6
|
|
7
7
|
GEM
|
@@ -18,9 +18,9 @@ GEM
|
|
18
18
|
docile (1.1.5)
|
19
19
|
json (2.1.0)
|
20
20
|
method_source (0.8.2)
|
21
|
-
mini_portile2 (2.
|
22
|
-
nokogiri (1.
|
23
|
-
mini_portile2 (~> 2.
|
21
|
+
mini_portile2 (2.2.0)
|
22
|
+
nokogiri (1.8.0)
|
23
|
+
mini_portile2 (~> 2.2.0)
|
24
24
|
pry (0.10.3)
|
25
25
|
coderay (~> 1.1.0)
|
26
26
|
method_source (~> 0.8.1)
|
@@ -64,4 +64,4 @@ DEPENDENCIES
|
|
64
64
|
scoruby!
|
65
65
|
|
66
66
|
BUNDLED WITH
|
67
|
-
1.
|
67
|
+
1.15.4
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'scoruby/models/naive_bayes/model_data'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Scoruby
|
5
|
+
module Models
|
6
|
+
module NaiveBayes
|
7
|
+
class Model
|
8
|
+
extend Forwardable
|
9
|
+
def_delegators :@model_data, :threshold, :labels, :numerical_features, :category_features
|
10
|
+
|
11
|
+
def initialize(xml)
|
12
|
+
@model_data = ModelData.new(xml)
|
13
|
+
end
|
14
|
+
|
15
|
+
def lvalues(features)
|
16
|
+
calc_label_feature_values(features)
|
17
|
+
calc_label_values
|
18
|
+
end
|
19
|
+
|
20
|
+
def score(features, label)
|
21
|
+
lvalues(features)[label] / lvalues(features).values.reduce(:+)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def calc_label_values
|
27
|
+
label_values = {}
|
28
|
+
labels.each do |label, label_data|
|
29
|
+
label_data.each do |key, value|
|
30
|
+
label_data[key] = threshold if value.round(5).zero?
|
31
|
+
end
|
32
|
+
label_values[label] = label_data.values.reduce(:*)
|
33
|
+
end
|
34
|
+
label_values
|
35
|
+
end
|
36
|
+
|
37
|
+
def calc_label_feature_values(features)
|
38
|
+
labels.each do |label, _|
|
39
|
+
features.each do |feature_name, feature_value|
|
40
|
+
label_value = calc_category(feature_name, feature_value, label)
|
41
|
+
label_value ||= calc_numerical(feature_name, feature_value, label)
|
42
|
+
labels[label][feature_name] = label_value if label_value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def calc_category(feature_name, feature_value, label)
|
48
|
+
return unless category_features[feature_name] && category_features[feature_name][feature_value]
|
49
|
+
value_count = category_features[feature_name][feature_value][label].to_f
|
50
|
+
overall_count = category_features[feature_name].sum { |_, value| value[label].to_f }
|
51
|
+
value_count / overall_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def calc_numerical(feature_name, feature_value, label)
|
55
|
+
return unless numerical_features[feature_name] && numerical_features[feature_name][label]
|
56
|
+
variance = numerical_features[feature_name][label][:variance].to_f
|
57
|
+
mean = numerical_features[feature_name][label][:mean].to_f
|
58
|
+
feature_value = feature_value.to_f
|
59
|
+
Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Models
|
3
|
+
module NaiveBayes
|
4
|
+
class ModelData
|
5
|
+
attr_reader :threshold, :labels, :numerical_features, :category_features
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
@xml = xml
|
9
|
+
fetch_threshold
|
10
|
+
fetch_features_data
|
11
|
+
fetch_label_counts
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def fetch_threshold
|
17
|
+
@threshold = @xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
|
18
|
+
end
|
19
|
+
|
20
|
+
def fetch_features_data
|
21
|
+
@category_features = {}
|
22
|
+
@numerical_features = {}
|
23
|
+
@xml.xpath('//BayesInput').each do |feature|
|
24
|
+
@category_features[feature.attr('fieldName').to_sym] = fetch_category_feature(feature)
|
25
|
+
@numerical_features[feature.attr('fieldName').to_sym] = fetch_numerical_feature(feature)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch_label_counts
|
30
|
+
@labels = {}
|
31
|
+
@xml.xpath('//BayesOutput//TargetValueCount').each do |l|
|
32
|
+
l.attr('value')
|
33
|
+
@labels[l.attr('value')] = {'count': l.attr('count').to_f}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def fetch_numerical_feature(feature)
|
38
|
+
return unless feature.child.name == 'TargetValueStats'
|
39
|
+
features_data = {}
|
40
|
+
feature.child.children.each do |child|
|
41
|
+
features_data[child.attr('value').strip] = {
|
42
|
+
mean: child.child.attr('mean'),
|
43
|
+
variance: child.child.attr('variance')
|
44
|
+
}
|
45
|
+
end
|
46
|
+
features_data
|
47
|
+
end
|
48
|
+
|
49
|
+
def fetch_category_feature(feature)
|
50
|
+
return unless feature.children.any? { |f| f.name == 'PairCounts' }
|
51
|
+
feature_data = {}
|
52
|
+
feature.children.each do |category|
|
53
|
+
feature_data[category.attr('value')] = fetch_category(category)
|
54
|
+
end
|
55
|
+
feature_data
|
56
|
+
end
|
57
|
+
|
58
|
+
def fetch_category(category)
|
59
|
+
category_data = {}
|
60
|
+
category.child.children.each do |label|
|
61
|
+
category_data[label.attr('value')] = label.attr('count')
|
62
|
+
end
|
63
|
+
category_data
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'scoruby/models/decision_tree'
|
2
2
|
require 'scoruby/models/gbm'
|
3
3
|
require 'scoruby/models/random_forest'
|
4
|
+
require 'scoruby/models/naive_bayes/model'
|
4
5
|
|
5
6
|
module Scoruby
|
6
7
|
class ModelsFactory
|
@@ -12,10 +13,15 @@ module Scoruby
|
|
12
13
|
return Models::RandomForest.new(xml) if random_forest?(xml)
|
13
14
|
return Models::Gbm.new(xml) if gbm?(xml)
|
14
15
|
return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
|
16
|
+
return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
|
15
17
|
|
16
18
|
raise MODEL_NOT_SUPPORTED_ERROR
|
17
19
|
end
|
18
20
|
|
21
|
+
def self.naive_bayes?(xml)
|
22
|
+
!xml.xpath('PMML/NaiveBayesModel').empty?
|
23
|
+
end
|
24
|
+
|
19
25
|
def self.decision_tree?(xml)
|
20
26
|
!xml.xpath('PMML/TreeModel').empty?
|
21
27
|
end
|
data/lib/scoruby/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scoruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -131,7 +131,8 @@ files:
|
|
131
131
|
- lib/scoruby/features.rb
|
132
132
|
- lib/scoruby/models/decision_tree.rb
|
133
133
|
- lib/scoruby/models/gbm.rb
|
134
|
-
- lib/scoruby/models/naive_bayes.rb
|
134
|
+
- lib/scoruby/models/naive_bayes/model.rb
|
135
|
+
- lib/scoruby/models/naive_bayes/model_data.rb
|
135
136
|
- lib/scoruby/models/random_forest.rb
|
136
137
|
- lib/scoruby/models_factory.rb
|
137
138
|
- lib/scoruby/node.rb
|
@@ -1,92 +0,0 @@
|
|
1
|
-
module Scoruby
|
2
|
-
module Models
|
3
|
-
class NaiveBayes
|
4
|
-
attr_reader :data
|
5
|
-
|
6
|
-
def initialize(xml)
|
7
|
-
@threshold = xml.xpath('//NaiveBayesModel').attr('threshold').value.to_f
|
8
|
-
@data = {}
|
9
|
-
xml.xpath('//BayesInput').each do |feature|
|
10
|
-
@data[feature.attr('fieldName').to_sym] = fetch_feature(feature)
|
11
|
-
end
|
12
|
-
|
13
|
-
@labels = {}
|
14
|
-
xml.xpath('//BayesOutput//TargetValueCount').each do |l| l.attr('value')
|
15
|
-
@labels[l.attr('value')] = { 'count': l.attr('count').to_f }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def lvalues(features)
|
20
|
-
@labels.each do |label, _|
|
21
|
-
features.each do |feature_name, feature_value|
|
22
|
-
|
23
|
-
if @data[feature_name][feature_value]
|
24
|
-
value_count = @data[feature_name][feature_value][label].to_f
|
25
|
-
overall_count = @data[feature_name].sum { |_, value| value[label].to_f }
|
26
|
-
|
27
|
-
@labels[label][feature_name] = value_count / overall_count
|
28
|
-
elsif @data[feature_name][label]
|
29
|
-
@labels[label][feature_name] = calc_numerical(@data[feature_name][label], feature_value)
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
lvalues = {}
|
35
|
-
@labels.each do |label, label_data|
|
36
|
-
label_data.each do |key, value|
|
37
|
-
label_data[key] = @threshold if value.round(5).zero?
|
38
|
-
end
|
39
|
-
lvalues[label] = label_data.values.reduce(:*)
|
40
|
-
end
|
41
|
-
lvalues
|
42
|
-
end
|
43
|
-
|
44
|
-
def score(features, label)
|
45
|
-
lvalues = lvalues(features)
|
46
|
-
lvalues[label] / lvalues.values.reduce(:+)
|
47
|
-
end
|
48
|
-
|
49
|
-
private
|
50
|
-
|
51
|
-
def calc_numerical(label_data, feature_value)
|
52
|
-
variance = label_data[:variance].to_f
|
53
|
-
mean = label_data[:mean].to_f
|
54
|
-
feature_value = feature_value.to_f
|
55
|
-
|
56
|
-
Math.exp(-(feature_value - mean)**2 / (2 * variance)) / Math.sqrt(2 * Math::PI * variance)
|
57
|
-
end
|
58
|
-
|
59
|
-
def fetch_feature(feature)
|
60
|
-
return fetch_numerical_feature(feature) if feature.child.name == 'TargetValueStats'
|
61
|
-
fetch_category_feature(feature)
|
62
|
-
end
|
63
|
-
|
64
|
-
def fetch_numerical_feature(feature)
|
65
|
-
features_data = {}
|
66
|
-
feature.child.children.each do |child|
|
67
|
-
features_data[child.attr('value').strip] = {
|
68
|
-
mean: child.child.attr('mean'),
|
69
|
-
variance: child.child.attr('variance')
|
70
|
-
}
|
71
|
-
end
|
72
|
-
features_data
|
73
|
-
end
|
74
|
-
|
75
|
-
def fetch_category_feature(feature)
|
76
|
-
feature_data = {}
|
77
|
-
feature.children.each do |category|
|
78
|
-
feature_data[category.attr('value')] = fetch_category(category)
|
79
|
-
end
|
80
|
-
feature_data
|
81
|
-
end
|
82
|
-
|
83
|
-
def fetch_category(category)
|
84
|
-
category_data = {}
|
85
|
-
category.child.children.each do |label|
|
86
|
-
category_data[label.attr('value')] = label.attr('count')
|
87
|
-
end
|
88
|
-
category_data
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|