scoruby 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile.lock +1 -1
- data/lib/scoruby/models/random_forest/data.rb +45 -0
- data/lib/scoruby/models/random_forest/model.rb +44 -0
- data/lib/scoruby/models_factory.rb +2 -2
- data/lib/scoruby/predicates/simple_set_predicate.rb +1 -1
- data/lib/scoruby/version.rb +1 -1
- metadata +5 -5
- data/lib/scoruby/models/random_forest.rb +0 -45
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3af72ce9c41b6f116e34af71b5ad3264d8043c6fd1aaa3544a0a362bff096c0f
|
4
|
+
data.tar.gz: de77039296cfd39f2ef036372e90ad2b4028180381a153380d77164ccf02a743
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c827de777f0b1b0d83af165a6079a6a2cff83d2612758c78171328cc2811cf718532fef6ff628a04a38f6ef9179c60dae529ee70a7daa90e5aed0e38462cf370
|
7
|
+
data.tar.gz: eff6fcc12bbe064b5a1af1a272c8b544af87c8ae9aa48e7f59a506abcaf292d3b72a235c835caec524eb5ae04a66e2f37ca12691735ce9c1bda19489c1fff385
|
data/Gemfile.lock
CHANGED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Scoruby
|
4
|
+
module Models
|
5
|
+
module RandomForest
|
6
|
+
class Data
|
7
|
+
RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
|
8
|
+
FEATURES_XPATH = 'PMML/DataDictionary/DataField'
|
9
|
+
|
10
|
+
def initialize(xml)
|
11
|
+
@xml = xml
|
12
|
+
end
|
13
|
+
|
14
|
+
def decision_trees
|
15
|
+
@xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
|
16
|
+
DecisionTree.new(xml_tree)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def categorical_features
|
21
|
+
categorical_predicates.each_with_object(Hash.new([])) do |xml, res|
|
22
|
+
predicate = Predicates::SimpleSetPredicate.new(xml)
|
23
|
+
res[predicate.field] = res[predicate.field] | predicate.array
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def continuous_features
|
28
|
+
continuous_predicates.map do |xml|
|
29
|
+
Predicates::SimplePredicate.new(xml).field
|
30
|
+
end.uniq
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def categorical_predicates
|
36
|
+
@xml.xpath('//SimpleSetPredicate')
|
37
|
+
end
|
38
|
+
|
39
|
+
def continuous_predicates
|
40
|
+
@xml.xpath('//SimplePredicate')
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'scoruby/models/random_forest/data'
|
4
|
+
|
5
|
+
module Scoruby
|
6
|
+
module Models
|
7
|
+
module RandomForest
|
8
|
+
class Model
|
9
|
+
def initialize(xml)
|
10
|
+
@decision_trees = Data.new(xml).decision_trees
|
11
|
+
end
|
12
|
+
|
13
|
+
def predict(features)
|
14
|
+
decisions_count = decisions_count(features)
|
15
|
+
decision = decisions_count.max_by { |_, v| v }
|
16
|
+
{
|
17
|
+
label: decision[0],
|
18
|
+
score: decision[1] / decisions_count.values.reduce(0, :+).to_f
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def decisions_count(features)
|
23
|
+
formatted_features = Features.new(features).formatted
|
24
|
+
decisions = traverse_trees(formatted_features)
|
25
|
+
aggregate_decisions(decisions)
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def traverse_trees(formatted_features)
|
31
|
+
@decision_trees.map do |decision_tree|
|
32
|
+
decision_tree.decide(formatted_features).score
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def aggregate_decisions(decisions)
|
37
|
+
decisions.each_with_object(Hash.new(0)) do |score, counts|
|
38
|
+
counts[score] += 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'scoruby/models/decision_tree'
|
4
4
|
require 'scoruby/models/gbm'
|
5
|
-
require 'scoruby/models/random_forest'
|
5
|
+
require 'scoruby/models/random_forest/model'
|
6
6
|
require 'scoruby/models/naive_bayes/model'
|
7
7
|
|
8
8
|
module Scoruby
|
@@ -12,7 +12,7 @@ module Scoruby
|
|
12
12
|
MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
|
13
13
|
|
14
14
|
def self.factory_for(xml)
|
15
|
-
return Models::RandomForest.new(xml) if random_forest?(xml)
|
15
|
+
return Models::RandomForest::Model.new(xml) if random_forest?(xml)
|
16
16
|
return Models::Gbm.new(xml) if gbm?(xml)
|
17
17
|
return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
|
18
18
|
return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
|
data/lib/scoruby/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scoruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-12-
|
11
|
+
date: 2017-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -134,7 +134,8 @@ files:
|
|
134
134
|
- lib/scoruby/models/gbm.rb
|
135
135
|
- lib/scoruby/models/naive_bayes/model.rb
|
136
136
|
- lib/scoruby/models/naive_bayes/model_data.rb
|
137
|
-
- lib/scoruby/models/random_forest.rb
|
137
|
+
- lib/scoruby/models/random_forest/data.rb
|
138
|
+
- lib/scoruby/models/random_forest/model.rb
|
138
139
|
- lib/scoruby/models_factory.rb
|
139
140
|
- lib/scoruby/node.rb
|
140
141
|
- lib/scoruby/predicate_factory.rb
|
@@ -168,9 +169,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
168
169
|
version: '0'
|
169
170
|
requirements: []
|
170
171
|
rubyforge_project:
|
171
|
-
rubygems_version: 2.
|
172
|
+
rubygems_version: 2.7.3
|
172
173
|
signing_key:
|
173
174
|
specification_version: 4
|
174
175
|
summary: Ruby Scoring API for PMML.
|
175
176
|
test_files: []
|
176
|
-
has_rdoc:
|
@@ -1,45 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Scoruby
|
4
|
-
module Models
|
5
|
-
class RandomForest
|
6
|
-
RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
|
7
|
-
|
8
|
-
def initialize(xml)
|
9
|
-
xml_trees = xml.xpath(RF_FOREST_XPATH)
|
10
|
-
@decision_trees = xml_trees.map do |xml_tree|
|
11
|
-
DecisionTree.new(xml_tree)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
def predict(features)
|
16
|
-
decisions_count = decisions_count(features)
|
17
|
-
decision = decisions_count.max_by { |_, v| v }
|
18
|
-
{
|
19
|
-
label: decision[0],
|
20
|
-
score: decision[1] / decisions_count.values.reduce(0, :+).to_f
|
21
|
-
}
|
22
|
-
end
|
23
|
-
|
24
|
-
def decisions_count(features)
|
25
|
-
formatted_features = Features.new(features).formatted
|
26
|
-
decisions = traverse_trees(formatted_features)
|
27
|
-
aggregate_decisions(decisions)
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
def traverse_trees(formatted_features)
|
33
|
-
@decision_trees.map do |decision_tree|
|
34
|
-
decision_tree.decide(formatted_features).score
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def aggregate_decisions(decisions)
|
39
|
-
decisions.each_with_object(Hash.new(0)) do |score, counts|
|
40
|
-
counts[score] += 1
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|