scoruby 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +10 -0
- data/Gemfile.lock +67 -0
- data/lib/scoruby/decision.rb +17 -0
- data/lib/scoruby/features.rb +18 -0
- data/lib/scoruby/models/decision_tree.rb +46 -0
- data/lib/scoruby/models/gbm.rb +31 -0
- data/lib/scoruby/models/random_forest.rb +26 -0
- data/lib/scoruby/models_factory.rb +31 -0
- data/lib/scoruby/node.rb +38 -0
- data/lib/scoruby/predicate_factory.rb +19 -0
- data/lib/scoruby/predicates/compound_predicate.rb +44 -0
- data/lib/scoruby/predicates/false_predicate.rb +17 -0
- data/lib/scoruby/predicates/simple_predicate.rb +47 -0
- data/lib/scoruby/predicates/simple_set_predicate.rb +33 -0
- data/lib/scoruby/predicates/true_predicate.rb +17 -0
- data/lib/scoruby/version.rb +1 -1
- data/lib/scoruby.rb +1 -2
- data/scoruby.gemspec +1 -1
- metadata +20 -17
- data/lib/decision.rb +0 -15
- data/lib/features.rb +0 -16
- data/lib/models/decision_tree.rb +0 -42
- data/lib/models/gbm.rb +0 -29
- data/lib/models/random_forest.rb +0 -25
- data/lib/models_factory.rb +0 -28
- data/lib/node.rb +0 -36
- data/lib/predicate_factory.rb +0 -18
- data/lib/predicates/compound_predicate.rb +0 -40
- data/lib/predicates/false_predicate.rb +0 -13
- data/lib/predicates/simple_predicate.rb +0 -43
- data/lib/predicates/simple_set_predicate.rb +0 -29
- data/lib/predicates/true_predicate.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59d91f99c8a04a124993b71950d425f6b1b89353
|
4
|
+
data.tar.gz: ee709d362a7699749561a7d1cb60f7b7aa40c902
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d433e761e5fc203d298ecec3fb5019e024ae6cd753296f08b304fc0dd790017f5ff0e8621a34995811d6db21c475f3e91a41d079722c5c1ec888d151740c9c6
|
7
|
+
data.tar.gz: c347e88a7cf8e5345be89160f6f6f6f07ff2bb7dfed93401f02700e898886b3205e0ced850ba1fad247779f4b0f16c712b751aca46be15ac0d75b6fe3f1cc9d1
|
data/.gitignore
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
scoruby (0.2.2)
|
5
|
+
nokogiri (~> 1.7)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
coderay (1.1.0)
|
11
|
+
coveralls (0.8.20)
|
12
|
+
json (>= 1.8, < 3)
|
13
|
+
simplecov (~> 0.14.1)
|
14
|
+
term-ansicolor (~> 1.3)
|
15
|
+
thor (~> 0.19.4)
|
16
|
+
tins (~> 1.6)
|
17
|
+
diff-lcs (1.2.5)
|
18
|
+
docile (1.1.5)
|
19
|
+
json (2.1.0)
|
20
|
+
method_source (0.8.2)
|
21
|
+
mini_portile2 (2.1.0)
|
22
|
+
nokogiri (1.7.0.1)
|
23
|
+
mini_portile2 (~> 2.1.0)
|
24
|
+
pry (0.10.3)
|
25
|
+
coderay (~> 1.1.0)
|
26
|
+
method_source (~> 0.8.1)
|
27
|
+
slop (~> 3.4)
|
28
|
+
rake (12.0.0)
|
29
|
+
rspec (3.5.0)
|
30
|
+
rspec-core (~> 3.5.0)
|
31
|
+
rspec-expectations (~> 3.5.0)
|
32
|
+
rspec-mocks (~> 3.5.0)
|
33
|
+
rspec-core (3.5.4)
|
34
|
+
rspec-support (~> 3.5.0)
|
35
|
+
rspec-expectations (3.5.0)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.5.0)
|
38
|
+
rspec-mocks (3.5.0)
|
39
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
40
|
+
rspec-support (~> 3.5.0)
|
41
|
+
rspec-support (3.5.0)
|
42
|
+
ruby-prof (0.16.2)
|
43
|
+
simplecov (0.14.1)
|
44
|
+
docile (~> 1.1.0)
|
45
|
+
json (>= 1.8, < 3)
|
46
|
+
simplecov-html (~> 0.10.0)
|
47
|
+
simplecov-html (0.10.0)
|
48
|
+
slop (3.6.0)
|
49
|
+
term-ansicolor (1.6.0)
|
50
|
+
tins (~> 1.0)
|
51
|
+
thor (0.19.4)
|
52
|
+
tins (1.13.2)
|
53
|
+
|
54
|
+
PLATFORMS
|
55
|
+
ruby
|
56
|
+
|
57
|
+
DEPENDENCIES
|
58
|
+
bundler (~> 1.10)
|
59
|
+
coveralls
|
60
|
+
pry (~> 0.10)
|
61
|
+
rake (~> 12.0)
|
62
|
+
rspec (~> 3.5)
|
63
|
+
ruby-prof
|
64
|
+
scoruby!
|
65
|
+
|
66
|
+
BUNDLED WITH
|
67
|
+
1.11.2
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Scoruby
|
2
|
+
class Decision
|
3
|
+
|
4
|
+
attr_reader :score, :score_distribution
|
5
|
+
|
6
|
+
def initialize(score, score_distributions)
|
7
|
+
@score = score
|
8
|
+
return if score_distributions.empty?
|
9
|
+
|
10
|
+
@score_distribution = {}
|
11
|
+
score_distributions.each {|score_distribution|
|
12
|
+
attributes = score_distribution.attributes
|
13
|
+
@score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Scoruby
|
2
|
+
class Features
|
3
|
+
|
4
|
+
attr_reader :formatted
|
5
|
+
|
6
|
+
def initialize(features)
|
7
|
+
@formatted = format_booleans(features)
|
8
|
+
end
|
9
|
+
|
10
|
+
def format_booleans(features)
|
11
|
+
features.map {|k, v|
|
12
|
+
features[k] = 'f' if v == false
|
13
|
+
features[k] = 't' if v == true
|
14
|
+
}
|
15
|
+
features
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'node'
|
2
|
+
|
3
|
+
module Scoruby
|
4
|
+
module Models
|
5
|
+
class DecisionTree
|
6
|
+
|
7
|
+
attr_reader :root
|
8
|
+
|
9
|
+
def initialize(tree_xml)
|
10
|
+
@id = tree_xml.attribute('id')
|
11
|
+
@root = Node.new(tree_xml.xpath('TreeModel/Node'))
|
12
|
+
end
|
13
|
+
|
14
|
+
def decide(features)
|
15
|
+
curr = @root
|
16
|
+
while curr.children[0]
|
17
|
+
prev = curr
|
18
|
+
curr = step(curr, features)
|
19
|
+
return if didnt_step?(curr, prev)
|
20
|
+
end
|
21
|
+
|
22
|
+
curr.decision
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def step(curr, features)
|
28
|
+
curr = step_on_true(curr, features, 0)
|
29
|
+
curr = step_on_true(curr, features, 1)
|
30
|
+
curr = step_on_true(curr, features, 2)
|
31
|
+
curr
|
32
|
+
end
|
33
|
+
|
34
|
+
def step_on_true(curr, features, num)
|
35
|
+
return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
|
36
|
+
curr
|
37
|
+
end
|
38
|
+
|
39
|
+
def didnt_step?(curr, prev)
|
40
|
+
return false if (prev.pred != curr.pred)
|
41
|
+
Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'models/decision_tree'
|
2
|
+
require 'features'
|
3
|
+
|
4
|
+
module Scoruby
|
5
|
+
module Models
|
6
|
+
class Gbm
|
7
|
+
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
8
|
+
CONST_XPATH = '//Target/@rescaleConstant'
|
9
|
+
|
10
|
+
def initialize(xml)
|
11
|
+
@decision_trees = xml.xpath(GBM_FOREST_XPATH).collect {|xml_tree|
|
12
|
+
DecisionTree.new(xml_tree)
|
13
|
+
}
|
14
|
+
@const = Float(xml.xpath(CONST_XPATH).to_s)
|
15
|
+
end
|
16
|
+
|
17
|
+
def tree_count
|
18
|
+
@decision_trees.count
|
19
|
+
end
|
20
|
+
|
21
|
+
def score(features)
|
22
|
+
formatted_features = Features.new(features).formatted
|
23
|
+
x = @decision_trees.map {|dt|
|
24
|
+
score = dt.decide(formatted_features).score
|
25
|
+
score.to_s.to_f
|
26
|
+
}.reduce(:+) + @const
|
27
|
+
Math.exp(x) / (1 + Math.exp(x))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Models
|
3
|
+
class RandomForest
|
4
|
+
RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
|
5
|
+
|
6
|
+
def initialize(xml)
|
7
|
+
xml_trees = xml.xpath(RF_FOREST_XPATH)
|
8
|
+
@decision_trees = xml_trees.collect {|xml_tree|
|
9
|
+
DecisionTree.new(xml_tree)
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
def decisions_count(features)
|
14
|
+
formatted_features = Features.new(features).formatted
|
15
|
+
decisions = @decision_trees.collect {|decision_tree|
|
16
|
+
decision_tree.decide(formatted_features).score
|
17
|
+
}
|
18
|
+
decisions.inject(Hash.new(0)) {|h, e| h[e] += 1; h}
|
19
|
+
end
|
20
|
+
|
21
|
+
def predict(features)
|
22
|
+
decisions_count(features).max_by {|_, v| v}[0]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'models/decision_tree'
|
2
|
+
require 'models/gbm'
|
3
|
+
require 'models/random_forest'
|
4
|
+
|
5
|
+
module Scoruby
|
6
|
+
class ModelsFactory
|
7
|
+
RANDOM_FOREST_MODEL = 'randomForest_Model'
|
8
|
+
GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
|
9
|
+
MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
|
10
|
+
|
11
|
+
def self.factory_for(xml)
|
12
|
+
return Models::RandomForest.new(xml) if random_forest?(xml)
|
13
|
+
return Models::Gbm.new(xml) if gbm?(xml)
|
14
|
+
return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
|
15
|
+
|
16
|
+
raise MODEL_NOT_SUPPORTED_ERROR
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.decision_tree?(xml)
|
20
|
+
!xml.xpath('PMML/TreeModel').empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.random_forest?(xml)
|
24
|
+
xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.gbm?(xml)
|
28
|
+
!xml.xpath(GBM_INDICATION).empty?
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/scoruby/node.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'predicate_factory'
|
2
|
+
require 'decision'
|
3
|
+
|
4
|
+
module Scoruby
|
5
|
+
class Node
|
6
|
+
|
7
|
+
attr_reader :decision, :pred, :children
|
8
|
+
|
9
|
+
def initialize(xml)
|
10
|
+
children = xml.children
|
11
|
+
|
12
|
+
@decision = Decision.new(xml.attribute('score').to_s,
|
13
|
+
children.select {|c| c.name == 'ScoreDistribution'})
|
14
|
+
|
15
|
+
children = remove_nodes(children)
|
16
|
+
|
17
|
+
pred_xml = children[0]
|
18
|
+
@pred = PredicateFactory.for(pred_xml)
|
19
|
+
@children = []
|
20
|
+
|
21
|
+
return if children.count == 1
|
22
|
+
|
23
|
+
@children << Node.new(children[1]) if children[1]
|
24
|
+
@children << Node.new(children[2]) if children[2]
|
25
|
+
@children << Node.new(children[3]) if children[3]
|
26
|
+
end
|
27
|
+
|
28
|
+
def true?(features)
|
29
|
+
@pred.nil? || @pred.true?(features)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def remove_nodes(children)
|
35
|
+
children.reject {|c| %w(Extension ScoreDistribution).include? c.name}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'predicates/compound_predicate'
|
2
|
+
require 'predicates/simple_predicate'
|
3
|
+
require 'predicates/simple_set_predicate'
|
4
|
+
require 'predicates/true_predicate'
|
5
|
+
require 'predicates/false_predicate'
|
6
|
+
|
7
|
+
module Scoruby
|
8
|
+
class PredicateFactory
|
9
|
+
|
10
|
+
def self.for(pred_xml)
|
11
|
+
return Predicates::SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
|
12
|
+
return Predicates::SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
|
13
|
+
return Predicates::CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
|
14
|
+
return Predicates::TruePredicate.new if pred_xml.name == 'True'
|
15
|
+
return Predicates::FalsePredicate.new if pred_xml.name == 'False'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Predicates
|
3
|
+
class CompoundPredicate
|
4
|
+
|
5
|
+
attr_reader :field
|
6
|
+
|
7
|
+
def initialize(pred_xml)
|
8
|
+
attributes = pred_xml.attributes
|
9
|
+
children = pred_xml.children
|
10
|
+
|
11
|
+
@boolean_operator = attributes['booleanOperator'].value
|
12
|
+
@predicates = []
|
13
|
+
@predicates << PredicateFactory.for(children[0])
|
14
|
+
@predicates << PredicateFactory.for(children[1])
|
15
|
+
@field = @predicates.map(&:field).flatten.compact
|
16
|
+
end
|
17
|
+
|
18
|
+
def true?(features)
|
19
|
+
return surrogate?(features) if @boolean_operator == 'surrogate'
|
20
|
+
return or?(features) if @boolean_operator == 'or'
|
21
|
+
and?(features) if @boolean_operator == 'and'
|
22
|
+
end
|
23
|
+
|
24
|
+
def is_missing?(features)
|
25
|
+
@field.any? {|f| !features.keys.include?(f)}
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def surrogate?(features)
|
31
|
+
return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
|
32
|
+
@predicates[0].true?(features)
|
33
|
+
end
|
34
|
+
|
35
|
+
def or?(features)
|
36
|
+
@predicates.any? {|p| p.true?(features)}
|
37
|
+
end
|
38
|
+
|
39
|
+
def and?(features)
|
40
|
+
@predicates.all? {|p| p.true?(features)}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Predicates
|
3
|
+
class SimplePredicate
|
4
|
+
|
5
|
+
GREATER_THAN = 'greaterThan'
|
6
|
+
LESS_THAN = 'lessThan'
|
7
|
+
LESS_OR_EQUAL = 'lessOrEqual'
|
8
|
+
GREATER_OR_EQUAL = 'greaterOrEqual'
|
9
|
+
MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
|
10
|
+
EQUAL = 'equal'
|
11
|
+
IS_MISSING = 'isMissing'
|
12
|
+
|
13
|
+
attr_reader :field
|
14
|
+
|
15
|
+
def initialize(pred_xml)
|
16
|
+
attributes = pred_xml.attributes
|
17
|
+
|
18
|
+
@field = attributes['field'].value.to_sym
|
19
|
+
@operator = attributes['operator'].value
|
20
|
+
return if @operator == IS_MISSING
|
21
|
+
@value = attributes['value'].value
|
22
|
+
end
|
23
|
+
|
24
|
+
def true?(features)
|
25
|
+
return num_true?(features) if MATH_OPS.include?(@operator)
|
26
|
+
return features[@field] == @value if @operator == EQUAL
|
27
|
+
features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
|
28
|
+
end
|
29
|
+
|
30
|
+
def is_missing?(features)
|
31
|
+
!features.keys.include?(@field)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def num_true?(features)
|
37
|
+
return false unless features[@field]
|
38
|
+
curr_value = Float(features[@field])
|
39
|
+
value = Float(@value)
|
40
|
+
return curr_value > value if @operator == GREATER_THAN
|
41
|
+
return curr_value < value if @operator == LESS_THAN
|
42
|
+
return curr_value <= value if @operator == LESS_OR_EQUAL
|
43
|
+
curr_value >= value if @operator == GREATER_OR_EQUAL
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Predicates
|
3
|
+
class SimpleSetPredicate
|
4
|
+
|
5
|
+
IS_IN = 'isIn'
|
6
|
+
|
7
|
+
attr_reader :field
|
8
|
+
|
9
|
+
def initialize(pred_xml)
|
10
|
+
attributes = pred_xml.attributes
|
11
|
+
@field = attributes['field'].value.to_sym
|
12
|
+
@array = single_or_quoted_words(pred_xml.children[0].content)
|
13
|
+
@operator = attributes['booleanOperator'].value
|
14
|
+
end
|
15
|
+
|
16
|
+
def true?(features)
|
17
|
+
@array.include? features[@field] if @operator == IS_IN
|
18
|
+
end
|
19
|
+
|
20
|
+
def is_missing?(features)
|
21
|
+
!features.keys.include?(@field)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def single_or_quoted_words(string)
|
27
|
+
string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
|
28
|
+
reject(&:empty?).
|
29
|
+
map {|w| w.tr('"', '')}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/scoruby/version.rb
CHANGED
data/lib/scoruby.rb
CHANGED
data/scoruby.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
17
|
spec.bindir = "exe"
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
-
spec.require_paths = ["lib", "lib/random_forest", "lib/gbm"]
|
19
|
+
spec.require_paths = ["lib", "lib/scoruby", "lib/scoruby/models/random_forest", "lib/scoruby/models/gbm"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.10"
|
22
22
|
spec.add_development_dependency "rake", "~> 12.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scoruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,29 +115,31 @@ executables: []
|
|
115
115
|
extensions: []
|
116
116
|
extra_rdoc_files: []
|
117
117
|
files:
|
118
|
+
- ".gitignore"
|
118
119
|
- ".rspec"
|
119
120
|
- ".travis.yml"
|
120
121
|
- CODE_OF_CONDUCT.md
|
121
122
|
- Gemfile
|
123
|
+
- Gemfile.lock
|
122
124
|
- LICENSE.txt
|
123
125
|
- README.md
|
124
126
|
- Rakefile
|
125
127
|
- bin/console
|
126
128
|
- bin/setup
|
127
|
-
- lib/decision.rb
|
128
|
-
- lib/features.rb
|
129
|
-
- lib/models/decision_tree.rb
|
130
|
-
- lib/models/gbm.rb
|
131
|
-
- lib/models/random_forest.rb
|
132
|
-
- lib/models_factory.rb
|
133
|
-
- lib/node.rb
|
134
|
-
- lib/predicate_factory.rb
|
135
|
-
- lib/predicates/compound_predicate.rb
|
136
|
-
- lib/predicates/false_predicate.rb
|
137
|
-
- lib/predicates/simple_predicate.rb
|
138
|
-
- lib/predicates/simple_set_predicate.rb
|
139
|
-
- lib/predicates/true_predicate.rb
|
140
129
|
- lib/scoruby.rb
|
130
|
+
- lib/scoruby/decision.rb
|
131
|
+
- lib/scoruby/features.rb
|
132
|
+
- lib/scoruby/models/decision_tree.rb
|
133
|
+
- lib/scoruby/models/gbm.rb
|
134
|
+
- lib/scoruby/models/random_forest.rb
|
135
|
+
- lib/scoruby/models_factory.rb
|
136
|
+
- lib/scoruby/node.rb
|
137
|
+
- lib/scoruby/predicate_factory.rb
|
138
|
+
- lib/scoruby/predicates/compound_predicate.rb
|
139
|
+
- lib/scoruby/predicates/false_predicate.rb
|
140
|
+
- lib/scoruby/predicates/simple_predicate.rb
|
141
|
+
- lib/scoruby/predicates/simple_set_predicate.rb
|
142
|
+
- lib/scoruby/predicates/true_predicate.rb
|
141
143
|
- lib/scoruby/version.rb
|
142
144
|
- scoruby.gemspec
|
143
145
|
homepage: https://github.com/asafschers/scoruby
|
@@ -148,8 +150,9 @@ post_install_message:
|
|
148
150
|
rdoc_options: []
|
149
151
|
require_paths:
|
150
152
|
- lib
|
151
|
-
- lib/
|
152
|
-
- lib/
|
153
|
+
- lib/scoruby
|
154
|
+
- lib/scoruby/models/random_forest
|
155
|
+
- lib/scoruby/models/gbm
|
153
156
|
required_ruby_version: !ruby/object:Gem::Requirement
|
154
157
|
requirements:
|
155
158
|
- - ">="
|
data/lib/decision.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
class Decision
|
2
|
-
|
3
|
-
attr_reader :score, :score_distribution
|
4
|
-
|
5
|
-
def initialize(score, score_distributions)
|
6
|
-
@score = score
|
7
|
-
return if score_distributions.empty?
|
8
|
-
|
9
|
-
@score_distribution = {}
|
10
|
-
score_distributions.each { |score_distribution|
|
11
|
-
attributes = score_distribution.attributes
|
12
|
-
@score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
|
13
|
-
}
|
14
|
-
end
|
15
|
-
end
|
data/lib/features.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
class Features
|
2
|
-
|
3
|
-
attr_reader :formatted
|
4
|
-
|
5
|
-
def initialize(features)
|
6
|
-
@formatted = format_booleans(features)
|
7
|
-
end
|
8
|
-
|
9
|
-
def format_booleans(features)
|
10
|
-
features.map { |k, v|
|
11
|
-
features[k] = 'f' if v == false
|
12
|
-
features[k] = 't' if v == true
|
13
|
-
}
|
14
|
-
features
|
15
|
-
end
|
16
|
-
end
|
data/lib/models/decision_tree.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require 'node'
|
2
|
-
|
3
|
-
class DecisionTree
|
4
|
-
|
5
|
-
attr_reader :root
|
6
|
-
|
7
|
-
def initialize(tree_xml)
|
8
|
-
@id = tree_xml.attribute('id')
|
9
|
-
@root = Node.new(tree_xml.xpath('TreeModel/Node'))
|
10
|
-
end
|
11
|
-
|
12
|
-
def decide(features)
|
13
|
-
curr = @root
|
14
|
-
while curr.children[0]
|
15
|
-
prev = curr
|
16
|
-
curr = step(curr, features)
|
17
|
-
return if didnt_step?(curr, prev)
|
18
|
-
end
|
19
|
-
|
20
|
-
curr.decision
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
def step(curr, features)
|
26
|
-
curr = step_on_true(curr, features, 0)
|
27
|
-
curr = step_on_true(curr, features, 1)
|
28
|
-
curr = step_on_true(curr, features, 2)
|
29
|
-
curr
|
30
|
-
end
|
31
|
-
|
32
|
-
def step_on_true(curr, features, num)
|
33
|
-
return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
|
34
|
-
curr
|
35
|
-
end
|
36
|
-
|
37
|
-
def didnt_step?(curr, prev)
|
38
|
-
return false if (prev.pred != curr.pred)
|
39
|
-
Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
|
40
|
-
true
|
41
|
-
end
|
42
|
-
end
|
data/lib/models/gbm.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'models/decision_tree'
|
2
|
-
require 'features'
|
3
|
-
|
4
|
-
class Gbm
|
5
|
-
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
6
|
-
CONST_XPATH = '//Target/@rescaleConstant'
|
7
|
-
|
8
|
-
def initialize(xml)
|
9
|
-
@decision_trees = xml.xpath(GBM_FOREST_XPATH).collect{ |xml_tree|
|
10
|
-
DecisionTree.new(xml_tree)
|
11
|
-
}
|
12
|
-
@const = Float(xml.xpath(CONST_XPATH).to_s)
|
13
|
-
end
|
14
|
-
|
15
|
-
def tree_count
|
16
|
-
@decision_trees.count
|
17
|
-
end
|
18
|
-
|
19
|
-
def score(features)
|
20
|
-
formatted_features = Features.new(features).formatted
|
21
|
-
x = @decision_trees.map { |dt|
|
22
|
-
score = dt.decide(formatted_features).score
|
23
|
-
score.to_s.to_f
|
24
|
-
}.reduce(:+) + @const
|
25
|
-
Math.exp(x) / (1 + Math.exp(x))
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
|
data/lib/models/random_forest.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
require 'models/decision_tree'
|
2
|
-
|
3
|
-
class RandomForest
|
4
|
-
RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
|
5
|
-
|
6
|
-
def initialize(xml)
|
7
|
-
xml_trees = xml.xpath(RF_FOREST_XPATH)
|
8
|
-
@decision_trees = xml_trees.collect{ |xml_tree|
|
9
|
-
DecisionTree.new(xml_tree)
|
10
|
-
}
|
11
|
-
end
|
12
|
-
|
13
|
-
def decisions_count(features)
|
14
|
-
formatted_features = Features.new(features).formatted
|
15
|
-
decisions = @decision_trees.collect { |decision_tree|
|
16
|
-
decision_tree.decide(formatted_features).score
|
17
|
-
}
|
18
|
-
decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
|
19
|
-
end
|
20
|
-
|
21
|
-
def predict(features)
|
22
|
-
decisions_count(features).max_by {|_, v| v }[0]
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
data/lib/models_factory.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
require 'models/random_forest'
|
2
|
-
require 'models/gbm'
|
3
|
-
|
4
|
-
class ModelsFactory
|
5
|
-
RANDOM_FOREST_MODEL = 'randomForest_Model'
|
6
|
-
GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
|
7
|
-
MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
|
8
|
-
|
9
|
-
def self.factory_for(xml)
|
10
|
-
return RandomForest.new(xml) if random_forest?(xml)
|
11
|
-
return Gbm.new(xml) if gbm?(xml)
|
12
|
-
return DecisionTree.new(xml.child) if decision_tree?(xml)
|
13
|
-
|
14
|
-
raise MODEL_NOT_SUPPORTED_ERROR
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.decision_tree?(xml)
|
18
|
-
!xml.xpath('PMML/TreeModel').empty?
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.random_forest?(xml)
|
22
|
-
xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.gbm?(xml)
|
26
|
-
!xml.xpath(GBM_INDICATION).empty?
|
27
|
-
end
|
28
|
-
end
|
data/lib/node.rb
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'predicate_factory'
|
2
|
-
require 'decision'
|
3
|
-
|
4
|
-
class Node
|
5
|
-
|
6
|
-
attr_reader :decision, :pred, :children
|
7
|
-
|
8
|
-
def initialize(xml)
|
9
|
-
children = xml.children
|
10
|
-
|
11
|
-
@decision = Decision.new(xml.attribute('score').to_s,
|
12
|
-
children.select { |c| c.name == 'ScoreDistribution' } )
|
13
|
-
|
14
|
-
children = remove_nodes(children)
|
15
|
-
|
16
|
-
pred_xml = children[0]
|
17
|
-
@pred = PredicateFactory.for(pred_xml)
|
18
|
-
@children = []
|
19
|
-
|
20
|
-
return if children.count == 1
|
21
|
-
|
22
|
-
@children << Node.new(children[1]) if children[1]
|
23
|
-
@children << Node.new(children[2]) if children[2]
|
24
|
-
@children << Node.new(children[3]) if children[3]
|
25
|
-
end
|
26
|
-
|
27
|
-
def true?(features)
|
28
|
-
@pred.nil? || @pred.true?(features)
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def remove_nodes(children)
|
34
|
-
children.reject { |c| %w(Extension ScoreDistribution).include? c.name }
|
35
|
-
end
|
36
|
-
end
|
data/lib/predicate_factory.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require 'predicates/compound_predicate'
|
2
|
-
require 'predicates/simple_predicate'
|
3
|
-
require 'predicates/simple_set_predicate'
|
4
|
-
require 'predicates/true_predicate'
|
5
|
-
require 'predicates/false_predicate'
|
6
|
-
|
7
|
-
class PredicateFactory
|
8
|
-
|
9
|
-
def self.for(pred_xml)
|
10
|
-
return SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
|
11
|
-
return SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
|
12
|
-
return CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
|
13
|
-
return TruePredicate.new if pred_xml.name == 'True'
|
14
|
-
return FalsePredicate.new if pred_xml.name == 'False'
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
|
@@ -1,40 +0,0 @@
|
|
1
|
-
class CompoundPredicate
|
2
|
-
|
3
|
-
attr_reader :field
|
4
|
-
|
5
|
-
def initialize(pred_xml)
|
6
|
-
attributes = pred_xml.attributes
|
7
|
-
children = pred_xml.children
|
8
|
-
|
9
|
-
@boolean_operator = attributes['booleanOperator'].value
|
10
|
-
@predicates = []
|
11
|
-
@predicates << PredicateFactory.for(children[0])
|
12
|
-
@predicates << PredicateFactory.for(children[1])
|
13
|
-
@field = @predicates.map(&:field).flatten.compact
|
14
|
-
end
|
15
|
-
|
16
|
-
def true?(features)
|
17
|
-
return surrogate?(features) if @boolean_operator == 'surrogate'
|
18
|
-
return or?(features) if @boolean_operator == 'or'
|
19
|
-
and?(features) if @boolean_operator == 'and'
|
20
|
-
end
|
21
|
-
|
22
|
-
def is_missing?(features)
|
23
|
-
@field.any? { |f| !features.keys.include?(f) }
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def surrogate?(features)
|
29
|
-
return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
|
30
|
-
@predicates[0].true?(features)
|
31
|
-
end
|
32
|
-
|
33
|
-
def or?(features)
|
34
|
-
@predicates.any? { |p| p.true?(features) }
|
35
|
-
end
|
36
|
-
|
37
|
-
def and?(features)
|
38
|
-
@predicates.all? { |p| p.true?(features) }
|
39
|
-
end
|
40
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
class SimplePredicate
|
2
|
-
|
3
|
-
GREATER_THAN = 'greaterThan'
|
4
|
-
LESS_THAN = 'lessThan'
|
5
|
-
LESS_OR_EQUAL = 'lessOrEqual'
|
6
|
-
GREATER_OR_EQUAL = 'greaterOrEqual'
|
7
|
-
MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
|
8
|
-
EQUAL = 'equal'
|
9
|
-
IS_MISSING = 'isMissing'
|
10
|
-
|
11
|
-
attr_reader :field
|
12
|
-
|
13
|
-
def initialize(pred_xml)
|
14
|
-
attributes = pred_xml.attributes
|
15
|
-
|
16
|
-
@field = attributes['field'].value.to_sym
|
17
|
-
@operator = attributes['operator'].value
|
18
|
-
return if @operator == IS_MISSING
|
19
|
-
@value = attributes['value'].value
|
20
|
-
end
|
21
|
-
|
22
|
-
def true?(features)
|
23
|
-
return num_true?(features) if MATH_OPS.include?(@operator)
|
24
|
-
return features[@field] == @value if @operator == EQUAL
|
25
|
-
features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
|
26
|
-
end
|
27
|
-
|
28
|
-
def is_missing?(features)
|
29
|
-
!features.keys.include?(@field)
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
def num_true?(features)
|
35
|
-
return false unless features[@field]
|
36
|
-
curr_value = Float(features[@field])
|
37
|
-
value = Float(@value)
|
38
|
-
return curr_value > value if @operator == GREATER_THAN
|
39
|
-
return curr_value < value if @operator == LESS_THAN
|
40
|
-
return curr_value <= value if @operator == LESS_OR_EQUAL
|
41
|
-
curr_value >= value if @operator == GREATER_OR_EQUAL
|
42
|
-
end
|
43
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
class SimpleSetPredicate
|
2
|
-
|
3
|
-
IS_IN = 'isIn'
|
4
|
-
|
5
|
-
attr_reader :field
|
6
|
-
|
7
|
-
def initialize(pred_xml)
|
8
|
-
attributes = pred_xml.attributes
|
9
|
-
@field = attributes['field'].value.to_sym
|
10
|
-
@array = single_or_quoted_words(pred_xml.children[0].content)
|
11
|
-
@operator = attributes['booleanOperator'].value
|
12
|
-
end
|
13
|
-
|
14
|
-
def true?(features)
|
15
|
-
@array.include? features[@field] if @operator == IS_IN
|
16
|
-
end
|
17
|
-
|
18
|
-
def is_missing?(features)
|
19
|
-
!features.keys.include?(@field)
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def single_or_quoted_words(string)
|
25
|
-
string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
|
26
|
-
reject(&:empty?).
|
27
|
-
map { |w| w.tr('"','')}
|
28
|
-
end
|
29
|
-
end
|