scoruby 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +10 -0
- data/Gemfile.lock +67 -0
- data/lib/scoruby/decision.rb +17 -0
- data/lib/scoruby/features.rb +18 -0
- data/lib/scoruby/models/decision_tree.rb +46 -0
- data/lib/scoruby/models/gbm.rb +31 -0
- data/lib/scoruby/models/random_forest.rb +26 -0
- data/lib/scoruby/models_factory.rb +31 -0
- data/lib/scoruby/node.rb +38 -0
- data/lib/scoruby/predicate_factory.rb +19 -0
- data/lib/scoruby/predicates/compound_predicate.rb +44 -0
- data/lib/scoruby/predicates/false_predicate.rb +17 -0
- data/lib/scoruby/predicates/simple_predicate.rb +47 -0
- data/lib/scoruby/predicates/simple_set_predicate.rb +33 -0
- data/lib/scoruby/predicates/true_predicate.rb +17 -0
- data/lib/scoruby/version.rb +1 -1
- data/lib/scoruby.rb +1 -2
- data/scoruby.gemspec +1 -1
- metadata +20 -17
- data/lib/decision.rb +0 -15
- data/lib/features.rb +0 -16
- data/lib/models/decision_tree.rb +0 -42
- data/lib/models/gbm.rb +0 -29
- data/lib/models/random_forest.rb +0 -25
- data/lib/models_factory.rb +0 -28
- data/lib/node.rb +0 -36
- data/lib/predicate_factory.rb +0 -18
- data/lib/predicates/compound_predicate.rb +0 -40
- data/lib/predicates/false_predicate.rb +0 -13
- data/lib/predicates/simple_predicate.rb +0 -43
- data/lib/predicates/simple_set_predicate.rb +0 -29
- data/lib/predicates/true_predicate.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59d91f99c8a04a124993b71950d425f6b1b89353
|
4
|
+
data.tar.gz: ee709d362a7699749561a7d1cb60f7b7aa40c902
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d433e761e5fc203d298ecec3fb5019e024ae6cd753296f08b304fc0dd790017f5ff0e8621a34995811d6db21c475f3e91a41d079722c5c1ec888d151740c9c6
|
7
|
+
data.tar.gz: c347e88a7cf8e5345be89160f6f6f6f07ff2bb7dfed93401f02700e898886b3205e0ced850ba1fad247779f4b0f16c712b751aca46be15ac0d75b6fe3f1cc9d1
|
data/.gitignore
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
scoruby (0.2.2)
|
5
|
+
nokogiri (~> 1.7)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
coderay (1.1.0)
|
11
|
+
coveralls (0.8.20)
|
12
|
+
json (>= 1.8, < 3)
|
13
|
+
simplecov (~> 0.14.1)
|
14
|
+
term-ansicolor (~> 1.3)
|
15
|
+
thor (~> 0.19.4)
|
16
|
+
tins (~> 1.6)
|
17
|
+
diff-lcs (1.2.5)
|
18
|
+
docile (1.1.5)
|
19
|
+
json (2.1.0)
|
20
|
+
method_source (0.8.2)
|
21
|
+
mini_portile2 (2.1.0)
|
22
|
+
nokogiri (1.7.0.1)
|
23
|
+
mini_portile2 (~> 2.1.0)
|
24
|
+
pry (0.10.3)
|
25
|
+
coderay (~> 1.1.0)
|
26
|
+
method_source (~> 0.8.1)
|
27
|
+
slop (~> 3.4)
|
28
|
+
rake (12.0.0)
|
29
|
+
rspec (3.5.0)
|
30
|
+
rspec-core (~> 3.5.0)
|
31
|
+
rspec-expectations (~> 3.5.0)
|
32
|
+
rspec-mocks (~> 3.5.0)
|
33
|
+
rspec-core (3.5.4)
|
34
|
+
rspec-support (~> 3.5.0)
|
35
|
+
rspec-expectations (3.5.0)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.5.0)
|
38
|
+
rspec-mocks (3.5.0)
|
39
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
40
|
+
rspec-support (~> 3.5.0)
|
41
|
+
rspec-support (3.5.0)
|
42
|
+
ruby-prof (0.16.2)
|
43
|
+
simplecov (0.14.1)
|
44
|
+
docile (~> 1.1.0)
|
45
|
+
json (>= 1.8, < 3)
|
46
|
+
simplecov-html (~> 0.10.0)
|
47
|
+
simplecov-html (0.10.0)
|
48
|
+
slop (3.6.0)
|
49
|
+
term-ansicolor (1.6.0)
|
50
|
+
tins (~> 1.0)
|
51
|
+
thor (0.19.4)
|
52
|
+
tins (1.13.2)
|
53
|
+
|
54
|
+
PLATFORMS
|
55
|
+
ruby
|
56
|
+
|
57
|
+
DEPENDENCIES
|
58
|
+
bundler (~> 1.10)
|
59
|
+
coveralls
|
60
|
+
pry (~> 0.10)
|
61
|
+
rake (~> 12.0)
|
62
|
+
rspec (~> 3.5)
|
63
|
+
ruby-prof
|
64
|
+
scoruby!
|
65
|
+
|
66
|
+
BUNDLED WITH
|
67
|
+
1.11.2
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Scoruby
|
2
|
+
class Decision
|
3
|
+
|
4
|
+
attr_reader :score, :score_distribution
|
5
|
+
|
6
|
+
def initialize(score, score_distributions)
|
7
|
+
@score = score
|
8
|
+
return if score_distributions.empty?
|
9
|
+
|
10
|
+
@score_distribution = {}
|
11
|
+
score_distributions.each {|score_distribution|
|
12
|
+
attributes = score_distribution.attributes
|
13
|
+
@score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
|
14
|
+
}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Scoruby
|
2
|
+
class Features
|
3
|
+
|
4
|
+
attr_reader :formatted
|
5
|
+
|
6
|
+
def initialize(features)
|
7
|
+
@formatted = format_booleans(features)
|
8
|
+
end
|
9
|
+
|
10
|
+
def format_booleans(features)
|
11
|
+
features.map {|k, v|
|
12
|
+
features[k] = 'f' if v == false
|
13
|
+
features[k] = 't' if v == true
|
14
|
+
}
|
15
|
+
features
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'node'
|
2
|
+
|
3
|
+
module Scoruby
|
4
|
+
module Models
|
5
|
+
class DecisionTree
|
6
|
+
|
7
|
+
attr_reader :root
|
8
|
+
|
9
|
+
def initialize(tree_xml)
|
10
|
+
@id = tree_xml.attribute('id')
|
11
|
+
@root = Node.new(tree_xml.xpath('TreeModel/Node'))
|
12
|
+
end
|
13
|
+
|
14
|
+
def decide(features)
|
15
|
+
curr = @root
|
16
|
+
while curr.children[0]
|
17
|
+
prev = curr
|
18
|
+
curr = step(curr, features)
|
19
|
+
return if didnt_step?(curr, prev)
|
20
|
+
end
|
21
|
+
|
22
|
+
curr.decision
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def step(curr, features)
|
28
|
+
curr = step_on_true(curr, features, 0)
|
29
|
+
curr = step_on_true(curr, features, 1)
|
30
|
+
curr = step_on_true(curr, features, 2)
|
31
|
+
curr
|
32
|
+
end
|
33
|
+
|
34
|
+
def step_on_true(curr, features, num)
|
35
|
+
return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
|
36
|
+
curr
|
37
|
+
end
|
38
|
+
|
39
|
+
def didnt_step?(curr, prev)
|
40
|
+
return false if (prev.pred != curr.pred)
|
41
|
+
Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'models/decision_tree'
|
2
|
+
require 'features'
|
3
|
+
|
4
|
+
module Scoruby
|
5
|
+
module Models
|
6
|
+
class Gbm
|
7
|
+
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
8
|
+
CONST_XPATH = '//Target/@rescaleConstant'
|
9
|
+
|
10
|
+
def initialize(xml)
|
11
|
+
@decision_trees = xml.xpath(GBM_FOREST_XPATH).collect {|xml_tree|
|
12
|
+
DecisionTree.new(xml_tree)
|
13
|
+
}
|
14
|
+
@const = Float(xml.xpath(CONST_XPATH).to_s)
|
15
|
+
end
|
16
|
+
|
17
|
+
def tree_count
|
18
|
+
@decision_trees.count
|
19
|
+
end
|
20
|
+
|
21
|
+
def score(features)
|
22
|
+
formatted_features = Features.new(features).formatted
|
23
|
+
x = @decision_trees.map {|dt|
|
24
|
+
score = dt.decide(formatted_features).score
|
25
|
+
score.to_s.to_f
|
26
|
+
}.reduce(:+) + @const
|
27
|
+
Math.exp(x) / (1 + Math.exp(x))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Models
|
3
|
+
class RandomForest
|
4
|
+
RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
|
5
|
+
|
6
|
+
def initialize(xml)
|
7
|
+
xml_trees = xml.xpath(RF_FOREST_XPATH)
|
8
|
+
@decision_trees = xml_trees.collect {|xml_tree|
|
9
|
+
DecisionTree.new(xml_tree)
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
def decisions_count(features)
|
14
|
+
formatted_features = Features.new(features).formatted
|
15
|
+
decisions = @decision_trees.collect {|decision_tree|
|
16
|
+
decision_tree.decide(formatted_features).score
|
17
|
+
}
|
18
|
+
decisions.inject(Hash.new(0)) {|h, e| h[e] += 1; h}
|
19
|
+
end
|
20
|
+
|
21
|
+
def predict(features)
|
22
|
+
decisions_count(features).max_by {|_, v| v}[0]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'models/decision_tree'
|
2
|
+
require 'models/gbm'
|
3
|
+
require 'models/random_forest'
|
4
|
+
|
5
|
+
module Scoruby
|
6
|
+
class ModelsFactory
|
7
|
+
RANDOM_FOREST_MODEL = 'randomForest_Model'
|
8
|
+
GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
|
9
|
+
MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
|
10
|
+
|
11
|
+
def self.factory_for(xml)
|
12
|
+
return Models::RandomForest.new(xml) if random_forest?(xml)
|
13
|
+
return Models::Gbm.new(xml) if gbm?(xml)
|
14
|
+
return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
|
15
|
+
|
16
|
+
raise MODEL_NOT_SUPPORTED_ERROR
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.decision_tree?(xml)
|
20
|
+
!xml.xpath('PMML/TreeModel').empty?
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.random_forest?(xml)
|
24
|
+
xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.gbm?(xml)
|
28
|
+
!xml.xpath(GBM_INDICATION).empty?
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/scoruby/node.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'predicate_factory'
|
2
|
+
require 'decision'
|
3
|
+
|
4
|
+
module Scoruby
|
5
|
+
class Node
|
6
|
+
|
7
|
+
attr_reader :decision, :pred, :children
|
8
|
+
|
9
|
+
def initialize(xml)
|
10
|
+
children = xml.children
|
11
|
+
|
12
|
+
@decision = Decision.new(xml.attribute('score').to_s,
|
13
|
+
children.select {|c| c.name == 'ScoreDistribution'})
|
14
|
+
|
15
|
+
children = remove_nodes(children)
|
16
|
+
|
17
|
+
pred_xml = children[0]
|
18
|
+
@pred = PredicateFactory.for(pred_xml)
|
19
|
+
@children = []
|
20
|
+
|
21
|
+
return if children.count == 1
|
22
|
+
|
23
|
+
@children << Node.new(children[1]) if children[1]
|
24
|
+
@children << Node.new(children[2]) if children[2]
|
25
|
+
@children << Node.new(children[3]) if children[3]
|
26
|
+
end
|
27
|
+
|
28
|
+
def true?(features)
|
29
|
+
@pred.nil? || @pred.true?(features)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def remove_nodes(children)
|
35
|
+
children.reject {|c| %w(Extension ScoreDistribution).include? c.name}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'predicates/compound_predicate'
|
2
|
+
require 'predicates/simple_predicate'
|
3
|
+
require 'predicates/simple_set_predicate'
|
4
|
+
require 'predicates/true_predicate'
|
5
|
+
require 'predicates/false_predicate'
|
6
|
+
|
7
|
+
module Scoruby
|
8
|
+
class PredicateFactory
|
9
|
+
|
10
|
+
def self.for(pred_xml)
|
11
|
+
return Predicates::SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
|
12
|
+
return Predicates::SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
|
13
|
+
return Predicates::CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
|
14
|
+
return Predicates::TruePredicate.new if pred_xml.name == 'True'
|
15
|
+
return Predicates::FalsePredicate.new if pred_xml.name == 'False'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Predicates
|
3
|
+
class CompoundPredicate
|
4
|
+
|
5
|
+
attr_reader :field
|
6
|
+
|
7
|
+
def initialize(pred_xml)
|
8
|
+
attributes = pred_xml.attributes
|
9
|
+
children = pred_xml.children
|
10
|
+
|
11
|
+
@boolean_operator = attributes['booleanOperator'].value
|
12
|
+
@predicates = []
|
13
|
+
@predicates << PredicateFactory.for(children[0])
|
14
|
+
@predicates << PredicateFactory.for(children[1])
|
15
|
+
@field = @predicates.map(&:field).flatten.compact
|
16
|
+
end
|
17
|
+
|
18
|
+
def true?(features)
|
19
|
+
return surrogate?(features) if @boolean_operator == 'surrogate'
|
20
|
+
return or?(features) if @boolean_operator == 'or'
|
21
|
+
and?(features) if @boolean_operator == 'and'
|
22
|
+
end
|
23
|
+
|
24
|
+
def is_missing?(features)
|
25
|
+
@field.any? {|f| !features.keys.include?(f)}
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def surrogate?(features)
|
31
|
+
return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
|
32
|
+
@predicates[0].true?(features)
|
33
|
+
end
|
34
|
+
|
35
|
+
def or?(features)
|
36
|
+
@predicates.any? {|p| p.true?(features)}
|
37
|
+
end
|
38
|
+
|
39
|
+
def and?(features)
|
40
|
+
@predicates.all? {|p| p.true?(features)}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Predicates
|
3
|
+
class SimplePredicate
|
4
|
+
|
5
|
+
GREATER_THAN = 'greaterThan'
|
6
|
+
LESS_THAN = 'lessThan'
|
7
|
+
LESS_OR_EQUAL = 'lessOrEqual'
|
8
|
+
GREATER_OR_EQUAL = 'greaterOrEqual'
|
9
|
+
MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
|
10
|
+
EQUAL = 'equal'
|
11
|
+
IS_MISSING = 'isMissing'
|
12
|
+
|
13
|
+
attr_reader :field
|
14
|
+
|
15
|
+
def initialize(pred_xml)
|
16
|
+
attributes = pred_xml.attributes
|
17
|
+
|
18
|
+
@field = attributes['field'].value.to_sym
|
19
|
+
@operator = attributes['operator'].value
|
20
|
+
return if @operator == IS_MISSING
|
21
|
+
@value = attributes['value'].value
|
22
|
+
end
|
23
|
+
|
24
|
+
def true?(features)
|
25
|
+
return num_true?(features) if MATH_OPS.include?(@operator)
|
26
|
+
return features[@field] == @value if @operator == EQUAL
|
27
|
+
features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
|
28
|
+
end
|
29
|
+
|
30
|
+
def is_missing?(features)
|
31
|
+
!features.keys.include?(@field)
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def num_true?(features)
|
37
|
+
return false unless features[@field]
|
38
|
+
curr_value = Float(features[@field])
|
39
|
+
value = Float(@value)
|
40
|
+
return curr_value > value if @operator == GREATER_THAN
|
41
|
+
return curr_value < value if @operator == LESS_THAN
|
42
|
+
return curr_value <= value if @operator == LESS_OR_EQUAL
|
43
|
+
curr_value >= value if @operator == GREATER_OR_EQUAL
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Scoruby
|
2
|
+
module Predicates
|
3
|
+
class SimpleSetPredicate
|
4
|
+
|
5
|
+
IS_IN = 'isIn'
|
6
|
+
|
7
|
+
attr_reader :field
|
8
|
+
|
9
|
+
def initialize(pred_xml)
|
10
|
+
attributes = pred_xml.attributes
|
11
|
+
@field = attributes['field'].value.to_sym
|
12
|
+
@array = single_or_quoted_words(pred_xml.children[0].content)
|
13
|
+
@operator = attributes['booleanOperator'].value
|
14
|
+
end
|
15
|
+
|
16
|
+
def true?(features)
|
17
|
+
@array.include? features[@field] if @operator == IS_IN
|
18
|
+
end
|
19
|
+
|
20
|
+
def is_missing?(features)
|
21
|
+
!features.keys.include?(@field)
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def single_or_quoted_words(string)
|
27
|
+
string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
|
28
|
+
reject(&:empty?).
|
29
|
+
map {|w| w.tr('"', '')}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/scoruby/version.rb
CHANGED
data/lib/scoruby.rb
CHANGED
data/scoruby.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
17
|
spec.bindir = "exe"
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
-
spec.require_paths = ["lib", "lib/random_forest", "lib/gbm"]
|
19
|
+
spec.require_paths = ["lib", "lib/scoruby", "lib/scoruby/models/random_forest", "lib/scoruby/models/gbm"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.10"
|
22
22
|
spec.add_development_dependency "rake", "~> 12.0"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scoruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-07-
|
11
|
+
date: 2017-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,29 +115,31 @@ executables: []
|
|
115
115
|
extensions: []
|
116
116
|
extra_rdoc_files: []
|
117
117
|
files:
|
118
|
+
- ".gitignore"
|
118
119
|
- ".rspec"
|
119
120
|
- ".travis.yml"
|
120
121
|
- CODE_OF_CONDUCT.md
|
121
122
|
- Gemfile
|
123
|
+
- Gemfile.lock
|
122
124
|
- LICENSE.txt
|
123
125
|
- README.md
|
124
126
|
- Rakefile
|
125
127
|
- bin/console
|
126
128
|
- bin/setup
|
127
|
-
- lib/decision.rb
|
128
|
-
- lib/features.rb
|
129
|
-
- lib/models/decision_tree.rb
|
130
|
-
- lib/models/gbm.rb
|
131
|
-
- lib/models/random_forest.rb
|
132
|
-
- lib/models_factory.rb
|
133
|
-
- lib/node.rb
|
134
|
-
- lib/predicate_factory.rb
|
135
|
-
- lib/predicates/compound_predicate.rb
|
136
|
-
- lib/predicates/false_predicate.rb
|
137
|
-
- lib/predicates/simple_predicate.rb
|
138
|
-
- lib/predicates/simple_set_predicate.rb
|
139
|
-
- lib/predicates/true_predicate.rb
|
140
129
|
- lib/scoruby.rb
|
130
|
+
- lib/scoruby/decision.rb
|
131
|
+
- lib/scoruby/features.rb
|
132
|
+
- lib/scoruby/models/decision_tree.rb
|
133
|
+
- lib/scoruby/models/gbm.rb
|
134
|
+
- lib/scoruby/models/random_forest.rb
|
135
|
+
- lib/scoruby/models_factory.rb
|
136
|
+
- lib/scoruby/node.rb
|
137
|
+
- lib/scoruby/predicate_factory.rb
|
138
|
+
- lib/scoruby/predicates/compound_predicate.rb
|
139
|
+
- lib/scoruby/predicates/false_predicate.rb
|
140
|
+
- lib/scoruby/predicates/simple_predicate.rb
|
141
|
+
- lib/scoruby/predicates/simple_set_predicate.rb
|
142
|
+
- lib/scoruby/predicates/true_predicate.rb
|
141
143
|
- lib/scoruby/version.rb
|
142
144
|
- scoruby.gemspec
|
143
145
|
homepage: https://github.com/asafschers/scoruby
|
@@ -148,8 +150,9 @@ post_install_message:
|
|
148
150
|
rdoc_options: []
|
149
151
|
require_paths:
|
150
152
|
- lib
|
151
|
-
- lib/
|
152
|
-
- lib/
|
153
|
+
- lib/scoruby
|
154
|
+
- lib/scoruby/models/random_forest
|
155
|
+
- lib/scoruby/models/gbm
|
153
156
|
required_ruby_version: !ruby/object:Gem::Requirement
|
154
157
|
requirements:
|
155
158
|
- - ">="
|
data/lib/decision.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
class Decision
|
2
|
-
|
3
|
-
attr_reader :score, :score_distribution
|
4
|
-
|
5
|
-
def initialize(score, score_distributions)
|
6
|
-
@score = score
|
7
|
-
return if score_distributions.empty?
|
8
|
-
|
9
|
-
@score_distribution = {}
|
10
|
-
score_distributions.each { |score_distribution|
|
11
|
-
attributes = score_distribution.attributes
|
12
|
-
@score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
|
13
|
-
}
|
14
|
-
end
|
15
|
-
end
|
data/lib/features.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
class Features
|
2
|
-
|
3
|
-
attr_reader :formatted
|
4
|
-
|
5
|
-
def initialize(features)
|
6
|
-
@formatted = format_booleans(features)
|
7
|
-
end
|
8
|
-
|
9
|
-
def format_booleans(features)
|
10
|
-
features.map { |k, v|
|
11
|
-
features[k] = 'f' if v == false
|
12
|
-
features[k] = 't' if v == true
|
13
|
-
}
|
14
|
-
features
|
15
|
-
end
|
16
|
-
end
|
data/lib/models/decision_tree.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require 'node'
|
2
|
-
|
3
|
-
class DecisionTree
|
4
|
-
|
5
|
-
attr_reader :root
|
6
|
-
|
7
|
-
def initialize(tree_xml)
|
8
|
-
@id = tree_xml.attribute('id')
|
9
|
-
@root = Node.new(tree_xml.xpath('TreeModel/Node'))
|
10
|
-
end
|
11
|
-
|
12
|
-
def decide(features)
|
13
|
-
curr = @root
|
14
|
-
while curr.children[0]
|
15
|
-
prev = curr
|
16
|
-
curr = step(curr, features)
|
17
|
-
return if didnt_step?(curr, prev)
|
18
|
-
end
|
19
|
-
|
20
|
-
curr.decision
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
def step(curr, features)
|
26
|
-
curr = step_on_true(curr, features, 0)
|
27
|
-
curr = step_on_true(curr, features, 1)
|
28
|
-
curr = step_on_true(curr, features, 2)
|
29
|
-
curr
|
30
|
-
end
|
31
|
-
|
32
|
-
def step_on_true(curr, features, num)
|
33
|
-
return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
|
34
|
-
curr
|
35
|
-
end
|
36
|
-
|
37
|
-
def didnt_step?(curr, prev)
|
38
|
-
return false if (prev.pred != curr.pred)
|
39
|
-
Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
|
40
|
-
true
|
41
|
-
end
|
42
|
-
end
|
data/lib/models/gbm.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'models/decision_tree'
|
2
|
-
require 'features'
|
3
|
-
|
4
|
-
class Gbm
|
5
|
-
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
6
|
-
CONST_XPATH = '//Target/@rescaleConstant'
|
7
|
-
|
8
|
-
def initialize(xml)
|
9
|
-
@decision_trees = xml.xpath(GBM_FOREST_XPATH).collect{ |xml_tree|
|
10
|
-
DecisionTree.new(xml_tree)
|
11
|
-
}
|
12
|
-
@const = Float(xml.xpath(CONST_XPATH).to_s)
|
13
|
-
end
|
14
|
-
|
15
|
-
def tree_count
|
16
|
-
@decision_trees.count
|
17
|
-
end
|
18
|
-
|
19
|
-
def score(features)
|
20
|
-
formatted_features = Features.new(features).formatted
|
21
|
-
x = @decision_trees.map { |dt|
|
22
|
-
score = dt.decide(formatted_features).score
|
23
|
-
score.to_s.to_f
|
24
|
-
}.reduce(:+) + @const
|
25
|
-
Math.exp(x) / (1 + Math.exp(x))
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
|
data/lib/models/random_forest.rb
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
require 'models/decision_tree'
|
2
|
-
|
3
|
-
class RandomForest
|
4
|
-
RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
|
5
|
-
|
6
|
-
def initialize(xml)
|
7
|
-
xml_trees = xml.xpath(RF_FOREST_XPATH)
|
8
|
-
@decision_trees = xml_trees.collect{ |xml_tree|
|
9
|
-
DecisionTree.new(xml_tree)
|
10
|
-
}
|
11
|
-
end
|
12
|
-
|
13
|
-
def decisions_count(features)
|
14
|
-
formatted_features = Features.new(features).formatted
|
15
|
-
decisions = @decision_trees.collect { |decision_tree|
|
16
|
-
decision_tree.decide(formatted_features).score
|
17
|
-
}
|
18
|
-
decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
|
19
|
-
end
|
20
|
-
|
21
|
-
def predict(features)
|
22
|
-
decisions_count(features).max_by {|_, v| v }[0]
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
data/lib/models_factory.rb
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
require 'models/random_forest'
|
2
|
-
require 'models/gbm'
|
3
|
-
|
4
|
-
class ModelsFactory
|
5
|
-
RANDOM_FOREST_MODEL = 'randomForest_Model'
|
6
|
-
GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
|
7
|
-
MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
|
8
|
-
|
9
|
-
def self.factory_for(xml)
|
10
|
-
return RandomForest.new(xml) if random_forest?(xml)
|
11
|
-
return Gbm.new(xml) if gbm?(xml)
|
12
|
-
return DecisionTree.new(xml.child) if decision_tree?(xml)
|
13
|
-
|
14
|
-
raise MODEL_NOT_SUPPORTED_ERROR
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.decision_tree?(xml)
|
18
|
-
!xml.xpath('PMML/TreeModel').empty?
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.random_forest?(xml)
|
22
|
-
xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.gbm?(xml)
|
26
|
-
!xml.xpath(GBM_INDICATION).empty?
|
27
|
-
end
|
28
|
-
end
|
data/lib/node.rb
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
require 'predicate_factory'
|
2
|
-
require 'decision'
|
3
|
-
|
4
|
-
class Node
|
5
|
-
|
6
|
-
attr_reader :decision, :pred, :children
|
7
|
-
|
8
|
-
def initialize(xml)
|
9
|
-
children = xml.children
|
10
|
-
|
11
|
-
@decision = Decision.new(xml.attribute('score').to_s,
|
12
|
-
children.select { |c| c.name == 'ScoreDistribution' } )
|
13
|
-
|
14
|
-
children = remove_nodes(children)
|
15
|
-
|
16
|
-
pred_xml = children[0]
|
17
|
-
@pred = PredicateFactory.for(pred_xml)
|
18
|
-
@children = []
|
19
|
-
|
20
|
-
return if children.count == 1
|
21
|
-
|
22
|
-
@children << Node.new(children[1]) if children[1]
|
23
|
-
@children << Node.new(children[2]) if children[2]
|
24
|
-
@children << Node.new(children[3]) if children[3]
|
25
|
-
end
|
26
|
-
|
27
|
-
def true?(features)
|
28
|
-
@pred.nil? || @pred.true?(features)
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
def remove_nodes(children)
|
34
|
-
children.reject { |c| %w(Extension ScoreDistribution).include? c.name }
|
35
|
-
end
|
36
|
-
end
|
data/lib/predicate_factory.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require 'predicates/compound_predicate'
|
2
|
-
require 'predicates/simple_predicate'
|
3
|
-
require 'predicates/simple_set_predicate'
|
4
|
-
require 'predicates/true_predicate'
|
5
|
-
require 'predicates/false_predicate'
|
6
|
-
|
7
|
-
class PredicateFactory
|
8
|
-
|
9
|
-
def self.for(pred_xml)
|
10
|
-
return SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
|
11
|
-
return SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
|
12
|
-
return CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
|
13
|
-
return TruePredicate.new if pred_xml.name == 'True'
|
14
|
-
return FalsePredicate.new if pred_xml.name == 'False'
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
|
@@ -1,40 +0,0 @@
|
|
1
|
-
class CompoundPredicate
|
2
|
-
|
3
|
-
attr_reader :field
|
4
|
-
|
5
|
-
def initialize(pred_xml)
|
6
|
-
attributes = pred_xml.attributes
|
7
|
-
children = pred_xml.children
|
8
|
-
|
9
|
-
@boolean_operator = attributes['booleanOperator'].value
|
10
|
-
@predicates = []
|
11
|
-
@predicates << PredicateFactory.for(children[0])
|
12
|
-
@predicates << PredicateFactory.for(children[1])
|
13
|
-
@field = @predicates.map(&:field).flatten.compact
|
14
|
-
end
|
15
|
-
|
16
|
-
def true?(features)
|
17
|
-
return surrogate?(features) if @boolean_operator == 'surrogate'
|
18
|
-
return or?(features) if @boolean_operator == 'or'
|
19
|
-
and?(features) if @boolean_operator == 'and'
|
20
|
-
end
|
21
|
-
|
22
|
-
def is_missing?(features)
|
23
|
-
@field.any? { |f| !features.keys.include?(f) }
|
24
|
-
end
|
25
|
-
|
26
|
-
private
|
27
|
-
|
28
|
-
def surrogate?(features)
|
29
|
-
return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
|
30
|
-
@predicates[0].true?(features)
|
31
|
-
end
|
32
|
-
|
33
|
-
def or?(features)
|
34
|
-
@predicates.any? { |p| p.true?(features) }
|
35
|
-
end
|
36
|
-
|
37
|
-
def and?(features)
|
38
|
-
@predicates.all? { |p| p.true?(features) }
|
39
|
-
end
|
40
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
class SimplePredicate
|
2
|
-
|
3
|
-
GREATER_THAN = 'greaterThan'
|
4
|
-
LESS_THAN = 'lessThan'
|
5
|
-
LESS_OR_EQUAL = 'lessOrEqual'
|
6
|
-
GREATER_OR_EQUAL = 'greaterOrEqual'
|
7
|
-
MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
|
8
|
-
EQUAL = 'equal'
|
9
|
-
IS_MISSING = 'isMissing'
|
10
|
-
|
11
|
-
attr_reader :field
|
12
|
-
|
13
|
-
def initialize(pred_xml)
|
14
|
-
attributes = pred_xml.attributes
|
15
|
-
|
16
|
-
@field = attributes['field'].value.to_sym
|
17
|
-
@operator = attributes['operator'].value
|
18
|
-
return if @operator == IS_MISSING
|
19
|
-
@value = attributes['value'].value
|
20
|
-
end
|
21
|
-
|
22
|
-
def true?(features)
|
23
|
-
return num_true?(features) if MATH_OPS.include?(@operator)
|
24
|
-
return features[@field] == @value if @operator == EQUAL
|
25
|
-
features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
|
26
|
-
end
|
27
|
-
|
28
|
-
def is_missing?(features)
|
29
|
-
!features.keys.include?(@field)
|
30
|
-
end
|
31
|
-
|
32
|
-
private
|
33
|
-
|
34
|
-
def num_true?(features)
|
35
|
-
return false unless features[@field]
|
36
|
-
curr_value = Float(features[@field])
|
37
|
-
value = Float(@value)
|
38
|
-
return curr_value > value if @operator == GREATER_THAN
|
39
|
-
return curr_value < value if @operator == LESS_THAN
|
40
|
-
return curr_value <= value if @operator == LESS_OR_EQUAL
|
41
|
-
curr_value >= value if @operator == GREATER_OR_EQUAL
|
42
|
-
end
|
43
|
-
end
|
@@ -1,29 +0,0 @@
|
|
1
|
-
class SimpleSetPredicate
|
2
|
-
|
3
|
-
IS_IN = 'isIn'
|
4
|
-
|
5
|
-
attr_reader :field
|
6
|
-
|
7
|
-
def initialize(pred_xml)
|
8
|
-
attributes = pred_xml.attributes
|
9
|
-
@field = attributes['field'].value.to_sym
|
10
|
-
@array = single_or_quoted_words(pred_xml.children[0].content)
|
11
|
-
@operator = attributes['booleanOperator'].value
|
12
|
-
end
|
13
|
-
|
14
|
-
def true?(features)
|
15
|
-
@array.include? features[@field] if @operator == IS_IN
|
16
|
-
end
|
17
|
-
|
18
|
-
def is_missing?(features)
|
19
|
-
!features.keys.include?(@field)
|
20
|
-
end
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
def single_or_quoted_words(string)
|
25
|
-
string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
|
26
|
-
reject(&:empty?).
|
27
|
-
map { |w| w.tr('"','')}
|
28
|
-
end
|
29
|
-
end
|