scoruby 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 29d319117ac389447c955bcd448235865fd72104
4
- data.tar.gz: 851feb27437019ef236b7ab8130c52800f3239a4
3
+ metadata.gz: 59d91f99c8a04a124993b71950d425f6b1b89353
4
+ data.tar.gz: ee709d362a7699749561a7d1cb60f7b7aa40c902
5
5
  SHA512:
6
- metadata.gz: e13cff01a0cf6800aa30c522dbb27bee0cfa931f55c7aa466811f7a46aff6fc865d9d9cdcacd2ed462b334cd6e38078c6e92a5235287cadd62383ae8abf1ff22
7
- data.tar.gz: b8664649eeef47a290f0c8547b486896d0044b3803580639468d41f8f1098e27c4de8b5f0df06a8807463e39c8d827e101fac4457de1ef3dcee947887404fe37
6
+ metadata.gz: 4d433e761e5fc203d298ecec3fb5019e024ae6cd753296f08b304fc0dd790017f5ff0e8621a34995811d6db21c475f3e91a41d079722c5c1ec888d151740c9c6
7
+ data.tar.gz: c347e88a7cf8e5345be89160f6f6f6f07ff2bb7dfed93401f02700e898886b3205e0ced850ba1fad247779f4b0f16c712b751aca46be15ac0d75b6fe3f1cc9d1
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ .idea
2
+ *.log
3
+
4
+ test_gbm.rb
5
+
6
+ test_gbm.pmml
7
+
8
+ *.gem
9
+
10
+ sample.pmml
data/Gemfile.lock ADDED
@@ -0,0 +1,67 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ scoruby (0.2.2)
5
+ nokogiri (~> 1.7)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ coderay (1.1.0)
11
+ coveralls (0.8.20)
12
+ json (>= 1.8, < 3)
13
+ simplecov (~> 0.14.1)
14
+ term-ansicolor (~> 1.3)
15
+ thor (~> 0.19.4)
16
+ tins (~> 1.6)
17
+ diff-lcs (1.2.5)
18
+ docile (1.1.5)
19
+ json (2.1.0)
20
+ method_source (0.8.2)
21
+ mini_portile2 (2.1.0)
22
+ nokogiri (1.7.0.1)
23
+ mini_portile2 (~> 2.1.0)
24
+ pry (0.10.3)
25
+ coderay (~> 1.1.0)
26
+ method_source (~> 0.8.1)
27
+ slop (~> 3.4)
28
+ rake (12.0.0)
29
+ rspec (3.5.0)
30
+ rspec-core (~> 3.5.0)
31
+ rspec-expectations (~> 3.5.0)
32
+ rspec-mocks (~> 3.5.0)
33
+ rspec-core (3.5.4)
34
+ rspec-support (~> 3.5.0)
35
+ rspec-expectations (3.5.0)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.5.0)
38
+ rspec-mocks (3.5.0)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.5.0)
41
+ rspec-support (3.5.0)
42
+ ruby-prof (0.16.2)
43
+ simplecov (0.14.1)
44
+ docile (~> 1.1.0)
45
+ json (>= 1.8, < 3)
46
+ simplecov-html (~> 0.10.0)
47
+ simplecov-html (0.10.0)
48
+ slop (3.6.0)
49
+ term-ansicolor (1.6.0)
50
+ tins (~> 1.0)
51
+ thor (0.19.4)
52
+ tins (1.13.2)
53
+
54
+ PLATFORMS
55
+ ruby
56
+
57
+ DEPENDENCIES
58
+ bundler (~> 1.10)
59
+ coveralls
60
+ pry (~> 0.10)
61
+ rake (~> 12.0)
62
+ rspec (~> 3.5)
63
+ ruby-prof
64
+ scoruby!
65
+
66
+ BUNDLED WITH
67
+ 1.11.2
@@ -0,0 +1,17 @@
1
+ module Scoruby
2
+ class Decision
3
+
4
+ attr_reader :score, :score_distribution
5
+
6
+ def initialize(score, score_distributions)
7
+ @score = score
8
+ return if score_distributions.empty?
9
+
10
+ @score_distribution = {}
11
+ score_distributions.each {|score_distribution|
12
+ attributes = score_distribution.attributes
13
+ @score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
14
+ }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ module Scoruby
2
+ class Features
3
+
4
+ attr_reader :formatted
5
+
6
+ def initialize(features)
7
+ @formatted = format_booleans(features)
8
+ end
9
+
10
+ def format_booleans(features)
11
+ features.map {|k, v|
12
+ features[k] = 'f' if v == false
13
+ features[k] = 't' if v == true
14
+ }
15
+ features
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,46 @@
1
+ require 'node'
2
+
3
+ module Scoruby
4
+ module Models
5
+ class DecisionTree
6
+
7
+ attr_reader :root
8
+
9
+ def initialize(tree_xml)
10
+ @id = tree_xml.attribute('id')
11
+ @root = Node.new(tree_xml.xpath('TreeModel/Node'))
12
+ end
13
+
14
+ def decide(features)
15
+ curr = @root
16
+ while curr.children[0]
17
+ prev = curr
18
+ curr = step(curr, features)
19
+ return if didnt_step?(curr, prev)
20
+ end
21
+
22
+ curr.decision
23
+ end
24
+
25
+ private
26
+
27
+ def step(curr, features)
28
+ curr = step_on_true(curr, features, 0)
29
+ curr = step_on_true(curr, features, 1)
30
+ curr = step_on_true(curr, features, 2)
31
+ curr
32
+ end
33
+
34
+ def step_on_true(curr, features, num)
35
+ return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
36
+ curr
37
+ end
38
+
39
+ def didnt_step?(curr, prev)
40
+ return false if (prev.pred != curr.pred)
41
+ Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
42
+ true
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,31 @@
1
+ require 'models/decision_tree'
2
+ require 'features'
3
+
4
+ module Scoruby
5
+ module Models
6
+ class Gbm
7
+ GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
8
+ CONST_XPATH = '//Target/@rescaleConstant'
9
+
10
+ def initialize(xml)
11
+ @decision_trees = xml.xpath(GBM_FOREST_XPATH).collect {|xml_tree|
12
+ DecisionTree.new(xml_tree)
13
+ }
14
+ @const = Float(xml.xpath(CONST_XPATH).to_s)
15
+ end
16
+
17
+ def tree_count
18
+ @decision_trees.count
19
+ end
20
+
21
+ def score(features)
22
+ formatted_features = Features.new(features).formatted
23
+ x = @decision_trees.map {|dt|
24
+ score = dt.decide(formatted_features).score
25
+ score.to_s.to_f
26
+ }.reduce(:+) + @const
27
+ Math.exp(x) / (1 + Math.exp(x))
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,26 @@
1
+ module Scoruby
2
+ module Models
3
+ class RandomForest
4
+ RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
5
+
6
+ def initialize(xml)
7
+ xml_trees = xml.xpath(RF_FOREST_XPATH)
8
+ @decision_trees = xml_trees.collect {|xml_tree|
9
+ DecisionTree.new(xml_tree)
10
+ }
11
+ end
12
+
13
+ def decisions_count(features)
14
+ formatted_features = Features.new(features).formatted
15
+ decisions = @decision_trees.collect {|decision_tree|
16
+ decision_tree.decide(formatted_features).score
17
+ }
18
+ decisions.inject(Hash.new(0)) {|h, e| h[e] += 1; h}
19
+ end
20
+
21
+ def predict(features)
22
+ decisions_count(features).max_by {|_, v| v}[0]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,31 @@
1
+ require 'models/decision_tree'
2
+ require 'models/gbm'
3
+ require 'models/random_forest'
4
+
5
+ module Scoruby
6
+ class ModelsFactory
7
+ RANDOM_FOREST_MODEL = 'randomForest_Model'
8
+ GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
9
+ MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
10
+
11
+ def self.factory_for(xml)
12
+ return Models::RandomForest.new(xml) if random_forest?(xml)
13
+ return Models::Gbm.new(xml) if gbm?(xml)
14
+ return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
15
+
16
+ raise MODEL_NOT_SUPPORTED_ERROR
17
+ end
18
+
19
+ def self.decision_tree?(xml)
20
+ !xml.xpath('PMML/TreeModel').empty?
21
+ end
22
+
23
+ def self.random_forest?(xml)
24
+ xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
25
+ end
26
+
27
+ def self.gbm?(xml)
28
+ !xml.xpath(GBM_INDICATION).empty?
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,38 @@
1
+ require 'predicate_factory'
2
+ require 'decision'
3
+
4
+ module Scoruby
5
+ class Node
6
+
7
+ attr_reader :decision, :pred, :children
8
+
9
+ def initialize(xml)
10
+ children = xml.children
11
+
12
+ @decision = Decision.new(xml.attribute('score').to_s,
13
+ children.select {|c| c.name == 'ScoreDistribution'})
14
+
15
+ children = remove_nodes(children)
16
+
17
+ pred_xml = children[0]
18
+ @pred = PredicateFactory.for(pred_xml)
19
+ @children = []
20
+
21
+ return if children.count == 1
22
+
23
+ @children << Node.new(children[1]) if children[1]
24
+ @children << Node.new(children[2]) if children[2]
25
+ @children << Node.new(children[3]) if children[3]
26
+ end
27
+
28
+ def true?(features)
29
+ @pred.nil? || @pred.true?(features)
30
+ end
31
+
32
+ private
33
+
34
+ def remove_nodes(children)
35
+ children.reject {|c| %w(Extension ScoreDistribution).include? c.name}
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,19 @@
1
+ require 'predicates/compound_predicate'
2
+ require 'predicates/simple_predicate'
3
+ require 'predicates/simple_set_predicate'
4
+ require 'predicates/true_predicate'
5
+ require 'predicates/false_predicate'
6
+
7
+ module Scoruby
8
+ class PredicateFactory
9
+
10
+ def self.for(pred_xml)
11
+ return Predicates::SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
12
+ return Predicates::SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
13
+ return Predicates::CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
14
+ return Predicates::TruePredicate.new if pred_xml.name == 'True'
15
+ return Predicates::FalsePredicate.new if pred_xml.name == 'False'
16
+ end
17
+ end
18
+ end
19
+
@@ -0,0 +1,44 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class CompoundPredicate
4
+
5
+ attr_reader :field
6
+
7
+ def initialize(pred_xml)
8
+ attributes = pred_xml.attributes
9
+ children = pred_xml.children
10
+
11
+ @boolean_operator = attributes['booleanOperator'].value
12
+ @predicates = []
13
+ @predicates << PredicateFactory.for(children[0])
14
+ @predicates << PredicateFactory.for(children[1])
15
+ @field = @predicates.map(&:field).flatten.compact
16
+ end
17
+
18
+ def true?(features)
19
+ return surrogate?(features) if @boolean_operator == 'surrogate'
20
+ return or?(features) if @boolean_operator == 'or'
21
+ and?(features) if @boolean_operator == 'and'
22
+ end
23
+
24
+ def is_missing?(features)
25
+ @field.any? {|f| !features.keys.include?(f)}
26
+ end
27
+
28
+ private
29
+
30
+ def surrogate?(features)
31
+ return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
32
+ @predicates[0].true?(features)
33
+ end
34
+
35
+ def or?(features)
36
+ @predicates.any? {|p| p.true?(features)}
37
+ end
38
+
39
+ def and?(features)
40
+ @predicates.all? {|p| p.true?(features)}
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,17 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class FalsePredicate
4
+ def field
5
+ nil
6
+ end
7
+
8
+ def true?(_)
9
+ false
10
+ end
11
+
12
+ def is_missing?(_)
13
+ false
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,47 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class SimplePredicate
4
+
5
+ GREATER_THAN = 'greaterThan'
6
+ LESS_THAN = 'lessThan'
7
+ LESS_OR_EQUAL = 'lessOrEqual'
8
+ GREATER_OR_EQUAL = 'greaterOrEqual'
9
+ MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
10
+ EQUAL = 'equal'
11
+ IS_MISSING = 'isMissing'
12
+
13
+ attr_reader :field
14
+
15
+ def initialize(pred_xml)
16
+ attributes = pred_xml.attributes
17
+
18
+ @field = attributes['field'].value.to_sym
19
+ @operator = attributes['operator'].value
20
+ return if @operator == IS_MISSING
21
+ @value = attributes['value'].value
22
+ end
23
+
24
+ def true?(features)
25
+ return num_true?(features) if MATH_OPS.include?(@operator)
26
+ return features[@field] == @value if @operator == EQUAL
27
+ features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
28
+ end
29
+
30
+ def is_missing?(features)
31
+ !features.keys.include?(@field)
32
+ end
33
+
34
+ private
35
+
36
+ def num_true?(features)
37
+ return false unless features[@field]
38
+ curr_value = Float(features[@field])
39
+ value = Float(@value)
40
+ return curr_value > value if @operator == GREATER_THAN
41
+ return curr_value < value if @operator == LESS_THAN
42
+ return curr_value <= value if @operator == LESS_OR_EQUAL
43
+ curr_value >= value if @operator == GREATER_OR_EQUAL
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,33 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class SimpleSetPredicate
4
+
5
+ IS_IN = 'isIn'
6
+
7
+ attr_reader :field
8
+
9
+ def initialize(pred_xml)
10
+ attributes = pred_xml.attributes
11
+ @field = attributes['field'].value.to_sym
12
+ @array = single_or_quoted_words(pred_xml.children[0].content)
13
+ @operator = attributes['booleanOperator'].value
14
+ end
15
+
16
+ def true?(features)
17
+ @array.include? features[@field] if @operator == IS_IN
18
+ end
19
+
20
+ def is_missing?(features)
21
+ !features.keys.include?(@field)
22
+ end
23
+
24
+ private
25
+
26
+ def single_or_quoted_words(string)
27
+ string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
28
+ reject(&:empty?).
29
+ map {|w| w.tr('"', '')}
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,17 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class TruePredicate
4
+ def field
5
+ nil
6
+ end
7
+
8
+ def true?(_)
9
+ true
10
+ end
11
+
12
+ def is_missing?(_)
13
+ false
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,3 @@
1
1
  module Scoruby
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
data/lib/scoruby.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  require 'scoruby/version'
2
- require 'models_factory'
2
+ require 'scoruby/models_factory'
3
3
  require 'nokogiri'
4
4
  require 'logger'
5
- require 'pry'
6
5
 
7
6
  module Scoruby
8
7
  class << self
data/scoruby.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
17
  spec.bindir = "exe"
18
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
- spec.require_paths = ["lib", "lib/random_forest", "lib/gbm"]
19
+ spec.require_paths = ["lib", "lib/scoruby", "lib/scoruby/models/random_forest", "lib/scoruby/models/gbm"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.10"
22
22
  spec.add_development_dependency "rake", "~> 12.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-09 00:00:00.000000000 Z
11
+ date: 2017-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,29 +115,31 @@ executables: []
115
115
  extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
+ - ".gitignore"
118
119
  - ".rspec"
119
120
  - ".travis.yml"
120
121
  - CODE_OF_CONDUCT.md
121
122
  - Gemfile
123
+ - Gemfile.lock
122
124
  - LICENSE.txt
123
125
  - README.md
124
126
  - Rakefile
125
127
  - bin/console
126
128
  - bin/setup
127
- - lib/decision.rb
128
- - lib/features.rb
129
- - lib/models/decision_tree.rb
130
- - lib/models/gbm.rb
131
- - lib/models/random_forest.rb
132
- - lib/models_factory.rb
133
- - lib/node.rb
134
- - lib/predicate_factory.rb
135
- - lib/predicates/compound_predicate.rb
136
- - lib/predicates/false_predicate.rb
137
- - lib/predicates/simple_predicate.rb
138
- - lib/predicates/simple_set_predicate.rb
139
- - lib/predicates/true_predicate.rb
140
129
  - lib/scoruby.rb
130
+ - lib/scoruby/decision.rb
131
+ - lib/scoruby/features.rb
132
+ - lib/scoruby/models/decision_tree.rb
133
+ - lib/scoruby/models/gbm.rb
134
+ - lib/scoruby/models/random_forest.rb
135
+ - lib/scoruby/models_factory.rb
136
+ - lib/scoruby/node.rb
137
+ - lib/scoruby/predicate_factory.rb
138
+ - lib/scoruby/predicates/compound_predicate.rb
139
+ - lib/scoruby/predicates/false_predicate.rb
140
+ - lib/scoruby/predicates/simple_predicate.rb
141
+ - lib/scoruby/predicates/simple_set_predicate.rb
142
+ - lib/scoruby/predicates/true_predicate.rb
141
143
  - lib/scoruby/version.rb
142
144
  - scoruby.gemspec
143
145
  homepage: https://github.com/asafschers/scoruby
@@ -148,8 +150,9 @@ post_install_message:
148
150
  rdoc_options: []
149
151
  require_paths:
150
152
  - lib
151
- - lib/random_forest
152
- - lib/gbm
153
+ - lib/scoruby
154
+ - lib/scoruby/models/random_forest
155
+ - lib/scoruby/models/gbm
153
156
  required_ruby_version: !ruby/object:Gem::Requirement
154
157
  requirements:
155
158
  - - ">="
data/lib/decision.rb DELETED
@@ -1,15 +0,0 @@
1
- class Decision
2
-
3
- attr_reader :score, :score_distribution
4
-
5
- def initialize(score, score_distributions)
6
- @score = score
7
- return if score_distributions.empty?
8
-
9
- @score_distribution = {}
10
- score_distributions.each { |score_distribution|
11
- attributes = score_distribution.attributes
12
- @score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
13
- }
14
- end
15
- end
data/lib/features.rb DELETED
@@ -1,16 +0,0 @@
1
- class Features
2
-
3
- attr_reader :formatted
4
-
5
- def initialize(features)
6
- @formatted = format_booleans(features)
7
- end
8
-
9
- def format_booleans(features)
10
- features.map { |k, v|
11
- features[k] = 'f' if v == false
12
- features[k] = 't' if v == true
13
- }
14
- features
15
- end
16
- end
@@ -1,42 +0,0 @@
1
- require 'node'
2
-
3
- class DecisionTree
4
-
5
- attr_reader :root
6
-
7
- def initialize(tree_xml)
8
- @id = tree_xml.attribute('id')
9
- @root = Node.new(tree_xml.xpath('TreeModel/Node'))
10
- end
11
-
12
- def decide(features)
13
- curr = @root
14
- while curr.children[0]
15
- prev = curr
16
- curr = step(curr, features)
17
- return if didnt_step?(curr, prev)
18
- end
19
-
20
- curr.decision
21
- end
22
-
23
- private
24
-
25
- def step(curr, features)
26
- curr = step_on_true(curr, features, 0)
27
- curr = step_on_true(curr, features, 1)
28
- curr = step_on_true(curr, features, 2)
29
- curr
30
- end
31
-
32
- def step_on_true(curr, features, num)
33
- return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
34
- curr
35
- end
36
-
37
- def didnt_step?(curr, prev)
38
- return false if (prev.pred != curr.pred)
39
- Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
40
- true
41
- end
42
- end
data/lib/models/gbm.rb DELETED
@@ -1,29 +0,0 @@
1
- require 'models/decision_tree'
2
- require 'features'
3
-
4
- class Gbm
5
- GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
6
- CONST_XPATH = '//Target/@rescaleConstant'
7
-
8
- def initialize(xml)
9
- @decision_trees = xml.xpath(GBM_FOREST_XPATH).collect{ |xml_tree|
10
- DecisionTree.new(xml_tree)
11
- }
12
- @const = Float(xml.xpath(CONST_XPATH).to_s)
13
- end
14
-
15
- def tree_count
16
- @decision_trees.count
17
- end
18
-
19
- def score(features)
20
- formatted_features = Features.new(features).formatted
21
- x = @decision_trees.map { |dt|
22
- score = dt.decide(formatted_features).score
23
- score.to_s.to_f
24
- }.reduce(:+) + @const
25
- Math.exp(x) / (1 + Math.exp(x))
26
- end
27
-
28
- end
29
-
@@ -1,25 +0,0 @@
1
- require 'models/decision_tree'
2
-
3
- class RandomForest
4
- RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
5
-
6
- def initialize(xml)
7
- xml_trees = xml.xpath(RF_FOREST_XPATH)
8
- @decision_trees = xml_trees.collect{ |xml_tree|
9
- DecisionTree.new(xml_tree)
10
- }
11
- end
12
-
13
- def decisions_count(features)
14
- formatted_features = Features.new(features).formatted
15
- decisions = @decision_trees.collect { |decision_tree|
16
- decision_tree.decide(formatted_features).score
17
- }
18
- decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
19
- end
20
-
21
- def predict(features)
22
- decisions_count(features).max_by {|_, v| v }[0]
23
- end
24
-
25
- end
@@ -1,28 +0,0 @@
1
- require 'models/random_forest'
2
- require 'models/gbm'
3
-
4
- class ModelsFactory
5
- RANDOM_FOREST_MODEL = 'randomForest_Model'
6
- GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
7
- MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
8
-
9
- def self.factory_for(xml)
10
- return RandomForest.new(xml) if random_forest?(xml)
11
- return Gbm.new(xml) if gbm?(xml)
12
- return DecisionTree.new(xml.child) if decision_tree?(xml)
13
-
14
- raise MODEL_NOT_SUPPORTED_ERROR
15
- end
16
-
17
- def self.decision_tree?(xml)
18
- !xml.xpath('PMML/TreeModel').empty?
19
- end
20
-
21
- def self.random_forest?(xml)
22
- xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
23
- end
24
-
25
- def self.gbm?(xml)
26
- !xml.xpath(GBM_INDICATION).empty?
27
- end
28
- end
data/lib/node.rb DELETED
@@ -1,36 +0,0 @@
1
- require 'predicate_factory'
2
- require 'decision'
3
-
4
- class Node
5
-
6
- attr_reader :decision, :pred, :children
7
-
8
- def initialize(xml)
9
- children = xml.children
10
-
11
- @decision = Decision.new(xml.attribute('score').to_s,
12
- children.select { |c| c.name == 'ScoreDistribution' } )
13
-
14
- children = remove_nodes(children)
15
-
16
- pred_xml = children[0]
17
- @pred = PredicateFactory.for(pred_xml)
18
- @children = []
19
-
20
- return if children.count == 1
21
-
22
- @children << Node.new(children[1]) if children[1]
23
- @children << Node.new(children[2]) if children[2]
24
- @children << Node.new(children[3]) if children[3]
25
- end
26
-
27
- def true?(features)
28
- @pred.nil? || @pred.true?(features)
29
- end
30
-
31
- private
32
-
33
- def remove_nodes(children)
34
- children.reject { |c| %w(Extension ScoreDistribution).include? c.name }
35
- end
36
- end
@@ -1,18 +0,0 @@
1
- require 'predicates/compound_predicate'
2
- require 'predicates/simple_predicate'
3
- require 'predicates/simple_set_predicate'
4
- require 'predicates/true_predicate'
5
- require 'predicates/false_predicate'
6
-
7
- class PredicateFactory
8
-
9
- def self.for(pred_xml)
10
- return SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
11
- return SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
12
- return CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
13
- return TruePredicate.new if pred_xml.name == 'True'
14
- return FalsePredicate.new if pred_xml.name == 'False'
15
- end
16
- end
17
-
18
-
@@ -1,40 +0,0 @@
1
- class CompoundPredicate
2
-
3
- attr_reader :field
4
-
5
- def initialize(pred_xml)
6
- attributes = pred_xml.attributes
7
- children = pred_xml.children
8
-
9
- @boolean_operator = attributes['booleanOperator'].value
10
- @predicates = []
11
- @predicates << PredicateFactory.for(children[0])
12
- @predicates << PredicateFactory.for(children[1])
13
- @field = @predicates.map(&:field).flatten.compact
14
- end
15
-
16
- def true?(features)
17
- return surrogate?(features) if @boolean_operator == 'surrogate'
18
- return or?(features) if @boolean_operator == 'or'
19
- and?(features) if @boolean_operator == 'and'
20
- end
21
-
22
- def is_missing?(features)
23
- @field.any? { |f| !features.keys.include?(f) }
24
- end
25
-
26
- private
27
-
28
- def surrogate?(features)
29
- return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
30
- @predicates[0].true?(features)
31
- end
32
-
33
- def or?(features)
34
- @predicates.any? { |p| p.true?(features) }
35
- end
36
-
37
- def and?(features)
38
- @predicates.all? { |p| p.true?(features) }
39
- end
40
- end
@@ -1,13 +0,0 @@
1
- class FalsePredicate
2
- def field
3
- nil
4
- end
5
-
6
- def true?(_)
7
- false
8
- end
9
-
10
- def is_missing?(_)
11
- false
12
- end
13
- end
@@ -1,43 +0,0 @@
1
- class SimplePredicate
2
-
3
- GREATER_THAN = 'greaterThan'
4
- LESS_THAN = 'lessThan'
5
- LESS_OR_EQUAL = 'lessOrEqual'
6
- GREATER_OR_EQUAL = 'greaterOrEqual'
7
- MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
8
- EQUAL = 'equal'
9
- IS_MISSING = 'isMissing'
10
-
11
- attr_reader :field
12
-
13
- def initialize(pred_xml)
14
- attributes = pred_xml.attributes
15
-
16
- @field = attributes['field'].value.to_sym
17
- @operator = attributes['operator'].value
18
- return if @operator == IS_MISSING
19
- @value = attributes['value'].value
20
- end
21
-
22
- def true?(features)
23
- return num_true?(features) if MATH_OPS.include?(@operator)
24
- return features[@field] == @value if @operator == EQUAL
25
- features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
26
- end
27
-
28
- def is_missing?(features)
29
- !features.keys.include?(@field)
30
- end
31
-
32
- private
33
-
34
- def num_true?(features)
35
- return false unless features[@field]
36
- curr_value = Float(features[@field])
37
- value = Float(@value)
38
- return curr_value > value if @operator == GREATER_THAN
39
- return curr_value < value if @operator == LESS_THAN
40
- return curr_value <= value if @operator == LESS_OR_EQUAL
41
- curr_value >= value if @operator == GREATER_OR_EQUAL
42
- end
43
- end
@@ -1,29 +0,0 @@
1
- class SimpleSetPredicate
2
-
3
- IS_IN = 'isIn'
4
-
5
- attr_reader :field
6
-
7
- def initialize(pred_xml)
8
- attributes = pred_xml.attributes
9
- @field = attributes['field'].value.to_sym
10
- @array = single_or_quoted_words(pred_xml.children[0].content)
11
- @operator = attributes['booleanOperator'].value
12
- end
13
-
14
- def true?(features)
15
- @array.include? features[@field] if @operator == IS_IN
16
- end
17
-
18
- def is_missing?(features)
19
- !features.keys.include?(@field)
20
- end
21
-
22
- private
23
-
24
- def single_or_quoted_words(string)
25
- string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
26
- reject(&:empty?).
27
- map { |w| w.tr('"','')}
28
- end
29
- end
@@ -1,13 +0,0 @@
1
- class TruePredicate
2
- def field
3
- nil
4
- end
5
-
6
- def true?(_)
7
- true
8
- end
9
-
10
- def is_missing?(_)
11
- false
12
- end
13
- end