scoruby 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 29d319117ac389447c955bcd448235865fd72104
4
- data.tar.gz: 851feb27437019ef236b7ab8130c52800f3239a4
3
+ metadata.gz: 59d91f99c8a04a124993b71950d425f6b1b89353
4
+ data.tar.gz: ee709d362a7699749561a7d1cb60f7b7aa40c902
5
5
  SHA512:
6
- metadata.gz: e13cff01a0cf6800aa30c522dbb27bee0cfa931f55c7aa466811f7a46aff6fc865d9d9cdcacd2ed462b334cd6e38078c6e92a5235287cadd62383ae8abf1ff22
7
- data.tar.gz: b8664649eeef47a290f0c8547b486896d0044b3803580639468d41f8f1098e27c4de8b5f0df06a8807463e39c8d827e101fac4457de1ef3dcee947887404fe37
6
+ metadata.gz: 4d433e761e5fc203d298ecec3fb5019e024ae6cd753296f08b304fc0dd790017f5ff0e8621a34995811d6db21c475f3e91a41d079722c5c1ec888d151740c9c6
7
+ data.tar.gz: c347e88a7cf8e5345be89160f6f6f6f07ff2bb7dfed93401f02700e898886b3205e0ced850ba1fad247779f4b0f16c712b751aca46be15ac0d75b6fe3f1cc9d1
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ .idea
2
+ *.log
3
+
4
+ test_gbm.rb
5
+
6
+ test_gbm.pmml
7
+
8
+ *.gem
9
+
10
+ sample.pmml
data/Gemfile.lock ADDED
@@ -0,0 +1,67 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ scoruby (0.2.2)
5
+ nokogiri (~> 1.7)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ coderay (1.1.0)
11
+ coveralls (0.8.20)
12
+ json (>= 1.8, < 3)
13
+ simplecov (~> 0.14.1)
14
+ term-ansicolor (~> 1.3)
15
+ thor (~> 0.19.4)
16
+ tins (~> 1.6)
17
+ diff-lcs (1.2.5)
18
+ docile (1.1.5)
19
+ json (2.1.0)
20
+ method_source (0.8.2)
21
+ mini_portile2 (2.1.0)
22
+ nokogiri (1.7.0.1)
23
+ mini_portile2 (~> 2.1.0)
24
+ pry (0.10.3)
25
+ coderay (~> 1.1.0)
26
+ method_source (~> 0.8.1)
27
+ slop (~> 3.4)
28
+ rake (12.0.0)
29
+ rspec (3.5.0)
30
+ rspec-core (~> 3.5.0)
31
+ rspec-expectations (~> 3.5.0)
32
+ rspec-mocks (~> 3.5.0)
33
+ rspec-core (3.5.4)
34
+ rspec-support (~> 3.5.0)
35
+ rspec-expectations (3.5.0)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.5.0)
38
+ rspec-mocks (3.5.0)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.5.0)
41
+ rspec-support (3.5.0)
42
+ ruby-prof (0.16.2)
43
+ simplecov (0.14.1)
44
+ docile (~> 1.1.0)
45
+ json (>= 1.8, < 3)
46
+ simplecov-html (~> 0.10.0)
47
+ simplecov-html (0.10.0)
48
+ slop (3.6.0)
49
+ term-ansicolor (1.6.0)
50
+ tins (~> 1.0)
51
+ thor (0.19.4)
52
+ tins (1.13.2)
53
+
54
+ PLATFORMS
55
+ ruby
56
+
57
+ DEPENDENCIES
58
+ bundler (~> 1.10)
59
+ coveralls
60
+ pry (~> 0.10)
61
+ rake (~> 12.0)
62
+ rspec (~> 3.5)
63
+ ruby-prof
64
+ scoruby!
65
+
66
+ BUNDLED WITH
67
+ 1.11.2
@@ -0,0 +1,17 @@
1
+ module Scoruby
2
+ class Decision
3
+
4
+ attr_reader :score, :score_distribution
5
+
6
+ def initialize(score, score_distributions)
7
+ @score = score
8
+ return if score_distributions.empty?
9
+
10
+ @score_distribution = {}
11
+ score_distributions.each {|score_distribution|
12
+ attributes = score_distribution.attributes
13
+ @score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
14
+ }
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,18 @@
1
+ module Scoruby
2
+ class Features
3
+
4
+ attr_reader :formatted
5
+
6
+ def initialize(features)
7
+ @formatted = format_booleans(features)
8
+ end
9
+
10
+ def format_booleans(features)
11
+ features.map {|k, v|
12
+ features[k] = 'f' if v == false
13
+ features[k] = 't' if v == true
14
+ }
15
+ features
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,46 @@
1
+ require 'node'
2
+
3
+ module Scoruby
4
+ module Models
5
+ class DecisionTree
6
+
7
+ attr_reader :root
8
+
9
+ def initialize(tree_xml)
10
+ @id = tree_xml.attribute('id')
11
+ @root = Node.new(tree_xml.xpath('TreeModel/Node'))
12
+ end
13
+
14
+ def decide(features)
15
+ curr = @root
16
+ while curr.children[0]
17
+ prev = curr
18
+ curr = step(curr, features)
19
+ return if didnt_step?(curr, prev)
20
+ end
21
+
22
+ curr.decision
23
+ end
24
+
25
+ private
26
+
27
+ def step(curr, features)
28
+ curr = step_on_true(curr, features, 0)
29
+ curr = step_on_true(curr, features, 1)
30
+ curr = step_on_true(curr, features, 2)
31
+ curr
32
+ end
33
+
34
+ def step_on_true(curr, features, num)
35
+ return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
36
+ curr
37
+ end
38
+
39
+ def didnt_step?(curr, prev)
40
+ return false if (prev.pred != curr.pred)
41
+ Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
42
+ true
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,31 @@
1
+ require 'models/decision_tree'
2
+ require 'features'
3
+
4
+ module Scoruby
5
+ module Models
6
+ class Gbm
7
+ GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
8
+ CONST_XPATH = '//Target/@rescaleConstant'
9
+
10
+ def initialize(xml)
11
+ @decision_trees = xml.xpath(GBM_FOREST_XPATH).collect {|xml_tree|
12
+ DecisionTree.new(xml_tree)
13
+ }
14
+ @const = Float(xml.xpath(CONST_XPATH).to_s)
15
+ end
16
+
17
+ def tree_count
18
+ @decision_trees.count
19
+ end
20
+
21
+ def score(features)
22
+ formatted_features = Features.new(features).formatted
23
+ x = @decision_trees.map {|dt|
24
+ score = dt.decide(formatted_features).score
25
+ score.to_s.to_f
26
+ }.reduce(:+) + @const
27
+ Math.exp(x) / (1 + Math.exp(x))
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,26 @@
1
+ module Scoruby
2
+ module Models
3
+ class RandomForest
4
+ RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
5
+
6
+ def initialize(xml)
7
+ xml_trees = xml.xpath(RF_FOREST_XPATH)
8
+ @decision_trees = xml_trees.collect {|xml_tree|
9
+ DecisionTree.new(xml_tree)
10
+ }
11
+ end
12
+
13
+ def decisions_count(features)
14
+ formatted_features = Features.new(features).formatted
15
+ decisions = @decision_trees.collect {|decision_tree|
16
+ decision_tree.decide(formatted_features).score
17
+ }
18
+ decisions.inject(Hash.new(0)) {|h, e| h[e] += 1; h}
19
+ end
20
+
21
+ def predict(features)
22
+ decisions_count(features).max_by {|_, v| v}[0]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,31 @@
1
+ require 'models/decision_tree'
2
+ require 'models/gbm'
3
+ require 'models/random_forest'
4
+
5
+ module Scoruby
6
+ class ModelsFactory
7
+ RANDOM_FOREST_MODEL = 'randomForest_Model'
8
+ GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
9
+ MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
10
+
11
+ def self.factory_for(xml)
12
+ return Models::RandomForest.new(xml) if random_forest?(xml)
13
+ return Models::Gbm.new(xml) if gbm?(xml)
14
+ return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
15
+
16
+ raise MODEL_NOT_SUPPORTED_ERROR
17
+ end
18
+
19
+ def self.decision_tree?(xml)
20
+ !xml.xpath('PMML/TreeModel').empty?
21
+ end
22
+
23
+ def self.random_forest?(xml)
24
+ xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
25
+ end
26
+
27
+ def self.gbm?(xml)
28
+ !xml.xpath(GBM_INDICATION).empty?
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,38 @@
1
+ require 'predicate_factory'
2
+ require 'decision'
3
+
4
+ module Scoruby
5
+ class Node
6
+
7
+ attr_reader :decision, :pred, :children
8
+
9
+ def initialize(xml)
10
+ children = xml.children
11
+
12
+ @decision = Decision.new(xml.attribute('score').to_s,
13
+ children.select {|c| c.name == 'ScoreDistribution'})
14
+
15
+ children = remove_nodes(children)
16
+
17
+ pred_xml = children[0]
18
+ @pred = PredicateFactory.for(pred_xml)
19
+ @children = []
20
+
21
+ return if children.count == 1
22
+
23
+ @children << Node.new(children[1]) if children[1]
24
+ @children << Node.new(children[2]) if children[2]
25
+ @children << Node.new(children[3]) if children[3]
26
+ end
27
+
28
+ def true?(features)
29
+ @pred.nil? || @pred.true?(features)
30
+ end
31
+
32
+ private
33
+
34
+ def remove_nodes(children)
35
+ children.reject {|c| %w(Extension ScoreDistribution).include? c.name}
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,19 @@
1
+ require 'predicates/compound_predicate'
2
+ require 'predicates/simple_predicate'
3
+ require 'predicates/simple_set_predicate'
4
+ require 'predicates/true_predicate'
5
+ require 'predicates/false_predicate'
6
+
7
+ module Scoruby
8
+ class PredicateFactory
9
+
10
+ def self.for(pred_xml)
11
+ return Predicates::SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
12
+ return Predicates::SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
13
+ return Predicates::CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
14
+ return Predicates::TruePredicate.new if pred_xml.name == 'True'
15
+ return Predicates::FalsePredicate.new if pred_xml.name == 'False'
16
+ end
17
+ end
18
+ end
19
+
@@ -0,0 +1,44 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class CompoundPredicate
4
+
5
+ attr_reader :field
6
+
7
+ def initialize(pred_xml)
8
+ attributes = pred_xml.attributes
9
+ children = pred_xml.children
10
+
11
+ @boolean_operator = attributes['booleanOperator'].value
12
+ @predicates = []
13
+ @predicates << PredicateFactory.for(children[0])
14
+ @predicates << PredicateFactory.for(children[1])
15
+ @field = @predicates.map(&:field).flatten.compact
16
+ end
17
+
18
+ def true?(features)
19
+ return surrogate?(features) if @boolean_operator == 'surrogate'
20
+ return or?(features) if @boolean_operator == 'or'
21
+ and?(features) if @boolean_operator == 'and'
22
+ end
23
+
24
+ def is_missing?(features)
25
+ @field.any? {|f| !features.keys.include?(f)}
26
+ end
27
+
28
+ private
29
+
30
+ def surrogate?(features)
31
+ return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
32
+ @predicates[0].true?(features)
33
+ end
34
+
35
+ def or?(features)
36
+ @predicates.any? {|p| p.true?(features)}
37
+ end
38
+
39
+ def and?(features)
40
+ @predicates.all? {|p| p.true?(features)}
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,17 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class FalsePredicate
4
+ def field
5
+ nil
6
+ end
7
+
8
+ def true?(_)
9
+ false
10
+ end
11
+
12
+ def is_missing?(_)
13
+ false
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,47 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class SimplePredicate
4
+
5
+ GREATER_THAN = 'greaterThan'
6
+ LESS_THAN = 'lessThan'
7
+ LESS_OR_EQUAL = 'lessOrEqual'
8
+ GREATER_OR_EQUAL = 'greaterOrEqual'
9
+ MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
10
+ EQUAL = 'equal'
11
+ IS_MISSING = 'isMissing'
12
+
13
+ attr_reader :field
14
+
15
+ def initialize(pred_xml)
16
+ attributes = pred_xml.attributes
17
+
18
+ @field = attributes['field'].value.to_sym
19
+ @operator = attributes['operator'].value
20
+ return if @operator == IS_MISSING
21
+ @value = attributes['value'].value
22
+ end
23
+
24
+ def true?(features)
25
+ return num_true?(features) if MATH_OPS.include?(@operator)
26
+ return features[@field] == @value if @operator == EQUAL
27
+ features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
28
+ end
29
+
30
+ def is_missing?(features)
31
+ !features.keys.include?(@field)
32
+ end
33
+
34
+ private
35
+
36
+ def num_true?(features)
37
+ return false unless features[@field]
38
+ curr_value = Float(features[@field])
39
+ value = Float(@value)
40
+ return curr_value > value if @operator == GREATER_THAN
41
+ return curr_value < value if @operator == LESS_THAN
42
+ return curr_value <= value if @operator == LESS_OR_EQUAL
43
+ curr_value >= value if @operator == GREATER_OR_EQUAL
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,33 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class SimpleSetPredicate
4
+
5
+ IS_IN = 'isIn'
6
+
7
+ attr_reader :field
8
+
9
+ def initialize(pred_xml)
10
+ attributes = pred_xml.attributes
11
+ @field = attributes['field'].value.to_sym
12
+ @array = single_or_quoted_words(pred_xml.children[0].content)
13
+ @operator = attributes['booleanOperator'].value
14
+ end
15
+
16
+ def true?(features)
17
+ @array.include? features[@field] if @operator == IS_IN
18
+ end
19
+
20
+ def is_missing?(features)
21
+ !features.keys.include?(@field)
22
+ end
23
+
24
+ private
25
+
26
+ def single_or_quoted_words(string)
27
+ string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
28
+ reject(&:empty?).
29
+ map {|w| w.tr('"', '')}
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,17 @@
1
+ module Scoruby
2
+ module Predicates
3
+ class TruePredicate
4
+ def field
5
+ nil
6
+ end
7
+
8
+ def true?(_)
9
+ true
10
+ end
11
+
12
+ def is_missing?(_)
13
+ false
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,3 @@
1
1
  module Scoruby
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
data/lib/scoruby.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  require 'scoruby/version'
2
- require 'models_factory'
2
+ require 'scoruby/models_factory'
3
3
  require 'nokogiri'
4
4
  require 'logger'
5
- require 'pry'
6
5
 
7
6
  module Scoruby
8
7
  class << self
data/scoruby.gemspec CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
17
  spec.bindir = "exe"
18
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
- spec.require_paths = ["lib", "lib/random_forest", "lib/gbm"]
19
+ spec.require_paths = ["lib", "lib/scoruby", "lib/scoruby/models/random_forest", "lib/scoruby/models/gbm"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.10"
22
22
  spec.add_development_dependency "rake", "~> 12.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-09 00:00:00.000000000 Z
11
+ date: 2017-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,29 +115,31 @@ executables: []
115
115
  extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
+ - ".gitignore"
118
119
  - ".rspec"
119
120
  - ".travis.yml"
120
121
  - CODE_OF_CONDUCT.md
121
122
  - Gemfile
123
+ - Gemfile.lock
122
124
  - LICENSE.txt
123
125
  - README.md
124
126
  - Rakefile
125
127
  - bin/console
126
128
  - bin/setup
127
- - lib/decision.rb
128
- - lib/features.rb
129
- - lib/models/decision_tree.rb
130
- - lib/models/gbm.rb
131
- - lib/models/random_forest.rb
132
- - lib/models_factory.rb
133
- - lib/node.rb
134
- - lib/predicate_factory.rb
135
- - lib/predicates/compound_predicate.rb
136
- - lib/predicates/false_predicate.rb
137
- - lib/predicates/simple_predicate.rb
138
- - lib/predicates/simple_set_predicate.rb
139
- - lib/predicates/true_predicate.rb
140
129
  - lib/scoruby.rb
130
+ - lib/scoruby/decision.rb
131
+ - lib/scoruby/features.rb
132
+ - lib/scoruby/models/decision_tree.rb
133
+ - lib/scoruby/models/gbm.rb
134
+ - lib/scoruby/models/random_forest.rb
135
+ - lib/scoruby/models_factory.rb
136
+ - lib/scoruby/node.rb
137
+ - lib/scoruby/predicate_factory.rb
138
+ - lib/scoruby/predicates/compound_predicate.rb
139
+ - lib/scoruby/predicates/false_predicate.rb
140
+ - lib/scoruby/predicates/simple_predicate.rb
141
+ - lib/scoruby/predicates/simple_set_predicate.rb
142
+ - lib/scoruby/predicates/true_predicate.rb
141
143
  - lib/scoruby/version.rb
142
144
  - scoruby.gemspec
143
145
  homepage: https://github.com/asafschers/scoruby
@@ -148,8 +150,9 @@ post_install_message:
148
150
  rdoc_options: []
149
151
  require_paths:
150
152
  - lib
151
- - lib/random_forest
152
- - lib/gbm
153
+ - lib/scoruby
154
+ - lib/scoruby/models/random_forest
155
+ - lib/scoruby/models/gbm
153
156
  required_ruby_version: !ruby/object:Gem::Requirement
154
157
  requirements:
155
158
  - - ">="
data/lib/decision.rb DELETED
@@ -1,15 +0,0 @@
1
- class Decision
2
-
3
- attr_reader :score, :score_distribution
4
-
5
- def initialize(score, score_distributions)
6
- @score = score
7
- return if score_distributions.empty?
8
-
9
- @score_distribution = {}
10
- score_distributions.each { |score_distribution|
11
- attributes = score_distribution.attributes
12
- @score_distribution[attributes['value'].to_s] = attributes['probability'].to_s
13
- }
14
- end
15
- end
data/lib/features.rb DELETED
@@ -1,16 +0,0 @@
1
- class Features
2
-
3
- attr_reader :formatted
4
-
5
- def initialize(features)
6
- @formatted = format_booleans(features)
7
- end
8
-
9
- def format_booleans(features)
10
- features.map { |k, v|
11
- features[k] = 'f' if v == false
12
- features[k] = 't' if v == true
13
- }
14
- features
15
- end
16
- end
@@ -1,42 +0,0 @@
1
- require 'node'
2
-
3
- class DecisionTree
4
-
5
- attr_reader :root
6
-
7
- def initialize(tree_xml)
8
- @id = tree_xml.attribute('id')
9
- @root = Node.new(tree_xml.xpath('TreeModel/Node'))
10
- end
11
-
12
- def decide(features)
13
- curr = @root
14
- while curr.children[0]
15
- prev = curr
16
- curr = step(curr, features)
17
- return if didnt_step?(curr, prev)
18
- end
19
-
20
- curr.decision
21
- end
22
-
23
- private
24
-
25
- def step(curr, features)
26
- curr = step_on_true(curr, features, 0)
27
- curr = step_on_true(curr, features, 1)
28
- curr = step_on_true(curr, features, 2)
29
- curr
30
- end
31
-
32
- def step_on_true(curr, features, num)
33
- return curr.children[num] if curr.children && curr.children[num] && curr.children[num].true?(features)
34
- curr
35
- end
36
-
37
- def didnt_step?(curr, prev)
38
- return false if (prev.pred != curr.pred)
39
- Scoruby.logger.error "Null tree: #{@id}, bad feature: #{curr.children[0].pred.field }"
40
- true
41
- end
42
- end
data/lib/models/gbm.rb DELETED
@@ -1,29 +0,0 @@
1
- require 'models/decision_tree'
2
- require 'features'
3
-
4
- class Gbm
5
- GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
6
- CONST_XPATH = '//Target/@rescaleConstant'
7
-
8
- def initialize(xml)
9
- @decision_trees = xml.xpath(GBM_FOREST_XPATH).collect{ |xml_tree|
10
- DecisionTree.new(xml_tree)
11
- }
12
- @const = Float(xml.xpath(CONST_XPATH).to_s)
13
- end
14
-
15
- def tree_count
16
- @decision_trees.count
17
- end
18
-
19
- def score(features)
20
- formatted_features = Features.new(features).formatted
21
- x = @decision_trees.map { |dt|
22
- score = dt.decide(formatted_features).score
23
- score.to_s.to_f
24
- }.reduce(:+) + @const
25
- Math.exp(x) / (1 + Math.exp(x))
26
- end
27
-
28
- end
29
-
@@ -1,25 +0,0 @@
1
- require 'models/decision_tree'
2
-
3
- class RandomForest
4
- RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
5
-
6
- def initialize(xml)
7
- xml_trees = xml.xpath(RF_FOREST_XPATH)
8
- @decision_trees = xml_trees.collect{ |xml_tree|
9
- DecisionTree.new(xml_tree)
10
- }
11
- end
12
-
13
- def decisions_count(features)
14
- formatted_features = Features.new(features).formatted
15
- decisions = @decision_trees.collect { |decision_tree|
16
- decision_tree.decide(formatted_features).score
17
- }
18
- decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
19
- end
20
-
21
- def predict(features)
22
- decisions_count(features).max_by {|_, v| v }[0]
23
- end
24
-
25
- end
@@ -1,28 +0,0 @@
1
- require 'models/random_forest'
2
- require 'models/gbm'
3
-
4
- class ModelsFactory
5
- RANDOM_FOREST_MODEL = 'randomForest_Model'
6
- GBM_INDICATION = '//OutputField[@name="scaledGbmValue"]'
7
- MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
8
-
9
- def self.factory_for(xml)
10
- return RandomForest.new(xml) if random_forest?(xml)
11
- return Gbm.new(xml) if gbm?(xml)
12
- return DecisionTree.new(xml.child) if decision_tree?(xml)
13
-
14
- raise MODEL_NOT_SUPPORTED_ERROR
15
- end
16
-
17
- def self.decision_tree?(xml)
18
- !xml.xpath('PMML/TreeModel').empty?
19
- end
20
-
21
- def self.random_forest?(xml)
22
- xml.xpath('PMML/MiningModel/@modelName').to_s == RANDOM_FOREST_MODEL
23
- end
24
-
25
- def self.gbm?(xml)
26
- !xml.xpath(GBM_INDICATION).empty?
27
- end
28
- end
data/lib/node.rb DELETED
@@ -1,36 +0,0 @@
1
- require 'predicate_factory'
2
- require 'decision'
3
-
4
- class Node
5
-
6
- attr_reader :decision, :pred, :children
7
-
8
- def initialize(xml)
9
- children = xml.children
10
-
11
- @decision = Decision.new(xml.attribute('score').to_s,
12
- children.select { |c| c.name == 'ScoreDistribution' } )
13
-
14
- children = remove_nodes(children)
15
-
16
- pred_xml = children[0]
17
- @pred = PredicateFactory.for(pred_xml)
18
- @children = []
19
-
20
- return if children.count == 1
21
-
22
- @children << Node.new(children[1]) if children[1]
23
- @children << Node.new(children[2]) if children[2]
24
- @children << Node.new(children[3]) if children[3]
25
- end
26
-
27
- def true?(features)
28
- @pred.nil? || @pred.true?(features)
29
- end
30
-
31
- private
32
-
33
- def remove_nodes(children)
34
- children.reject { |c| %w(Extension ScoreDistribution).include? c.name }
35
- end
36
- end
@@ -1,18 +0,0 @@
1
- require 'predicates/compound_predicate'
2
- require 'predicates/simple_predicate'
3
- require 'predicates/simple_set_predicate'
4
- require 'predicates/true_predicate'
5
- require 'predicates/false_predicate'
6
-
7
- class PredicateFactory
8
-
9
- def self.for(pred_xml)
10
- return SimplePredicate.new(pred_xml) if pred_xml.name == 'SimplePredicate'
11
- return SimpleSetPredicate.new(pred_xml) if pred_xml.name == 'SimpleSetPredicate'
12
- return CompoundPredicate.new(pred_xml) if pred_xml.name == 'CompoundPredicate'
13
- return TruePredicate.new if pred_xml.name == 'True'
14
- return FalsePredicate.new if pred_xml.name == 'False'
15
- end
16
- end
17
-
18
-
@@ -1,40 +0,0 @@
1
- class CompoundPredicate
2
-
3
- attr_reader :field
4
-
5
- def initialize(pred_xml)
6
- attributes = pred_xml.attributes
7
- children = pred_xml.children
8
-
9
- @boolean_operator = attributes['booleanOperator'].value
10
- @predicates = []
11
- @predicates << PredicateFactory.for(children[0])
12
- @predicates << PredicateFactory.for(children[1])
13
- @field = @predicates.map(&:field).flatten.compact
14
- end
15
-
16
- def true?(features)
17
- return surrogate?(features) if @boolean_operator == 'surrogate'
18
- return or?(features) if @boolean_operator == 'or'
19
- and?(features) if @boolean_operator == 'and'
20
- end
21
-
22
- def is_missing?(features)
23
- @field.any? { |f| !features.keys.include?(f) }
24
- end
25
-
26
- private
27
-
28
- def surrogate?(features)
29
- return @predicates[1].true?(features) if @predicates[0].is_missing?(features)
30
- @predicates[0].true?(features)
31
- end
32
-
33
- def or?(features)
34
- @predicates.any? { |p| p.true?(features) }
35
- end
36
-
37
- def and?(features)
38
- @predicates.all? { |p| p.true?(features) }
39
- end
40
- end
@@ -1,13 +0,0 @@
1
- class FalsePredicate
2
- def field
3
- nil
4
- end
5
-
6
- def true?(_)
7
- false
8
- end
9
-
10
- def is_missing?(_)
11
- false
12
- end
13
- end
@@ -1,43 +0,0 @@
1
- class SimplePredicate
2
-
3
- GREATER_THAN = 'greaterThan'
4
- LESS_THAN = 'lessThan'
5
- LESS_OR_EQUAL = 'lessOrEqual'
6
- GREATER_OR_EQUAL = 'greaterOrEqual'
7
- MATH_OPS = [GREATER_THAN, LESS_THAN, LESS_OR_EQUAL, GREATER_OR_EQUAL]
8
- EQUAL = 'equal'
9
- IS_MISSING = 'isMissing'
10
-
11
- attr_reader :field
12
-
13
- def initialize(pred_xml)
14
- attributes = pred_xml.attributes
15
-
16
- @field = attributes['field'].value.to_sym
17
- @operator = attributes['operator'].value
18
- return if @operator == IS_MISSING
19
- @value = attributes['value'].value
20
- end
21
-
22
- def true?(features)
23
- return num_true?(features) if MATH_OPS.include?(@operator)
24
- return features[@field] == @value if @operator == EQUAL
25
- features[field].nil? || !features.has_key?(field) if @operator == IS_MISSING
26
- end
27
-
28
- def is_missing?(features)
29
- !features.keys.include?(@field)
30
- end
31
-
32
- private
33
-
34
- def num_true?(features)
35
- return false unless features[@field]
36
- curr_value = Float(features[@field])
37
- value = Float(@value)
38
- return curr_value > value if @operator == GREATER_THAN
39
- return curr_value < value if @operator == LESS_THAN
40
- return curr_value <= value if @operator == LESS_OR_EQUAL
41
- curr_value >= value if @operator == GREATER_OR_EQUAL
42
- end
43
- end
@@ -1,29 +0,0 @@
1
- class SimpleSetPredicate
2
-
3
- IS_IN = 'isIn'
4
-
5
- attr_reader :field
6
-
7
- def initialize(pred_xml)
8
- attributes = pred_xml.attributes
9
- @field = attributes['field'].value.to_sym
10
- @array = single_or_quoted_words(pred_xml.children[0].content)
11
- @operator = attributes['booleanOperator'].value
12
- end
13
-
14
- def true?(features)
15
- @array.include? features[@field] if @operator == IS_IN
16
- end
17
-
18
- def is_missing?(features)
19
- !features.keys.include?(@field)
20
- end
21
-
22
- private
23
-
24
- def single_or_quoted_words(string)
25
- string.split(/\s(?=(?:[^"]|"[^"]*")*$)/).
26
- reject(&:empty?).
27
- map { |w| w.tr('"','')}
28
- end
29
- end
@@ -1,13 +0,0 @@
1
- class TruePredicate
2
- def field
3
- nil
4
- end
5
-
6
- def true?(_)
7
- true
8
- end
9
-
10
- def is_missing?(_)
11
- false
12
- end
13
- end