scoruby 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: f78c07d033cba7c4f13531ae30e51f9c00b33d11
4
- data.tar.gz: d487653a5f8604e669db89a8766b4e84d3e1950b
2
+ SHA256:
3
+ metadata.gz: 3af72ce9c41b6f116e34af71b5ad3264d8043c6fd1aaa3544a0a362bff096c0f
4
+ data.tar.gz: de77039296cfd39f2ef036372e90ad2b4028180381a153380d77164ccf02a743
5
5
  SHA512:
6
- metadata.gz: d64f07f42472a9ae54435d26d738f05381a3ae2542c4ad2fdfdf879696e0af7c6bd8df6d946fb4aafed2e316fcc00c09c75b164c6a1d56b11ea5cd2ed48b82a5
7
- data.tar.gz: 5ffde48fb242ef57f3c06f089aecfe73b5b9b8d9038a75caae516a368619aba3f1d597ddf664783346c538f26fbe9756efc02d20036900d2d94abd0fb7219204
6
+ metadata.gz: c827de777f0b1b0d83af165a6079a6a2cff83d2612758c78171328cc2811cf718532fef6ff628a04a38f6ef9179c60dae529ee70a7daa90e5aed0e38462cf370
7
+ data.tar.gz: eff6fcc12bbe064b5a1af1a272c8b544af87c8ae9aa48e7f59a506abcaf292d3b72a235c835caec524eb5ae04a66e2f37ca12691735ce9c1bda19489c1fff385
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scoruby (0.2.7)
4
+ scoruby (0.2.8)
5
5
  nokogiri (~> 1.7)
6
6
 
7
7
  GEM
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Scoruby
4
+ module Models
5
+ module RandomForest
6
+ class Data
7
+ RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
8
+ FEATURES_XPATH = 'PMML/DataDictionary/DataField'
9
+
10
+ def initialize(xml)
11
+ @xml = xml
12
+ end
13
+
14
+ def decision_trees
15
+ @xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
16
+ DecisionTree.new(xml_tree)
17
+ end
18
+ end
19
+
20
+ def categorical_features
21
+ categorical_predicates.each_with_object(Hash.new([])) do |xml, res|
22
+ predicate = Predicates::SimpleSetPredicate.new(xml)
23
+ res[predicate.field] = res[predicate.field] | predicate.array
24
+ end
25
+ end
26
+
27
+ def continuous_features
28
+ continuous_predicates.map do |xml|
29
+ Predicates::SimplePredicate.new(xml).field
30
+ end.uniq
31
+ end
32
+
33
+ private
34
+
35
+ def categorical_predicates
36
+ @xml.xpath('//SimpleSetPredicate')
37
+ end
38
+
39
+ def continuous_predicates
40
+ @xml.xpath('//SimplePredicate')
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'scoruby/models/random_forest/data'
4
+
5
+ module Scoruby
6
+ module Models
7
+ module RandomForest
8
+ class Model
9
+ def initialize(xml)
10
+ @decision_trees = Data.new(xml).decision_trees
11
+ end
12
+
13
+ def predict(features)
14
+ decisions_count = decisions_count(features)
15
+ decision = decisions_count.max_by { |_, v| v }
16
+ {
17
+ label: decision[0],
18
+ score: decision[1] / decisions_count.values.reduce(0, :+).to_f
19
+ }
20
+ end
21
+
22
+ def decisions_count(features)
23
+ formatted_features = Features.new(features).formatted
24
+ decisions = traverse_trees(formatted_features)
25
+ aggregate_decisions(decisions)
26
+ end
27
+
28
+ private
29
+
30
+ def traverse_trees(formatted_features)
31
+ @decision_trees.map do |decision_tree|
32
+ decision_tree.decide(formatted_features).score
33
+ end
34
+ end
35
+
36
+ def aggregate_decisions(decisions)
37
+ decisions.each_with_object(Hash.new(0)) do |score, counts|
38
+ counts[score] += 1
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'scoruby/models/decision_tree'
4
4
  require 'scoruby/models/gbm'
5
- require 'scoruby/models/random_forest'
5
+ require 'scoruby/models/random_forest/model'
6
6
  require 'scoruby/models/naive_bayes/model'
7
7
 
8
8
  module Scoruby
@@ -12,7 +12,7 @@ module Scoruby
12
12
  MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
13
13
 
14
14
  def self.factory_for(xml)
15
- return Models::RandomForest.new(xml) if random_forest?(xml)
15
+ return Models::RandomForest::Model.new(xml) if random_forest?(xml)
16
16
  return Models::Gbm.new(xml) if gbm?(xml)
17
17
  return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
18
18
  return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
@@ -5,7 +5,7 @@ module Scoruby
5
5
  class SimpleSetPredicate
6
6
  IS_IN = 'isIn'
7
7
 
8
- attr_reader :field
8
+ attr_reader :field, :array
9
9
 
10
10
  def initialize(pred_xml)
11
11
  attributes = pred_xml.attributes
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Scoruby
4
- VERSION = '0.2.8'
4
+ VERSION = '0.2.9'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-12-02 00:00:00.000000000 Z
11
+ date: 2017-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -134,7 +134,8 @@ files:
134
134
  - lib/scoruby/models/gbm.rb
135
135
  - lib/scoruby/models/naive_bayes/model.rb
136
136
  - lib/scoruby/models/naive_bayes/model_data.rb
137
- - lib/scoruby/models/random_forest.rb
137
+ - lib/scoruby/models/random_forest/data.rb
138
+ - lib/scoruby/models/random_forest/model.rb
138
139
  - lib/scoruby/models_factory.rb
139
140
  - lib/scoruby/node.rb
140
141
  - lib/scoruby/predicate_factory.rb
@@ -168,9 +169,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
169
  version: '0'
169
170
  requirements: []
170
171
  rubyforge_project:
171
- rubygems_version: 2.6.13
172
+ rubygems_version: 2.7.3
172
173
  signing_key:
173
174
  specification_version: 4
174
175
  summary: Ruby Scoring API for PMML.
175
176
  test_files: []
176
- has_rdoc:
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Scoruby
4
- module Models
5
- class RandomForest
6
- RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
7
-
8
- def initialize(xml)
9
- xml_trees = xml.xpath(RF_FOREST_XPATH)
10
- @decision_trees = xml_trees.map do |xml_tree|
11
- DecisionTree.new(xml_tree)
12
- end
13
- end
14
-
15
- def predict(features)
16
- decisions_count = decisions_count(features)
17
- decision = decisions_count.max_by { |_, v| v }
18
- {
19
- label: decision[0],
20
- score: decision[1] / decisions_count.values.reduce(0, :+).to_f
21
- }
22
- end
23
-
24
- def decisions_count(features)
25
- formatted_features = Features.new(features).formatted
26
- decisions = traverse_trees(formatted_features)
27
- aggregate_decisions(decisions)
28
- end
29
-
30
- private
31
-
32
- def traverse_trees(formatted_features)
33
- @decision_trees.map do |decision_tree|
34
- decision_tree.decide(formatted_features).score
35
- end
36
- end
37
-
38
- def aggregate_decisions(decisions)
39
- decisions.each_with_object(Hash.new(0)) do |score, counts|
40
- counts[score] += 1
41
- end
42
- end
43
- end
44
- end
45
- end