scoruby 0.2.8 → 0.2.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: f78c07d033cba7c4f13531ae30e51f9c00b33d11
4
- data.tar.gz: d487653a5f8604e669db89a8766b4e84d3e1950b
2
+ SHA256:
3
+ metadata.gz: 3af72ce9c41b6f116e34af71b5ad3264d8043c6fd1aaa3544a0a362bff096c0f
4
+ data.tar.gz: de77039296cfd39f2ef036372e90ad2b4028180381a153380d77164ccf02a743
5
5
  SHA512:
6
- metadata.gz: d64f07f42472a9ae54435d26d738f05381a3ae2542c4ad2fdfdf879696e0af7c6bd8df6d946fb4aafed2e316fcc00c09c75b164c6a1d56b11ea5cd2ed48b82a5
7
- data.tar.gz: 5ffde48fb242ef57f3c06f089aecfe73b5b9b8d9038a75caae516a368619aba3f1d597ddf664783346c538f26fbe9756efc02d20036900d2d94abd0fb7219204
6
+ metadata.gz: c827de777f0b1b0d83af165a6079a6a2cff83d2612758c78171328cc2811cf718532fef6ff628a04a38f6ef9179c60dae529ee70a7daa90e5aed0e38462cf370
7
+ data.tar.gz: eff6fcc12bbe064b5a1af1a272c8b544af87c8ae9aa48e7f59a506abcaf292d3b72a235c835caec524eb5ae04a66e2f37ca12691735ce9c1bda19489c1fff385
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scoruby (0.2.7)
4
+ scoruby (0.2.8)
5
5
  nokogiri (~> 1.7)
6
6
 
7
7
  GEM
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Scoruby
4
+ module Models
5
+ module RandomForest
6
+ class Data
7
+ RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
8
+ FEATURES_XPATH = 'PMML/DataDictionary/DataField'
9
+
10
+ def initialize(xml)
11
+ @xml = xml
12
+ end
13
+
14
+ def decision_trees
15
+ @xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
16
+ DecisionTree.new(xml_tree)
17
+ end
18
+ end
19
+
20
+ def categorical_features
21
+ categorical_predicates.each_with_object(Hash.new([])) do |xml, res|
22
+ predicate = Predicates::SimpleSetPredicate.new(xml)
23
+ res[predicate.field] = res[predicate.field] | predicate.array
24
+ end
25
+ end
26
+
27
+ def continuous_features
28
+ continuous_predicates.map do |xml|
29
+ Predicates::SimplePredicate.new(xml).field
30
+ end.uniq
31
+ end
32
+
33
+ private
34
+
35
+ def categorical_predicates
36
+ @xml.xpath('//SimpleSetPredicate')
37
+ end
38
+
39
+ def continuous_predicates
40
+ @xml.xpath('//SimplePredicate')
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'scoruby/models/random_forest/data'
4
+
5
+ module Scoruby
6
+ module Models
7
+ module RandomForest
8
+ class Model
9
+ def initialize(xml)
10
+ @decision_trees = Data.new(xml).decision_trees
11
+ end
12
+
13
+ def predict(features)
14
+ decisions_count = decisions_count(features)
15
+ decision = decisions_count.max_by { |_, v| v }
16
+ {
17
+ label: decision[0],
18
+ score: decision[1] / decisions_count.values.reduce(0, :+).to_f
19
+ }
20
+ end
21
+
22
+ def decisions_count(features)
23
+ formatted_features = Features.new(features).formatted
24
+ decisions = traverse_trees(formatted_features)
25
+ aggregate_decisions(decisions)
26
+ end
27
+
28
+ private
29
+
30
+ def traverse_trees(formatted_features)
31
+ @decision_trees.map do |decision_tree|
32
+ decision_tree.decide(formatted_features).score
33
+ end
34
+ end
35
+
36
+ def aggregate_decisions(decisions)
37
+ decisions.each_with_object(Hash.new(0)) do |score, counts|
38
+ counts[score] += 1
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'scoruby/models/decision_tree'
4
4
  require 'scoruby/models/gbm'
5
- require 'scoruby/models/random_forest'
5
+ require 'scoruby/models/random_forest/model'
6
6
  require 'scoruby/models/naive_bayes/model'
7
7
 
8
8
  module Scoruby
@@ -12,7 +12,7 @@ module Scoruby
12
12
  MODEL_NOT_SUPPORTED_ERROR = 'model not supported'
13
13
 
14
14
  def self.factory_for(xml)
15
- return Models::RandomForest.new(xml) if random_forest?(xml)
15
+ return Models::RandomForest::Model.new(xml) if random_forest?(xml)
16
16
  return Models::Gbm.new(xml) if gbm?(xml)
17
17
  return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
18
18
  return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
@@ -5,7 +5,7 @@ module Scoruby
5
5
  class SimpleSetPredicate
6
6
  IS_IN = 'isIn'
7
7
 
8
- attr_reader :field
8
+ attr_reader :field, :array
9
9
 
10
10
  def initialize(pred_xml)
11
11
  attributes = pred_xml.attributes
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Scoruby
4
- VERSION = '0.2.8'
4
+ VERSION = '0.2.9'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-12-02 00:00:00.000000000 Z
11
+ date: 2017-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -134,7 +134,8 @@ files:
134
134
  - lib/scoruby/models/gbm.rb
135
135
  - lib/scoruby/models/naive_bayes/model.rb
136
136
  - lib/scoruby/models/naive_bayes/model_data.rb
137
- - lib/scoruby/models/random_forest.rb
137
+ - lib/scoruby/models/random_forest/data.rb
138
+ - lib/scoruby/models/random_forest/model.rb
138
139
  - lib/scoruby/models_factory.rb
139
140
  - lib/scoruby/node.rb
140
141
  - lib/scoruby/predicate_factory.rb
@@ -168,9 +169,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
169
  version: '0'
169
170
  requirements: []
170
171
  rubyforge_project:
171
- rubygems_version: 2.6.13
172
+ rubygems_version: 2.7.3
172
173
  signing_key:
173
174
  specification_version: 4
174
175
  summary: Ruby Scoring API for PMML.
175
176
  test_files: []
176
- has_rdoc:
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Scoruby
4
- module Models
5
- class RandomForest
6
- RF_FOREST_XPATH = 'PMML/MiningModel/Segmentation/Segment'
7
-
8
- def initialize(xml)
9
- xml_trees = xml.xpath(RF_FOREST_XPATH)
10
- @decision_trees = xml_trees.map do |xml_tree|
11
- DecisionTree.new(xml_tree)
12
- end
13
- end
14
-
15
- def predict(features)
16
- decisions_count = decisions_count(features)
17
- decision = decisions_count.max_by { |_, v| v }
18
- {
19
- label: decision[0],
20
- score: decision[1] / decisions_count.values.reduce(0, :+).to_f
21
- }
22
- end
23
-
24
- def decisions_count(features)
25
- formatted_features = Features.new(features).formatted
26
- decisions = traverse_trees(formatted_features)
27
- aggregate_decisions(decisions)
28
- end
29
-
30
- private
31
-
32
- def traverse_trees(formatted_features)
33
- @decision_trees.map do |decision_tree|
34
- decision_tree.decide(formatted_features).score
35
- end
36
- end
37
-
38
- def aggregate_decisions(decisions)
39
- decisions.each_with_object(Hash.new(0)) do |score, counts|
40
- counts[score] += 1
41
- end
42
- end
43
- end
44
- end
45
- end