scoruby 0.2.13 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0594a9d84f41423a9e7b3c611a713dc7eca9a10c
4
- data.tar.gz: 723ccca30622ea47a3a1f22b4cc1a00ca3ba931b
3
+ metadata.gz: fb645c7c145b4d3fe4e72f759243bf473bd901e9
4
+ data.tar.gz: e0e5fe27be6ed36ae84567a82fd31fe31f9ce6ea
5
5
  SHA512:
6
- metadata.gz: 96e4a373f3884d8e834d3c22b86477738d6ae9cecbd6b16195adcc336c92e72c0320f20bfbad96ec933692a39c931555ee23f21f2e87f082924c1a1b81fa4eec
7
- data.tar.gz: 64932827e238356bd6535e5b2f497ad175fe977a09dbda3c5aee2382827cb5db4c2268a23607de36534bbe975ba9ace631344a0c3209a306696f5ae1a0908e8c
6
+ metadata.gz: e4492e5f7f635d61983bac4d95c13e29c7d965e3b56dc7c68bf4d4128cb26a7385bd45098fe809da513afc56786f840308688fc2769c3e2316277ccc8fd58205
7
+ data.tar.gz: a7923b40d7591ca56a286c2c72fb232bc335fb2079399210d4d67fb76bb1a9af27d23af79a6f6b3659bf11d3bc0553333c7fa719d48bfd9d19dc8537bae5ff6c
data/.codeclimate.yml ADDED
@@ -0,0 +1,3 @@
1
+ plugins:
2
+ rubocop:
3
+ enabled: true
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scoruby (0.2.13)
4
+ scoruby (0.3.0)
5
5
  nokogiri (~> 1.7)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -51,7 +51,7 @@ features = {
51
51
  Embarked: 'Q'
52
52
  }
53
53
 
54
- random_forest.predict(features)
54
+ random_forest.score(features)
55
55
 
56
56
  => {:label=>"0", :score=>0.882}
57
57
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'scoruby/models/decision_tree'
4
- require 'scoruby/models/gbm'
4
+ require 'scoruby/models/gradient_boosted_model/model'
5
5
  require 'scoruby/models/random_forest/model'
6
6
  require 'scoruby/models/naive_bayes/model'
7
7
 
@@ -14,7 +14,7 @@ module Scoruby
14
14
 
15
15
  def self.factory_for(xml)
16
16
  return Models::RandomForest::Model.new(xml) if random_forest?(xml)
17
- return Models::Gbm.new(xml) if gbm?(xml)
17
+ return Models::GradientBoostedModel::Model.new(xml) if gbm?(xml)
18
18
  return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
19
19
  return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
20
20
 
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Scoruby
4
+ module Models
5
+ module GradientBoostedModel
6
+ class Data
7
+ GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
8
+ CONST_XPATH = '//Target/@rescaleConstant'
9
+ CONST_XPATH_4_2 = '//Constant'
10
+
11
+ def initialize(xml)
12
+ @xml = xml
13
+ end
14
+
15
+ def decision_trees
16
+ @decision_trees ||= @xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
17
+ DecisionTree.new(xml_tree)
18
+ end
19
+ end
20
+
21
+ def const
22
+ @const ||= const_by_version
23
+ end
24
+
25
+ def continuous_features
26
+ @continuous_features ||= fetch_continuous_features
27
+ end
28
+
29
+ def categorical_features
30
+ @categorical_features ||= fetch_categorical_features
31
+ end
32
+
33
+ private
34
+
35
+ def fetch_continuous_features
36
+ @xml.xpath('//DataField')
37
+ .select { |xml| xml.attr('optype') == 'continuous' }
38
+ .map { |xml| xml.attr('name') }
39
+ end
40
+
41
+ def fetch_categorical_features
42
+ @xml.xpath('//DataField')
43
+ .select { |xml| xml.attr('optype') == 'categorical' }
44
+ .reject { |xml| xml.attr('name') == target }
45
+ .each_with_object(Hash.new([])) do |xml, res|
46
+ res[xml.attr('name').to_sym] = xml.xpath('Value')
47
+ .map { |xml| xml.attr('value') }
48
+ end
49
+ end
50
+
51
+ def target
52
+ @target ||= @xml.xpath('//MiningField')
53
+ .find { |xml| xml.attr('usageType') == 'target' }
54
+ .attr('name').to_s
55
+ end
56
+
57
+ def const_by_version
58
+ return Float(@xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(@xml)
59
+ Float(@xml.xpath(CONST_XPATH_4_2).first.content)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'scoruby/features'
4
+ require 'forwardable'
5
+ require 'scoruby/models/gradient_boosted_model/data'
6
+
7
+ module Scoruby
8
+ module Models
9
+ module GradientBoostedModel
10
+ class Model
11
+ extend Forwardable
12
+ def_delegators :@data, :decision_trees, :const, :continuous_features,
13
+ :categorical_features
14
+
15
+ def initialize(xml)
16
+ @data = Data.new(xml)
17
+ end
18
+
19
+ def score(features)
20
+ formatted_features = Features.new(features).formatted
21
+ scores = traverse_trees(formatted_features)
22
+ sum = scores.reduce(:+) + const
23
+ Math.exp(sum) / (1 + Math.exp(sum))
24
+ end
25
+
26
+ def traverse_trees(formatted_features)
27
+ decision_trees.map do |dt|
28
+ dt.decide(formatted_features).score.to_s.to_f
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -12,7 +12,7 @@ module Scoruby
12
12
  end
13
13
 
14
14
  def decision_trees
15
- @xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
15
+ @decision_trees ||= @xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
16
16
  DecisionTree.new(xml_tree)
17
17
  end
18
18
  end
@@ -8,13 +8,14 @@ module Scoruby
8
8
  module RandomForest
9
9
  class Model
10
10
  extend Forwardable
11
- def_delegators :@data, :decision_trees, :categorical_features, :continuous_features
11
+ def_delegators :@data, :decision_trees, :categorical_features,
12
+ :continuous_features
12
13
 
13
14
  def initialize(xml)
14
15
  @data = Data.new(xml)
15
16
  end
16
17
 
17
- def predict(features)
18
+ def score(features)
18
19
  decisions_count = decisions_count(features)
19
20
  decision = decisions_count.max_by { |_, v| v }
20
21
  {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Scoruby
4
- VERSION = '0.2.13'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.13
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-01-27 00:00:00.000000000 Z
11
+ date: 2018-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,6 +115,7 @@ executables: []
115
115
  extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
+ - ".codeclimate.yml"
118
119
  - ".gitignore"
119
120
  - ".rspec"
120
121
  - ".rubocop.yml"
@@ -132,7 +133,8 @@ files:
132
133
  - lib/scoruby/features.rb
133
134
  - lib/scoruby/model_factory.rb
134
135
  - lib/scoruby/models/decision_tree.rb
135
- - lib/scoruby/models/gbm.rb
136
+ - lib/scoruby/models/gradient_boosted_model/data.rb
137
+ - lib/scoruby/models/gradient_boosted_model/model.rb
136
138
  - lib/scoruby/models/naive_bayes/model.rb
137
139
  - lib/scoruby/models/naive_bayes/model_data.rb
138
140
  - lib/scoruby/models/random_forest/data.rb
@@ -1,41 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'scoruby/models/decision_tree'
4
- require 'scoruby/features'
5
-
6
- module Scoruby
7
- module Models
8
- class Gbm
9
- GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
10
- CONST_XPATH = '//Target/@rescaleConstant'
11
- CONST_XPATH_4_2 = '//Constant'
12
-
13
- def initialize(xml)
14
- @decision_trees = xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
15
- DecisionTree.new(xml_tree)
16
- end
17
- @const = const(xml)
18
- end
19
-
20
- def tree_count
21
- @decision_trees.count
22
- end
23
-
24
- def score(features)
25
- formatted_features = Features.new(features).formatted
26
- scores = @decision_trees.map do |dt|
27
- dt.decide(formatted_features).score.to_s.to_f
28
- end
29
- sum = scores.reduce(:+) + @const
30
- Math.exp(sum) / (1 + Math.exp(sum))
31
- end
32
-
33
- private
34
-
35
- def const(xml)
36
- return Float(xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(xml)
37
- Float(xml.xpath(CONST_XPATH_4_2).first.content)
38
- end
39
- end
40
- end
41
- end