scoruby 0.2.13 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0594a9d84f41423a9e7b3c611a713dc7eca9a10c
4
- data.tar.gz: 723ccca30622ea47a3a1f22b4cc1a00ca3ba931b
3
+ metadata.gz: fb645c7c145b4d3fe4e72f759243bf473bd901e9
4
+ data.tar.gz: e0e5fe27be6ed36ae84567a82fd31fe31f9ce6ea
5
5
  SHA512:
6
- metadata.gz: 96e4a373f3884d8e834d3c22b86477738d6ae9cecbd6b16195adcc336c92e72c0320f20bfbad96ec933692a39c931555ee23f21f2e87f082924c1a1b81fa4eec
7
- data.tar.gz: 64932827e238356bd6535e5b2f497ad175fe977a09dbda3c5aee2382827cb5db4c2268a23607de36534bbe975ba9ace631344a0c3209a306696f5ae1a0908e8c
6
+ metadata.gz: e4492e5f7f635d61983bac4d95c13e29c7d965e3b56dc7c68bf4d4128cb26a7385bd45098fe809da513afc56786f840308688fc2769c3e2316277ccc8fd58205
7
+ data.tar.gz: a7923b40d7591ca56a286c2c72fb232bc335fb2079399210d4d67fb76bb1a9af27d23af79a6f6b3659bf11d3bc0553333c7fa719d48bfd9d19dc8537bae5ff6c
data/.codeclimate.yml ADDED
@@ -0,0 +1,3 @@
1
+ plugins:
2
+ rubocop:
3
+ enabled: true
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- scoruby (0.2.13)
4
+ scoruby (0.3.0)
5
5
  nokogiri (~> 1.7)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -51,7 +51,7 @@ features = {
51
51
  Embarked: 'Q'
52
52
  }
53
53
 
54
- random_forest.predict(features)
54
+ random_forest.score(features)
55
55
 
56
56
  => {:label=>"0", :score=>0.882}
57
57
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'scoruby/models/decision_tree'
4
- require 'scoruby/models/gbm'
4
+ require 'scoruby/models/gradient_boosted_model/model'
5
5
  require 'scoruby/models/random_forest/model'
6
6
  require 'scoruby/models/naive_bayes/model'
7
7
 
@@ -14,7 +14,7 @@ module Scoruby
14
14
 
15
15
  def self.factory_for(xml)
16
16
  return Models::RandomForest::Model.new(xml) if random_forest?(xml)
17
- return Models::Gbm.new(xml) if gbm?(xml)
17
+ return Models::GradientBoostedModel::Model.new(xml) if gbm?(xml)
18
18
  return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
19
19
  return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
20
20
 
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Scoruby
4
+ module Models
5
+ module GradientBoostedModel
6
+ class Data
7
+ GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
8
+ CONST_XPATH = '//Target/@rescaleConstant'
9
+ CONST_XPATH_4_2 = '//Constant'
10
+
11
+ def initialize(xml)
12
+ @xml = xml
13
+ end
14
+
15
+ def decision_trees
16
+ @decision_trees ||= @xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
17
+ DecisionTree.new(xml_tree)
18
+ end
19
+ end
20
+
21
+ def const
22
+ @const ||= const_by_version
23
+ end
24
+
25
+ def continuous_features
26
+ @continuous_features ||= fetch_continuous_features
27
+ end
28
+
29
+ def categorical_features
30
+ @categorical_features ||= fetch_categorical_features
31
+ end
32
+
33
+ private
34
+
35
+ def fetch_continuous_features
36
+ @xml.xpath('//DataField')
37
+ .select { |xml| xml.attr('optype') == 'continuous' }
38
+ .map { |xml| xml.attr('name') }
39
+ end
40
+
41
+ def fetch_categorical_features
42
+ @xml.xpath('//DataField')
43
+ .select { |xml| xml.attr('optype') == 'categorical' }
44
+ .reject { |xml| xml.attr('name') == target }
45
+ .each_with_object(Hash.new([])) do |xml, res|
46
+ res[xml.attr('name').to_sym] = xml.xpath('Value')
47
+ .map { |xml| xml.attr('value') }
48
+ end
49
+ end
50
+
51
+ def target
52
+ @target ||= @xml.xpath('//MiningField')
53
+ .find { |xml| xml.attr('usageType') == 'target' }
54
+ .attr('name').to_s
55
+ end
56
+
57
+ def const_by_version
58
+ return Float(@xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(@xml)
59
+ Float(@xml.xpath(CONST_XPATH_4_2).first.content)
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'scoruby/features'
4
+ require 'forwardable'
5
+ require 'scoruby/models/gradient_boosted_model/data'
6
+
7
+ module Scoruby
8
+ module Models
9
+ module GradientBoostedModel
10
+ class Model
11
+ extend Forwardable
12
+ def_delegators :@data, :decision_trees, :const, :continuous_features,
13
+ :categorical_features
14
+
15
+ def initialize(xml)
16
+ @data = Data.new(xml)
17
+ end
18
+
19
+ def score(features)
20
+ formatted_features = Features.new(features).formatted
21
+ scores = traverse_trees(formatted_features)
22
+ sum = scores.reduce(:+) + const
23
+ Math.exp(sum) / (1 + Math.exp(sum))
24
+ end
25
+
26
+ def traverse_trees(formatted_features)
27
+ decision_trees.map do |dt|
28
+ dt.decide(formatted_features).score.to_s.to_f
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -12,7 +12,7 @@ module Scoruby
12
12
  end
13
13
 
14
14
  def decision_trees
15
- @xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
15
+ @decision_trees ||= @xml.xpath(RF_FOREST_XPATH).map do |xml_tree|
16
16
  DecisionTree.new(xml_tree)
17
17
  end
18
18
  end
@@ -8,13 +8,14 @@ module Scoruby
8
8
  module RandomForest
9
9
  class Model
10
10
  extend Forwardable
11
- def_delegators :@data, :decision_trees, :categorical_features, :continuous_features
11
+ def_delegators :@data, :decision_trees, :categorical_features,
12
+ :continuous_features
12
13
 
13
14
  def initialize(xml)
14
15
  @data = Data.new(xml)
15
16
  end
16
17
 
17
- def predict(features)
18
+ def score(features)
18
19
  decisions_count = decisions_count(features)
19
20
  decision = decisions_count.max_by { |_, v| v }
20
21
  {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Scoruby
4
- VERSION = '0.2.13'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scoruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.13
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-01-27 00:00:00.000000000 Z
11
+ date: 2018-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -115,6 +115,7 @@ executables: []
115
115
  extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
+ - ".codeclimate.yml"
118
119
  - ".gitignore"
119
120
  - ".rspec"
120
121
  - ".rubocop.yml"
@@ -132,7 +133,8 @@ files:
132
133
  - lib/scoruby/features.rb
133
134
  - lib/scoruby/model_factory.rb
134
135
  - lib/scoruby/models/decision_tree.rb
135
- - lib/scoruby/models/gbm.rb
136
+ - lib/scoruby/models/gradient_boosted_model/data.rb
137
+ - lib/scoruby/models/gradient_boosted_model/model.rb
136
138
  - lib/scoruby/models/naive_bayes/model.rb
137
139
  - lib/scoruby/models/naive_bayes/model_data.rb
138
140
  - lib/scoruby/models/random_forest/data.rb
@@ -1,41 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'scoruby/models/decision_tree'
4
- require 'scoruby/features'
5
-
6
- module Scoruby
7
- module Models
8
- class Gbm
9
- GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
10
- CONST_XPATH = '//Target/@rescaleConstant'
11
- CONST_XPATH_4_2 = '//Constant'
12
-
13
- def initialize(xml)
14
- @decision_trees = xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
15
- DecisionTree.new(xml_tree)
16
- end
17
- @const = const(xml)
18
- end
19
-
20
- def tree_count
21
- @decision_trees.count
22
- end
23
-
24
- def score(features)
25
- formatted_features = Features.new(features).formatted
26
- scores = @decision_trees.map do |dt|
27
- dt.decide(formatted_features).score.to_s.to_f
28
- end
29
- sum = scores.reduce(:+) + @const
30
- Math.exp(sum) / (1 + Math.exp(sum))
31
- end
32
-
33
- private
34
-
35
- def const(xml)
36
- return Float(xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(xml)
37
- Float(xml.xpath(CONST_XPATH_4_2).first.content)
38
- end
39
- end
40
- end
41
- end