scoruby 0.2.13 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codeclimate.yml +3 -0
- data/Gemfile.lock +1 -1
- data/README.md +1 -1
- data/lib/scoruby/model_factory.rb +2 -2
- data/lib/scoruby/models/gradient_boosted_model/data.rb +64 -0
- data/lib/scoruby/models/gradient_boosted_model/model.rb +34 -0
- data/lib/scoruby/models/random_forest/data.rb +1 -1
- data/lib/scoruby/models/random_forest/model.rb +3 -2
- data/lib/scoruby/version.rb +1 -1
- metadata +5 -3
- data/lib/scoruby/models/gbm.rb +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb645c7c145b4d3fe4e72f759243bf473bd901e9
|
4
|
+
data.tar.gz: e0e5fe27be6ed36ae84567a82fd31fe31f9ce6ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4492e5f7f635d61983bac4d95c13e29c7d965e3b56dc7c68bf4d4128cb26a7385bd45098fe809da513afc56786f840308688fc2769c3e2316277ccc8fd58205
|
7
|
+
data.tar.gz: a7923b40d7591ca56a286c2c72fb232bc335fb2079399210d4d67fb76bb1a9af27d23af79a6f6b3659bf11d3bc0553333c7fa719d48bfd9d19dc8537bae5ff6c
|
data/.codeclimate.yml
ADDED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'scoruby/models/decision_tree'
|
4
|
-
require 'scoruby/models/
|
4
|
+
require 'scoruby/models/gradient_boosted_model/model'
|
5
5
|
require 'scoruby/models/random_forest/model'
|
6
6
|
require 'scoruby/models/naive_bayes/model'
|
7
7
|
|
@@ -14,7 +14,7 @@ module Scoruby
|
|
14
14
|
|
15
15
|
def self.factory_for(xml)
|
16
16
|
return Models::RandomForest::Model.new(xml) if random_forest?(xml)
|
17
|
-
return Models::
|
17
|
+
return Models::GradientBoostedModel::Model.new(xml) if gbm?(xml)
|
18
18
|
return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
|
19
19
|
return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
|
20
20
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Scoruby
|
4
|
+
module Models
|
5
|
+
module GradientBoostedModel
|
6
|
+
class Data
|
7
|
+
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
8
|
+
CONST_XPATH = '//Target/@rescaleConstant'
|
9
|
+
CONST_XPATH_4_2 = '//Constant'
|
10
|
+
|
11
|
+
def initialize(xml)
|
12
|
+
@xml = xml
|
13
|
+
end
|
14
|
+
|
15
|
+
def decision_trees
|
16
|
+
@decision_trees ||= @xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
|
17
|
+
DecisionTree.new(xml_tree)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def const
|
22
|
+
@const ||= const_by_version
|
23
|
+
end
|
24
|
+
|
25
|
+
def continuous_features
|
26
|
+
@continuous_features ||= fetch_continuous_features
|
27
|
+
end
|
28
|
+
|
29
|
+
def categorical_features
|
30
|
+
@categorical_features ||= fetch_categorical_features
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def fetch_continuous_features
|
36
|
+
@xml.xpath('//DataField')
|
37
|
+
.select { |xml| xml.attr('optype') == 'continuous' }
|
38
|
+
.map { |xml| xml.attr('name') }
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch_categorical_features
|
42
|
+
@xml.xpath('//DataField')
|
43
|
+
.select { |xml| xml.attr('optype') == 'categorical' }
|
44
|
+
.reject { |xml| xml.attr('name') == target }
|
45
|
+
.each_with_object(Hash.new([])) do |xml, res|
|
46
|
+
res[xml.attr('name').to_sym] = xml.xpath('Value')
|
47
|
+
.map { |xml| xml.attr('value') }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def target
|
52
|
+
@target ||= @xml.xpath('//MiningField')
|
53
|
+
.find { |xml| xml.attr('usageType') == 'target' }
|
54
|
+
.attr('name').to_s
|
55
|
+
end
|
56
|
+
|
57
|
+
def const_by_version
|
58
|
+
return Float(@xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(@xml)
|
59
|
+
Float(@xml.xpath(CONST_XPATH_4_2).first.content)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'scoruby/features'
|
4
|
+
require 'forwardable'
|
5
|
+
require 'scoruby/models/gradient_boosted_model/data'
|
6
|
+
|
7
|
+
module Scoruby
|
8
|
+
module Models
|
9
|
+
module GradientBoostedModel
|
10
|
+
class Model
|
11
|
+
extend Forwardable
|
12
|
+
def_delegators :@data, :decision_trees, :const, :continuous_features,
|
13
|
+
:categorical_features
|
14
|
+
|
15
|
+
def initialize(xml)
|
16
|
+
@data = Data.new(xml)
|
17
|
+
end
|
18
|
+
|
19
|
+
def score(features)
|
20
|
+
formatted_features = Features.new(features).formatted
|
21
|
+
scores = traverse_trees(formatted_features)
|
22
|
+
sum = scores.reduce(:+) + const
|
23
|
+
Math.exp(sum) / (1 + Math.exp(sum))
|
24
|
+
end
|
25
|
+
|
26
|
+
def traverse_trees(formatted_features)
|
27
|
+
decision_trees.map do |dt|
|
28
|
+
dt.decide(formatted_features).score.to_s.to_f
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -8,13 +8,14 @@ module Scoruby
|
|
8
8
|
module RandomForest
|
9
9
|
class Model
|
10
10
|
extend Forwardable
|
11
|
-
def_delegators :@data, :decision_trees, :categorical_features,
|
11
|
+
def_delegators :@data, :decision_trees, :categorical_features,
|
12
|
+
:continuous_features
|
12
13
|
|
13
14
|
def initialize(xml)
|
14
15
|
@data = Data.new(xml)
|
15
16
|
end
|
16
17
|
|
17
|
-
def
|
18
|
+
def score(features)
|
18
19
|
decisions_count = decisions_count(features)
|
19
20
|
decision = decisions_count.max_by { |_, v| v }
|
20
21
|
{
|
data/lib/scoruby/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scoruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,6 +115,7 @@ executables: []
|
|
115
115
|
extensions: []
|
116
116
|
extra_rdoc_files: []
|
117
117
|
files:
|
118
|
+
- ".codeclimate.yml"
|
118
119
|
- ".gitignore"
|
119
120
|
- ".rspec"
|
120
121
|
- ".rubocop.yml"
|
@@ -132,7 +133,8 @@ files:
|
|
132
133
|
- lib/scoruby/features.rb
|
133
134
|
- lib/scoruby/model_factory.rb
|
134
135
|
- lib/scoruby/models/decision_tree.rb
|
135
|
-
- lib/scoruby/models/
|
136
|
+
- lib/scoruby/models/gradient_boosted_model/data.rb
|
137
|
+
- lib/scoruby/models/gradient_boosted_model/model.rb
|
136
138
|
- lib/scoruby/models/naive_bayes/model.rb
|
137
139
|
- lib/scoruby/models/naive_bayes/model_data.rb
|
138
140
|
- lib/scoruby/models/random_forest/data.rb
|
data/lib/scoruby/models/gbm.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'scoruby/models/decision_tree'
|
4
|
-
require 'scoruby/features'
|
5
|
-
|
6
|
-
module Scoruby
|
7
|
-
module Models
|
8
|
-
class Gbm
|
9
|
-
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
10
|
-
CONST_XPATH = '//Target/@rescaleConstant'
|
11
|
-
CONST_XPATH_4_2 = '//Constant'
|
12
|
-
|
13
|
-
def initialize(xml)
|
14
|
-
@decision_trees = xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
|
15
|
-
DecisionTree.new(xml_tree)
|
16
|
-
end
|
17
|
-
@const = const(xml)
|
18
|
-
end
|
19
|
-
|
20
|
-
def tree_count
|
21
|
-
@decision_trees.count
|
22
|
-
end
|
23
|
-
|
24
|
-
def score(features)
|
25
|
-
formatted_features = Features.new(features).formatted
|
26
|
-
scores = @decision_trees.map do |dt|
|
27
|
-
dt.decide(formatted_features).score.to_s.to_f
|
28
|
-
end
|
29
|
-
sum = scores.reduce(:+) + @const
|
30
|
-
Math.exp(sum) / (1 + Math.exp(sum))
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def const(xml)
|
36
|
-
return Float(xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(xml)
|
37
|
-
Float(xml.xpath(CONST_XPATH_4_2).first.content)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|