scoruby 0.2.13 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +3 -0
- data/Gemfile.lock +1 -1
- data/README.md +1 -1
- data/lib/scoruby/model_factory.rb +2 -2
- data/lib/scoruby/models/gradient_boosted_model/data.rb +64 -0
- data/lib/scoruby/models/gradient_boosted_model/model.rb +34 -0
- data/lib/scoruby/models/random_forest/data.rb +1 -1
- data/lib/scoruby/models/random_forest/model.rb +3 -2
- data/lib/scoruby/version.rb +1 -1
- metadata +5 -3
- data/lib/scoruby/models/gbm.rb +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb645c7c145b4d3fe4e72f759243bf473bd901e9
|
4
|
+
data.tar.gz: e0e5fe27be6ed36ae84567a82fd31fe31f9ce6ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4492e5f7f635d61983bac4d95c13e29c7d965e3b56dc7c68bf4d4128cb26a7385bd45098fe809da513afc56786f840308688fc2769c3e2316277ccc8fd58205
|
7
|
+
data.tar.gz: a7923b40d7591ca56a286c2c72fb232bc335fb2079399210d4d67fb76bb1a9af27d23af79a6f6b3659bf11d3bc0553333c7fa719d48bfd9d19dc8537bae5ff6c
|
data/.codeclimate.yml
ADDED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'scoruby/models/decision_tree'
|
4
|
-
require 'scoruby/models/
|
4
|
+
require 'scoruby/models/gradient_boosted_model/model'
|
5
5
|
require 'scoruby/models/random_forest/model'
|
6
6
|
require 'scoruby/models/naive_bayes/model'
|
7
7
|
|
@@ -14,7 +14,7 @@ module Scoruby
|
|
14
14
|
|
15
15
|
def self.factory_for(xml)
|
16
16
|
return Models::RandomForest::Model.new(xml) if random_forest?(xml)
|
17
|
-
return Models::
|
17
|
+
return Models::GradientBoostedModel::Model.new(xml) if gbm?(xml)
|
18
18
|
return Models::DecisionTree.new(xml.child) if decision_tree?(xml)
|
19
19
|
return Models::NaiveBayes::Model.new(xml) if naive_bayes?(xml)
|
20
20
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Scoruby
|
4
|
+
module Models
|
5
|
+
module GradientBoostedModel
|
6
|
+
class Data
|
7
|
+
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
8
|
+
CONST_XPATH = '//Target/@rescaleConstant'
|
9
|
+
CONST_XPATH_4_2 = '//Constant'
|
10
|
+
|
11
|
+
def initialize(xml)
|
12
|
+
@xml = xml
|
13
|
+
end
|
14
|
+
|
15
|
+
def decision_trees
|
16
|
+
@decision_trees ||= @xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
|
17
|
+
DecisionTree.new(xml_tree)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def const
|
22
|
+
@const ||= const_by_version
|
23
|
+
end
|
24
|
+
|
25
|
+
def continuous_features
|
26
|
+
@continuous_features ||= fetch_continuous_features
|
27
|
+
end
|
28
|
+
|
29
|
+
def categorical_features
|
30
|
+
@categorical_features ||= fetch_categorical_features
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def fetch_continuous_features
|
36
|
+
@xml.xpath('//DataField')
|
37
|
+
.select { |xml| xml.attr('optype') == 'continuous' }
|
38
|
+
.map { |xml| xml.attr('name') }
|
39
|
+
end
|
40
|
+
|
41
|
+
def fetch_categorical_features
|
42
|
+
@xml.xpath('//DataField')
|
43
|
+
.select { |xml| xml.attr('optype') == 'categorical' }
|
44
|
+
.reject { |xml| xml.attr('name') == target }
|
45
|
+
.each_with_object(Hash.new([])) do |xml, res|
|
46
|
+
res[xml.attr('name').to_sym] = xml.xpath('Value')
|
47
|
+
.map { |xml| xml.attr('value') }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def target
|
52
|
+
@target ||= @xml.xpath('//MiningField')
|
53
|
+
.find { |xml| xml.attr('usageType') == 'target' }
|
54
|
+
.attr('name').to_s
|
55
|
+
end
|
56
|
+
|
57
|
+
def const_by_version
|
58
|
+
return Float(@xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(@xml)
|
59
|
+
Float(@xml.xpath(CONST_XPATH_4_2).first.content)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'scoruby/features'
|
4
|
+
require 'forwardable'
|
5
|
+
require 'scoruby/models/gradient_boosted_model/data'
|
6
|
+
|
7
|
+
module Scoruby
|
8
|
+
module Models
|
9
|
+
module GradientBoostedModel
|
10
|
+
class Model
|
11
|
+
extend Forwardable
|
12
|
+
def_delegators :@data, :decision_trees, :const, :continuous_features,
|
13
|
+
:categorical_features
|
14
|
+
|
15
|
+
def initialize(xml)
|
16
|
+
@data = Data.new(xml)
|
17
|
+
end
|
18
|
+
|
19
|
+
def score(features)
|
20
|
+
formatted_features = Features.new(features).formatted
|
21
|
+
scores = traverse_trees(formatted_features)
|
22
|
+
sum = scores.reduce(:+) + const
|
23
|
+
Math.exp(sum) / (1 + Math.exp(sum))
|
24
|
+
end
|
25
|
+
|
26
|
+
def traverse_trees(formatted_features)
|
27
|
+
decision_trees.map do |dt|
|
28
|
+
dt.decide(formatted_features).score.to_s.to_f
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -8,13 +8,14 @@ module Scoruby
|
|
8
8
|
module RandomForest
|
9
9
|
class Model
|
10
10
|
extend Forwardable
|
11
|
-
def_delegators :@data, :decision_trees, :categorical_features,
|
11
|
+
def_delegators :@data, :decision_trees, :categorical_features,
|
12
|
+
:continuous_features
|
12
13
|
|
13
14
|
def initialize(xml)
|
14
15
|
@data = Data.new(xml)
|
15
16
|
end
|
16
17
|
|
17
|
-
def
|
18
|
+
def score(features)
|
18
19
|
decisions_count = decisions_count(features)
|
19
20
|
decision = decisions_count.max_by { |_, v| v }
|
20
21
|
{
|
data/lib/scoruby/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scoruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,6 +115,7 @@ executables: []
|
|
115
115
|
extensions: []
|
116
116
|
extra_rdoc_files: []
|
117
117
|
files:
|
118
|
+
- ".codeclimate.yml"
|
118
119
|
- ".gitignore"
|
119
120
|
- ".rspec"
|
120
121
|
- ".rubocop.yml"
|
@@ -132,7 +133,8 @@ files:
|
|
132
133
|
- lib/scoruby/features.rb
|
133
134
|
- lib/scoruby/model_factory.rb
|
134
135
|
- lib/scoruby/models/decision_tree.rb
|
135
|
-
- lib/scoruby/models/
|
136
|
+
- lib/scoruby/models/gradient_boosted_model/data.rb
|
137
|
+
- lib/scoruby/models/gradient_boosted_model/model.rb
|
136
138
|
- lib/scoruby/models/naive_bayes/model.rb
|
137
139
|
- lib/scoruby/models/naive_bayes/model_data.rb
|
138
140
|
- lib/scoruby/models/random_forest/data.rb
|
data/lib/scoruby/models/gbm.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'scoruby/models/decision_tree'
|
4
|
-
require 'scoruby/features'
|
5
|
-
|
6
|
-
module Scoruby
|
7
|
-
module Models
|
8
|
-
class Gbm
|
9
|
-
GBM_FOREST_XPATH = '//Segmentation[@multipleModelMethod="sum"]/Segment'
|
10
|
-
CONST_XPATH = '//Target/@rescaleConstant'
|
11
|
-
CONST_XPATH_4_2 = '//Constant'
|
12
|
-
|
13
|
-
def initialize(xml)
|
14
|
-
@decision_trees = xml.xpath(GBM_FOREST_XPATH).map do |xml_tree|
|
15
|
-
DecisionTree.new(xml_tree)
|
16
|
-
end
|
17
|
-
@const = const(xml)
|
18
|
-
end
|
19
|
-
|
20
|
-
def tree_count
|
21
|
-
@decision_trees.count
|
22
|
-
end
|
23
|
-
|
24
|
-
def score(features)
|
25
|
-
formatted_features = Features.new(features).formatted
|
26
|
-
scores = @decision_trees.map do |dt|
|
27
|
-
dt.decide(formatted_features).score.to_s.to_f
|
28
|
-
end
|
29
|
-
sum = scores.reduce(:+) + @const
|
30
|
-
Math.exp(sum) / (1 + Math.exp(sum))
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def const(xml)
|
36
|
-
return Float(xml.xpath(CONST_XPATH).to_s) if ModelFactory.gbm_4_3?(xml)
|
37
|
-
Float(xml.xpath(CONST_XPATH_4_2).first.content)
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|