random_forester 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -5
- data/lib/categorical_predicate.rb +10 -3
- data/lib/decision_tree.rb +18 -30
- data/lib/node.rb +21 -0
- data/lib/numerical_predicate.rb +4 -4
- data/lib/predicate.rb +3 -19
- data/lib/random_forest.rb +2 -2
- data/lib/random_forester/version.rb +1 -1
- data/lib/random_forester.rb +8 -4
- data/random_forester.gemspec +6 -6
- metadata +17 -17
- data/lib/missing_categories_script.rb +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45b219cba170a847d0cc4db93503a726655ce212
|
4
|
+
data.tar.gz: 63ad36530b2d4dacede594fb5fd3df28a52d4634
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 279ca796b380047cb0eb0de1d1e741540f410e2e3ca763a1b4f91dbe4e0b2d60f682fa94991dac1355100814a9e6c15edb2718dc62ddb3c94a1e3ca14f5d2f3a
|
7
|
+
data.tar.gz: 01bda06cc65bde79cef7cad504f0818333e74316d3d59aadf939ac7c6d0df520da7e28b8c5b94443724c1ec48eded7c56a2f4d5bc9e951aebdb7877b4b7d27b0
|
data/README.md
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
-
|
1
|
+
<a href="https://codeclimate.com/github/asafschers/random_forester"><img src="https://codeclimate.com/github/asafschers/random_forester/badges/gpa.svg" /></a>
|
2
|
+
[](https://badge.fury.io/rb/random_forester)
|
3
|
+
[](https://www.versioneye.com/user/projects/5870c8c42f149b00509e72a3)
|
4
|
+
[](https://travis-ci.org/asafschers/random_forester)
|
5
|
+
[](https://www.codetriage.com/asafschers/random_forester)
|
2
6
|
|
3
|
-
|
7
|
+
# RandomForester
|
4
8
|
|
5
|
-
|
9
|
+
Reads Random Forest PMML files and creates Ruby Random Forest classifier model.
|
6
10
|
|
7
11
|
## Installation
|
8
12
|
|
@@ -22,7 +26,13 @@ Or install it yourself as:
|
|
22
26
|
|
23
27
|
## Usage
|
24
28
|
|
25
|
-
|
29
|
+
```ruby
|
30
|
+
random_forest = RandomForester.get_model 'sample.pmml'
|
31
|
+
features = {a: 1, b: true, c: "YES"}
|
32
|
+
random_forest.predict(features)
|
33
|
+
random_forest.decisions_count(features)
|
34
|
+
```
|
35
|
+
|
26
36
|
|
27
37
|
## Development
|
28
38
|
|
@@ -32,7 +42,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
42
|
|
33
43
|
## Contributing
|
34
44
|
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
45
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/asafschers/random_forester. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
|
36
46
|
|
37
47
|
|
38
48
|
## License
|
@@ -5,12 +5,19 @@ class CategoricalPredicate
|
|
5
5
|
attr_reader :field
|
6
6
|
|
7
7
|
def initialize(pred_xml)
|
8
|
-
|
9
|
-
@
|
10
|
-
@
|
8
|
+
attributes = pred_xml.attributes
|
9
|
+
@field = attributes['field'].value.to_sym
|
10
|
+
@array = pred_xml.children[0].content.tr('"', '').split(' ')
|
11
|
+
@operator = attributes['booleanOperator'].value
|
11
12
|
end
|
12
13
|
|
13
14
|
def true?(features)
|
15
|
+
format_boolean(features)
|
14
16
|
@array.include? features[@field] if @operator == IS_IN
|
15
17
|
end
|
18
|
+
|
19
|
+
def format_boolean(features)
|
20
|
+
features[@field] = 'f' if features[@field] == false
|
21
|
+
features[@field] = 't' if features[@field] == true
|
22
|
+
end
|
16
23
|
end
|
data/lib/decision_tree.rb
CHANGED
@@ -1,48 +1,36 @@
|
|
1
|
-
require '
|
2
|
-
require 'rubytree'
|
1
|
+
require 'node'
|
3
2
|
|
4
3
|
class DecisionTree
|
5
|
-
ROOT = 'root'
|
6
|
-
LEFT = 'left'
|
7
|
-
RIGHT = 'right'
|
8
4
|
|
9
5
|
attr_reader :root
|
10
6
|
|
11
7
|
def initialize(tree_xml)
|
12
|
-
@id = tree_xml.
|
13
|
-
@root =
|
14
|
-
set_node(tree_xml.xpath('TreeModel/Node'), @root)
|
15
|
-
end
|
16
|
-
|
17
|
-
def set_node(tree_xml, root)
|
18
|
-
root.content = Predicate.new(tree_xml)
|
19
|
-
|
20
|
-
return if tree_xml.xpath('*').count == 1
|
21
|
-
|
22
|
-
root << Tree::TreeNode.new(LEFT)
|
23
|
-
root << Tree::TreeNode.new(RIGHT)
|
24
|
-
|
25
|
-
set_node(tree_xml.xpath('*')[1], root[LEFT]) if tree_xml.xpath('*')[1]
|
26
|
-
set_node(tree_xml.xpath('*')[2], root[RIGHT]) if tree_xml.xpath('*')[2]
|
8
|
+
@id = tree_xml.attribute('id')
|
9
|
+
@root = Node.new(tree_xml.xpath('TreeModel/Node'))
|
27
10
|
end
|
28
11
|
|
29
12
|
def decide(features)
|
30
13
|
curr = @root
|
31
|
-
while curr.
|
14
|
+
while curr.decision == ''
|
32
15
|
prev = curr
|
33
|
-
curr = curr
|
34
|
-
|
35
|
-
|
36
|
-
return if no_true_child?(curr, prev)
|
16
|
+
curr = step(curr, features)
|
17
|
+
return if didnt_step?(curr, prev)
|
37
18
|
end
|
38
19
|
|
39
|
-
curr.
|
20
|
+
curr.decision
|
40
21
|
end
|
41
22
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
true
|
23
|
+
private
|
24
|
+
|
25
|
+
def step(curr, features)
|
26
|
+
curr = curr.left if curr.left && curr.left.true?(features)
|
27
|
+
curr = curr.right if curr.right && curr.right.true?(features)
|
28
|
+
curr
|
46
29
|
end
|
47
30
|
|
31
|
+
def didnt_step?(curr, prev)
|
32
|
+
return false if (prev.pred != curr.pred)
|
33
|
+
RandomForester.logger.error "Null tree: #{@id}, bad feature: #{curr.left.pred.field }"
|
34
|
+
true
|
35
|
+
end
|
48
36
|
end
|
data/lib/node.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'predicate'
|
2
|
+
|
3
|
+
class Node
|
4
|
+
|
5
|
+
attr_reader :decision, :left, :right, :pred
|
6
|
+
|
7
|
+
def initialize(xml)
|
8
|
+
children = xml.children
|
9
|
+
@pred = Predicate.new(children[0])
|
10
|
+
|
11
|
+
@decision = xml.attribute('score').to_s
|
12
|
+
|
13
|
+
return if children.count == 1
|
14
|
+
@left = Node.new(children[1]) if children[1]
|
15
|
+
@right = Node.new(children[2]) if children[2]
|
16
|
+
end
|
17
|
+
|
18
|
+
def true?(features)
|
19
|
+
@pred.nil? || @pred.true?(features)
|
20
|
+
end
|
21
|
+
end
|
data/lib/numerical_predicate.rb
CHANGED
@@ -5,10 +5,10 @@ class NumericalPredicate
|
|
5
5
|
|
6
6
|
attr_reader :field
|
7
7
|
|
8
|
-
def initialize(
|
9
|
-
@field =
|
10
|
-
@value = Float(
|
11
|
-
@operator =
|
8
|
+
def initialize(attributes)
|
9
|
+
@field = attributes['field'].value.to_sym
|
10
|
+
@value = Float(attributes['value'].value)
|
11
|
+
@operator = attributes['operator'].value
|
12
12
|
end
|
13
13
|
|
14
14
|
def true?(features)
|
data/lib/predicate.rb
CHANGED
@@ -3,25 +3,10 @@ require 'categorical_predicate'
|
|
3
3
|
|
4
4
|
class Predicate
|
5
5
|
|
6
|
-
attr_reader :decision
|
7
|
-
|
8
6
|
def initialize(pred_xml)
|
9
|
-
|
10
|
-
|
11
|
-
@
|
12
|
-
@bool_op = @pred_xml.xpath('@booleanOperator').to_s
|
13
|
-
|
14
|
-
if !@op.empty?
|
15
|
-
@pred = NumericalPredicate.new(@pred_xml)
|
16
|
-
elsif !@bool_op.empty?
|
17
|
-
@pred = CategoricalPredicate.new(@pred_xml)
|
18
|
-
end
|
19
|
-
|
20
|
-
@decision = pred_xml.xpath('@score').to_s
|
21
|
-
end
|
22
|
-
|
23
|
-
def to_s
|
24
|
-
@pred_xml.to_s
|
7
|
+
attributes = pred_xml.attributes
|
8
|
+
@pred = NumericalPredicate.new(attributes) if attributes['operator']
|
9
|
+
@pred = CategoricalPredicate.new(pred_xml) if attributes['booleanOperator']
|
25
10
|
end
|
26
11
|
|
27
12
|
def field
|
@@ -29,7 +14,6 @@ class Predicate
|
|
29
14
|
end
|
30
15
|
|
31
16
|
def true?(features)
|
32
|
-
return true if @pred.nil?
|
33
17
|
return if missing_feature?(features)
|
34
18
|
return if nil_feature?(features)
|
35
19
|
@pred.true?(features)
|
data/lib/random_forest.rb
CHANGED
@@ -9,9 +9,9 @@ class RandomForest
|
|
9
9
|
}
|
10
10
|
end
|
11
11
|
|
12
|
-
def decisions_count(
|
12
|
+
def decisions_count(features)
|
13
13
|
decisions = @decision_trees.collect { |decision_tree|
|
14
|
-
decision_tree.decide(
|
14
|
+
decision_tree.decide(features)
|
15
15
|
}
|
16
16
|
decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
|
17
17
|
end
|
data/lib/random_forester.rb
CHANGED
@@ -20,7 +20,7 @@ module RandomForester
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def self.get_model(pmml_file_name)
|
23
|
-
xml =
|
23
|
+
xml = xml_from_file_path(pmml_file_name)
|
24
24
|
new_model(xml)
|
25
25
|
end
|
26
26
|
|
@@ -30,12 +30,16 @@ module RandomForester
|
|
30
30
|
RandomForest.new(xml)
|
31
31
|
else
|
32
32
|
raise MODEL_NOT_SUPPORTED_ERROR
|
33
|
-
|
33
|
+
end
|
34
34
|
end
|
35
35
|
|
36
|
-
def self.
|
36
|
+
def self.xml_from_file_path(pmml_file_name)
|
37
37
|
pmml_string = File.open(pmml_file_name, 'rb').read
|
38
|
-
|
38
|
+
xml_from_string(pmml_string)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.xml_from_string(pmml_string)
|
42
|
+
xml = Nokogiri::XML(pmml_string) { |config| config.noblanks }
|
39
43
|
xml.remove_namespaces!
|
40
44
|
end
|
41
45
|
|
data/random_forester.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'random_forester/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "random_forester"
|
8
8
|
spec.version = RandomForester::VERSION
|
9
|
-
spec.authors = ["
|
9
|
+
spec.authors = ["Asaf Schers"]
|
10
10
|
spec.email = ["schers@riskified.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Creates a random forest object from a pmml file.}
|
@@ -19,9 +19,9 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.10"
|
22
|
-
spec.add_development_dependency "rake", "~>
|
23
|
-
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_development_dependency "pry"
|
25
|
-
spec.add_dependency "nokogiri", "~> 1.
|
26
|
-
spec.add_dependency "
|
22
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3.5"
|
24
|
+
spec.add_development_dependency "pry", "~> 0.10"
|
25
|
+
spec.add_dependency "nokogiri", "~> 1.7"
|
26
|
+
spec.add_dependency "ruby-prof"
|
27
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: random_forester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Asaf Schers
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,58 +30,58 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '12.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '12.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '3.5'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '3.5'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
61
|
+
version: '0.10'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
68
|
+
version: '0.10'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: nokogiri
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1.
|
75
|
+
version: '1.7'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1.
|
82
|
+
version: '1.7'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: ruby-prof
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -112,7 +112,7 @@ files:
|
|
112
112
|
- bin/setup
|
113
113
|
- lib/categorical_predicate.rb
|
114
114
|
- lib/decision_tree.rb
|
115
|
-
- lib/
|
115
|
+
- lib/node.rb
|
116
116
|
- lib/numerical_predicate.rb
|
117
117
|
- lib/predicate.rb
|
118
118
|
- lib/random_forest.rb
|
@@ -1,33 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
|
3
|
-
pmml_file = '' #pmml file name
|
4
|
-
json = '' #get features json from oscd
|
5
|
-
|
6
|
-
get_missing_categories(json,pmml_file)
|
7
|
-
|
8
|
-
def get_category_feature_names(pmml_file)
|
9
|
-
xml = RandomForester.get_xml pmml_file;
|
10
|
-
features = {}
|
11
|
-
xml.xpath("PMML/DataDictionary/*").each { |df| features[df.xpath('@name').to_s] = df.xpath('@dataType').to_s }; nil
|
12
|
-
features.select { |_, v| v == 'string' }.keys
|
13
|
-
end
|
14
|
-
|
15
|
-
def get_categories(json, pmml_file)
|
16
|
-
curr_features = JSON.parse(json)
|
17
|
-
category_features = get_category_feature_names(pmml_file)
|
18
|
-
curr_features.select { |k, v| category_features.include?(k)}
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_missing_categories(json, pmml_file)
|
22
|
-
categories = get_categories(json, pmml_file)
|
23
|
-
categories.each { |k, v|
|
24
|
-
next if !!v == v
|
25
|
-
category_on_pmml = File.readlines(pmml_file).any?{ |l| l[v.to_s] }
|
26
|
-
puts "category: #{k}, value: #{v}" unless category_on_pmml
|
27
|
-
}; nil
|
28
|
-
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|