random_forester 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4464cbf26aa7ddb944082197474c6e5fe2c8a122
4
- data.tar.gz: 8bc005154166a42ce313fb4657fd9b9acf0e4347
3
+ metadata.gz: 45b219cba170a847d0cc4db93503a726655ce212
4
+ data.tar.gz: 63ad36530b2d4dacede594fb5fd3df28a52d4634
5
5
  SHA512:
6
- metadata.gz: 12093b7119a156e2c49d49639a6cb8b3b77abbd2058419800416781e8c08104f65752a0102ad4575cf477af1750eb1033e906602159298cd6b45b492513beabc
7
- data.tar.gz: d6aefbe4e09e000ce3d26847ff13ccdac1d5f15c00dd082103c563546f554172240510a0f30f3ed6ddfb2ef463599991d3918c79efe1a034bca7728f9dff25b7
6
+ metadata.gz: 279ca796b380047cb0eb0de1d1e741540f410e2e3ca763a1b4f91dbe4e0b2d60f682fa94991dac1355100814a9e6c15edb2718dc62ddb3c94a1e3ca14f5d2f3a
7
+ data.tar.gz: 01bda06cc65bde79cef7cad504f0818333e74316d3d59aadf939ac7c6d0df520da7e28b8c5b94443724c1ec48eded7c56a2f4d5bc9e951aebdb7877b4b7d27b0
data/README.md CHANGED
@@ -1,8 +1,12 @@
1
- # RandomForester
1
+ <a href="https://codeclimate.com/github/asafschers/random_forester"><img src="https://codeclimate.com/github/asafschers/random_forester/badges/gpa.svg" /></a>
2
+ [![Gem Version](https://badge.fury.io/rb/random_forester.svg)](https://badge.fury.io/rb/random_forester)
3
+ [![Dependency Status](https://www.versioneye.com/user/projects/5870c8c42f149b00509e72a3/badge.svg?style=flat-square)](https://www.versioneye.com/user/projects/5870c8c42f149b00509e72a3)
4
+ [![Build Status](https://travis-ci.org/asafschers/random_forester.svg?branch=master)](https://travis-ci.org/asafschers/random_forester)
5
+ [![Code Triagers Badge](https://www.codetriage.com/asafschers/random_forester/badges/users.svg)](https://www.codetriage.com/asafschers/random_forester)
2
6
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/random_forester`. To experiment with that code, run `bin/console` for an interactive prompt.
7
+ # RandomForester
4
8
 
5
- TODO: Delete this and the text above, and describe your gem
9
+ Reads Random Forest PMML files and creates Ruby Random Forest classifier model.
6
10
 
7
11
  ## Installation
8
12
 
@@ -22,7 +26,13 @@ Or install it yourself as:
22
26
 
23
27
  ## Usage
24
28
 
25
- TODO: Write usage instructions here
29
+ ```ruby
30
+ random_forest = RandomForester.get_model 'sample.pmml'
31
+ features = {a: 1, b: true, c: "YES"}
32
+ random_forest.predict(features)
33
+ random_forest.decisions_count(features)
34
+ ```
35
+
26
36
 
27
37
  ## Development
28
38
 
@@ -32,7 +42,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
32
42
 
33
43
  ## Contributing
34
44
 
35
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/random_forester. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
45
+ Bug reports and pull requests are welcome on GitHub at https://github.com/asafschers/random_forester. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
36
46
 
37
47
 
38
48
  ## License
@@ -5,12 +5,19 @@ class CategoricalPredicate
5
5
  attr_reader :field
6
6
 
7
7
  def initialize(pred_xml)
8
- @field = pred_xml.xpath('@field').to_s.to_sym
9
- @array = pred_xml.xpath('Array/text()').to_s.tr('"', '').split(' ')
10
- @operator = pred_xml.xpath('@booleanOperator').to_s
8
+ attributes = pred_xml.attributes
9
+ @field = attributes['field'].value.to_sym
10
+ @array = pred_xml.children[0].content.tr('"', '').split(' ')
11
+ @operator = attributes['booleanOperator'].value
11
12
  end
12
13
 
13
14
  def true?(features)
15
+ format_boolean(features)
14
16
  @array.include? features[@field] if @operator == IS_IN
15
17
  end
18
+
19
+ def format_boolean(features)
20
+ features[@field] = 'f' if features[@field] == false
21
+ features[@field] = 't' if features[@field] == true
22
+ end
16
23
  end
data/lib/decision_tree.rb CHANGED
@@ -1,48 +1,36 @@
1
- require 'predicate'
2
- require 'rubytree'
1
+ require 'node'
3
2
 
4
3
  class DecisionTree
5
- ROOT = 'root'
6
- LEFT = 'left'
7
- RIGHT = 'right'
8
4
 
9
5
  attr_reader :root
10
6
 
11
7
  def initialize(tree_xml)
12
- @id = tree_xml.xpath('@id')
13
- @root = Tree::TreeNode.new(ROOT)
14
- set_node(tree_xml.xpath('TreeModel/Node'), @root)
15
- end
16
-
17
- def set_node(tree_xml, root)
18
- root.content = Predicate.new(tree_xml)
19
-
20
- return if tree_xml.xpath('*').count == 1
21
-
22
- root << Tree::TreeNode.new(LEFT)
23
- root << Tree::TreeNode.new(RIGHT)
24
-
25
- set_node(tree_xml.xpath('*')[1], root[LEFT]) if tree_xml.xpath('*')[1]
26
- set_node(tree_xml.xpath('*')[2], root[RIGHT]) if tree_xml.xpath('*')[2]
8
+ @id = tree_xml.attribute('id')
9
+ @root = Node.new(tree_xml.xpath('TreeModel/Node'))
27
10
  end
28
11
 
29
12
  def decide(features)
30
13
  curr = @root
31
- while curr.content.decision == ''
14
+ while curr.decision == ''
32
15
  prev = curr
33
- curr = curr[LEFT] if curr[LEFT] && curr[LEFT].content.true?(features)
34
- curr = curr[RIGHT] if curr[RIGHT] && curr[RIGHT].content.true?(features)
35
-
36
- return if no_true_child?(curr, prev)
16
+ curr = step(curr, features)
17
+ return if didnt_step?(curr, prev)
37
18
  end
38
19
 
39
- curr.content.decision
20
+ curr.decision
40
21
  end
41
22
 
42
- def no_true_child?(curr, prev)
43
- return false if (prev.content != curr.content)
44
- RandomForester.logger.error "Null tree: #{@id}, bad feature: #{curr[LEFT].content.field }"
45
- true
23
+ private
24
+
25
+ def step(curr, features)
26
+ curr = curr.left if curr.left && curr.left.true?(features)
27
+ curr = curr.right if curr.right && curr.right.true?(features)
28
+ curr
46
29
  end
47
30
 
31
+ def didnt_step?(curr, prev)
32
+ return false if (prev.pred != curr.pred)
33
+ RandomForester.logger.error "Null tree: #{@id}, bad feature: #{curr.left.pred.field }"
34
+ true
35
+ end
48
36
  end
data/lib/node.rb ADDED
@@ -0,0 +1,21 @@
1
+ require 'predicate'
2
+
3
+ class Node
4
+
5
+ attr_reader :decision, :left, :right, :pred
6
+
7
+ def initialize(xml)
8
+ children = xml.children
9
+ @pred = Predicate.new(children[0])
10
+
11
+ @decision = xml.attribute('score').to_s
12
+
13
+ return if children.count == 1
14
+ @left = Node.new(children[1]) if children[1]
15
+ @right = Node.new(children[2]) if children[2]
16
+ end
17
+
18
+ def true?(features)
19
+ @pred.nil? || @pred.true?(features)
20
+ end
21
+ end
@@ -5,10 +5,10 @@ class NumericalPredicate
5
5
 
6
6
  attr_reader :field
7
7
 
8
- def initialize(pred_xml)
9
- @field = pred_xml.xpath('@field').to_s.to_sym
10
- @value = Float(pred_xml.xpath('@value').to_s)
11
- @operator = pred_xml.xpath('@operator').to_s
8
+ def initialize(attributes)
9
+ @field = attributes['field'].value.to_sym
10
+ @value = Float(attributes['value'].value)
11
+ @operator = attributes['operator'].value
12
12
  end
13
13
 
14
14
  def true?(features)
data/lib/predicate.rb CHANGED
@@ -3,25 +3,10 @@ require 'categorical_predicate'
3
3
 
4
4
  class Predicate
5
5
 
6
- attr_reader :decision
7
-
8
6
  def initialize(pred_xml)
9
- @pred_xml = pred_xml.xpath('*')[0]
10
-
11
- @op = @pred_xml.xpath('@operator').to_s
12
- @bool_op = @pred_xml.xpath('@booleanOperator').to_s
13
-
14
- if !@op.empty?
15
- @pred = NumericalPredicate.new(@pred_xml)
16
- elsif !@bool_op.empty?
17
- @pred = CategoricalPredicate.new(@pred_xml)
18
- end
19
-
20
- @decision = pred_xml.xpath('@score').to_s
21
- end
22
-
23
- def to_s
24
- @pred_xml.to_s
7
+ attributes = pred_xml.attributes
8
+ @pred = NumericalPredicate.new(attributes) if attributes['operator']
9
+ @pred = CategoricalPredicate.new(pred_xml) if attributes['booleanOperator']
25
10
  end
26
11
 
27
12
  def field
@@ -29,7 +14,6 @@ class Predicate
29
14
  end
30
15
 
31
16
  def true?(features)
32
- return true if @pred.nil?
33
17
  return if missing_feature?(features)
34
18
  return if nil_feature?(features)
35
19
  @pred.true?(features)
data/lib/random_forest.rb CHANGED
@@ -9,9 +9,9 @@ class RandomForest
9
9
  }
10
10
  end
11
11
 
12
- def decisions_count(fearures)
12
+ def decisions_count(features)
13
13
  decisions = @decision_trees.collect { |decision_tree|
14
- decision_tree.decide(fearures)
14
+ decision_tree.decide(features)
15
15
  }
16
16
  decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
17
17
  end
@@ -1,3 +1,3 @@
1
1
  module RandomForester
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -20,7 +20,7 @@ module RandomForester
20
20
  end
21
21
 
22
22
  def self.get_model(pmml_file_name)
23
- xml = get_xml(pmml_file_name)
23
+ xml = xml_from_file_path(pmml_file_name)
24
24
  new_model(xml)
25
25
  end
26
26
 
@@ -30,12 +30,16 @@ module RandomForester
30
30
  RandomForest.new(xml)
31
31
  else
32
32
  raise MODEL_NOT_SUPPORTED_ERROR
33
- end
33
+ end
34
34
  end
35
35
 
36
- def self.get_xml(pmml_file_name)
36
+ def self.xml_from_file_path(pmml_file_name)
37
37
  pmml_string = File.open(pmml_file_name, 'rb').read
38
- xml = Nokogiri::XML(pmml_string)
38
+ xml_from_string(pmml_string)
39
+ end
40
+
41
+ def self.xml_from_string(pmml_string)
42
+ xml = Nokogiri::XML(pmml_string) { |config| config.noblanks }
39
43
  xml.remove_namespaces!
40
44
  end
41
45
 
@@ -6,7 +6,7 @@ require 'random_forester/version'
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "random_forester"
8
8
  spec.version = RandomForester::VERSION
9
- spec.authors = ["asaf schers"]
9
+ spec.authors = ["Asaf Schers"]
10
10
  spec.email = ["schers@riskified.com"]
11
11
 
12
12
  spec.summary = %q{Creates a random forest object from a pmml file.}
@@ -19,9 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.10"
22
- spec.add_development_dependency "rake", "~> 10.0"
23
- spec.add_development_dependency "rspec"
24
- spec.add_development_dependency "pry"
25
- spec.add_dependency "nokogiri", "~> 1.6"
26
- spec.add_dependency "rubytree"
22
+ spec.add_development_dependency "rake", "~> 12.0"
23
+ spec.add_development_dependency "rspec", "~> 3.5"
24
+ spec.add_development_dependency "pry", "~> 0.10"
25
+ spec.add_dependency "nokogiri", "~> 1.7"
26
+ spec.add_dependency "ruby-prof"
27
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: random_forester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
- - asaf schers
7
+ - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-10 00:00:00.000000000 Z
11
+ date: 2017-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -30,58 +30,58 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '12.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '12.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '3.5'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '3.5'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '0.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '0.10'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: nokogiri
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.6'
75
+ version: '1.7'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.6'
82
+ version: '1.7'
83
83
  - !ruby/object:Gem::Dependency
84
- name: rubytree
84
+ name: ruby-prof
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
@@ -112,7 +112,7 @@ files:
112
112
  - bin/setup
113
113
  - lib/categorical_predicate.rb
114
114
  - lib/decision_tree.rb
115
- - lib/missing_categories_script.rb
115
+ - lib/node.rb
116
116
  - lib/numerical_predicate.rb
117
117
  - lib/predicate.rb
118
118
  - lib/random_forest.rb
@@ -1,33 +0,0 @@
1
- require 'json'
2
-
3
- pmml_file = '' #pmml file name
4
- json = '' #get features json from oscd
5
-
6
- get_missing_categories(json,pmml_file)
7
-
8
- def get_category_feature_names(pmml_file)
9
- xml = RandomForester.get_xml pmml_file;
10
- features = {}
11
- xml.xpath("PMML/DataDictionary/*").each { |df| features[df.xpath('@name').to_s] = df.xpath('@dataType').to_s }; nil
12
- features.select { |_, v| v == 'string' }.keys
13
- end
14
-
15
- def get_categories(json, pmml_file)
16
- curr_features = JSON.parse(json)
17
- category_features = get_category_feature_names(pmml_file)
18
- curr_features.select { |k, v| category_features.include?(k)}
19
- end
20
-
21
- def get_missing_categories(json, pmml_file)
22
- categories = get_categories(json, pmml_file)
23
- categories.each { |k, v|
24
- next if !!v == v
25
- category_on_pmml = File.readlines(pmml_file).any?{ |l| l[v.to_s] }
26
- puts "category: #{k}, value: #{v}" unless category_on_pmml
27
- }; nil
28
- end
29
-
30
-
31
-
32
-
33
-