random_forester 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4464cbf26aa7ddb944082197474c6e5fe2c8a122
4
- data.tar.gz: 8bc005154166a42ce313fb4657fd9b9acf0e4347
3
+ metadata.gz: 45b219cba170a847d0cc4db93503a726655ce212
4
+ data.tar.gz: 63ad36530b2d4dacede594fb5fd3df28a52d4634
5
5
  SHA512:
6
- metadata.gz: 12093b7119a156e2c49d49639a6cb8b3b77abbd2058419800416781e8c08104f65752a0102ad4575cf477af1750eb1033e906602159298cd6b45b492513beabc
7
- data.tar.gz: d6aefbe4e09e000ce3d26847ff13ccdac1d5f15c00dd082103c563546f554172240510a0f30f3ed6ddfb2ef463599991d3918c79efe1a034bca7728f9dff25b7
6
+ metadata.gz: 279ca796b380047cb0eb0de1d1e741540f410e2e3ca763a1b4f91dbe4e0b2d60f682fa94991dac1355100814a9e6c15edb2718dc62ddb3c94a1e3ca14f5d2f3a
7
+ data.tar.gz: 01bda06cc65bde79cef7cad504f0818333e74316d3d59aadf939ac7c6d0df520da7e28b8c5b94443724c1ec48eded7c56a2f4d5bc9e951aebdb7877b4b7d27b0
data/README.md CHANGED
@@ -1,8 +1,12 @@
1
- # RandomForester
1
+ <a href="https://codeclimate.com/github/asafschers/random_forester"><img src="https://codeclimate.com/github/asafschers/random_forester/badges/gpa.svg" /></a>
2
+ [![Gem Version](https://badge.fury.io/rb/random_forester.svg)](https://badge.fury.io/rb/random_forester)
3
+ [![Dependency Status](https://www.versioneye.com/user/projects/5870c8c42f149b00509e72a3/badge.svg?style=flat-square)](https://www.versioneye.com/user/projects/5870c8c42f149b00509e72a3)
4
+ [![Build Status](https://travis-ci.org/asafschers/random_forester.svg?branch=master)](https://travis-ci.org/asafschers/random_forester)
5
+ [![Code Triagers Badge](https://www.codetriage.com/asafschers/random_forester/badges/users.svg)](https://www.codetriage.com/asafschers/random_forester)
2
6
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/random_forester`. To experiment with that code, run `bin/console` for an interactive prompt.
7
+ # RandomForester
4
8
 
5
- TODO: Delete this and the text above, and describe your gem
9
+ Reads Random Forest PMML files and creates Ruby Random Forest classifier model.
6
10
 
7
11
  ## Installation
8
12
 
@@ -22,7 +26,13 @@ Or install it yourself as:
22
26
 
23
27
  ## Usage
24
28
 
25
- TODO: Write usage instructions here
29
+ ```ruby
30
+ random_forest = RandomForester.get_model 'sample.pmml'
31
+ features = {a: 1, b: true, c: "YES"}
32
+ random_forest.predict(features)
33
+ random_forest.decisions_count(features)
34
+ ```
35
+
26
36
 
27
37
  ## Development
28
38
 
@@ -32,7 +42,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
32
42
 
33
43
  ## Contributing
34
44
 
35
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/random_forester. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
45
+ Bug reports and pull requests are welcome on GitHub at https://github.com/asafschers/random_forester. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](contributor-covenant.org) code of conduct.
36
46
 
37
47
 
38
48
  ## License
@@ -5,12 +5,19 @@ class CategoricalPredicate
5
5
  attr_reader :field
6
6
 
7
7
  def initialize(pred_xml)
8
- @field = pred_xml.xpath('@field').to_s.to_sym
9
- @array = pred_xml.xpath('Array/text()').to_s.tr('"', '').split(' ')
10
- @operator = pred_xml.xpath('@booleanOperator').to_s
8
+ attributes = pred_xml.attributes
9
+ @field = attributes['field'].value.to_sym
10
+ @array = pred_xml.children[0].content.tr('"', '').split(' ')
11
+ @operator = attributes['booleanOperator'].value
11
12
  end
12
13
 
13
14
  def true?(features)
15
+ format_boolean(features)
14
16
  @array.include? features[@field] if @operator == IS_IN
15
17
  end
18
+
19
+ def format_boolean(features)
20
+ features[@field] = 'f' if features[@field] == false
21
+ features[@field] = 't' if features[@field] == true
22
+ end
16
23
  end
data/lib/decision_tree.rb CHANGED
@@ -1,48 +1,36 @@
1
- require 'predicate'
2
- require 'rubytree'
1
+ require 'node'
3
2
 
4
3
  class DecisionTree
5
- ROOT = 'root'
6
- LEFT = 'left'
7
- RIGHT = 'right'
8
4
 
9
5
  attr_reader :root
10
6
 
11
7
  def initialize(tree_xml)
12
- @id = tree_xml.xpath('@id')
13
- @root = Tree::TreeNode.new(ROOT)
14
- set_node(tree_xml.xpath('TreeModel/Node'), @root)
15
- end
16
-
17
- def set_node(tree_xml, root)
18
- root.content = Predicate.new(tree_xml)
19
-
20
- return if tree_xml.xpath('*').count == 1
21
-
22
- root << Tree::TreeNode.new(LEFT)
23
- root << Tree::TreeNode.new(RIGHT)
24
-
25
- set_node(tree_xml.xpath('*')[1], root[LEFT]) if tree_xml.xpath('*')[1]
26
- set_node(tree_xml.xpath('*')[2], root[RIGHT]) if tree_xml.xpath('*')[2]
8
+ @id = tree_xml.attribute('id')
9
+ @root = Node.new(tree_xml.xpath('TreeModel/Node'))
27
10
  end
28
11
 
29
12
  def decide(features)
30
13
  curr = @root
31
- while curr.content.decision == ''
14
+ while curr.decision == ''
32
15
  prev = curr
33
- curr = curr[LEFT] if curr[LEFT] && curr[LEFT].content.true?(features)
34
- curr = curr[RIGHT] if curr[RIGHT] && curr[RIGHT].content.true?(features)
35
-
36
- return if no_true_child?(curr, prev)
16
+ curr = step(curr, features)
17
+ return if didnt_step?(curr, prev)
37
18
  end
38
19
 
39
- curr.content.decision
20
+ curr.decision
40
21
  end
41
22
 
42
- def no_true_child?(curr, prev)
43
- return false if (prev.content != curr.content)
44
- RandomForester.logger.error "Null tree: #{@id}, bad feature: #{curr[LEFT].content.field }"
45
- true
23
+ private
24
+
25
+ def step(curr, features)
26
+ curr = curr.left if curr.left && curr.left.true?(features)
27
+ curr = curr.right if curr.right && curr.right.true?(features)
28
+ curr
46
29
  end
47
30
 
31
+ def didnt_step?(curr, prev)
32
+ return false if (prev.pred != curr.pred)
33
+ RandomForester.logger.error "Null tree: #{@id}, bad feature: #{curr.left.pred.field }"
34
+ true
35
+ end
48
36
  end
data/lib/node.rb ADDED
@@ -0,0 +1,21 @@
1
+ require 'predicate'
2
+
3
+ class Node
4
+
5
+ attr_reader :decision, :left, :right, :pred
6
+
7
+ def initialize(xml)
8
+ children = xml.children
9
+ @pred = Predicate.new(children[0])
10
+
11
+ @decision = xml.attribute('score').to_s
12
+
13
+ return if children.count == 1
14
+ @left = Node.new(children[1]) if children[1]
15
+ @right = Node.new(children[2]) if children[2]
16
+ end
17
+
18
+ def true?(features)
19
+ @pred.nil? || @pred.true?(features)
20
+ end
21
+ end
@@ -5,10 +5,10 @@ class NumericalPredicate
5
5
 
6
6
  attr_reader :field
7
7
 
8
- def initialize(pred_xml)
9
- @field = pred_xml.xpath('@field').to_s.to_sym
10
- @value = Float(pred_xml.xpath('@value').to_s)
11
- @operator = pred_xml.xpath('@operator').to_s
8
+ def initialize(attributes)
9
+ @field = attributes['field'].value.to_sym
10
+ @value = Float(attributes['value'].value)
11
+ @operator = attributes['operator'].value
12
12
  end
13
13
 
14
14
  def true?(features)
data/lib/predicate.rb CHANGED
@@ -3,25 +3,10 @@ require 'categorical_predicate'
3
3
 
4
4
  class Predicate
5
5
 
6
- attr_reader :decision
7
-
8
6
  def initialize(pred_xml)
9
- @pred_xml = pred_xml.xpath('*')[0]
10
-
11
- @op = @pred_xml.xpath('@operator').to_s
12
- @bool_op = @pred_xml.xpath('@booleanOperator').to_s
13
-
14
- if !@op.empty?
15
- @pred = NumericalPredicate.new(@pred_xml)
16
- elsif !@bool_op.empty?
17
- @pred = CategoricalPredicate.new(@pred_xml)
18
- end
19
-
20
- @decision = pred_xml.xpath('@score').to_s
21
- end
22
-
23
- def to_s
24
- @pred_xml.to_s
7
+ attributes = pred_xml.attributes
8
+ @pred = NumericalPredicate.new(attributes) if attributes['operator']
9
+ @pred = CategoricalPredicate.new(pred_xml) if attributes['booleanOperator']
25
10
  end
26
11
 
27
12
  def field
@@ -29,7 +14,6 @@ class Predicate
29
14
  end
30
15
 
31
16
  def true?(features)
32
- return true if @pred.nil?
33
17
  return if missing_feature?(features)
34
18
  return if nil_feature?(features)
35
19
  @pred.true?(features)
data/lib/random_forest.rb CHANGED
@@ -9,9 +9,9 @@ class RandomForest
9
9
  }
10
10
  end
11
11
 
12
- def decisions_count(fearures)
12
+ def decisions_count(features)
13
13
  decisions = @decision_trees.collect { |decision_tree|
14
- decision_tree.decide(fearures)
14
+ decision_tree.decide(features)
15
15
  }
16
16
  decisions.inject(Hash.new(0)) { |h, e| h[e] += 1 ; h }
17
17
  end
@@ -1,3 +1,3 @@
1
1
  module RandomForester
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -20,7 +20,7 @@ module RandomForester
20
20
  end
21
21
 
22
22
  def self.get_model(pmml_file_name)
23
- xml = get_xml(pmml_file_name)
23
+ xml = xml_from_file_path(pmml_file_name)
24
24
  new_model(xml)
25
25
  end
26
26
 
@@ -30,12 +30,16 @@ module RandomForester
30
30
  RandomForest.new(xml)
31
31
  else
32
32
  raise MODEL_NOT_SUPPORTED_ERROR
33
- end
33
+ end
34
34
  end
35
35
 
36
- def self.get_xml(pmml_file_name)
36
+ def self.xml_from_file_path(pmml_file_name)
37
37
  pmml_string = File.open(pmml_file_name, 'rb').read
38
- xml = Nokogiri::XML(pmml_string)
38
+ xml_from_string(pmml_string)
39
+ end
40
+
41
+ def self.xml_from_string(pmml_string)
42
+ xml = Nokogiri::XML(pmml_string) { |config| config.noblanks }
39
43
  xml.remove_namespaces!
40
44
  end
41
45
 
@@ -6,7 +6,7 @@ require 'random_forester/version'
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "random_forester"
8
8
  spec.version = RandomForester::VERSION
9
- spec.authors = ["asaf schers"]
9
+ spec.authors = ["Asaf Schers"]
10
10
  spec.email = ["schers@riskified.com"]
11
11
 
12
12
  spec.summary = %q{Creates a random forest object from a pmml file.}
@@ -19,9 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.10"
22
- spec.add_development_dependency "rake", "~> 10.0"
23
- spec.add_development_dependency "rspec"
24
- spec.add_development_dependency "pry"
25
- spec.add_dependency "nokogiri", "~> 1.6"
26
- spec.add_dependency "rubytree"
22
+ spec.add_development_dependency "rake", "~> 12.0"
23
+ spec.add_development_dependency "rspec", "~> 3.5"
24
+ spec.add_development_dependency "pry", "~> 0.10"
25
+ spec.add_dependency "nokogiri", "~> 1.7"
26
+ spec.add_dependency "ruby-prof"
27
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: random_forester
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
- - asaf schers
7
+ - Asaf Schers
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-12-10 00:00:00.000000000 Z
11
+ date: 2017-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -30,58 +30,58 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '12.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '12.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '3.5'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: '3.5'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: pry
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '0'
61
+ version: '0.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '0'
68
+ version: '0.10'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: nokogiri
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '1.6'
75
+ version: '1.7'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '1.6'
82
+ version: '1.7'
83
83
  - !ruby/object:Gem::Dependency
84
- name: rubytree
84
+ name: ruby-prof
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
@@ -112,7 +112,7 @@ files:
112
112
  - bin/setup
113
113
  - lib/categorical_predicate.rb
114
114
  - lib/decision_tree.rb
115
- - lib/missing_categories_script.rb
115
+ - lib/node.rb
116
116
  - lib/numerical_predicate.rb
117
117
  - lib/predicate.rb
118
118
  - lib/random_forest.rb
@@ -1,33 +0,0 @@
1
- require 'json'
2
-
3
- pmml_file = '' #pmml file name
4
- json = '' #get features json from oscd
5
-
6
- get_missing_categories(json,pmml_file)
7
-
8
- def get_category_feature_names(pmml_file)
9
- xml = RandomForester.get_xml pmml_file;
10
- features = {}
11
- xml.xpath("PMML/DataDictionary/*").each { |df| features[df.xpath('@name').to_s] = df.xpath('@dataType').to_s }; nil
12
- features.select { |_, v| v == 'string' }.keys
13
- end
14
-
15
- def get_categories(json, pmml_file)
16
- curr_features = JSON.parse(json)
17
- category_features = get_category_feature_names(pmml_file)
18
- curr_features.select { |k, v| category_features.include?(k)}
19
- end
20
-
21
- def get_missing_categories(json, pmml_file)
22
- categories = get_categories(json, pmml_file)
23
- categories.each { |k, v|
24
- next if !!v == v
25
- category_on_pmml = File.readlines(pmml_file).any?{ |l| l[v.to_s] }
26
- puts "category: #{k}, value: #{v}" unless category_on_pmml
27
- }; nil
28
- end
29
-
30
-
31
-
32
-
33
-