slide_rule 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f11d8a365d8a650797e49fe64f89947e2b1f1926
4
- data.tar.gz: 3d6742d9e39d545e6b9f569691ae36970882bc51
3
+ metadata.gz: 140911c7f9078ab2887f32012e167852d15f9ccd
4
+ data.tar.gz: c7f8db4642867e007875ee81d8403e037d647389
5
5
  SHA512:
6
- metadata.gz: e94316418b33fcbadc4a596890837197052f13d863052a4bd9d581da06992b2234462d090fbad4397e791123d263b96151970c20244736286210f89018ad4f8a
7
- data.tar.gz: 89d3aa4826f9663b8cdf92dc975dd473a4d052daa2c6b3ced8cb9cc3139731ab612a6ba1fcf197c4ebe6642320245505931f52f46ca44dfbde18d6db9a30f598
6
+ metadata.gz: 24a59fcbca5f635b1a52d0cd8186df12d97ba0f55cb27e2389c55023b87e3cd9b06f3cd8f752cd97b1480466d2c6eb47ab36ca425edd804ae04543f06b33b2d0
7
+ data.tar.gz: 9856686afc80d0437803c44095d0c9561049d1fee4f38913aec2e7f647d7456eb0c959ecde55efd74613e04a265fc4b60de7bf4f48be915b185bd05022eeaeec
data/.gitignore CHANGED
@@ -2,3 +2,7 @@
2
2
  **.DS_Store
3
3
  Guardfile
4
4
  Gemfile.lock
5
+ vendor/cache
6
+ .ruby-gemset
7
+ .ruby-version
8
+ pkg/
data/README.md CHANGED
@@ -19,25 +19,26 @@ _Note: weights are assumed to be equal if not provided_
19
19
 
20
20
  #API
21
21
 
22
- ##Describe the field calculators
22
+ ##Describe the field distance calculators
23
23
 
24
24
  Each field to be considered in the distance calculation should be described
25
25
  with a calculation method and weight(optional)
26
26
 
27
27
  Valid calculators:
28
28
 
29
- * day_of_month (this needs to be factored into configurable date_recurrence)
30
- * float_range_distance
29
+ * day_of_year
30
+ * day_of_month
31
+ * levenshtein
31
32
 
32
33
  ```ruby
33
34
  distance_rules = {
34
35
  :description => {
35
36
  :weight => 0.80,
36
- :type => :levenshtein,
37
+ :calculator => :levenshtein,
37
38
  },
38
39
  :date => {
39
40
  :weight => 0.90,
40
- :type => :day_of_month,
41
+ :calculator => :day_of_month,
41
42
  },
42
43
  }
43
44
  ```
@@ -81,3 +82,41 @@ matcher.closest_match(candidate, [example, example2], 0.2)
81
82
  => example
82
83
 
83
84
  ```
85
+
86
+ ## Custom Field Distance Calculators
87
+
88
+ To define a custom field distance calculator, define a class with a `calculate(value1, value2)` method.
89
+
90
+ Requirements:
91
+ * Class must be stateless
92
+ * Calculate should return a float from `0` (perfect match) to `1.0` (no match)
93
+ * Calculation should not be order dependent (e.g. `calculate(a, b) == calculate(b, a)`)
94
+
95
+ ```ruby
96
+ class StringLengthCalculator
97
+ def calculate(l1, l2)
98
+ diff = (l1 - l2).abs.to_f
99
+ return diff / [l1, l2].max
100
+ end
101
+ end
102
+
103
+ matcher = ::SlideRule::DistanceCalculator.new(
104
+ :length => {
105
+ :weight => 1.0,
106
+ :calculator => StringLengthCalculator
107
+ }
108
+ )
109
+
110
+ # Find the string with the closest length
111
+ matcher.closest_match("Howdy Doody Time!", ["Felix the cat", "Mighty Mouse"], 0.5)
112
+ # => { :item=>"Mighty Mouse", :distance=>0.29411764705882354 }
113
+ ```
114
+
115
+ See the [distance_calculators](https://github.com/mattnichols/slide_rule/tree/master/lib/slide_rule/distance_calculators) directory in source for more examples.
116
+
117
+
118
+ # To Do
119
+
120
+ * Add more field distance calculators
121
+
122
+
@@ -1,7 +1,9 @@
1
1
  module SlideRule
2
2
  class DistanceCalculator
3
+ attr_accessor :rules
4
+
3
5
  def initialize(rules)
4
- @rules = normalize_weights(rules)
6
+ @rules = prepare_rules(rules)
5
7
  end
6
8
 
7
9
  # TODO: Figure this out. Very inefficient!
@@ -20,19 +22,21 @@ module SlideRule
20
22
  end
21
23
  end
22
24
 
23
- def closest_match(obj, array, threshold)
24
- matches(obj, array, threshold).sort { |match| match[:distance] }.first
25
+ def closest_match(obj, array, threshold = 1.0)
26
+ matches(obj, array, threshold).sort_by { |match| match[:distance] }.first
25
27
  end
26
28
 
27
- def is_match?(obj_1, obj_2, threshold)
28
- distance = calculate_distance(obj_1, obj_2)
29
- distance < threshold
29
+ def closest_matching_item(obj, array, threshold = 1.0)
30
+ match = closest_match(obj, array, threshold)
31
+ return nil if match.nil?
32
+
33
+ match[:item]
30
34
  end
31
35
 
32
36
  def matches(obj, array, threshold)
33
37
  array.map do |item|
34
38
  distance = calculate_distance(obj, item)
35
- next nil unless distance < threshold
39
+ next nil unless distance <= threshold
36
40
  {
37
41
  item: item,
38
42
  distance: distance
@@ -48,16 +52,29 @@ module SlideRule
48
52
  # {
49
53
  # :attribute_name => {
50
54
  # :weight => 0.90,
51
- # :type => :distance_calculator,
55
+ # :calculator => :distance_calculator,
52
56
  # }
53
57
  # }
54
58
  def calculate_distance(i1, i2)
55
- @rules.map do |attribute, rule|
59
+ calculate_weighted_distances(i1, i2).reduce(0.0) do |distance, obj|
60
+ distance + (obj[:distance] * obj[:weight])
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def calculate_weighted_distances(i1, i2)
67
+ distances = @rules.map do |attribute, rule|
56
68
  val1 = i1.send(attribute)
57
69
  val2 = i2.send(attribute)
58
- calculator = get_calculator(rule[:type])
59
- calculator.calculate(val1, val2).to_f * rule[:weight]
60
- end.reduce(0.0, &:+)
70
+ distance = rule[:calculator].calculate(val1, val2)
71
+ next { distance: distance.to_f, weight: rule[:weight] } unless distance.nil?
72
+
73
+ nil
74
+ end
75
+ normalize_weights_array(distances) if distances.compact!
76
+
77
+ distances
61
78
  end
62
79
 
63
80
  def get_calculator(calculator)
@@ -73,12 +90,35 @@ module SlideRule
73
90
 
74
91
  # Ensures all weights add up to 1.0
75
92
  #
76
- def normalize_weights(rules_hash)
77
- rules = rules_hash.dup
93
+ def normalize_weights(rules)
78
94
  weight_total = rules.map { |_attr, rule| rule[:weight] }.reduce(0.0, &:+)
79
95
  rules.each do |_attr, rule|
80
96
  rule[:weight] = rule[:weight] / weight_total
81
97
  end
82
98
  end
99
+
100
+ # Ensures all weights add up to 1.0 in array of hashes
101
+ #
102
+ def normalize_weights_array(rules)
103
+ weight_total = rules.map { |rule| rule[:weight] }.reduce(0.0, &:+)
104
+ rules.each do |rule|
105
+ rule[:weight] = rule[:weight] / weight_total
106
+ end
107
+ end
108
+
109
+ def prepare_rules(rules)
110
+ prepared_rules = rules.dup
111
+ prepared_rules = normalize_weights(prepared_rules)
112
+ prepared_rules.each do |_attr, rule|
113
+ if rule[:type]
114
+ puts 'Rule key `:type` is deprecated. Use `:calculator` instead.'
115
+ rule[:calculator] = rule[:type]
116
+ end
117
+
118
+ rule[:calculator] = get_calculator(rule[:calculator])
119
+ end
120
+
121
+ prepared_rules
122
+ end
83
123
  end
84
124
  end
@@ -1,3 +1,3 @@
1
1
  module SlideRule
2
- VERSION = '0.0.4'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -19,6 +19,12 @@ describe ::SlideRule::DistanceCalculator do
19
19
  end
20
20
  end
21
21
 
22
+ class NilCalc
23
+ def calculate(_first, _second)
24
+ nil
25
+ end
26
+ end
27
+
22
28
  let(:examples) do
23
29
  [
24
30
  ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com'),
@@ -36,50 +42,31 @@ describe ::SlideRule::DistanceCalculator do
36
42
  ::SlideRule::DistanceCalculator.new(
37
43
  description: {
38
44
  weight: 0.80,
39
- type: :levenshtein
45
+ calculator: :levenshtein
40
46
  },
41
47
  date: {
42
48
  weight: 0.90,
43
- type: :day_of_month
49
+ calculator: :day_of_month
44
50
  }
45
51
  )
46
52
  end
47
53
 
48
- it 'finds recurring transaction' do
54
+ it 'finds closest' do
49
55
  example = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
50
56
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[3])
51
57
 
52
58
  example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
53
59
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[0])
54
60
  end
55
- end
56
61
 
57
- describe '#is_match?' do
58
- let(:calculator) do
59
- ::SlideRule::DistanceCalculator.new(
60
- description: {
61
- weight: 0.80,
62
- type: :levenshtein
63
- },
64
- date: {
65
- weight: 0.90,
66
- type: :day_of_month
67
- }
68
- )
69
- end
70
-
71
- it 'returns true if there is a match' do
72
- example_1 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
73
- example_2 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
74
-
75
- expect(calculator.is_match?(example_1, example_2, 0.2)).to be(true)
62
+ it 'with default threshold' do
63
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
64
+ expect(calculator.closest_match(example, examples)[:item]).to eq(examples[0])
76
65
  end
77
66
 
78
- it 'returns false if there is a match' do
79
- example_1 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
80
- example_2 = ExampleTransaction.new(description: 'Taco Bell', date: '2015-06-17')
81
-
82
- expect(calculator.is_match?(example_1, example_2, 0.2)).to be(false)
67
+ it 'finds closest matching item' do
68
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
69
+ expect(calculator.closest_matching_item(example, examples)).to eq(examples[0])
83
70
  end
84
71
  end
85
72
 
@@ -89,11 +76,11 @@ describe ::SlideRule::DistanceCalculator do
89
76
  calculator = ::SlideRule::DistanceCalculator.new(
90
77
  description: {
91
78
  weight: 1.00,
92
- type: :levenshtein
79
+ calculator: :levenshtein
93
80
  },
94
81
  date: {
95
82
  weight: 0.50,
96
- type: :day_of_month
83
+ calculator: :day_of_month
97
84
  }
98
85
  )
99
86
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -105,11 +92,11 @@ describe ::SlideRule::DistanceCalculator do
105
92
  calculator = ::SlideRule::DistanceCalculator.new(
106
93
  description: {
107
94
  weight: 0.50,
108
- type: :levenshtein
95
+ calculator: :levenshtein
109
96
  },
110
97
  date: {
111
98
  weight: 0.50,
112
- type: :day_of_month
99
+ calculator: :day_of_month
113
100
  }
114
101
  )
115
102
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -125,6 +112,23 @@ describe ::SlideRule::DistanceCalculator do
125
112
  distance = calculator.calculate_distance(example, candidate)
126
113
  expect(distance.round(4)).to eq(((3.0 * 0.5 / 15) + (4.0 * 0.5 / 11)).round(4))
127
114
  end
115
+
116
+ it 'should renormalize on nil' do
117
+ calculator = ::SlideRule::DistanceCalculator.new(
118
+ description: {
119
+ weight: 0.50,
120
+ calculator: :levenshtein
121
+ },
122
+ date: {
123
+ weight: 0.50,
124
+ calculator: NilCalc
125
+ }
126
+ )
127
+ example1 = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
128
+ example2 = ::ExampleTransaction.new(amount: 25.00, date: '2015-06-08', description: 'Audible Inc')
129
+
130
+ expect(calculator.calculate_distance(example1, example2).round(4)).to eq((4.0 / 11).round(4))
131
+ end
128
132
  end
129
133
 
130
134
  context 'uses custom calculator' do
@@ -132,7 +136,7 @@ describe ::SlideRule::DistanceCalculator do
132
136
  calculator = ::SlideRule::DistanceCalculator.new(
133
137
  description: {
134
138
  weight: 1.00,
135
- type: CustomCalc
139
+ calculator: CustomCalc
136
140
  }
137
141
  )
138
142
  example = ::ExampleTransaction.new
@@ -142,5 +146,27 @@ describe ::SlideRule::DistanceCalculator do
142
146
  expect(distance).to eq(0.9)
143
147
  end
144
148
  end
149
+
150
+ context 'validates rules on initialize' do
151
+ it 'should allow :type' do
152
+ ::SlideRule::DistanceCalculator.new(
153
+ description: {
154
+ weight: 1.00,
155
+ type: CustomCalc
156
+ }
157
+ )
158
+ end
159
+
160
+ it 'should raise error if not valid calculator' do
161
+ expect do
162
+ ::SlideRule::DistanceCalculator.new(
163
+ description: {
164
+ weight: 1.00,
165
+ calculator: :some_junk
166
+ }
167
+ )
168
+ end.to raise_error
169
+ end
170
+ end
145
171
  end
146
172
  end
data/spec/spec_helper.rb CHANGED
@@ -18,6 +18,7 @@
18
18
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
19
 
20
20
  require_relative '../lib/slide_rule.rb'
21
+ require 'pry'
21
22
 
22
23
  RSpec.configure do |config|
23
24
  # rspec-expectations config goes here. You can use an alternate
metadata CHANGED
@@ -1,96 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slide_rule
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mattnichols
8
8
  - fergmastaflex
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-11 00:00:00.000000000 Z
12
+ date: 2015-11-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
+ name: vladlev
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
- - - ~>
18
+ - - "~>"
18
19
  - !ruby/object:Gem::Version
19
20
  version: '1.0'
20
- name: vladlev
21
- prerelease: false
22
21
  type: :runtime
22
+ prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - ~>
25
+ - - "~>"
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.0'
28
28
  - !ruby/object:Gem::Dependency
29
+ name: rake
29
30
  requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
- - - ~>
32
+ - - "~>"
32
33
  - !ruby/object:Gem::Version
33
34
  version: '10'
34
- name: rake
35
- prerelease: false
36
35
  type: :development
36
+ prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - ~>
39
+ - - "~>"
40
40
  - !ruby/object:Gem::Version
41
41
  version: '10'
42
42
  - !ruby/object:Gem::Dependency
43
+ name: pry
43
44
  requirement: !ruby/object:Gem::Requirement
44
45
  requirements:
45
- - - ~>
46
+ - - "~>"
46
47
  - !ruby/object:Gem::Version
47
48
  version: '0'
48
- name: pry
49
- prerelease: false
50
49
  type: :development
50
+ prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - ~>
53
+ - - "~>"
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
+ name: rspec
57
58
  requirement: !ruby/object:Gem::Requirement
58
59
  requirements:
59
- - - ~>
60
+ - - "~>"
60
61
  - !ruby/object:Gem::Version
61
62
  version: '3'
62
- name: rspec
63
- prerelease: false
64
63
  type: :development
64
+ prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - ~>
67
+ - - "~>"
68
68
  - !ruby/object:Gem::Version
69
69
  version: '3'
70
70
  - !ruby/object:Gem::Dependency
71
+ name: rubocop
71
72
  requirement: !ruby/object:Gem::Requirement
72
73
  requirements:
73
- - - ~>
74
+ - - "~>"
74
75
  - !ruby/object:Gem::Version
75
76
  version: '0'
76
- name: rubocop
77
- prerelease: false
78
77
  type: :development
78
+ prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
- - - ~>
81
+ - - "~>"
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
- description: Calculates the distance between 2 arbitrary objects using specified fields and algorithms.
84
+ description: Calculates the distance between 2 arbitrary objects using specified fields
85
+ and algorithms.
85
86
  email:
86
87
  - dev@mx.com
87
88
  executables: []
88
89
  extensions: []
89
90
  extra_rdoc_files: []
90
91
  files:
91
- - .gitignore
92
- - .rubocop.yml
93
- - .travis.yml
92
+ - ".gitignore"
93
+ - ".rubocop.yml"
94
+ - ".travis.yml"
94
95
  - CODE_OF_CONDUCT.md
95
96
  - Gemfile
96
97
  - LICENSE
@@ -112,24 +113,24 @@ homepage: https://github.com/mattnichols/slide_rule
112
113
  licenses:
113
114
  - MIT
114
115
  metadata: {}
115
- post_install_message:
116
+ post_install_message:
116
117
  rdoc_options: []
117
118
  require_paths:
118
119
  - lib
119
120
  required_ruby_version: !ruby/object:Gem::Requirement
120
121
  requirements:
121
- - - '>='
122
+ - - ">="
122
123
  - !ruby/object:Gem::Version
123
124
  version: '0'
124
125
  required_rubygems_version: !ruby/object:Gem::Requirement
125
126
  requirements:
126
- - - '>='
127
+ - - ">="
127
128
  - !ruby/object:Gem::Version
128
129
  version: '0'
129
130
  requirements: []
130
- rubyforge_project:
131
- rubygems_version: 2.4.8
132
- signing_key:
131
+ rubyforge_project:
132
+ rubygems_version: 2.4.6
133
+ signing_key:
133
134
  specification_version: 4
134
135
  summary: Ruby object distance calculator
135
136
  test_files: