slide_rule 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d6f0cc115c650f57a1ebc32fb16dedb8647df5c4
4
- data.tar.gz: cf1f0ba21b49a34ebc56d7595366180adec9d744
3
+ metadata.gz: f3035bfd6643001929a84db47e8de72911901eb6
4
+ data.tar.gz: 0a2f570fb3371d5814a3247aea005676525e69aa
5
5
  SHA512:
6
- metadata.gz: 2fc5aff23287ff405bdc120e5378e48f3b10c30ef23bbd311766cf6cebd7b149dd0fcfc3a34e2fe94cbc45ffe0cbd8a5b6310f3e02031ead580922ff0ead0bba
7
- data.tar.gz: e8913245a2286590fa8e8560885ce86024506fc132424813187103a71de0e625532cea53edc59c476038e3be29c46af6d009a03f3c7d605c31ae23022d861f15
6
+ metadata.gz: 282666dafc39dad85c6850c460e7bf2babe7ad3b38b54469ae0df5b91a9802400f3ed50626507712cc7a5afa1173add644ec229704133fe5b60858fa986b2d12
7
+ data.tar.gz: 8c167275bb751444636e25fd1c197566e3dde22f88d30a30f8d9a670a786f897252991429ade2e37efa766efbe5c9dfd3982c73d896780df61d6f869e075b92d
data/.gitignore CHANGED
@@ -2,3 +2,7 @@
2
2
  **.DS_Store
3
3
  Guardfile
4
4
  Gemfile.lock
5
+ vendor/cache
6
+ .ruby-gemset
7
+ .ruby-version
8
+ pkg/
data/README.md CHANGED
@@ -19,25 +19,26 @@ _Note: weights are assumed to be equal if not provided_
19
19
 
20
20
  #API
21
21
 
22
- ##Describe the field calculators
22
+ ##Describe the field distance calculators
23
23
 
24
24
  Each field to be considered in the distance calculation should be described
25
25
  with a calculation method and weight(optional)
26
26
 
27
27
  Valid calculators:
28
28
 
29
- * day_of_month (this needs to be factored into configurable date_recurrence)
30
- * float_range_distance
29
+ * day_of_year
30
+ * day_of_month
31
+ * levenshtein
31
32
 
32
33
  ```ruby
33
34
  distance_rules = {
34
35
  :description => {
35
36
  :weight => 0.80,
36
- :type => :levenshtein,
37
+ :calculator => :levenshtein,
37
38
  },
38
39
  :date => {
39
40
  :weight => 0.90,
40
- :type => :day_of_month,
41
+ :calculator => :day_of_month,
41
42
  },
42
43
  }
43
44
  ```
@@ -81,3 +82,41 @@ matcher.closest_match(candidate, [example, example2], 0.2)
81
82
  => example
82
83
 
83
84
  ```
85
+
86
+ ## Custom Field Distance Calculators
87
+
88
+ To define a custom field distance calculator, define a class with a `calculate(value1, value2)` method.
89
+
90
+ Requirements:
91
+ * Class must be stateless
92
+ * Calculate should return a float from `0` (perfect match) to `1.0` (no match)
93
+ * Calculation should not be order dependent (e.g. `calculate(a, b) == calculate(b, a)`)
94
+
95
+ ```ruby
96
+ class StringLengthCalculator
97
+ def calculate(l1, l2)
98
+ diff = (l1 - l2).abs.to_f
99
+ return diff / [l1, l2].max
100
+ end
101
+ end
102
+
103
+ matcher = ::SlideRule::DistanceCalculator.new(
104
+ :length => {
105
+ :weight => 1.0,
106
+ :calculator => StringLengthCalculator
107
+ }
108
+ )
109
+
110
+ # Find the string with the closest length
111
+ matcher.closest_match("Howdy Doody Time!", ["Felix the cat", "Mighty Mouse"], 0.5)
112
+ # => { :item=>"Mighty Mouse", :distance=>0.29411764705882354 }
113
+ ```
114
+
115
+ See the [distance_calculators](https://github.com/mattnichols/slide_rule/tree/master/lib/slide_rule/distance_calculators) directory in source for more examples.
116
+
117
+
118
+ # To Do
119
+
120
+ * Add more field distance calculators
121
+
122
+
@@ -1,7 +1,9 @@
1
1
  module SlideRule
2
2
  class DistanceCalculator
3
+ attr_accessor :rules
4
+
3
5
  def initialize(rules)
4
- @rules = normalize_weights(rules)
6
+ @rules = prepare_rules(rules)
5
7
  end
6
8
 
7
9
  # TODO: Figure this out. Very inefficient!
@@ -20,8 +22,15 @@ module SlideRule
20
22
  end
21
23
  end
22
24
 
23
- def closest_match(obj, array, threshold)
24
- matches(obj, array, threshold).sort { |match| match[:distance] }.first
25
+ def closest_match(obj, array, threshold = 1.0)
26
+ matches(obj, array, threshold).sort_by { |match| match[:distance] }.first
27
+ end
28
+
29
+ def closest_matching_item(obj, array, threshold = 1.0)
30
+ match = closest_match(obj, array, threshold)
31
+ return nil if match.nil?
32
+
33
+ match[:item]
25
34
  end
26
35
 
27
36
  def is_match?(obj_1, obj_2, threshold)
@@ -32,7 +41,7 @@ module SlideRule
32
41
  def matches(obj, array, threshold)
33
42
  array.map do |item|
34
43
  distance = calculate_distance(obj, item)
35
- next nil unless distance < threshold
44
+ next nil unless distance <= threshold
36
45
  {
37
46
  item: item,
38
47
  distance: distance
@@ -48,23 +57,40 @@ module SlideRule
48
57
  # {
49
58
  # :attribute_name => {
50
59
  # :weight => 0.90,
51
- # :type => :distance_calculator,
60
+ # :calculator => :distance_calculator,
52
61
  # }
53
62
  # }
54
63
  def calculate_distance(i1, i2)
55
- @rules.map do |attribute, rule|
64
+ calculate_weighted_distances(i1, i2).reduce(0.0) do |distance, obj|
65
+ distance + (obj[:distance] * obj[:weight])
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def calculate_weighted_distances(i1, i2)
72
+ distances = @rules.map do |attribute, rule|
56
73
  val1 = i1.send(attribute)
57
74
  val2 = i2.send(attribute)
58
- calculator = get_calculator(rule[:type])
59
- calculator.calculate(val1, val2).to_f * rule[:weight]
60
- end.reduce(0.0, &:+)
75
+ distance = rule[:calculator].calculate(val1, val2)
76
+ next { distance: distance.to_f, weight: rule[:weight] } unless distance.nil?
77
+
78
+ nil
79
+ end
80
+ normalize_weights_array(distances) if distances.compact!
81
+
82
+ distances
61
83
  end
62
84
 
63
85
  def get_calculator(calculator)
64
86
  return calculator.new if calculator.is_a?(Class)
65
87
 
66
88
  klass_name = "#{calculator.to_s.split('_').collect(&:capitalize).join}"
67
- klass = ::SlideRule::DistanceCalculators.const_get(klass_name)
89
+ klass = begin
90
+ ::SlideRule::DistanceCalculators.const_get(klass_name)
91
+ rescue(::NameError)
92
+ nil
93
+ end
68
94
 
69
95
  fail ArgumentError, "Unable to find calculator #{klass_name}" if klass.nil?
70
96
 
@@ -73,12 +99,46 @@ module SlideRule
73
99
 
74
100
  # Ensures all weights add up to 1.0
75
101
  #
76
- def normalize_weights(rules_hash)
77
- rules = rules_hash.dup
102
+ def normalize_weights(rules)
78
103
  weight_total = rules.map { |_attr, rule| rule[:weight] }.reduce(0.0, &:+)
79
104
  rules.each do |_attr, rule|
80
105
  rule[:weight] = rule[:weight] / weight_total
81
106
  end
82
107
  end
108
+
109
+ # Ensures all weights add up to 1.0 in array of hashes
110
+ #
111
+ def normalize_weights_array(rules)
112
+ weight_total = rules.map { |rule| rule[:weight] }.reduce(0.0, &:+)
113
+ rules.each do |rule|
114
+ rule[:weight] = rule[:weight] / weight_total
115
+ end
116
+ end
117
+
118
+ # Prepares a duplicate of given rules hash with normalized weights and calculator instances
119
+ #
120
+ def prepare_rules(rules)
121
+ prepared_rules = rules.each_with_object({}) do |(attribute, rule), copy|
122
+ rule = copy[attribute] = safe_dup(rule)
123
+
124
+ if rule[:type]
125
+ puts 'Rule key `:type` is deprecated. Use `:calculator` instead.'
126
+ rule[:calculator] = rule[:type]
127
+ end
128
+
129
+ rule[:calculator] = get_calculator(rule[:calculator])
130
+
131
+ copy
132
+ end
133
+ prepared_rules = normalize_weights(prepared_rules)
134
+
135
+ prepared_rules
136
+ end
137
+
138
+ def safe_dup(obj)
139
+ obj.dup
140
+ rescue
141
+ obj
142
+ end
83
143
  end
84
144
  end
@@ -23,6 +23,7 @@ module SlideRule
23
23
  private
24
24
 
25
25
  def cleanse_date(date)
26
+ date = Time.at(date).utc.to_date if date.is_a?(::Fixnum)
26
27
  date = Date.parse(date) unless date.is_a?(::Date) || date.is_a?(::Time)
27
28
  date = date.to_date if date.is_a?(::Time)
28
29
 
@@ -18,6 +18,7 @@ module SlideRule
18
18
  private
19
19
 
20
20
  def cleanse_date(date)
21
+ date = Time.at(date).utc.to_date if date.is_a?(::Fixnum)
21
22
  date = Date.parse(date) unless date.is_a?(::Date) || date.is_a?(::Time)
22
23
  date = date.to_date if date.is_a?(::Time)
23
24
 
@@ -1,3 +1,3 @@
1
1
  module SlideRule
2
- VERSION = '0.2.0'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -19,6 +19,12 @@ describe ::SlideRule::DistanceCalculator do
19
19
  end
20
20
  end
21
21
 
22
+ class NilCalc
23
+ def calculate(_first, _second)
24
+ nil
25
+ end
26
+ end
27
+
22
28
  let(:examples) do
23
29
  [
24
30
  ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com'),
@@ -36,22 +42,32 @@ describe ::SlideRule::DistanceCalculator do
36
42
  ::SlideRule::DistanceCalculator.new(
37
43
  description: {
38
44
  weight: 0.80,
39
- type: :levenshtein
45
+ calculator: :levenshtein
40
46
  },
41
47
  date: {
42
48
  weight: 0.90,
43
- type: :day_of_month
49
+ calculator: :day_of_month
44
50
  }
45
51
  )
46
52
  end
47
53
 
48
- it 'finds recurring transaction' do
54
+ it 'finds closest' do
49
55
  example = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
50
56
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[3])
51
57
 
52
58
  example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
53
59
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[0])
54
60
  end
61
+
62
+ it 'with default threshold' do
63
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
64
+ expect(calculator.closest_match(example, examples)[:item]).to eq(examples[0])
65
+ end
66
+
67
+ it 'finds closest matching item' do
68
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
69
+ expect(calculator.closest_matching_item(example, examples)).to eq(examples[0])
70
+ end
55
71
  end
56
72
 
57
73
  describe '#is_match?' do
@@ -89,11 +105,11 @@ describe ::SlideRule::DistanceCalculator do
89
105
  calculator = ::SlideRule::DistanceCalculator.new(
90
106
  description: {
91
107
  weight: 1.00,
92
- type: :levenshtein
108
+ calculator: :levenshtein
93
109
  },
94
110
  date: {
95
111
  weight: 0.50,
96
- type: :day_of_month
112
+ calculator: :day_of_month
97
113
  }
98
114
  )
99
115
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -105,11 +121,11 @@ describe ::SlideRule::DistanceCalculator do
105
121
  calculator = ::SlideRule::DistanceCalculator.new(
106
122
  description: {
107
123
  weight: 0.50,
108
- type: :levenshtein
124
+ calculator: :levenshtein
109
125
  },
110
126
  date: {
111
127
  weight: 0.50,
112
- type: :day_of_month
128
+ calculator: :day_of_month
113
129
  }
114
130
  )
115
131
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -125,6 +141,23 @@ describe ::SlideRule::DistanceCalculator do
125
141
  distance = calculator.calculate_distance(example, candidate)
126
142
  expect(distance.round(4)).to eq(((3.0 * 0.5 / 15) + (4.0 * 0.5 / 11)).round(4))
127
143
  end
144
+
145
+ it 'should renormalize on nil' do
146
+ calculator = ::SlideRule::DistanceCalculator.new(
147
+ description: {
148
+ weight: 0.50,
149
+ calculator: :levenshtein
150
+ },
151
+ date: {
152
+ weight: 0.50,
153
+ calculator: NilCalc
154
+ }
155
+ )
156
+ example1 = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
157
+ example2 = ::ExampleTransaction.new(amount: 25.00, date: '2015-06-08', description: 'Audible Inc')
158
+
159
+ expect(calculator.calculate_distance(example1, example2).round(4)).to eq((4.0 / 11).round(4))
160
+ end
128
161
  end
129
162
 
130
163
  context 'uses custom calculator' do
@@ -132,7 +165,7 @@ describe ::SlideRule::DistanceCalculator do
132
165
  calculator = ::SlideRule::DistanceCalculator.new(
133
166
  description: {
134
167
  weight: 1.00,
135
- type: CustomCalc
168
+ calculator: CustomCalc
136
169
  }
137
170
  )
138
171
  example = ::ExampleTransaction.new
@@ -142,5 +175,51 @@ describe ::SlideRule::DistanceCalculator do
142
175
  expect(distance).to eq(0.9)
143
176
  end
144
177
  end
178
+
179
+ describe '#initialize' do
180
+ context 'validates rules on initialize' do
181
+ it 'should allow :type' do
182
+ ::SlideRule::DistanceCalculator.new(
183
+ description: {
184
+ weight: 1.00,
185
+ type: CustomCalc
186
+ }
187
+ )
188
+ end
189
+
190
+ it 'should not modify input rule hash' do
191
+ rules = {
192
+ description: {
193
+ weight: 1.0,
194
+ calculator: CustomCalc
195
+ },
196
+ name: {
197
+ weight: 1.0,
198
+ type: CustomCalc
199
+ }
200
+ }
201
+ ::SlideRule::DistanceCalculator.new(rules)
202
+ # Run a second time to ensure that no calculator instance is in rules. Will currently throw an error.
203
+ ::SlideRule::DistanceCalculator.new(rules)
204
+
205
+ # :type should still be in original hash
206
+ expect(rules[:name].key?(:calculator)).to eq(false)
207
+
208
+ # :weight should not be normalized in original hash
209
+ expect(rules[:name][:weight]).to eq(1.0)
210
+ end
211
+
212
+ it 'should raise error if not valid calculator' do
213
+ expect do
214
+ ::SlideRule::DistanceCalculator.new(
215
+ description: {
216
+ weight: 1.00,
217
+ calculator: :some_junk
218
+ }
219
+ )
220
+ end.to raise_error(::ArgumentError, 'Unable to find calculator SomeJunk')
221
+ end
222
+ end
223
+ end
145
224
  end
146
225
  end
@@ -6,6 +6,10 @@ describe ::SlideRule::DistanceCalculators::DayOfMonth do
6
6
  expect(described_class.new.calculate('2012-03-19', '2014-08-19')).to eq(0.0)
7
7
  end
8
8
 
9
+ it 'should accept epoch date' do
10
+ expect(described_class.new.calculate(1_444_262_400, 1_444_262_400)).to eq(0.0)
11
+ end
12
+
9
13
  it 'should calculate when date is in the same month' do
10
14
  expect(described_class.new.calculate('2012-03-19', '2014-08-22')).to eq(3.0 / 15)
11
15
  expect(described_class.new.calculate('2012-03-19', '2014-08-09')).to eq(10.0 / 15)
@@ -5,6 +5,10 @@ describe ::SlideRule::DistanceCalculators::DayOfYear do
5
5
  it 'should return a 0 distance' do
6
6
  expect(described_class.new.calculate('2015-10-8', '2015-10-8')).to eq(0.0)
7
7
  end
8
+
9
+ it 'should accept epoch date' do
10
+ expect(described_class.new.calculate(1_444_262_400, 1_444_262_400)).to eq(0.0)
11
+ end
8
12
  end
9
13
 
10
14
  context 'when dates are more than a year apart' do
@@ -1,11 +1,19 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe ::SlideRule::DistanceCalculators::Levenshtein do
4
+ let(:subject) { described_class.new }
5
+
4
6
  it 'should calculate perfect match' do
5
- expect(described_class.new.calculate('this is a test', 'this is a test')).to eq(0.0)
7
+ expect(subject.calculate('this is a test', 'this is a test')).to eq(0.0)
6
8
  end
7
9
 
8
10
  it 'should calculate distance as distance divided by length of longest string' do
9
- expect(described_class.new.calculate('this is a test', 'this is a test!').round(4)).to eq((1.0 / 15).round(4))
11
+ expect(subject.calculate('this is a test', 'this is a test!').round(4)).to eq((1.0 / 15).round(4))
12
+ end
13
+
14
+ it 'should handle nils' do
15
+ expect(subject.calculate(nil, nil)).to eq(0.0)
16
+ expect(subject.calculate(nil, 'goodbye')).to eq(1.0)
17
+ expect(subject.calculate('hello', nil)).to eq(1.0)
10
18
  end
11
19
  end
data/spec/spec_helper.rb CHANGED
@@ -18,6 +18,7 @@
18
18
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
19
 
20
20
  require_relative '../lib/slide_rule.rb'
21
+ require 'pry'
21
22
 
22
23
  RSpec.configure do |config|
23
24
  # rspec-expectations config goes here. You can use an alternate
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slide_rule
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - mattnichols