slide_rule 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d6f0cc115c650f57a1ebc32fb16dedb8647df5c4
4
- data.tar.gz: cf1f0ba21b49a34ebc56d7595366180adec9d744
3
+ metadata.gz: f3035bfd6643001929a84db47e8de72911901eb6
4
+ data.tar.gz: 0a2f570fb3371d5814a3247aea005676525e69aa
5
5
  SHA512:
6
- metadata.gz: 2fc5aff23287ff405bdc120e5378e48f3b10c30ef23bbd311766cf6cebd7b149dd0fcfc3a34e2fe94cbc45ffe0cbd8a5b6310f3e02031ead580922ff0ead0bba
7
- data.tar.gz: e8913245a2286590fa8e8560885ce86024506fc132424813187103a71de0e625532cea53edc59c476038e3be29c46af6d009a03f3c7d605c31ae23022d861f15
6
+ metadata.gz: 282666dafc39dad85c6850c460e7bf2babe7ad3b38b54469ae0df5b91a9802400f3ed50626507712cc7a5afa1173add644ec229704133fe5b60858fa986b2d12
7
+ data.tar.gz: 8c167275bb751444636e25fd1c197566e3dde22f88d30a30f8d9a670a786f897252991429ade2e37efa766efbe5c9dfd3982c73d896780df61d6f869e075b92d
data/.gitignore CHANGED
@@ -2,3 +2,7 @@
2
2
  **.DS_Store
3
3
  Guardfile
4
4
  Gemfile.lock
5
+ vendor/cache
6
+ .ruby-gemset
7
+ .ruby-version
8
+ pkg/
data/README.md CHANGED
@@ -19,25 +19,26 @@ _Note: weights are assumed to be equal if not provided_
19
19
 
20
20
  #API
21
21
 
22
- ##Describe the field calculators
22
+ ##Describe the field distance calculators
23
23
 
24
24
  Each field to be considered in the distance calculation should be described
25
25
  with a calculation method and weight(optional)
26
26
 
27
27
  Valid calculators:
28
28
 
29
- * day_of_month (this needs to be factored into configurable date_recurrence)
30
- * float_range_distance
29
+ * day_of_year
30
+ * day_of_month
31
+ * levenshtein
31
32
 
32
33
  ```ruby
33
34
  distance_rules = {
34
35
  :description => {
35
36
  :weight => 0.80,
36
- :type => :levenshtein,
37
+ :calculator => :levenshtein,
37
38
  },
38
39
  :date => {
39
40
  :weight => 0.90,
40
- :type => :day_of_month,
41
+ :calculator => :day_of_month,
41
42
  },
42
43
  }
43
44
  ```
@@ -81,3 +82,41 @@ matcher.closest_match(candidate, [example, example2], 0.2)
81
82
  => example
82
83
 
83
84
  ```
85
+
86
+ ## Custom Field Distance Calculators
87
+
88
+ To define a custom field distance calculator, define a class with a `calculate(value1, value2)` method.
89
+
90
+ Requirements:
91
+ * Class must be stateless
92
+ * Calculate should return a float from `0` (perfect match) to `1.0` (no match)
93
+ * Calculation should not be order dependent (e.g. `calculate(a, b) == calculate(b, a)`)
94
+
95
+ ```ruby
96
+ class StringLengthCalculator
97
+ def calculate(l1, l2)
98
+ diff = (l1 - l2).abs.to_f
99
+ return diff / [l1, l2].max
100
+ end
101
+ end
102
+
103
+ matcher = ::SlideRule::DistanceCalculator.new(
104
+ :length => {
105
+ :weight => 1.0,
106
+ :calculator => StringLengthCalculator
107
+ }
108
+ )
109
+
110
+ # Find the string with the closest length
111
+ matcher.closest_match("Howdy Doody Time!", ["Felix the cat", "Mighty Mouse"], 0.5)
112
+ # => { :item=>"Mighty Mouse", :distance=>0.29411764705882354 }
113
+ ```
114
+
115
+ See the [distance_calculators](https://github.com/mattnichols/slide_rule/tree/master/lib/slide_rule/distance_calculators) directory in source for more examples.
116
+
117
+
118
+ # To Do
119
+
120
+ * Add more field distance calculators
121
+
122
+
@@ -1,7 +1,9 @@
1
1
  module SlideRule
2
2
  class DistanceCalculator
3
+ attr_accessor :rules
4
+
3
5
  def initialize(rules)
4
- @rules = normalize_weights(rules)
6
+ @rules = prepare_rules(rules)
5
7
  end
6
8
 
7
9
  # TODO: Figure this out. Very inefficient!
@@ -20,8 +22,15 @@ module SlideRule
20
22
  end
21
23
  end
22
24
 
23
- def closest_match(obj, array, threshold)
24
- matches(obj, array, threshold).sort { |match| match[:distance] }.first
25
+ def closest_match(obj, array, threshold = 1.0)
26
+ matches(obj, array, threshold).sort_by { |match| match[:distance] }.first
27
+ end
28
+
29
+ def closest_matching_item(obj, array, threshold = 1.0)
30
+ match = closest_match(obj, array, threshold)
31
+ return nil if match.nil?
32
+
33
+ match[:item]
25
34
  end
26
35
 
27
36
  def is_match?(obj_1, obj_2, threshold)
@@ -32,7 +41,7 @@ module SlideRule
32
41
  def matches(obj, array, threshold)
33
42
  array.map do |item|
34
43
  distance = calculate_distance(obj, item)
35
- next nil unless distance < threshold
44
+ next nil unless distance <= threshold
36
45
  {
37
46
  item: item,
38
47
  distance: distance
@@ -48,23 +57,40 @@ module SlideRule
48
57
  # {
49
58
  # :attribute_name => {
50
59
  # :weight => 0.90,
51
- # :type => :distance_calculator,
60
+ # :calculator => :distance_calculator,
52
61
  # }
53
62
  # }
54
63
  def calculate_distance(i1, i2)
55
- @rules.map do |attribute, rule|
64
+ calculate_weighted_distances(i1, i2).reduce(0.0) do |distance, obj|
65
+ distance + (obj[:distance] * obj[:weight])
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def calculate_weighted_distances(i1, i2)
72
+ distances = @rules.map do |attribute, rule|
56
73
  val1 = i1.send(attribute)
57
74
  val2 = i2.send(attribute)
58
- calculator = get_calculator(rule[:type])
59
- calculator.calculate(val1, val2).to_f * rule[:weight]
60
- end.reduce(0.0, &:+)
75
+ distance = rule[:calculator].calculate(val1, val2)
76
+ next { distance: distance.to_f, weight: rule[:weight] } unless distance.nil?
77
+
78
+ nil
79
+ end
80
+ normalize_weights_array(distances) if distances.compact!
81
+
82
+ distances
61
83
  end
62
84
 
63
85
  def get_calculator(calculator)
64
86
  return calculator.new if calculator.is_a?(Class)
65
87
 
66
88
  klass_name = "#{calculator.to_s.split('_').collect(&:capitalize).join}"
67
- klass = ::SlideRule::DistanceCalculators.const_get(klass_name)
89
+ klass = begin
90
+ ::SlideRule::DistanceCalculators.const_get(klass_name)
91
+ rescue(::NameError)
92
+ nil
93
+ end
68
94
 
69
95
  fail ArgumentError, "Unable to find calculator #{klass_name}" if klass.nil?
70
96
 
@@ -73,12 +99,46 @@ module SlideRule
73
99
 
74
100
  # Ensures all weights add up to 1.0
75
101
  #
76
- def normalize_weights(rules_hash)
77
- rules = rules_hash.dup
102
+ def normalize_weights(rules)
78
103
  weight_total = rules.map { |_attr, rule| rule[:weight] }.reduce(0.0, &:+)
79
104
  rules.each do |_attr, rule|
80
105
  rule[:weight] = rule[:weight] / weight_total
81
106
  end
82
107
  end
108
+
109
+ # Ensures all weights add up to 1.0 in array of hashes
110
+ #
111
+ def normalize_weights_array(rules)
112
+ weight_total = rules.map { |rule| rule[:weight] }.reduce(0.0, &:+)
113
+ rules.each do |rule|
114
+ rule[:weight] = rule[:weight] / weight_total
115
+ end
116
+ end
117
+
118
+ # Prepares a duplicate of given rules hash with normalized weights and calculator instances
119
+ #
120
+ def prepare_rules(rules)
121
+ prepared_rules = rules.each_with_object({}) do |(attribute, rule), copy|
122
+ rule = copy[attribute] = safe_dup(rule)
123
+
124
+ if rule[:type]
125
+ puts 'Rule key `:type` is deprecated. Use `:calculator` instead.'
126
+ rule[:calculator] = rule[:type]
127
+ end
128
+
129
+ rule[:calculator] = get_calculator(rule[:calculator])
130
+
131
+ copy
132
+ end
133
+ prepared_rules = normalize_weights(prepared_rules)
134
+
135
+ prepared_rules
136
+ end
137
+
138
+ def safe_dup(obj)
139
+ obj.dup
140
+ rescue
141
+ obj
142
+ end
83
143
  end
84
144
  end
@@ -23,6 +23,7 @@ module SlideRule
23
23
  private
24
24
 
25
25
  def cleanse_date(date)
26
+ date = Time.at(date).utc.to_date if date.is_a?(::Fixnum)
26
27
  date = Date.parse(date) unless date.is_a?(::Date) || date.is_a?(::Time)
27
28
  date = date.to_date if date.is_a?(::Time)
28
29
 
@@ -18,6 +18,7 @@ module SlideRule
18
18
  private
19
19
 
20
20
  def cleanse_date(date)
21
+ date = Time.at(date).utc.to_date if date.is_a?(::Fixnum)
21
22
  date = Date.parse(date) unless date.is_a?(::Date) || date.is_a?(::Time)
22
23
  date = date.to_date if date.is_a?(::Time)
23
24
 
@@ -1,3 +1,3 @@
1
1
  module SlideRule
2
- VERSION = '0.2.0'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -19,6 +19,12 @@ describe ::SlideRule::DistanceCalculator do
19
19
  end
20
20
  end
21
21
 
22
+ class NilCalc
23
+ def calculate(_first, _second)
24
+ nil
25
+ end
26
+ end
27
+
22
28
  let(:examples) do
23
29
  [
24
30
  ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com'),
@@ -36,22 +42,32 @@ describe ::SlideRule::DistanceCalculator do
36
42
  ::SlideRule::DistanceCalculator.new(
37
43
  description: {
38
44
  weight: 0.80,
39
- type: :levenshtein
45
+ calculator: :levenshtein
40
46
  },
41
47
  date: {
42
48
  weight: 0.90,
43
- type: :day_of_month
49
+ calculator: :day_of_month
44
50
  }
45
51
  )
46
52
  end
47
53
 
48
- it 'finds recurring transaction' do
54
+ it 'finds closest' do
49
55
  example = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
50
56
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[3])
51
57
 
52
58
  example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
53
59
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[0])
54
60
  end
61
+
62
+ it 'with default threshold' do
63
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
64
+ expect(calculator.closest_match(example, examples)[:item]).to eq(examples[0])
65
+ end
66
+
67
+ it 'finds closest matching item' do
68
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
69
+ expect(calculator.closest_matching_item(example, examples)).to eq(examples[0])
70
+ end
55
71
  end
56
72
 
57
73
  describe '#is_match?' do
@@ -89,11 +105,11 @@ describe ::SlideRule::DistanceCalculator do
89
105
  calculator = ::SlideRule::DistanceCalculator.new(
90
106
  description: {
91
107
  weight: 1.00,
92
- type: :levenshtein
108
+ calculator: :levenshtein
93
109
  },
94
110
  date: {
95
111
  weight: 0.50,
96
- type: :day_of_month
112
+ calculator: :day_of_month
97
113
  }
98
114
  )
99
115
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -105,11 +121,11 @@ describe ::SlideRule::DistanceCalculator do
105
121
  calculator = ::SlideRule::DistanceCalculator.new(
106
122
  description: {
107
123
  weight: 0.50,
108
- type: :levenshtein
124
+ calculator: :levenshtein
109
125
  },
110
126
  date: {
111
127
  weight: 0.50,
112
- type: :day_of_month
128
+ calculator: :day_of_month
113
129
  }
114
130
  )
115
131
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -125,6 +141,23 @@ describe ::SlideRule::DistanceCalculator do
125
141
  distance = calculator.calculate_distance(example, candidate)
126
142
  expect(distance.round(4)).to eq(((3.0 * 0.5 / 15) + (4.0 * 0.5 / 11)).round(4))
127
143
  end
144
+
145
+ it 'should renormalize on nil' do
146
+ calculator = ::SlideRule::DistanceCalculator.new(
147
+ description: {
148
+ weight: 0.50,
149
+ calculator: :levenshtein
150
+ },
151
+ date: {
152
+ weight: 0.50,
153
+ calculator: NilCalc
154
+ }
155
+ )
156
+ example1 = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
157
+ example2 = ::ExampleTransaction.new(amount: 25.00, date: '2015-06-08', description: 'Audible Inc')
158
+
159
+ expect(calculator.calculate_distance(example1, example2).round(4)).to eq((4.0 / 11).round(4))
160
+ end
128
161
  end
129
162
 
130
163
  context 'uses custom calculator' do
@@ -132,7 +165,7 @@ describe ::SlideRule::DistanceCalculator do
132
165
  calculator = ::SlideRule::DistanceCalculator.new(
133
166
  description: {
134
167
  weight: 1.00,
135
- type: CustomCalc
168
+ calculator: CustomCalc
136
169
  }
137
170
  )
138
171
  example = ::ExampleTransaction.new
@@ -142,5 +175,51 @@ describe ::SlideRule::DistanceCalculator do
142
175
  expect(distance).to eq(0.9)
143
176
  end
144
177
  end
178
+
179
+ describe '#initialize' do
180
+ context 'validates rules on initialize' do
181
+ it 'should allow :type' do
182
+ ::SlideRule::DistanceCalculator.new(
183
+ description: {
184
+ weight: 1.00,
185
+ type: CustomCalc
186
+ }
187
+ )
188
+ end
189
+
190
+ it 'should not modify input rule hash' do
191
+ rules = {
192
+ description: {
193
+ weight: 1.0,
194
+ calculator: CustomCalc
195
+ },
196
+ name: {
197
+ weight: 1.0,
198
+ type: CustomCalc
199
+ }
200
+ }
201
+ ::SlideRule::DistanceCalculator.new(rules)
202
+ # Run a second time to ensure that no calculator instance is in rules. Will currently throw an error.
203
+ ::SlideRule::DistanceCalculator.new(rules)
204
+
205
+ # :type should still be in original hash
206
+ expect(rules[:name].key?(:calculator)).to eq(false)
207
+
208
+ # :weight should not be normalized in original hash
209
+ expect(rules[:name][:weight]).to eq(1.0)
210
+ end
211
+
212
+ it 'should raise error if not valid calculator' do
213
+ expect do
214
+ ::SlideRule::DistanceCalculator.new(
215
+ description: {
216
+ weight: 1.00,
217
+ calculator: :some_junk
218
+ }
219
+ )
220
+ end.to raise_error(::ArgumentError, 'Unable to find calculator SomeJunk')
221
+ end
222
+ end
223
+ end
145
224
  end
146
225
  end
@@ -6,6 +6,10 @@ describe ::SlideRule::DistanceCalculators::DayOfMonth do
6
6
  expect(described_class.new.calculate('2012-03-19', '2014-08-19')).to eq(0.0)
7
7
  end
8
8
 
9
+ it 'should accept epoch date' do
10
+ expect(described_class.new.calculate(1_444_262_400, 1_444_262_400)).to eq(0.0)
11
+ end
12
+
9
13
  it 'should calculate when date is in the same month' do
10
14
  expect(described_class.new.calculate('2012-03-19', '2014-08-22')).to eq(3.0 / 15)
11
15
  expect(described_class.new.calculate('2012-03-19', '2014-08-09')).to eq(10.0 / 15)
@@ -5,6 +5,10 @@ describe ::SlideRule::DistanceCalculators::DayOfYear do
5
5
  it 'should return a 0 distance' do
6
6
  expect(described_class.new.calculate('2015-10-8', '2015-10-8')).to eq(0.0)
7
7
  end
8
+
9
+ it 'should accept epoch date' do
10
+ expect(described_class.new.calculate(1_444_262_400, 1_444_262_400)).to eq(0.0)
11
+ end
8
12
  end
9
13
 
10
14
  context 'when dates are more than a year apart' do
@@ -1,11 +1,19 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe ::SlideRule::DistanceCalculators::Levenshtein do
4
+ let(:subject) { described_class.new }
5
+
4
6
  it 'should calculate perfect match' do
5
- expect(described_class.new.calculate('this is a test', 'this is a test')).to eq(0.0)
7
+ expect(subject.calculate('this is a test', 'this is a test')).to eq(0.0)
6
8
  end
7
9
 
8
10
  it 'should calculate distance as distance divided by length of longest string' do
9
- expect(described_class.new.calculate('this is a test', 'this is a test!').round(4)).to eq((1.0 / 15).round(4))
11
+ expect(subject.calculate('this is a test', 'this is a test!').round(4)).to eq((1.0 / 15).round(4))
12
+ end
13
+
14
+ it 'should handle nils' do
15
+ expect(subject.calculate(nil, nil)).to eq(0.0)
16
+ expect(subject.calculate(nil, 'goodbye')).to eq(1.0)
17
+ expect(subject.calculate('hello', nil)).to eq(1.0)
10
18
  end
11
19
  end
data/spec/spec_helper.rb CHANGED
@@ -18,6 +18,7 @@
18
18
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
19
 
20
20
  require_relative '../lib/slide_rule.rb'
21
+ require 'pry'
21
22
 
22
23
  RSpec.configure do |config|
23
24
  # rspec-expectations config goes here. You can use an alternate
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slide_rule
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - mattnichols