slide_rule 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f11d8a365d8a650797e49fe64f89947e2b1f1926
4
- data.tar.gz: 3d6742d9e39d545e6b9f569691ae36970882bc51
3
+ metadata.gz: 140911c7f9078ab2887f32012e167852d15f9ccd
4
+ data.tar.gz: c7f8db4642867e007875ee81d8403e037d647389
5
5
  SHA512:
6
- metadata.gz: e94316418b33fcbadc4a596890837197052f13d863052a4bd9d581da06992b2234462d090fbad4397e791123d263b96151970c20244736286210f89018ad4f8a
7
- data.tar.gz: 89d3aa4826f9663b8cdf92dc975dd473a4d052daa2c6b3ced8cb9cc3139731ab612a6ba1fcf197c4ebe6642320245505931f52f46ca44dfbde18d6db9a30f598
6
+ metadata.gz: 24a59fcbca5f635b1a52d0cd8186df12d97ba0f55cb27e2389c55023b87e3cd9b06f3cd8f752cd97b1480466d2c6eb47ab36ca425edd804ae04543f06b33b2d0
7
+ data.tar.gz: 9856686afc80d0437803c44095d0c9561049d1fee4f38913aec2e7f647d7456eb0c959ecde55efd74613e04a265fc4b60de7bf4f48be915b185bd05022eeaeec
data/.gitignore CHANGED
@@ -2,3 +2,7 @@
2
2
  **.DS_Store
3
3
  Guardfile
4
4
  Gemfile.lock
5
+ vendor/cache
6
+ .ruby-gemset
7
+ .ruby-version
8
+ pkg/
data/README.md CHANGED
@@ -19,25 +19,26 @@ _Note: weights are assumed to be equal if not provided_
19
19
 
20
20
  #API
21
21
 
22
- ##Describe the field calculators
22
+ ##Describe the field distance calculators
23
23
 
24
24
  Each field to be considered in the distance calculation should be described
25
25
  with a calculation method and weight(optional)
26
26
 
27
27
  Valid calculators:
28
28
 
29
- * day_of_month (this needs to be factored into configurable date_recurrence)
30
- * float_range_distance
29
+ * day_of_year
30
+ * day_of_month
31
+ * levenshtein
31
32
 
32
33
  ```ruby
33
34
  distance_rules = {
34
35
  :description => {
35
36
  :weight => 0.80,
36
- :type => :levenshtein,
37
+ :calculator => :levenshtein,
37
38
  },
38
39
  :date => {
39
40
  :weight => 0.90,
40
- :type => :day_of_month,
41
+ :calculator => :day_of_month,
41
42
  },
42
43
  }
43
44
  ```
@@ -81,3 +82,41 @@ matcher.closest_match(candidate, [example, example2], 0.2)
81
82
  => example
82
83
 
83
84
  ```
85
+
86
+ ## Custom Field Distance Calculators
87
+
88
+ To define a custom field distance calculator, define a class with a `calculate(value1, value2)` method.
89
+
90
+ Requirements:
91
+ * Class must be stateless
92
+ * Calculate should return a float from `0` (perfect match) to `1.0` (no match)
93
+ * Calculation should not be order dependent (e.g. `calculate(a, b) == calculate(b, a)`)
94
+
95
+ ```ruby
96
+ class StringLengthCalculator
97
+ def calculate(l1, l2)
98
+ diff = (l1 - l2).abs.to_f
99
+ return diff / [l1, l2].max
100
+ end
101
+ end
102
+
103
+ matcher = ::SlideRule::DistanceCalculator.new(
104
+ :length => {
105
+ :weight => 1.0,
106
+ :calculator => StringLengthCalculator
107
+ }
108
+ )
109
+
110
+ # Find the string with the closest length
111
+ matcher.closest_match("Howdy Doody Time!", ["Felix the cat", "Mighty Mouse"], 0.5)
112
+ # => { :item=>"Mighty Mouse", :distance=>0.29411764705882354 }
113
+ ```
114
+
115
+ See the [distance_calculators](https://github.com/mattnichols/slide_rule/tree/master/lib/slide_rule/distance_calculators) directory in source for more examples.
116
+
117
+
118
+ # To Do
119
+
120
+ * Add more field distance calculators
121
+
122
+
@@ -1,7 +1,9 @@
1
1
  module SlideRule
2
2
  class DistanceCalculator
3
+ attr_accessor :rules
4
+
3
5
  def initialize(rules)
4
- @rules = normalize_weights(rules)
6
+ @rules = prepare_rules(rules)
5
7
  end
6
8
 
7
9
  # TODO: Figure this out. Very inefficient!
@@ -20,19 +22,21 @@ module SlideRule
20
22
  end
21
23
  end
22
24
 
23
- def closest_match(obj, array, threshold)
24
- matches(obj, array, threshold).sort { |match| match[:distance] }.first
25
+ def closest_match(obj, array, threshold = 1.0)
26
+ matches(obj, array, threshold).sort_by { |match| match[:distance] }.first
25
27
  end
26
28
 
27
- def is_match?(obj_1, obj_2, threshold)
28
- distance = calculate_distance(obj_1, obj_2)
29
- distance < threshold
29
+ def closest_matching_item(obj, array, threshold = 1.0)
30
+ match = closest_match(obj, array, threshold)
31
+ return nil if match.nil?
32
+
33
+ match[:item]
30
34
  end
31
35
 
32
36
  def matches(obj, array, threshold)
33
37
  array.map do |item|
34
38
  distance = calculate_distance(obj, item)
35
- next nil unless distance < threshold
39
+ next nil unless distance <= threshold
36
40
  {
37
41
  item: item,
38
42
  distance: distance
@@ -48,16 +52,29 @@ module SlideRule
48
52
  # {
49
53
  # :attribute_name => {
50
54
  # :weight => 0.90,
51
- # :type => :distance_calculator,
55
+ # :calculator => :distance_calculator,
52
56
  # }
53
57
  # }
54
58
  def calculate_distance(i1, i2)
55
- @rules.map do |attribute, rule|
59
+ calculate_weighted_distances(i1, i2).reduce(0.0) do |distance, obj|
60
+ distance + (obj[:distance] * obj[:weight])
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def calculate_weighted_distances(i1, i2)
67
+ distances = @rules.map do |attribute, rule|
56
68
  val1 = i1.send(attribute)
57
69
  val2 = i2.send(attribute)
58
- calculator = get_calculator(rule[:type])
59
- calculator.calculate(val1, val2).to_f * rule[:weight]
60
- end.reduce(0.0, &:+)
70
+ distance = rule[:calculator].calculate(val1, val2)
71
+ next { distance: distance.to_f, weight: rule[:weight] } unless distance.nil?
72
+
73
+ nil
74
+ end
75
+ normalize_weights_array(distances) if distances.compact!
76
+
77
+ distances
61
78
  end
62
79
 
63
80
  def get_calculator(calculator)
@@ -73,12 +90,35 @@ module SlideRule
73
90
 
74
91
  # Ensures all weights add up to 1.0
75
92
  #
76
- def normalize_weights(rules_hash)
77
- rules = rules_hash.dup
93
+ def normalize_weights(rules)
78
94
  weight_total = rules.map { |_attr, rule| rule[:weight] }.reduce(0.0, &:+)
79
95
  rules.each do |_attr, rule|
80
96
  rule[:weight] = rule[:weight] / weight_total
81
97
  end
82
98
  end
99
+
100
+ # Ensures all weights add up to 1.0 in array of hashes
101
+ #
102
+ def normalize_weights_array(rules)
103
+ weight_total = rules.map { |rule| rule[:weight] }.reduce(0.0, &:+)
104
+ rules.each do |rule|
105
+ rule[:weight] = rule[:weight] / weight_total
106
+ end
107
+ end
108
+
109
+ def prepare_rules(rules)
110
+ prepared_rules = rules.dup
111
+ prepared_rules = normalize_weights(prepared_rules)
112
+ prepared_rules.each do |_attr, rule|
113
+ if rule[:type]
114
+ puts 'Rule key `:type` is deprecated. Use `:calculator` instead.'
115
+ rule[:calculator] = rule[:type]
116
+ end
117
+
118
+ rule[:calculator] = get_calculator(rule[:calculator])
119
+ end
120
+
121
+ prepared_rules
122
+ end
83
123
  end
84
124
  end
@@ -1,3 +1,3 @@
1
1
  module SlideRule
2
- VERSION = '0.0.4'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -19,6 +19,12 @@ describe ::SlideRule::DistanceCalculator do
19
19
  end
20
20
  end
21
21
 
22
+ class NilCalc
23
+ def calculate(_first, _second)
24
+ nil
25
+ end
26
+ end
27
+
22
28
  let(:examples) do
23
29
  [
24
30
  ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com'),
@@ -36,50 +42,31 @@ describe ::SlideRule::DistanceCalculator do
36
42
  ::SlideRule::DistanceCalculator.new(
37
43
  description: {
38
44
  weight: 0.80,
39
- type: :levenshtein
45
+ calculator: :levenshtein
40
46
  },
41
47
  date: {
42
48
  weight: 0.90,
43
- type: :day_of_month
49
+ calculator: :day_of_month
44
50
  }
45
51
  )
46
52
  end
47
53
 
48
- it 'finds recurring transaction' do
54
+ it 'finds closest' do
49
55
  example = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
50
56
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[3])
51
57
 
52
58
  example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
53
59
  expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[0])
54
60
  end
55
- end
56
61
 
57
- describe '#is_match?' do
58
- let(:calculator) do
59
- ::SlideRule::DistanceCalculator.new(
60
- description: {
61
- weight: 0.80,
62
- type: :levenshtein
63
- },
64
- date: {
65
- weight: 0.90,
66
- type: :day_of_month
67
- }
68
- )
69
- end
70
-
71
- it 'returns true if there is a match' do
72
- example_1 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
73
- example_2 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
74
-
75
- expect(calculator.is_match?(example_1, example_2, 0.2)).to be(true)
62
+ it 'with default threshold' do
63
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
64
+ expect(calculator.closest_match(example, examples)[:item]).to eq(examples[0])
76
65
  end
77
66
 
78
- it 'returns false if there is a match' do
79
- example_1 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
80
- example_2 = ExampleTransaction.new(description: 'Taco Bell', date: '2015-06-17')
81
-
82
- expect(calculator.is_match?(example_1, example_2, 0.2)).to be(false)
67
+ it 'finds closest matching item' do
68
+ example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
69
+ expect(calculator.closest_matching_item(example, examples)).to eq(examples[0])
83
70
  end
84
71
  end
85
72
 
@@ -89,11 +76,11 @@ describe ::SlideRule::DistanceCalculator do
89
76
  calculator = ::SlideRule::DistanceCalculator.new(
90
77
  description: {
91
78
  weight: 1.00,
92
- type: :levenshtein
79
+ calculator: :levenshtein
93
80
  },
94
81
  date: {
95
82
  weight: 0.50,
96
- type: :day_of_month
83
+ calculator: :day_of_month
97
84
  }
98
85
  )
99
86
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -105,11 +92,11 @@ describe ::SlideRule::DistanceCalculator do
105
92
  calculator = ::SlideRule::DistanceCalculator.new(
106
93
  description: {
107
94
  weight: 0.50,
108
- type: :levenshtein
95
+ calculator: :levenshtein
109
96
  },
110
97
  date: {
111
98
  weight: 0.50,
112
- type: :day_of_month
99
+ calculator: :day_of_month
113
100
  }
114
101
  )
115
102
  example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
@@ -125,6 +112,23 @@ describe ::SlideRule::DistanceCalculator do
125
112
  distance = calculator.calculate_distance(example, candidate)
126
113
  expect(distance.round(4)).to eq(((3.0 * 0.5 / 15) + (4.0 * 0.5 / 11)).round(4))
127
114
  end
115
+
116
+ it 'should renormalize on nil' do
117
+ calculator = ::SlideRule::DistanceCalculator.new(
118
+ description: {
119
+ weight: 0.50,
120
+ calculator: :levenshtein
121
+ },
122
+ date: {
123
+ weight: 0.50,
124
+ calculator: NilCalc
125
+ }
126
+ )
127
+ example1 = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
128
+ example2 = ::ExampleTransaction.new(amount: 25.00, date: '2015-06-08', description: 'Audible Inc')
129
+
130
+ expect(calculator.calculate_distance(example1, example2).round(4)).to eq((4.0 / 11).round(4))
131
+ end
128
132
  end
129
133
 
130
134
  context 'uses custom calculator' do
@@ -132,7 +136,7 @@ describe ::SlideRule::DistanceCalculator do
132
136
  calculator = ::SlideRule::DistanceCalculator.new(
133
137
  description: {
134
138
  weight: 1.00,
135
- type: CustomCalc
139
+ calculator: CustomCalc
136
140
  }
137
141
  )
138
142
  example = ::ExampleTransaction.new
@@ -142,5 +146,27 @@ describe ::SlideRule::DistanceCalculator do
142
146
  expect(distance).to eq(0.9)
143
147
  end
144
148
  end
149
+
150
+ context 'validates rules on initialize' do
151
+ it 'should allow :type' do
152
+ ::SlideRule::DistanceCalculator.new(
153
+ description: {
154
+ weight: 1.00,
155
+ type: CustomCalc
156
+ }
157
+ )
158
+ end
159
+
160
+ it 'should raise error if not valid calculator' do
161
+ expect do
162
+ ::SlideRule::DistanceCalculator.new(
163
+ description: {
164
+ weight: 1.00,
165
+ calculator: :some_junk
166
+ }
167
+ )
168
+ end.to raise_error
169
+ end
170
+ end
145
171
  end
146
172
  end
data/spec/spec_helper.rb CHANGED
@@ -18,6 +18,7 @@
18
18
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
19
 
20
20
  require_relative '../lib/slide_rule.rb'
21
+ require 'pry'
21
22
 
22
23
  RSpec.configure do |config|
23
24
  # rspec-expectations config goes here. You can use an alternate
metadata CHANGED
@@ -1,96 +1,97 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slide_rule
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mattnichols
8
8
  - fergmastaflex
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-01-11 00:00:00.000000000 Z
12
+ date: 2015-11-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
+ name: vladlev
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
17
- - - ~>
18
+ - - "~>"
18
19
  - !ruby/object:Gem::Version
19
20
  version: '1.0'
20
- name: vladlev
21
- prerelease: false
22
21
  type: :runtime
22
+ prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - ~>
25
+ - - "~>"
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.0'
28
28
  - !ruby/object:Gem::Dependency
29
+ name: rake
29
30
  requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
- - - ~>
32
+ - - "~>"
32
33
  - !ruby/object:Gem::Version
33
34
  version: '10'
34
- name: rake
35
- prerelease: false
36
35
  type: :development
36
+ prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - ~>
39
+ - - "~>"
40
40
  - !ruby/object:Gem::Version
41
41
  version: '10'
42
42
  - !ruby/object:Gem::Dependency
43
+ name: pry
43
44
  requirement: !ruby/object:Gem::Requirement
44
45
  requirements:
45
- - - ~>
46
+ - - "~>"
46
47
  - !ruby/object:Gem::Version
47
48
  version: '0'
48
- name: pry
49
- prerelease: false
50
49
  type: :development
50
+ prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - ~>
53
+ - - "~>"
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
+ name: rspec
57
58
  requirement: !ruby/object:Gem::Requirement
58
59
  requirements:
59
- - - ~>
60
+ - - "~>"
60
61
  - !ruby/object:Gem::Version
61
62
  version: '3'
62
- name: rspec
63
- prerelease: false
64
63
  type: :development
64
+ prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - ~>
67
+ - - "~>"
68
68
  - !ruby/object:Gem::Version
69
69
  version: '3'
70
70
  - !ruby/object:Gem::Dependency
71
+ name: rubocop
71
72
  requirement: !ruby/object:Gem::Requirement
72
73
  requirements:
73
- - - ~>
74
+ - - "~>"
74
75
  - !ruby/object:Gem::Version
75
76
  version: '0'
76
- name: rubocop
77
- prerelease: false
78
77
  type: :development
78
+ prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
- - - ~>
81
+ - - "~>"
82
82
  - !ruby/object:Gem::Version
83
83
  version: '0'
84
- description: Calculates the distance between 2 arbitrary objects using specified fields and algorithms.
84
+ description: Calculates the distance between 2 arbitrary objects using specified fields
85
+ and algorithms.
85
86
  email:
86
87
  - dev@mx.com
87
88
  executables: []
88
89
  extensions: []
89
90
  extra_rdoc_files: []
90
91
  files:
91
- - .gitignore
92
- - .rubocop.yml
93
- - .travis.yml
92
+ - ".gitignore"
93
+ - ".rubocop.yml"
94
+ - ".travis.yml"
94
95
  - CODE_OF_CONDUCT.md
95
96
  - Gemfile
96
97
  - LICENSE
@@ -112,24 +113,24 @@ homepage: https://github.com/mattnichols/slide_rule
112
113
  licenses:
113
114
  - MIT
114
115
  metadata: {}
115
- post_install_message:
116
+ post_install_message:
116
117
  rdoc_options: []
117
118
  require_paths:
118
119
  - lib
119
120
  required_ruby_version: !ruby/object:Gem::Requirement
120
121
  requirements:
121
- - - '>='
122
+ - - ">="
122
123
  - !ruby/object:Gem::Version
123
124
  version: '0'
124
125
  required_rubygems_version: !ruby/object:Gem::Requirement
125
126
  requirements:
126
- - - '>='
127
+ - - ">="
127
128
  - !ruby/object:Gem::Version
128
129
  version: '0'
129
130
  requirements: []
130
- rubyforge_project:
131
- rubygems_version: 2.4.8
132
- signing_key:
131
+ rubyforge_project:
132
+ rubygems_version: 2.4.6
133
+ signing_key:
133
134
  specification_version: 4
134
135
  summary: Ruby object distance calculator
135
136
  test_files: