slide_rule 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/README.md +44 -5
- data/lib/slide_rule/distance_calculator.rb +54 -14
- data/lib/slide_rule/version.rb +1 -1
- data/spec/slide_rule/distance_calculator_spec.rb +59 -33
- data/spec/spec_helper.rb +1 -0
- metadata +34 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 140911c7f9078ab2887f32012e167852d15f9ccd
|
4
|
+
data.tar.gz: c7f8db4642867e007875ee81d8403e037d647389
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24a59fcbca5f635b1a52d0cd8186df12d97ba0f55cb27e2389c55023b87e3cd9b06f3cd8f752cd97b1480466d2c6eb47ab36ca425edd804ae04543f06b33b2d0
|
7
|
+
data.tar.gz: 9856686afc80d0437803c44095d0c9561049d1fee4f38913aec2e7f647d7456eb0c959ecde55efd74613e04a265fc4b60de7bf4f48be915b185bd05022eeaeec
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -19,25 +19,26 @@ _Note: weights are assumed to be equal if not provided_
|
|
19
19
|
|
20
20
|
#API
|
21
21
|
|
22
|
-
##Describe the field calculators
|
22
|
+
##Describe the field distance calculators
|
23
23
|
|
24
24
|
Each field to be considered in the distance calculation should be described
|
25
25
|
with a calculation method and weight(optional)
|
26
26
|
|
27
27
|
Valid calculators:
|
28
28
|
|
29
|
-
*
|
30
|
-
*
|
29
|
+
* day_of_year
|
30
|
+
* day_of_month
|
31
|
+
* levenshtein
|
31
32
|
|
32
33
|
```ruby
|
33
34
|
distance_rules = {
|
34
35
|
:description => {
|
35
36
|
:weight => 0.80,
|
36
|
-
:
|
37
|
+
:calculator => :levenshtein,
|
37
38
|
},
|
38
39
|
:date => {
|
39
40
|
:weight => 0.90,
|
40
|
-
:
|
41
|
+
:calculator => :day_of_month,
|
41
42
|
},
|
42
43
|
}
|
43
44
|
```
|
@@ -81,3 +82,41 @@ matcher.closest_match(candidate, [example, example2], 0.2)
|
|
81
82
|
=> example
|
82
83
|
|
83
84
|
```
|
85
|
+
|
86
|
+
## Custom Field Distance Calculators
|
87
|
+
|
88
|
+
To define a custom field distance calculator, define a class with a `calculate(value1, value2)` method.
|
89
|
+
|
90
|
+
Requirements:
|
91
|
+
* Class must be stateless
|
92
|
+
* Calculate should return a float from `0` (perfect match) to `1.0` (no match)
|
93
|
+
* Calculation should not be order dependent (e.g. `calculate(a, b) == calculate(b, a)`)
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
class StringLengthCalculator
|
97
|
+
def calculate(l1, l2)
|
98
|
+
diff = (l1 - l2).abs.to_f
|
99
|
+
return diff / [l1, l2].max
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
matcher = ::SlideRule::DistanceCalculator.new(
|
104
|
+
:length => {
|
105
|
+
:weight => 1.0,
|
106
|
+
:calculator => StringLengthCalculator
|
107
|
+
}
|
108
|
+
)
|
109
|
+
|
110
|
+
# Find the string with the closest length
|
111
|
+
matcher.closest_match("Howdy Doody Time!", ["Felix the cat", "Mighty Mouse"], 0.5)
|
112
|
+
# => { :item=>"Mighty Mouse", :distance=>0.29411764705882354 }
|
113
|
+
```
|
114
|
+
|
115
|
+
See the [distance_calculators](https://github.com/mattnichols/slide_rule/tree/master/lib/slide_rule/distance_calculators) directory in source for more examples.
|
116
|
+
|
117
|
+
|
118
|
+
# To Do
|
119
|
+
|
120
|
+
* Add more field distance calculators
|
121
|
+
|
122
|
+
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module SlideRule
|
2
2
|
class DistanceCalculator
|
3
|
+
attr_accessor :rules
|
4
|
+
|
3
5
|
def initialize(rules)
|
4
|
-
@rules =
|
6
|
+
@rules = prepare_rules(rules)
|
5
7
|
end
|
6
8
|
|
7
9
|
# TODO: Figure this out. Very inefficient!
|
@@ -20,19 +22,21 @@ module SlideRule
|
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
23
|
-
def closest_match(obj, array, threshold)
|
24
|
-
matches(obj, array, threshold).
|
25
|
+
def closest_match(obj, array, threshold = 1.0)
|
26
|
+
matches(obj, array, threshold).sort_by { |match| match[:distance] }.first
|
25
27
|
end
|
26
28
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
29
|
+
def closest_matching_item(obj, array, threshold = 1.0)
|
30
|
+
match = closest_match(obj, array, threshold)
|
31
|
+
return nil if match.nil?
|
32
|
+
|
33
|
+
match[:item]
|
30
34
|
end
|
31
35
|
|
32
36
|
def matches(obj, array, threshold)
|
33
37
|
array.map do |item|
|
34
38
|
distance = calculate_distance(obj, item)
|
35
|
-
next nil unless distance
|
39
|
+
next nil unless distance <= threshold
|
36
40
|
{
|
37
41
|
item: item,
|
38
42
|
distance: distance
|
@@ -48,16 +52,29 @@ module SlideRule
|
|
48
52
|
# {
|
49
53
|
# :attribute_name => {
|
50
54
|
# :weight => 0.90,
|
51
|
-
# :
|
55
|
+
# :calculator => :distance_calculator,
|
52
56
|
# }
|
53
57
|
# }
|
54
58
|
def calculate_distance(i1, i2)
|
55
|
-
|
59
|
+
calculate_weighted_distances(i1, i2).reduce(0.0) do |distance, obj|
|
60
|
+
distance + (obj[:distance] * obj[:weight])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def calculate_weighted_distances(i1, i2)
|
67
|
+
distances = @rules.map do |attribute, rule|
|
56
68
|
val1 = i1.send(attribute)
|
57
69
|
val2 = i2.send(attribute)
|
58
|
-
|
59
|
-
|
60
|
-
|
70
|
+
distance = rule[:calculator].calculate(val1, val2)
|
71
|
+
next { distance: distance.to_f, weight: rule[:weight] } unless distance.nil?
|
72
|
+
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
normalize_weights_array(distances) if distances.compact!
|
76
|
+
|
77
|
+
distances
|
61
78
|
end
|
62
79
|
|
63
80
|
def get_calculator(calculator)
|
@@ -73,12 +90,35 @@ module SlideRule
|
|
73
90
|
|
74
91
|
# Ensures all weights add up to 1.0
|
75
92
|
#
|
76
|
-
def normalize_weights(
|
77
|
-
rules = rules_hash.dup
|
93
|
+
def normalize_weights(rules)
|
78
94
|
weight_total = rules.map { |_attr, rule| rule[:weight] }.reduce(0.0, &:+)
|
79
95
|
rules.each do |_attr, rule|
|
80
96
|
rule[:weight] = rule[:weight] / weight_total
|
81
97
|
end
|
82
98
|
end
|
99
|
+
|
100
|
+
# Ensures all weights add up to 1.0 in array of hashes
|
101
|
+
#
|
102
|
+
def normalize_weights_array(rules)
|
103
|
+
weight_total = rules.map { |rule| rule[:weight] }.reduce(0.0, &:+)
|
104
|
+
rules.each do |rule|
|
105
|
+
rule[:weight] = rule[:weight] / weight_total
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def prepare_rules(rules)
|
110
|
+
prepared_rules = rules.dup
|
111
|
+
prepared_rules = normalize_weights(prepared_rules)
|
112
|
+
prepared_rules.each do |_attr, rule|
|
113
|
+
if rule[:type]
|
114
|
+
puts 'Rule key `:type` is deprecated. Use `:calculator` instead.'
|
115
|
+
rule[:calculator] = rule[:type]
|
116
|
+
end
|
117
|
+
|
118
|
+
rule[:calculator] = get_calculator(rule[:calculator])
|
119
|
+
end
|
120
|
+
|
121
|
+
prepared_rules
|
122
|
+
end
|
83
123
|
end
|
84
124
|
end
|
data/lib/slide_rule/version.rb
CHANGED
@@ -19,6 +19,12 @@ describe ::SlideRule::DistanceCalculator do
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class NilCalc
|
23
|
+
def calculate(_first, _second)
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
let(:examples) do
|
23
29
|
[
|
24
30
|
::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com'),
|
@@ -36,50 +42,31 @@ describe ::SlideRule::DistanceCalculator do
|
|
36
42
|
::SlideRule::DistanceCalculator.new(
|
37
43
|
description: {
|
38
44
|
weight: 0.80,
|
39
|
-
|
45
|
+
calculator: :levenshtein
|
40
46
|
},
|
41
47
|
date: {
|
42
48
|
weight: 0.90,
|
43
|
-
|
49
|
+
calculator: :day_of_month
|
44
50
|
}
|
45
51
|
)
|
46
52
|
end
|
47
53
|
|
48
|
-
it 'finds
|
54
|
+
it 'finds closest' do
|
49
55
|
example = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
|
50
56
|
expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[3])
|
51
57
|
|
52
58
|
example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
|
53
59
|
expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[0])
|
54
60
|
end
|
55
|
-
end
|
56
61
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
description: {
|
61
|
-
weight: 0.80,
|
62
|
-
type: :levenshtein
|
63
|
-
},
|
64
|
-
date: {
|
65
|
-
weight: 0.90,
|
66
|
-
type: :day_of_month
|
67
|
-
}
|
68
|
-
)
|
69
|
-
end
|
70
|
-
|
71
|
-
it 'returns true if there is a match' do
|
72
|
-
example_1 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
|
73
|
-
example_2 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
|
74
|
-
|
75
|
-
expect(calculator.is_match?(example_1, example_2, 0.2)).to be(true)
|
62
|
+
it 'with default threshold' do
|
63
|
+
example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
|
64
|
+
expect(calculator.closest_match(example, examples)[:item]).to eq(examples[0])
|
76
65
|
end
|
77
66
|
|
78
|
-
it '
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
expect(calculator.is_match?(example_1, example_2, 0.2)).to be(false)
|
67
|
+
it 'finds closest matching item' do
|
68
|
+
example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
|
69
|
+
expect(calculator.closest_matching_item(example, examples)).to eq(examples[0])
|
83
70
|
end
|
84
71
|
end
|
85
72
|
|
@@ -89,11 +76,11 @@ describe ::SlideRule::DistanceCalculator do
|
|
89
76
|
calculator = ::SlideRule::DistanceCalculator.new(
|
90
77
|
description: {
|
91
78
|
weight: 1.00,
|
92
|
-
|
79
|
+
calculator: :levenshtein
|
93
80
|
},
|
94
81
|
date: {
|
95
82
|
weight: 0.50,
|
96
|
-
|
83
|
+
calculator: :day_of_month
|
97
84
|
}
|
98
85
|
)
|
99
86
|
example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
|
@@ -105,11 +92,11 @@ describe ::SlideRule::DistanceCalculator do
|
|
105
92
|
calculator = ::SlideRule::DistanceCalculator.new(
|
106
93
|
description: {
|
107
94
|
weight: 0.50,
|
108
|
-
|
95
|
+
calculator: :levenshtein
|
109
96
|
},
|
110
97
|
date: {
|
111
98
|
weight: 0.50,
|
112
|
-
|
99
|
+
calculator: :day_of_month
|
113
100
|
}
|
114
101
|
)
|
115
102
|
example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
|
@@ -125,6 +112,23 @@ describe ::SlideRule::DistanceCalculator do
|
|
125
112
|
distance = calculator.calculate_distance(example, candidate)
|
126
113
|
expect(distance.round(4)).to eq(((3.0 * 0.5 / 15) + (4.0 * 0.5 / 11)).round(4))
|
127
114
|
end
|
115
|
+
|
116
|
+
it 'should renormalize on nil' do
|
117
|
+
calculator = ::SlideRule::DistanceCalculator.new(
|
118
|
+
description: {
|
119
|
+
weight: 0.50,
|
120
|
+
calculator: :levenshtein
|
121
|
+
},
|
122
|
+
date: {
|
123
|
+
weight: 0.50,
|
124
|
+
calculator: NilCalc
|
125
|
+
}
|
126
|
+
)
|
127
|
+
example1 = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
|
128
|
+
example2 = ::ExampleTransaction.new(amount: 25.00, date: '2015-06-08', description: 'Audible Inc')
|
129
|
+
|
130
|
+
expect(calculator.calculate_distance(example1, example2).round(4)).to eq((4.0 / 11).round(4))
|
131
|
+
end
|
128
132
|
end
|
129
133
|
|
130
134
|
context 'uses custom calculator' do
|
@@ -132,7 +136,7 @@ describe ::SlideRule::DistanceCalculator do
|
|
132
136
|
calculator = ::SlideRule::DistanceCalculator.new(
|
133
137
|
description: {
|
134
138
|
weight: 1.00,
|
135
|
-
|
139
|
+
calculator: CustomCalc
|
136
140
|
}
|
137
141
|
)
|
138
142
|
example = ::ExampleTransaction.new
|
@@ -142,5 +146,27 @@ describe ::SlideRule::DistanceCalculator do
|
|
142
146
|
expect(distance).to eq(0.9)
|
143
147
|
end
|
144
148
|
end
|
149
|
+
|
150
|
+
context 'validates rules on initialize' do
|
151
|
+
it 'should allow :type' do
|
152
|
+
::SlideRule::DistanceCalculator.new(
|
153
|
+
description: {
|
154
|
+
weight: 1.00,
|
155
|
+
type: CustomCalc
|
156
|
+
}
|
157
|
+
)
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'should raise error if not valid calculator' do
|
161
|
+
expect do
|
162
|
+
::SlideRule::DistanceCalculator.new(
|
163
|
+
description: {
|
164
|
+
weight: 1.00,
|
165
|
+
calculator: :some_junk
|
166
|
+
}
|
167
|
+
)
|
168
|
+
end.to raise_error
|
169
|
+
end
|
170
|
+
end
|
145
171
|
end
|
146
172
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,96 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slide_rule
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mattnichols
|
8
8
|
- fergmastaflex
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
+
name: vladlev
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
|
-
- - ~>
|
18
|
+
- - "~>"
|
18
19
|
- !ruby/object:Gem::Version
|
19
20
|
version: '1.0'
|
20
|
-
name: vladlev
|
21
|
-
prerelease: false
|
22
21
|
type: :runtime
|
22
|
+
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - ~>
|
25
|
+
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
29
30
|
requirement: !ruby/object:Gem::Requirement
|
30
31
|
requirements:
|
31
|
-
- - ~>
|
32
|
+
- - "~>"
|
32
33
|
- !ruby/object:Gem::Version
|
33
34
|
version: '10'
|
34
|
-
name: rake
|
35
|
-
prerelease: false
|
36
35
|
type: :development
|
36
|
+
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- - ~>
|
39
|
+
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '10'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
+
name: pry
|
43
44
|
requirement: !ruby/object:Gem::Requirement
|
44
45
|
requirements:
|
45
|
-
- - ~>
|
46
|
+
- - "~>"
|
46
47
|
- !ruby/object:Gem::Version
|
47
48
|
version: '0'
|
48
|
-
name: pry
|
49
|
-
prerelease: false
|
50
49
|
type: :development
|
50
|
+
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- - ~>
|
53
|
+
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
|
+
name: rspec
|
57
58
|
requirement: !ruby/object:Gem::Requirement
|
58
59
|
requirements:
|
59
|
-
- - ~>
|
60
|
+
- - "~>"
|
60
61
|
- !ruby/object:Gem::Version
|
61
62
|
version: '3'
|
62
|
-
name: rspec
|
63
|
-
prerelease: false
|
64
63
|
type: :development
|
64
|
+
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- - ~>
|
67
|
+
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '3'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
|
+
name: rubocop
|
71
72
|
requirement: !ruby/object:Gem::Requirement
|
72
73
|
requirements:
|
73
|
-
- - ~>
|
74
|
+
- - "~>"
|
74
75
|
- !ruby/object:Gem::Version
|
75
76
|
version: '0'
|
76
|
-
name: rubocop
|
77
|
-
prerelease: false
|
78
77
|
type: :development
|
78
|
+
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
|
-
- - ~>
|
81
|
+
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: '0'
|
84
|
-
description: Calculates the distance between 2 arbitrary objects using specified fields
|
84
|
+
description: Calculates the distance between 2 arbitrary objects using specified fields
|
85
|
+
and algorithms.
|
85
86
|
email:
|
86
87
|
- dev@mx.com
|
87
88
|
executables: []
|
88
89
|
extensions: []
|
89
90
|
extra_rdoc_files: []
|
90
91
|
files:
|
91
|
-
- .gitignore
|
92
|
-
- .rubocop.yml
|
93
|
-
- .travis.yml
|
92
|
+
- ".gitignore"
|
93
|
+
- ".rubocop.yml"
|
94
|
+
- ".travis.yml"
|
94
95
|
- CODE_OF_CONDUCT.md
|
95
96
|
- Gemfile
|
96
97
|
- LICENSE
|
@@ -112,24 +113,24 @@ homepage: https://github.com/mattnichols/slide_rule
|
|
112
113
|
licenses:
|
113
114
|
- MIT
|
114
115
|
metadata: {}
|
115
|
-
post_install_message:
|
116
|
+
post_install_message:
|
116
117
|
rdoc_options: []
|
117
118
|
require_paths:
|
118
119
|
- lib
|
119
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
120
121
|
requirements:
|
121
|
-
- -
|
122
|
+
- - ">="
|
122
123
|
- !ruby/object:Gem::Version
|
123
124
|
version: '0'
|
124
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
126
|
requirements:
|
126
|
-
- -
|
127
|
+
- - ">="
|
127
128
|
- !ruby/object:Gem::Version
|
128
129
|
version: '0'
|
129
130
|
requirements: []
|
130
|
-
rubyforge_project:
|
131
|
-
rubygems_version: 2.4.
|
132
|
-
signing_key:
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.4.6
|
133
|
+
signing_key:
|
133
134
|
specification_version: 4
|
134
135
|
summary: Ruby object distance calculator
|
135
136
|
test_files:
|