slide_rule 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/README.md +44 -5
- data/lib/slide_rule/distance_calculator.rb +54 -14
- data/lib/slide_rule/version.rb +1 -1
- data/spec/slide_rule/distance_calculator_spec.rb +59 -33
- data/spec/spec_helper.rb +1 -0
- metadata +34 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 140911c7f9078ab2887f32012e167852d15f9ccd
|
4
|
+
data.tar.gz: c7f8db4642867e007875ee81d8403e037d647389
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24a59fcbca5f635b1a52d0cd8186df12d97ba0f55cb27e2389c55023b87e3cd9b06f3cd8f752cd97b1480466d2c6eb47ab36ca425edd804ae04543f06b33b2d0
|
7
|
+
data.tar.gz: 9856686afc80d0437803c44095d0c9561049d1fee4f38913aec2e7f647d7456eb0c959ecde55efd74613e04a265fc4b60de7bf4f48be915b185bd05022eeaeec
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -19,25 +19,26 @@ _Note: weights are assumed to be equal if not provided_
|
|
19
19
|
|
20
20
|
#API
|
21
21
|
|
22
|
-
##Describe the field calculators
|
22
|
+
##Describe the field distance calculators
|
23
23
|
|
24
24
|
Each field to be considered in the distance calculation should be described
|
25
25
|
with a calculation method and weight(optional)
|
26
26
|
|
27
27
|
Valid calculators:
|
28
28
|
|
29
|
-
*
|
30
|
-
*
|
29
|
+
* day_of_year
|
30
|
+
* day_of_month
|
31
|
+
* levenshtein
|
31
32
|
|
32
33
|
```ruby
|
33
34
|
distance_rules = {
|
34
35
|
:description => {
|
35
36
|
:weight => 0.80,
|
36
|
-
:
|
37
|
+
:calculator => :levenshtein,
|
37
38
|
},
|
38
39
|
:date => {
|
39
40
|
:weight => 0.90,
|
40
|
-
:
|
41
|
+
:calculator => :day_of_month,
|
41
42
|
},
|
42
43
|
}
|
43
44
|
```
|
@@ -81,3 +82,41 @@ matcher.closest_match(candidate, [example, example2], 0.2)
|
|
81
82
|
=> example
|
82
83
|
|
83
84
|
```
|
85
|
+
|
86
|
+
## Custom Field Distance Calculators
|
87
|
+
|
88
|
+
To define a custom field distance calculator, define a class with a `calculate(value1, value2)` method.
|
89
|
+
|
90
|
+
Requirements:
|
91
|
+
* Class must be stateless
|
92
|
+
* Calculate should return a float from `0` (perfect match) to `1.0` (no match)
|
93
|
+
* Calculation should not be order dependent (e.g. `calculate(a, b) == calculate(b, a)`)
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
class StringLengthCalculator
|
97
|
+
def calculate(l1, l2)
|
98
|
+
diff = (l1 - l2).abs.to_f
|
99
|
+
return diff / [l1, l2].max
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
matcher = ::SlideRule::DistanceCalculator.new(
|
104
|
+
:length => {
|
105
|
+
:weight => 1.0,
|
106
|
+
:calculator => StringLengthCalculator
|
107
|
+
}
|
108
|
+
)
|
109
|
+
|
110
|
+
# Find the string with the closest length
|
111
|
+
matcher.closest_match("Howdy Doody Time!", ["Felix the cat", "Mighty Mouse"], 0.5)
|
112
|
+
# => { :item=>"Mighty Mouse", :distance=>0.29411764705882354 }
|
113
|
+
```
|
114
|
+
|
115
|
+
See the [distance_calculators](https://github.com/mattnichols/slide_rule/tree/master/lib/slide_rule/distance_calculators) directory in source for more examples.
|
116
|
+
|
117
|
+
|
118
|
+
# To Do
|
119
|
+
|
120
|
+
* Add more field distance calculators
|
121
|
+
|
122
|
+
|
@@ -1,7 +1,9 @@
|
|
1
1
|
module SlideRule
|
2
2
|
class DistanceCalculator
|
3
|
+
attr_accessor :rules
|
4
|
+
|
3
5
|
def initialize(rules)
|
4
|
-
@rules =
|
6
|
+
@rules = prepare_rules(rules)
|
5
7
|
end
|
6
8
|
|
7
9
|
# TODO: Figure this out. Very inefficient!
|
@@ -20,19 +22,21 @@ module SlideRule
|
|
20
22
|
end
|
21
23
|
end
|
22
24
|
|
23
|
-
def closest_match(obj, array, threshold)
|
24
|
-
matches(obj, array, threshold).
|
25
|
+
def closest_match(obj, array, threshold = 1.0)
|
26
|
+
matches(obj, array, threshold).sort_by { |match| match[:distance] }.first
|
25
27
|
end
|
26
28
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
29
|
+
def closest_matching_item(obj, array, threshold = 1.0)
|
30
|
+
match = closest_match(obj, array, threshold)
|
31
|
+
return nil if match.nil?
|
32
|
+
|
33
|
+
match[:item]
|
30
34
|
end
|
31
35
|
|
32
36
|
def matches(obj, array, threshold)
|
33
37
|
array.map do |item|
|
34
38
|
distance = calculate_distance(obj, item)
|
35
|
-
next nil unless distance
|
39
|
+
next nil unless distance <= threshold
|
36
40
|
{
|
37
41
|
item: item,
|
38
42
|
distance: distance
|
@@ -48,16 +52,29 @@ module SlideRule
|
|
48
52
|
# {
|
49
53
|
# :attribute_name => {
|
50
54
|
# :weight => 0.90,
|
51
|
-
# :
|
55
|
+
# :calculator => :distance_calculator,
|
52
56
|
# }
|
53
57
|
# }
|
54
58
|
def calculate_distance(i1, i2)
|
55
|
-
|
59
|
+
calculate_weighted_distances(i1, i2).reduce(0.0) do |distance, obj|
|
60
|
+
distance + (obj[:distance] * obj[:weight])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def calculate_weighted_distances(i1, i2)
|
67
|
+
distances = @rules.map do |attribute, rule|
|
56
68
|
val1 = i1.send(attribute)
|
57
69
|
val2 = i2.send(attribute)
|
58
|
-
|
59
|
-
|
60
|
-
|
70
|
+
distance = rule[:calculator].calculate(val1, val2)
|
71
|
+
next { distance: distance.to_f, weight: rule[:weight] } unless distance.nil?
|
72
|
+
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
normalize_weights_array(distances) if distances.compact!
|
76
|
+
|
77
|
+
distances
|
61
78
|
end
|
62
79
|
|
63
80
|
def get_calculator(calculator)
|
@@ -73,12 +90,35 @@ module SlideRule
|
|
73
90
|
|
74
91
|
# Ensures all weights add up to 1.0
|
75
92
|
#
|
76
|
-
def normalize_weights(
|
77
|
-
rules = rules_hash.dup
|
93
|
+
def normalize_weights(rules)
|
78
94
|
weight_total = rules.map { |_attr, rule| rule[:weight] }.reduce(0.0, &:+)
|
79
95
|
rules.each do |_attr, rule|
|
80
96
|
rule[:weight] = rule[:weight] / weight_total
|
81
97
|
end
|
82
98
|
end
|
99
|
+
|
100
|
+
# Ensures all weights add up to 1.0 in array of hashes
|
101
|
+
#
|
102
|
+
def normalize_weights_array(rules)
|
103
|
+
weight_total = rules.map { |rule| rule[:weight] }.reduce(0.0, &:+)
|
104
|
+
rules.each do |rule|
|
105
|
+
rule[:weight] = rule[:weight] / weight_total
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def prepare_rules(rules)
|
110
|
+
prepared_rules = rules.dup
|
111
|
+
prepared_rules = normalize_weights(prepared_rules)
|
112
|
+
prepared_rules.each do |_attr, rule|
|
113
|
+
if rule[:type]
|
114
|
+
puts 'Rule key `:type` is deprecated. Use `:calculator` instead.'
|
115
|
+
rule[:calculator] = rule[:type]
|
116
|
+
end
|
117
|
+
|
118
|
+
rule[:calculator] = get_calculator(rule[:calculator])
|
119
|
+
end
|
120
|
+
|
121
|
+
prepared_rules
|
122
|
+
end
|
83
123
|
end
|
84
124
|
end
|
data/lib/slide_rule/version.rb
CHANGED
@@ -19,6 +19,12 @@ describe ::SlideRule::DistanceCalculator do
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
+
class NilCalc
|
23
|
+
def calculate(_first, _second)
|
24
|
+
nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
22
28
|
let(:examples) do
|
23
29
|
[
|
24
30
|
::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com'),
|
@@ -36,50 +42,31 @@ describe ::SlideRule::DistanceCalculator do
|
|
36
42
|
::SlideRule::DistanceCalculator.new(
|
37
43
|
description: {
|
38
44
|
weight: 0.80,
|
39
|
-
|
45
|
+
calculator: :levenshtein
|
40
46
|
},
|
41
47
|
date: {
|
42
48
|
weight: 0.90,
|
43
|
-
|
49
|
+
calculator: :day_of_month
|
44
50
|
}
|
45
51
|
)
|
46
52
|
end
|
47
53
|
|
48
|
-
it 'finds
|
54
|
+
it 'finds closest' do
|
49
55
|
example = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
|
50
56
|
expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[3])
|
51
57
|
|
52
58
|
example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
|
53
59
|
expect(calculator.closest_match(example, examples, 0.2)[:item]).to eq(examples[0])
|
54
60
|
end
|
55
|
-
end
|
56
61
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
description: {
|
61
|
-
weight: 0.80,
|
62
|
-
type: :levenshtein
|
63
|
-
},
|
64
|
-
date: {
|
65
|
-
weight: 0.90,
|
66
|
-
type: :day_of_month
|
67
|
-
}
|
68
|
-
)
|
69
|
-
end
|
70
|
-
|
71
|
-
it 'returns true if there is a match' do
|
72
|
-
example_1 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
|
73
|
-
example_2 = ExampleTransaction.new(description: 'Wells Fargo Dealer SVC', date: '2015-06-17')
|
74
|
-
|
75
|
-
expect(calculator.is_match?(example_1, example_2, 0.2)).to be(true)
|
62
|
+
it 'with default threshold' do
|
63
|
+
example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
|
64
|
+
expect(calculator.closest_match(example, examples)[:item]).to eq(examples[0])
|
76
65
|
end
|
77
66
|
|
78
|
-
it '
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
expect(calculator.is_match?(example_1, example_2, 0.2)).to be(false)
|
67
|
+
it 'finds closest matching item' do
|
68
|
+
example = ExampleTransaction.new(description: 'Audible.com', date: '2015-06-05')
|
69
|
+
expect(calculator.closest_matching_item(example, examples)).to eq(examples[0])
|
83
70
|
end
|
84
71
|
end
|
85
72
|
|
@@ -89,11 +76,11 @@ describe ::SlideRule::DistanceCalculator do
|
|
89
76
|
calculator = ::SlideRule::DistanceCalculator.new(
|
90
77
|
description: {
|
91
78
|
weight: 1.00,
|
92
|
-
|
79
|
+
calculator: :levenshtein
|
93
80
|
},
|
94
81
|
date: {
|
95
82
|
weight: 0.50,
|
96
|
-
|
83
|
+
calculator: :day_of_month
|
97
84
|
}
|
98
85
|
)
|
99
86
|
example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
|
@@ -105,11 +92,11 @@ describe ::SlideRule::DistanceCalculator do
|
|
105
92
|
calculator = ::SlideRule::DistanceCalculator.new(
|
106
93
|
description: {
|
107
94
|
weight: 0.50,
|
108
|
-
|
95
|
+
calculator: :levenshtein
|
109
96
|
},
|
110
97
|
date: {
|
111
98
|
weight: 0.50,
|
112
|
-
|
99
|
+
calculator: :day_of_month
|
113
100
|
}
|
114
101
|
)
|
115
102
|
example = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
|
@@ -125,6 +112,23 @@ describe ::SlideRule::DistanceCalculator do
|
|
125
112
|
distance = calculator.calculate_distance(example, candidate)
|
126
113
|
expect(distance.round(4)).to eq(((3.0 * 0.5 / 15) + (4.0 * 0.5 / 11)).round(4))
|
127
114
|
end
|
115
|
+
|
116
|
+
it 'should renormalize on nil' do
|
117
|
+
calculator = ::SlideRule::DistanceCalculator.new(
|
118
|
+
description: {
|
119
|
+
weight: 0.50,
|
120
|
+
calculator: :levenshtein
|
121
|
+
},
|
122
|
+
date: {
|
123
|
+
weight: 0.50,
|
124
|
+
calculator: NilCalc
|
125
|
+
}
|
126
|
+
)
|
127
|
+
example1 = ::ExampleTransaction.new(amount: 25.00, date: '2015-02-05', description: 'Audible.com')
|
128
|
+
example2 = ::ExampleTransaction.new(amount: 25.00, date: '2015-06-08', description: 'Audible Inc')
|
129
|
+
|
130
|
+
expect(calculator.calculate_distance(example1, example2).round(4)).to eq((4.0 / 11).round(4))
|
131
|
+
end
|
128
132
|
end
|
129
133
|
|
130
134
|
context 'uses custom calculator' do
|
@@ -132,7 +136,7 @@ describe ::SlideRule::DistanceCalculator do
|
|
132
136
|
calculator = ::SlideRule::DistanceCalculator.new(
|
133
137
|
description: {
|
134
138
|
weight: 1.00,
|
135
|
-
|
139
|
+
calculator: CustomCalc
|
136
140
|
}
|
137
141
|
)
|
138
142
|
example = ::ExampleTransaction.new
|
@@ -142,5 +146,27 @@ describe ::SlideRule::DistanceCalculator do
|
|
142
146
|
expect(distance).to eq(0.9)
|
143
147
|
end
|
144
148
|
end
|
149
|
+
|
150
|
+
context 'validates rules on initialize' do
|
151
|
+
it 'should allow :type' do
|
152
|
+
::SlideRule::DistanceCalculator.new(
|
153
|
+
description: {
|
154
|
+
weight: 1.00,
|
155
|
+
type: CustomCalc
|
156
|
+
}
|
157
|
+
)
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'should raise error if not valid calculator' do
|
161
|
+
expect do
|
162
|
+
::SlideRule::DistanceCalculator.new(
|
163
|
+
description: {
|
164
|
+
weight: 1.00,
|
165
|
+
calculator: :some_junk
|
166
|
+
}
|
167
|
+
)
|
168
|
+
end.to raise_error
|
169
|
+
end
|
170
|
+
end
|
145
171
|
end
|
146
172
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,96 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: slide_rule
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mattnichols
|
8
8
|
- fergmastaflex
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2015-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
+
name: vladlev
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
16
17
|
requirements:
|
17
|
-
- - ~>
|
18
|
+
- - "~>"
|
18
19
|
- !ruby/object:Gem::Version
|
19
20
|
version: '1.0'
|
20
|
-
name: vladlev
|
21
|
-
prerelease: false
|
22
21
|
type: :runtime
|
22
|
+
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - ~>
|
25
|
+
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
29
30
|
requirement: !ruby/object:Gem::Requirement
|
30
31
|
requirements:
|
31
|
-
- - ~>
|
32
|
+
- - "~>"
|
32
33
|
- !ruby/object:Gem::Version
|
33
34
|
version: '10'
|
34
|
-
name: rake
|
35
|
-
prerelease: false
|
36
35
|
type: :development
|
36
|
+
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- - ~>
|
39
|
+
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '10'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
+
name: pry
|
43
44
|
requirement: !ruby/object:Gem::Requirement
|
44
45
|
requirements:
|
45
|
-
- - ~>
|
46
|
+
- - "~>"
|
46
47
|
- !ruby/object:Gem::Version
|
47
48
|
version: '0'
|
48
|
-
name: pry
|
49
|
-
prerelease: false
|
50
49
|
type: :development
|
50
|
+
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- - ~>
|
53
|
+
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
|
+
name: rspec
|
57
58
|
requirement: !ruby/object:Gem::Requirement
|
58
59
|
requirements:
|
59
|
-
- - ~>
|
60
|
+
- - "~>"
|
60
61
|
- !ruby/object:Gem::Version
|
61
62
|
version: '3'
|
62
|
-
name: rspec
|
63
|
-
prerelease: false
|
64
63
|
type: :development
|
64
|
+
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- - ~>
|
67
|
+
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '3'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
|
+
name: rubocop
|
71
72
|
requirement: !ruby/object:Gem::Requirement
|
72
73
|
requirements:
|
73
|
-
- - ~>
|
74
|
+
- - "~>"
|
74
75
|
- !ruby/object:Gem::Version
|
75
76
|
version: '0'
|
76
|
-
name: rubocop
|
77
|
-
prerelease: false
|
78
77
|
type: :development
|
78
|
+
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
|
-
- - ~>
|
81
|
+
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: '0'
|
84
|
-
description: Calculates the distance between 2 arbitrary objects using specified fields
|
84
|
+
description: Calculates the distance between 2 arbitrary objects using specified fields
|
85
|
+
and algorithms.
|
85
86
|
email:
|
86
87
|
- dev@mx.com
|
87
88
|
executables: []
|
88
89
|
extensions: []
|
89
90
|
extra_rdoc_files: []
|
90
91
|
files:
|
91
|
-
- .gitignore
|
92
|
-
- .rubocop.yml
|
93
|
-
- .travis.yml
|
92
|
+
- ".gitignore"
|
93
|
+
- ".rubocop.yml"
|
94
|
+
- ".travis.yml"
|
94
95
|
- CODE_OF_CONDUCT.md
|
95
96
|
- Gemfile
|
96
97
|
- LICENSE
|
@@ -112,24 +113,24 @@ homepage: https://github.com/mattnichols/slide_rule
|
|
112
113
|
licenses:
|
113
114
|
- MIT
|
114
115
|
metadata: {}
|
115
|
-
post_install_message:
|
116
|
+
post_install_message:
|
116
117
|
rdoc_options: []
|
117
118
|
require_paths:
|
118
119
|
- lib
|
119
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
120
121
|
requirements:
|
121
|
-
- -
|
122
|
+
- - ">="
|
122
123
|
- !ruby/object:Gem::Version
|
123
124
|
version: '0'
|
124
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
126
|
requirements:
|
126
|
-
- -
|
127
|
+
- - ">="
|
127
128
|
- !ruby/object:Gem::Version
|
128
129
|
version: '0'
|
129
130
|
requirements: []
|
130
|
-
rubyforge_project:
|
131
|
-
rubygems_version: 2.4.
|
132
|
-
signing_key:
|
131
|
+
rubyforge_project:
|
132
|
+
rubygems_version: 2.4.6
|
133
|
+
signing_key:
|
133
134
|
specification_version: 4
|
134
135
|
summary: Ruby object distance calculator
|
135
136
|
test_files:
|