match-mate 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rspec +2 -0
- data/Gemfile +11 -0
- data/README.md +118 -0
- data/Rakefile +10 -0
- data/lib/match-mate.rb +12 -0
- data/lib/match-mate/address.rb +95 -0
- data/lib/match-mate/address_match_list.rb +39 -0
- data/lib/match-mate/address_matcher.rb +52 -0
- data/lib/match-mate/configuration.rb +40 -0
- data/lib/match-mate/utils/fuzzy_wuzzy.rb +71 -0
- data/lib/match-mate/version.rb +3 -0
- data/match-mate.gemspec +25 -0
- data/spec/lib/address_matcher.rb +60 -0
- data/spec/lib/address_spec.rb +49 -0
- data/spec/lib/fuzz_wuzzy_spec.rb +80 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/support/helpers.rb +14 -0
- metadata +128 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fd432f4a83f343c07dceb78d41868c5a7af99f0a4d189ada8ae4f2960715eb44
|
4
|
+
data.tar.gz: a6ce70620c52d58d6a381013fc3157da0cf1fef5603de689abe602d1652d30a8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: '08dd5ec6915e9e2fc37580551b1b126e11bd73717fce751d640957aeede69b4c8af61b83ccd82ab0971d5d2461154ff3ee038d96c2c47cf344181204d687b734'
|
7
|
+
data.tar.gz: 25d67e04905e242b96047c949d034516cc7b7af4eb88d9c35e357e75c5c7c61d7ce4d447e75ec25afbbf0b95efc9b3c7db1979b1aeb7b96e65c43ac4f8738dc6
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
# match-mate
|
2
|
+
|
3
|
+
A Ruby gem for matching demographics data.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
* * *
|
8
|
+
|
9
|
+
### Prerequisites
|
10
|
+
|
11
|
+
#### Python / FuzzyWuzzy
|
12
|
+
|
13
|
+
match-mate uses [FuzzyWuzzy](https://github.com/seatgeek/fuzzywuzzy), a Python library that uses Levenshtein Distance to calculate the differences between sequences. It has some features not currently found in any Ruby gems, particularily the ability to ignore word order and duplicated words.
|
14
|
+
Thus, it currently requires Python version 2.7 or higher, as well as
|
15
|
+
|
16
|
+
To install FuzzyWuzzy:
|
17
|
+
|
18
|
+
**On Ubuntu/Debian**
|
19
|
+
|
20
|
+
sudo apt install python python-pip
|
21
|
+
pip install fuzzywuzzy[speedup]
|
22
|
+
|
23
|
+
**On CentOS/RHEL**
|
24
|
+
|
25
|
+
sudo yum install python python-pip
|
26
|
+
pip install fuzzywuzzy[speedup]
|
27
|
+
|
28
|
+
**On MacOS**
|
29
|
+
|
30
|
+
Python comes pre-installed on MacOS.
|
31
|
+
|
32
|
+
pip install fuzzywuzzy[speedup]
|
33
|
+
|
34
|
+
#### libpostal
|
35
|
+
|
36
|
+
match-mate uses [ruby_postal](https://github.com/openvenues/ruby_postal), a gem that provides Ruby bindings to [libpostal](https://github.com/openvenues/libpostal) for fast street address parsing and normalization.
|
37
|
+
|
38
|
+
Before you install, make sure you have the following prerequisites:
|
39
|
+
|
40
|
+
**On Ubuntu/Debian**
|
41
|
+
|
42
|
+
sudo apt-get install curl autoconf automake libtool pkg-config
|
43
|
+
|
44
|
+
**On CentOS/RHEL**
|
45
|
+
|
46
|
+
sudo yum install curl autoconf automake libtool pkgconfig
|
47
|
+
|
48
|
+
**On MacOS**
|
49
|
+
|
50
|
+
brew install curl autoconf automake libtool pkg-config
|
51
|
+
|
52
|
+
Then to install the C library:
|
53
|
+
|
54
|
+
git clone https://github.com/openvenues/libpostal
|
55
|
+
cd libpostal
|
56
|
+
./bootstrap.sh
|
57
|
+
./configure --datadir=[...some dir with a few GB of space...]
|
58
|
+
make -j4
|
59
|
+
sudo make install
|
60
|
+
|
61
|
+
# On Linux it's probably a good idea to run
|
62
|
+
sudo ldconfig
|
63
|
+
|
64
|
+
### Install the Gem
|
65
|
+
|
66
|
+
To install using Bundler:
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
gem 'match-mate', github: 'combinaut/match-mate'
|
70
|
+
```
|
71
|
+
|
72
|
+
## Configuration
|
73
|
+
|
74
|
+
* * *
|
75
|
+
|
76
|
+
Create an initializer and configure your Python path and match weights
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
MatchMate.configure do |config|
|
80
|
+
config.python_path = '/usr/local/bin/python'
|
81
|
+
config.address_weights = {
|
82
|
+
road: {
|
83
|
+
weight: 30,
|
84
|
+
threshold: 80
|
85
|
+
},
|
86
|
+
unit: {
|
87
|
+
weight: 2,
|
88
|
+
threshold: 80
|
89
|
+
},
|
90
|
+
postcode: {
|
91
|
+
weight: 30,
|
92
|
+
threshold: 100
|
93
|
+
},
|
94
|
+
city: {
|
95
|
+
weight: 8,
|
96
|
+
threshold: 80
|
97
|
+
},
|
98
|
+
house_number: {
|
99
|
+
weight: 30,
|
100
|
+
threshold: 100
|
101
|
+
}
|
102
|
+
}
|
103
|
+
end
|
104
|
+
```
|
105
|
+
|
106
|
+
## Usage
|
107
|
+
|
108
|
+
### Address Matcher
|
109
|
+
|
110
|
+
Compare two addresses and get similarity score:
|
111
|
+
|
112
|
+
```ruby
|
113
|
+
address = MatchMate::Address.new "742 Evergreen Terrace Springfield IL 62704"
|
114
|
+
other_address = MatchMate::Address.new "742 Evergreen Springfield IL 62704"
|
115
|
+
matcher = MatchMate::AddressMatcher.new(address, other_address)
|
116
|
+
matcher.score
|
117
|
+
# => 100
|
118
|
+
```
|
data/Rakefile
ADDED
data/lib/match-mate.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/core_ext'
|
3
|
+
|
4
|
+
require 'pathname'
|
5
|
+
require 'pycall'
|
6
|
+
require 'ruby_postal/expand'
|
7
|
+
require 'ruby_postal/parser'
|
8
|
+
require 'match-mate/configuration'
|
9
|
+
require 'match-mate/utils/fuzzy_wuzzy'
|
10
|
+
require 'match-mate/address'
|
11
|
+
require 'match-mate/address_matcher'
|
12
|
+
require 'match-mate/address_match_list'
|
@@ -0,0 +1,95 @@
|
|
1
|
+
module MatchMate
|
2
|
+
class Address
|
3
|
+
def self.languages
|
4
|
+
@languages ||= MatchMate.config.languages || DEFAULTS[:languages]
|
5
|
+
end
|
6
|
+
|
7
|
+
def initialize(input_object_or_string)
|
8
|
+
# Accepts any address object that responds to a to_s method
|
9
|
+
@input_string = input_object_or_string.to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
# Basic Components for comparsion
|
13
|
+
def road
|
14
|
+
expand components[:road]
|
15
|
+
end
|
16
|
+
|
17
|
+
def unit
|
18
|
+
extract_integers components[:unit]
|
19
|
+
end
|
20
|
+
|
21
|
+
def postcode
|
22
|
+
limit_to components[:postcode], MatchMate.config.postcode_limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def city
|
26
|
+
components[:city]
|
27
|
+
end
|
28
|
+
|
29
|
+
def state
|
30
|
+
components[:state]
|
31
|
+
end
|
32
|
+
|
33
|
+
def house_number
|
34
|
+
components[:house_number]
|
35
|
+
end
|
36
|
+
|
37
|
+
def ==(other_address)
|
38
|
+
AddressMatcher.new(self, other_address).match?
|
39
|
+
end
|
40
|
+
|
41
|
+
alias eql? ==
|
42
|
+
alias === ==
|
43
|
+
|
44
|
+
# Normalized Compositites
|
45
|
+
|
46
|
+
def city_and_state
|
47
|
+
[city, state].compact.join(', ')
|
48
|
+
end
|
49
|
+
|
50
|
+
def street_address
|
51
|
+
[house_number, road.last].compact.join(' ')
|
52
|
+
end
|
53
|
+
|
54
|
+
def street_adress_with_unit
|
55
|
+
[street_address, unit].compact.join(' ')
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
[street_adress_with_unit, city_and_state, zip_code].compact.join(', ')
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def extract_integers(string, limit = nil)
|
65
|
+
return if string.blank?
|
66
|
+
|
67
|
+
value = string.gsub(/\D/, '')
|
68
|
+
limit ? value.first(limit) : value
|
69
|
+
end
|
70
|
+
|
71
|
+
def components
|
72
|
+
{}.tap do |components|
|
73
|
+
parsed_object.each do |component|
|
74
|
+
components[component[:label]] = component[:value]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def parsed_object
|
80
|
+
@parsed_object ||= Postal::Parser.parse_address @input_string, languages: self.class.languages
|
81
|
+
end
|
82
|
+
|
83
|
+
def limit_to(string, limit)
|
84
|
+
return string if string.blank? || limit.blank?
|
85
|
+
|
86
|
+
string.first(limit)
|
87
|
+
end
|
88
|
+
|
89
|
+
def expand(string)
|
90
|
+
return if string.blank?
|
91
|
+
|
92
|
+
Postal::Expand.expand_address string, languages: self.class.languages
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module MatchMate
|
2
|
+
class AddressMatchList
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
def initialize(address_object_or_string, list = [])
|
6
|
+
@address = address_object_or_string
|
7
|
+
@list = list
|
8
|
+
end
|
9
|
+
|
10
|
+
def each(&block)
|
11
|
+
scored_list.sort.each(&block)
|
12
|
+
end
|
13
|
+
|
14
|
+
def scored_list
|
15
|
+
@scored_list ||= @list.map { |address_object_or_string| AddressMatchListItem.new address_object_or_string, @address }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class AddressMatchListItem
|
20
|
+
include Comparable
|
21
|
+
|
22
|
+
attr_reader :address
|
23
|
+
delegate :score, :match?, to: :matcher
|
24
|
+
|
25
|
+
def initialize(address_object_or_string, other_address)
|
26
|
+
# Accepts any address object that responds to a to_s method
|
27
|
+
@address = address_object_or_string
|
28
|
+
@other_address = other_address
|
29
|
+
end
|
30
|
+
|
31
|
+
def matcher
|
32
|
+
@matcher ||= AddressMatcher.new(@address, @other_address)
|
33
|
+
end
|
34
|
+
|
35
|
+
def <=>(other)
|
36
|
+
other.score <=> score
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module MatchMate
|
2
|
+
class AddressMatcher
|
3
|
+
def self.attributes
|
4
|
+
@attributes ||= (MatchMate.config.address_weights || {}).deep_merge DEFAULTS[:address_weights]
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.threshold
|
8
|
+
@threshold ||= MatchMate.config.address_threshold || DEFAULTS[:address_threshold]
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.total_weight
|
12
|
+
@total_weight ||= attributes.values.sum { |config| config[:weight] }
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(address, other_address)
|
16
|
+
@address = address.is_a?(MatchMate::Address) ? address : Address.new(address)
|
17
|
+
@other_address = other_address.is_a?(MatchMate::Address) ? other_address : Address.new(other_address)
|
18
|
+
end
|
19
|
+
|
20
|
+
def score
|
21
|
+
@score ||= calculate_score.round
|
22
|
+
end
|
23
|
+
|
24
|
+
def match?
|
25
|
+
score >= self.class.threshold
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def calculate_score
|
31
|
+
self.class.attributes.keys.sum do |attribute|
|
32
|
+
weighted_score_for attribute
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def weighted_score_for(attribute)
|
37
|
+
value = score_for(attribute)
|
38
|
+
config = self.class.attributes[attribute]
|
39
|
+
return 0 if value < config[:threshold]
|
40
|
+
|
41
|
+
value * adjusted_weight_for(config[:weight])
|
42
|
+
end
|
43
|
+
|
44
|
+
def adjusted_weight_for(weight)
|
45
|
+
((self.class.total_weight.to_f / 100) * weight) / 100
|
46
|
+
end
|
47
|
+
|
48
|
+
def score_for(attribute)
|
49
|
+
FuzzyWuzzy.new(@address.send(attribute), @other_address.send(attribute)).token_set_ratio
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module MatchMate
|
2
|
+
DEFAULTS = {
|
3
|
+
languages: ['en'],
|
4
|
+
address_threshold: 80,
|
5
|
+
address_weights: {
|
6
|
+
road: {
|
7
|
+
weight: 30,
|
8
|
+
threshold: 80
|
9
|
+
},
|
10
|
+
unit: {
|
11
|
+
weight: 2,
|
12
|
+
threshold: 80
|
13
|
+
},
|
14
|
+
postcode: {
|
15
|
+
weight: 30,
|
16
|
+
threshold: 100
|
17
|
+
},
|
18
|
+
city: {
|
19
|
+
weight: 8,
|
20
|
+
threshold: 80
|
21
|
+
},
|
22
|
+
house_number: {
|
23
|
+
weight: 30,
|
24
|
+
threshold: 100
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
def self.configure
|
30
|
+
yield config
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.config
|
34
|
+
@config ||= Configuration.new
|
35
|
+
end
|
36
|
+
|
37
|
+
class Configuration
|
38
|
+
attr_accessor :python_path, :postcode_limit, :address_weights, :address_threshold, :languages
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
PyCall.init MatchMate.config.python_path || `which python`.chomp
|
2
|
+
|
3
|
+
class FuzzyWuzzy
|
4
|
+
def initialize(value, other_value)
|
5
|
+
@values = [value].flatten.map(&:to_s)
|
6
|
+
@other_values = [other_value].flatten.map(&:to_s)
|
7
|
+
PyCall.exec import
|
8
|
+
end
|
9
|
+
|
10
|
+
# Compares the entire string similarity, ignores word order and duplicated words
|
11
|
+
def token_set_ratio
|
12
|
+
run_command :token_set_ratio
|
13
|
+
end
|
14
|
+
|
15
|
+
# Compares the entire string similarity, ignores word order
|
16
|
+
def token_sort_ratio
|
17
|
+
run_command :token_sort_ratio
|
18
|
+
end
|
19
|
+
|
20
|
+
# Compares partial string similarity
|
21
|
+
def partial_ratio
|
22
|
+
run_command :partial_ratio
|
23
|
+
end
|
24
|
+
|
25
|
+
# Compares the entire string similarity, in order.
|
26
|
+
def ratio
|
27
|
+
run_command :ratio
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def run_command(method)
|
33
|
+
scores = []
|
34
|
+
combinations.each do |value, other_value|
|
35
|
+
score = score_for method, value, other_value
|
36
|
+
scores << score
|
37
|
+
break if score == 100
|
38
|
+
end
|
39
|
+
scores.max
|
40
|
+
end
|
41
|
+
|
42
|
+
def score_for(method, value, other_value)
|
43
|
+
value = normalize(value)
|
44
|
+
other_value = normalize(other_value)
|
45
|
+
if value == other_value
|
46
|
+
100
|
47
|
+
else
|
48
|
+
PyCall.eval command(method, value, other_value)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def combinations
|
53
|
+
@values.product @other_values
|
54
|
+
end
|
55
|
+
|
56
|
+
def import
|
57
|
+
<<~PYTHON
|
58
|
+
from fuzzywuzzy import fuzz
|
59
|
+
PYTHON
|
60
|
+
end
|
61
|
+
|
62
|
+
def normalize(value)
|
63
|
+
value.strip.squeeze(' ').downcase
|
64
|
+
end
|
65
|
+
|
66
|
+
def command(method, value, other_value)
|
67
|
+
<<~PYTHON
|
68
|
+
fuzz.#{method}("#{value}", "#{other_value}")
|
69
|
+
PYTHON
|
70
|
+
end
|
71
|
+
end
|
data/match-mate.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
|
3
|
+
require "match-mate/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "match-mate"
|
7
|
+
s.version = MatchMate::VERSION
|
8
|
+
s.authors = ['Combinaut Team']
|
9
|
+
s.summary = "A Ruby gem for matching demographics"
|
10
|
+
s.description = "A Ruby gem for matching demographics"
|
11
|
+
s.homepage = "https://github.com/combinaut/match-mate"
|
12
|
+
s.license = "MIT"
|
13
|
+
|
14
|
+
s.files = `git ls-files -z`.split("\x0")
|
15
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
s.require_paths = ['lib']
|
18
|
+
s.required_ruby_version = '>= 2.0.0'
|
19
|
+
|
20
|
+
s.add_dependency('activesupport', '~> 5.0')
|
21
|
+
s.add_dependency('pycall', '~> 1.2.1')
|
22
|
+
s.add_dependency('ruby_postal', '~> 1.0.0')
|
23
|
+
|
24
|
+
s.add_development_dependency 'bundler', '>= 1.5', '< 3.0'
|
25
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module MatchMate
|
2
|
+
describe AddressMatcher do
|
3
|
+
context 'comparing equivalent addresses' do
|
4
|
+
let(:address) { MatchMate::Address.new '12395 El Camino Avenue Suite 120 San Diego, CA 92130' }
|
5
|
+
let(:other_address) { MatchMate::Address.new '12395 El Camino Ave. Ste 120 San Diego, CA 92130' }
|
6
|
+
subject { MatchMate::AddressMatcher.new address, other_address }
|
7
|
+
describe '#score' do
|
8
|
+
it 'is a full match of 100' do
|
9
|
+
expect(subject.score).to eq(100)
|
10
|
+
end
|
11
|
+
context 'with weights that dont add up to 100' do
|
12
|
+
around(:each) do |example|
|
13
|
+
address_weights = {
|
14
|
+
address_weights: {
|
15
|
+
road: {
|
16
|
+
weight: 23,
|
17
|
+
threshold: 80
|
18
|
+
},
|
19
|
+
unit: {
|
20
|
+
weight: 13,
|
21
|
+
threshold: 80
|
22
|
+
},
|
23
|
+
postcode: {
|
24
|
+
weight: 133,
|
25
|
+
threshold: 100
|
26
|
+
},
|
27
|
+
city: {
|
28
|
+
weight: 3,
|
29
|
+
threshold: 80
|
30
|
+
},
|
31
|
+
house_number: {
|
32
|
+
weight: 17,
|
33
|
+
threshold: 100
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
with_config address_weights do
|
38
|
+
example.run
|
39
|
+
end
|
40
|
+
end
|
41
|
+
it 'is a full match of 100' do
|
42
|
+
expect(subject.score).to eq(100)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
context 'matching address pair examples' do
|
48
|
+
let(:examples) do
|
49
|
+
[
|
50
|
+
['12395 El Camino Real. Suite 120 San Diego, CA 92130', '12395 El Camino Real Ste 120 San Diego, CA 92130']
|
51
|
+
]
|
52
|
+
end
|
53
|
+
it 'should all match' do
|
54
|
+
examples.each do |example|
|
55
|
+
expect(MatchMate::AddressMatcher.new(example[0], example[1])).to be_match
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Note that these examples rely on an existing appointment.
|
2
|
+
# If you want to regenerate the VCR cassettes for these examples,
|
3
|
+
# you'll need to generate a new appointment, adjust the `api_params` to match,
|
4
|
+
# and then run the `success` example before the `already_cancelled` example.
|
5
|
+
module MatchMate
|
6
|
+
describe Address do
|
7
|
+
context 'passing entire address as a string' do
|
8
|
+
subject { MatchMate::Address.new '12395 El Camino Ave Ste 120 San Diego, CA 92130-12456' }
|
9
|
+
|
10
|
+
describe '#road' do
|
11
|
+
it 'extracts an arrayt of expandeded, normalized, lowercase core street address' do
|
12
|
+
expect(subject.road).to include('el camino avenue')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
describe '#unit' do
|
16
|
+
it 'only uses the number' do
|
17
|
+
expect(subject.unit).to eq('120')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
describe '#house_number' do
|
21
|
+
it 'extracts the house number' do
|
22
|
+
expect(subject.house_number).to eq('12395')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
describe '#postcode' do
|
26
|
+
context 'when postcode_limit is not set' do
|
27
|
+
it 'returns the full postcode' do
|
28
|
+
expect(subject.postcode).to eq('92130-12456')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
context 'when postcode_limit is set' do
|
32
|
+
around(:each) do |example|
|
33
|
+
with_config(postcode_limit: 5) do
|
34
|
+
example.run
|
35
|
+
end
|
36
|
+
end
|
37
|
+
it 'extracts the first digits set by limit' do
|
38
|
+
expect(subject.postcode).to eq('92130')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
describe '#city' do
|
43
|
+
it 'extracts lowercase city name' do
|
44
|
+
expect(subject.city).to eq('san diego')
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
describe FuzzyWuzzy do
|
2
|
+
context 'comparing single strings' do
|
3
|
+
describe '#token_set_ratio' do
|
4
|
+
context 'literal match' do
|
5
|
+
subject { FuzzyWuzzy.new 'string', 'string' }
|
6
|
+
it 'returns 100' do
|
7
|
+
expect(subject.token_set_ratio).to eq(100)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
context 'comparing two nil values' do
|
11
|
+
subject { FuzzyWuzzy.new nil, nil}
|
12
|
+
it 'returns 100' do
|
13
|
+
expect(subject.token_set_ratio).to eq(100)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
context 'literal match ignores capitalization and spaces' do
|
17
|
+
subject { FuzzyWuzzy.new 'A string ', 'a stRing' }
|
18
|
+
it 'returns 100' do
|
19
|
+
expect(subject.token_set_ratio).to eq(100)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
context 'close match' do
|
23
|
+
subject { FuzzyWuzzy.new 'swing', 'string' }
|
24
|
+
it 'returns less than 100' do
|
25
|
+
expect(subject.token_set_ratio).to be < 100
|
26
|
+
end
|
27
|
+
end
|
28
|
+
context 'total mismatch' do
|
29
|
+
subject { FuzzyWuzzy.new 'mash', 'pot' }
|
30
|
+
it 'returns 0' do
|
31
|
+
expect(subject.token_set_ratio).to eq(0)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
context 'comparing string against array of strings' do
|
37
|
+
describe '#token_set_ratio' do
|
38
|
+
context 'full match on one of the members' do
|
39
|
+
subject { FuzzyWuzzy.new 'string', %w[other_string string] }
|
40
|
+
it 'returns 100' do
|
41
|
+
expect(subject.token_set_ratio).to eq(100)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
context 'close match on one of the members' do
|
45
|
+
subject { FuzzyWuzzy.new 'string', %w[swing pot] }
|
46
|
+
it 'returns less than 100' do
|
47
|
+
expect(subject.token_set_ratio).to be < 100
|
48
|
+
end
|
49
|
+
end
|
50
|
+
context 'total mismatch on all of the members' do
|
51
|
+
subject { FuzzyWuzzy.new 'mash', %w[pot not] }
|
52
|
+
it 'returns 0' do
|
53
|
+
expect(subject.token_set_ratio).to eq(0)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
context 'comparing two arrays of strings' do
|
59
|
+
describe '#token_set_ratio' do
|
60
|
+
context 'full match on one of the members' do
|
61
|
+
subject { FuzzyWuzzy.new ['a string', 'something'], ['other_string', 'a string'] }
|
62
|
+
it 'returns 100' do
|
63
|
+
expect(subject.token_set_ratio).to eq(100)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
context 'close match on one of the members' do
|
67
|
+
subject { FuzzyWuzzy.new %w[string rack], %w[swing pot] }
|
68
|
+
it 'returns less than 100' do
|
69
|
+
expect(subject.token_set_ratio).to be < 100
|
70
|
+
end
|
71
|
+
end
|
72
|
+
context 'total mismatch on all of the members' do
|
73
|
+
subject { FuzzyWuzzy.new %w[mash rack], %w[pot not] }
|
74
|
+
it 'returns 0' do
|
75
|
+
expect(subject.token_set_ratio).to eq(0)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
def with_config(options = {})
|
2
|
+
old_options = {}
|
3
|
+
|
4
|
+
options.each do |key, value|
|
5
|
+
old_options[key] = MatchMate.config.send(key)
|
6
|
+
MatchMate.config.send("#{key}=", value)
|
7
|
+
end
|
8
|
+
|
9
|
+
yield
|
10
|
+
ensure
|
11
|
+
old_options.each do |key, value|
|
12
|
+
MatchMate.config.send("#{key}=", value)
|
13
|
+
end
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: match-mate
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Combinaut Team
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-01-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '5.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pycall
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.2.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.2.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ruby_postal
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.5'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '3.0'
|
65
|
+
type: :development
|
66
|
+
prerelease: false
|
67
|
+
version_requirements: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '1.5'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '3.0'
|
75
|
+
description: A Ruby gem for matching demographics
|
76
|
+
email:
|
77
|
+
executables: []
|
78
|
+
extensions: []
|
79
|
+
extra_rdoc_files: []
|
80
|
+
files:
|
81
|
+
- ".gitignore"
|
82
|
+
- ".rspec"
|
83
|
+
- Gemfile
|
84
|
+
- README.md
|
85
|
+
- Rakefile
|
86
|
+
- lib/match-mate.rb
|
87
|
+
- lib/match-mate/address.rb
|
88
|
+
- lib/match-mate/address_match_list.rb
|
89
|
+
- lib/match-mate/address_matcher.rb
|
90
|
+
- lib/match-mate/configuration.rb
|
91
|
+
- lib/match-mate/utils/fuzzy_wuzzy.rb
|
92
|
+
- lib/match-mate/version.rb
|
93
|
+
- match-mate.gemspec
|
94
|
+
- spec/lib/address_matcher.rb
|
95
|
+
- spec/lib/address_spec.rb
|
96
|
+
- spec/lib/fuzz_wuzzy_spec.rb
|
97
|
+
- spec/spec_helper.rb
|
98
|
+
- spec/support/helpers.rb
|
99
|
+
homepage: https://github.com/combinaut/match-mate
|
100
|
+
licenses:
|
101
|
+
- MIT
|
102
|
+
metadata: {}
|
103
|
+
post_install_message:
|
104
|
+
rdoc_options: []
|
105
|
+
require_paths:
|
106
|
+
- lib
|
107
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - ">="
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 2.0.0
|
112
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
requirements: []
|
118
|
+
rubyforge_project:
|
119
|
+
rubygems_version: 2.7.6.2
|
120
|
+
signing_key:
|
121
|
+
specification_version: 4
|
122
|
+
summary: A Ruby gem for matching demographics
|
123
|
+
test_files:
|
124
|
+
- spec/lib/address_matcher.rb
|
125
|
+
- spec/lib/address_spec.rb
|
126
|
+
- spec/lib/fuzz_wuzzy_spec.rb
|
127
|
+
- spec/spec_helper.rb
|
128
|
+
- spec/support/helpers.rb
|