fullname-matcher 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +3 -0
- data/README.md +16 -0
- data/fullname-matcher.gemspec +17 -0
- data/lib/fullname/equivalence.rb +1259 -0
- data/lib/fullname/matcher/core.rb +198 -0
- data/lib/fullname/matcher/version.rb +5 -0
- data/lib/fullname/matcher.rb +15 -0
- data/lib/fullname.rb +4 -0
- metadata +65 -0
@@ -0,0 +1,198 @@
|
|
1
|
+
# this class gets people, based on name match, from a table that stores firstname/middlename/lastaname/suffix separately.
|
2
|
+
#
|
3
|
+
# try a series of searches (exact match first, then different variations) until success
|
4
|
+
# a lof of logics/examples (eg. middlename handling, suffix handling, when to use abbreviation, when to use 'regexp') are commented inline
|
5
|
+
#
|
6
|
+
# public methods:
|
7
|
+
# . new(table, mapping={}, options = {})
|
8
|
+
# when constructing a name match using xxx = Fullname::Matcher.new(...), the first arg is the table where the search is executed in;
|
9
|
+
# default column mapping is {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'};
|
10
|
+
# - if the actual mapping is different, it should be provided as the second arg of new()
|
11
|
+
# - options:
|
12
|
+
# :skip_match_middle_name default is false
|
13
|
+
# . set_condition(c)
|
14
|
+
# if there's other condition (like "data_import_key = 'yyyy.mm.dd'") in search criteria, set it this way
|
15
|
+
#
|
16
|
+
# . get_matches
|
17
|
+
# if the name is one string, use get_matches(orig_name)
|
18
|
+
# if the name is in pieces, use get_matches(firstname, middlename, lastname, suffix)
|
19
|
+
# return ALL matches of the first successful search or [] if all searches fail
|
20
|
+
# . match_fullname
|
21
|
+
# alias of get_matches
|
22
|
+
# . names_match?(n1, style1, n2, style2)
|
23
|
+
# return true if two names (n1 and n2) are same; false otherwise
|
24
|
+
#
|
25
|
+
require 'fullname/parser'
|
26
|
+
require 'fullname/equivalence'
|
27
|
+
|
28
|
+
module Fullname::Matcher
|
29
|
+
class Core
|
30
|
+
|
31
|
+
DEFAULT_MAPPING = {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'}
|
32
|
+
DEFAULT_OPTIONS = {
|
33
|
+
:skip_match_middle_name => false, # skip match middle name if middle name not provided.
|
34
|
+
:skip_match_suffix => false # skip match suffix if suffix not provided or no column suffix in database.
|
35
|
+
}
|
36
|
+
|
37
|
+
class Error < StandardError ; end
|
38
|
+
|
39
|
+
attr_accessor :options
|
40
|
+
|
41
|
+
def initialize(table, mapping = {}, options = {})
|
42
|
+
@table = table
|
43
|
+
@mapping = DEFAULT_MAPPING.merge(mapping)
|
44
|
+
@condition = nil
|
45
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
46
|
+
end
|
47
|
+
|
48
|
+
def set_condition(c)
|
49
|
+
@condition = c
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_matches(*args)
|
53
|
+
name = nil
|
54
|
+
match_options = {}
|
55
|
+
case args.size
|
56
|
+
when 1
|
57
|
+
name = ::Fullname::Parser.parse_fullname(args[0])
|
58
|
+
when 4,5
|
59
|
+
name = {:first => args[0], :middle => args[1], :last => args[2], :suffix => args[3]}
|
60
|
+
match_options = args.pop if args.size == 5
|
61
|
+
else
|
62
|
+
raise Error, 'illeagle arguments length of get_matches, must be the length of 1,4,5'
|
63
|
+
end
|
64
|
+
recursive = match_options.include?(:recursive) ? match_options[:recursive] : true
|
65
|
+
return [] if name[:first].nil? || name[:last].nil?
|
66
|
+
match_list = match_first_last_and_suffix(name)
|
67
|
+
|
68
|
+
# skip validating middlename if @options[:skip_match_middle_name] == true
|
69
|
+
# all matched result which middle name is NULL or NON-NULL will be returned
|
70
|
+
return match_list if @options[:skip_match_middle_name] && match_list.size > 0
|
71
|
+
|
72
|
+
if match_list.size > 0
|
73
|
+
# 1. exactly match
|
74
|
+
match_list_with_middlename = match_list.select do |r|
|
75
|
+
r_middle_name = r.send(@mapping[:middle])
|
76
|
+
r_middle_name.to_s.downcase.strip == name[:middle].to_s.downcase.strip
|
77
|
+
end
|
78
|
+
return match_list_with_middlename if match_list_with_middlename.size > 0
|
79
|
+
|
80
|
+
# 2. if name[:middle] is not NULL, regexp match
|
81
|
+
if name[:middle]
|
82
|
+
m_re = build_middlename_regexp(name[:middle])
|
83
|
+
match_list_with_middlename = match_list.select do |r|
|
84
|
+
r_middle_name = r.send(@mapping[:middle])
|
85
|
+
r_middle_name && r_middle_name =~ m_re
|
86
|
+
end
|
87
|
+
return match_list_with_middlename if match_list_with_middlename.size > 0
|
88
|
+
# 2.1 fuzzy match: if middlename in DB is NULL, it matches
|
89
|
+
match_list_with_middlename = match_list.select{ |r| r.send(@mapping[:middle]).nil? }
|
90
|
+
return match_list_with_middlename if match_list_with_middlename.size > 0
|
91
|
+
else
|
92
|
+
# 2.2 fuzzy match: assume all matches since name[:middle] is NULL
|
93
|
+
return match_list if match_list.size > 0
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# if nothing matches, try to search with equivalence of first name
|
98
|
+
if match_list.size == 0 && recursive
|
99
|
+
firstname_array = ::Fullname::Equivalence.get_name_equivalence(name[:first])
|
100
|
+
firstname_array.each do |n|
|
101
|
+
match_list += get_matches(n, name[:middle], name[:last], name[:suffix], {:recursive => false})
|
102
|
+
end if firstname_array
|
103
|
+
end
|
104
|
+
|
105
|
+
return match_list
|
106
|
+
end
|
107
|
+
|
108
|
+
alias_method :match_fullname, :get_matches
|
109
|
+
|
110
|
+
# return true if two names (n1 and n2) are same; false otherwise
|
111
|
+
# style = :short means the pieces are first/middle/last/suffix; firstname/middlename/lastname/suffix otherwise
|
112
|
+
def names_match?(n1, style1, n2, style2)
|
113
|
+
f1 = style1 == :short ? n1.first : n1.firstname
|
114
|
+
m1 = style1 == :short ? n1.middle : n1.middlename
|
115
|
+
l1 = style1 == :short ? n1.last : n1.lastname
|
116
|
+
|
117
|
+
f2 = style2 == :short ? n2.first : n2.firstname
|
118
|
+
m2 = style2 == :short ? n2.middle : n2.middlename
|
119
|
+
l2 = style2 == :short ? n2.last : n2.lastname
|
120
|
+
|
121
|
+
# first/last name have to be provided
|
122
|
+
return false if l1.nil? || l2.nil? || f1.nil? || f2.nil?
|
123
|
+
return false if l1.downcase.strip != l2.downcase.strip
|
124
|
+
|
125
|
+
unless @options[:skip_match_suffix]
|
126
|
+
s1 = n1.suffix
|
127
|
+
s2 = n2.suffix
|
128
|
+
return false if s1 && s2 && s1.gsub('.', '').downcase.strip != s2.gsub('.', '').downcase.strip
|
129
|
+
end
|
130
|
+
|
131
|
+
return false if !abbr_match?(f1, f2)
|
132
|
+
m1.nil? or m2.nil? or abbr_match?(m1, m2)
|
133
|
+
end
|
134
|
+
|
135
|
+
# 2 strings are 'abbr-match'ed if
|
136
|
+
# . they are same, or
|
137
|
+
# . one string is one char long and the other starts with it
|
138
|
+
# ex: 'abc edf' abbr-matches 'a. e' or 'abc edf', but not 'abc e'
|
139
|
+
def abbr_match?(str1, str2)
|
140
|
+
build_middlename_regexp(str1) =~ str2
|
141
|
+
end
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
def match_first_last_and_suffix(name)
|
146
|
+
conditions = []
|
147
|
+
queries = []
|
148
|
+
conditions << '(' + @condition + ')' if @condition
|
149
|
+
queries << '(placeholder)'
|
150
|
+
conditions << "(#{@mapping[:first]} = ? OR #{@mapping[:first]} REGEXP ?)"
|
151
|
+
queries << name[:first]
|
152
|
+
queries << '^' + name[:first][0].chr + '([.]?' + (name[:first] =~ /^[a-z]\.?$/i ? '|[a-z]+' : '') + ')$'
|
153
|
+
conditions << "#{@mapping[:last]} = ?"
|
154
|
+
queries << name[:last]
|
155
|
+
queries[0] = conditions.join(' AND ')
|
156
|
+
matched_list = @table.all(:conditions => queries)
|
157
|
+
unless @options[:skip_match_suffix]
|
158
|
+
|
159
|
+
suffix = name[:suffix] ? name[:suffix].gsub('.', '').downcase.strip : nil
|
160
|
+
|
161
|
+
# exactly match suffix
|
162
|
+
matched_list_with_suffix = matched_list.select{|r|
|
163
|
+
r_suffix = r.send(@mapping[:suffix])
|
164
|
+
r_suffix.to_s.downcase.strip == suffix.to_s
|
165
|
+
}
|
166
|
+
return matched_list_with_suffix if matched_list_with_suffix.size > 0
|
167
|
+
|
168
|
+
# fuzzy match suffix( NULL matches NON-NULL )
|
169
|
+
return matched_list.select{|r|
|
170
|
+
r_suffix = r.send(@mapping[:suffix])
|
171
|
+
r_suffix.nil? || suffix.nil? || suffix == r_suffix.gsub('.', '').downcase.strip
|
172
|
+
}
|
173
|
+
|
174
|
+
end
|
175
|
+
return matched_list
|
176
|
+
end
|
177
|
+
|
178
|
+
def build_middlename_regexp(middlename)
|
179
|
+
middle_arr = middlename.split(/[. ]+/)
|
180
|
+
tmp_reg = []
|
181
|
+
# Z M |Z M
|
182
|
+
# Z. M. |ZM
|
183
|
+
# Z.M. |Zoellner M
|
184
|
+
# Z Miller |Z Miller
|
185
|
+
# Zoellner M |Zoellner Miller
|
186
|
+
# Zoellner Miller |
|
187
|
+
# K.Taylor
|
188
|
+
if middle_arr.size > 1
|
189
|
+
last_ele = middle_arr.pop
|
190
|
+
tmp_reg << middle_arr.map{|m| Regexp.escape(m[0].chr) + '[. ]+'}.join + Regexp.escape(last_ele) + '[.]?'
|
191
|
+
middle_arr.push(last_ele)
|
192
|
+
end
|
193
|
+
tmp_reg << middle_arr.map{|m| m.size == 1 ? (Regexp.escape(m) + '\S*') : (Regexp.escape(m[0].chr) + '(' + Regexp.escape(m[1..-1]) + '|[.])?')}.join('[. ]+')
|
194
|
+
Regexp.new("^(#{tmp_reg.join('|')})$", true)
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'fullname/matcher/core'
|
2
|
+
require 'fullname/matcher/version'
|
3
|
+
|
4
|
+
module Fullname
|
5
|
+
module Matcher
|
6
|
+
|
7
|
+
def self.create(table, mapping = {}, options = {}, &blk)
|
8
|
+
core = Core.new(table, mapping, options)
|
9
|
+
blk.call(core) if block_given?
|
10
|
+
core
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
data/lib/fullname.rb
ADDED
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fullname-matcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- xiaohui
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-10 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: fullname-parser
|
16
|
+
requirement: &11930540 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.0.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *11930540
|
25
|
+
description: Provide fullname, search in database with proper conditions
|
26
|
+
email:
|
27
|
+
- wesley@zhangxh.net
|
28
|
+
executables: []
|
29
|
+
extensions: []
|
30
|
+
extra_rdoc_files: []
|
31
|
+
files:
|
32
|
+
- .gitignore
|
33
|
+
- Gemfile
|
34
|
+
- README.md
|
35
|
+
- fullname-matcher.gemspec
|
36
|
+
- lib/fullname.rb
|
37
|
+
- lib/fullname/equivalence.rb
|
38
|
+
- lib/fullname/matcher.rb
|
39
|
+
- lib/fullname/matcher/core.rb
|
40
|
+
- lib/fullname/matcher/version.rb
|
41
|
+
homepage: https://github.com/xiaohui-zhangxh/fullname-matcher
|
42
|
+
licenses: []
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
none: false
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.8.10
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Match fullname in database
|
65
|
+
test_files: []
|