fullname-matcher 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,198 @@
1
+ # this class gets people, based on name match, from a table that stores firstname/middlename/lastaname/suffix separately.
2
+ #
3
+ # try a series of searches (exact match first, then different variations) until success
4
+ # a lof of logics/examples (eg. middlename handling, suffix handling, when to use abbreviation, when to use 'regexp') are commented inline
5
+ #
6
+ # public methods:
7
+ # . new(table, mapping={}, options = {})
8
+ # when constructing a name match using xxx = Fullname::Matcher.new(...), the first arg is the table where the search is executed in;
9
+ # default column mapping is {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'};
10
+ # - if the actual mapping is different, it should be provided as the second arg of new()
11
+ # - options:
12
+ # :skip_match_middle_name default is false
13
+ # . set_condition(c)
14
+ # if there's other condition (like "data_import_key = 'yyyy.mm.dd'") in search criteria, set it this way
15
+ #
16
+ # . get_matches
17
+ # if the name is one string, use get_matches(orig_name)
18
+ # if the name is in pieces, use get_matches(firstname, middlename, lastname, suffix)
19
+ # return ALL matches of the first successful search or [] if all searches fail
20
+ # . match_fullname
21
+ # alias of get_matches
22
+ # . names_match?(n1, style1, n2, style2)
23
+ # return true if two names (n1 and n2) are same; false otherwise
24
+ #
25
+ require 'fullname/parser'
26
+ require 'fullname/equivalence'
27
+
28
+ module Fullname::Matcher
29
+ class Core
30
+
31
+ DEFAULT_MAPPING = {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'}
32
+ DEFAULT_OPTIONS = {
33
+ :skip_match_middle_name => false, # skip match middle name if middle name not provided.
34
+ :skip_match_suffix => false # skip match suffix if suffix not provided or no column suffix in database.
35
+ }
36
+
37
+ class Error < StandardError ; end
38
+
39
+ attr_accessor :options
40
+
41
+ def initialize(table, mapping = {}, options = {})
42
+ @table = table
43
+ @mapping = DEFAULT_MAPPING.merge(mapping)
44
+ @condition = nil
45
+ @options = DEFAULT_OPTIONS.merge(options)
46
+ end
47
+
48
+ def set_condition(c)
49
+ @condition = c
50
+ end
51
+
52
+ def get_matches(*args)
53
+ name = nil
54
+ match_options = {}
55
+ case args.size
56
+ when 1
57
+ name = ::Fullname::Parser.parse_fullname(args[0])
58
+ when 4,5
59
+ name = {:first => args[0], :middle => args[1], :last => args[2], :suffix => args[3]}
60
+ match_options = args.pop if args.size == 5
61
+ else
62
+ raise Error, 'illeagle arguments length of get_matches, must be the length of 1,4,5'
63
+ end
64
+ recursive = match_options.include?(:recursive) ? match_options[:recursive] : true
65
+ return [] if name[:first].nil? || name[:last].nil?
66
+ match_list = match_first_last_and_suffix(name)
67
+
68
+ # skip validating middlename if @options[:skip_match_middle_name] == true
69
+ # all matched result which middle name is NULL or NON-NULL will be returned
70
+ return match_list if @options[:skip_match_middle_name] && match_list.size > 0
71
+
72
+ if match_list.size > 0
73
+ # 1. exactly match
74
+ match_list_with_middlename = match_list.select do |r|
75
+ r_middle_name = r.send(@mapping[:middle])
76
+ r_middle_name.to_s.downcase.strip == name[:middle].to_s.downcase.strip
77
+ end
78
+ return match_list_with_middlename if match_list_with_middlename.size > 0
79
+
80
+ # 2. if name[:middle] is not NULL, regexp match
81
+ if name[:middle]
82
+ m_re = build_middlename_regexp(name[:middle])
83
+ match_list_with_middlename = match_list.select do |r|
84
+ r_middle_name = r.send(@mapping[:middle])
85
+ r_middle_name && r_middle_name =~ m_re
86
+ end
87
+ return match_list_with_middlename if match_list_with_middlename.size > 0
88
+ # 2.1 fuzzy match: if middlename in DB is NULL, it matches
89
+ match_list_with_middlename = match_list.select{ |r| r.send(@mapping[:middle]).nil? }
90
+ return match_list_with_middlename if match_list_with_middlename.size > 0
91
+ else
92
+ # 2.2 fuzzy match: assume all matches since name[:middle] is NULL
93
+ return match_list if match_list.size > 0
94
+ end
95
+ end
96
+
97
+ # if nothing matches, try to search with equivalence of first name
98
+ if match_list.size == 0 && recursive
99
+ firstname_array = ::Fullname::Equivalence.get_name_equivalence(name[:first])
100
+ firstname_array.each do |n|
101
+ match_list += get_matches(n, name[:middle], name[:last], name[:suffix], {:recursive => false})
102
+ end if firstname_array
103
+ end
104
+
105
+ return match_list
106
+ end
107
+
108
+ alias_method :match_fullname, :get_matches
109
+
110
+ # return true if two names (n1 and n2) are same; false otherwise
111
+ # style = :short means the pieces are first/middle/last/suffix; firstname/middlename/lastname/suffix otherwise
112
+ def names_match?(n1, style1, n2, style2)
113
+ f1 = style1 == :short ? n1.first : n1.firstname
114
+ m1 = style1 == :short ? n1.middle : n1.middlename
115
+ l1 = style1 == :short ? n1.last : n1.lastname
116
+
117
+ f2 = style2 == :short ? n2.first : n2.firstname
118
+ m2 = style2 == :short ? n2.middle : n2.middlename
119
+ l2 = style2 == :short ? n2.last : n2.lastname
120
+
121
+ # first/last name have to be provided
122
+ return false if l1.nil? || l2.nil? || f1.nil? || f2.nil?
123
+ return false if l1.downcase.strip != l2.downcase.strip
124
+
125
+ unless @options[:skip_match_suffix]
126
+ s1 = n1.suffix
127
+ s2 = n2.suffix
128
+ return false if s1 && s2 && s1.gsub('.', '').downcase.strip != s2.gsub('.', '').downcase.strip
129
+ end
130
+
131
+ return false if !abbr_match?(f1, f2)
132
+ m1.nil? or m2.nil? or abbr_match?(m1, m2)
133
+ end
134
+
135
+ # 2 strings are 'abbr-match'ed if
136
+ # . they are same, or
137
+ # . one string is one char long and the other starts with it
138
+ # ex: 'abc edf' abbr-matches 'a. e' or 'abc edf', but not 'abc e'
139
+ def abbr_match?(str1, str2)
140
+ build_middlename_regexp(str1) =~ str2
141
+ end
142
+
143
+ private
144
+
145
+ def match_first_last_and_suffix(name)
146
+ conditions = []
147
+ queries = []
148
+ conditions << '(' + @condition + ')' if @condition
149
+ queries << '(placeholder)'
150
+ conditions << "(#{@mapping[:first]} = ? OR #{@mapping[:first]} REGEXP ?)"
151
+ queries << name[:first]
152
+ queries << '^' + name[:first][0].chr + '([.]?' + (name[:first] =~ /^[a-z]\.?$/i ? '|[a-z]+' : '') + ')$'
153
+ conditions << "#{@mapping[:last]} = ?"
154
+ queries << name[:last]
155
+ queries[0] = conditions.join(' AND ')
156
+ matched_list = @table.all(:conditions => queries)
157
+ unless @options[:skip_match_suffix]
158
+
159
+ suffix = name[:suffix] ? name[:suffix].gsub('.', '').downcase.strip : nil
160
+
161
+ # exactly match suffix
162
+ matched_list_with_suffix = matched_list.select{|r|
163
+ r_suffix = r.send(@mapping[:suffix])
164
+ r_suffix.to_s.downcase.strip == suffix.to_s
165
+ }
166
+ return matched_list_with_suffix if matched_list_with_suffix.size > 0
167
+
168
+ # fuzzy match suffix( NULL matches NON-NULL )
169
+ return matched_list.select{|r|
170
+ r_suffix = r.send(@mapping[:suffix])
171
+ r_suffix.nil? || suffix.nil? || suffix == r_suffix.gsub('.', '').downcase.strip
172
+ }
173
+
174
+ end
175
+ return matched_list
176
+ end
177
+
178
+ def build_middlename_regexp(middlename)
179
+ middle_arr = middlename.split(/[. ]+/)
180
+ tmp_reg = []
181
+ # Z M |Z M
182
+ # Z. M. |ZM
183
+ # Z.M. |Zoellner M
184
+ # Z Miller |Z Miller
185
+ # Zoellner M |Zoellner Miller
186
+ # Zoellner Miller |
187
+ # K.Taylor
188
+ if middle_arr.size > 1
189
+ last_ele = middle_arr.pop
190
+ tmp_reg << middle_arr.map{|m| Regexp.escape(m[0].chr) + '[. ]+'}.join + Regexp.escape(last_ele) + '[.]?'
191
+ middle_arr.push(last_ele)
192
+ end
193
+ tmp_reg << middle_arr.map{|m| m.size == 1 ? (Regexp.escape(m) + '\S*') : (Regexp.escape(m[0].chr) + '(' + Regexp.escape(m[1..-1]) + '|[.])?')}.join('[. ]+')
194
+ Regexp.new("^(#{tmp_reg.join('|')})$", true)
195
+ end
196
+
197
+ end
198
+ end
@@ -0,0 +1,5 @@
1
+ module Fullname
2
+ module Matcher
3
+ VERSION = '1.0.0'
4
+ end
5
+ end
@@ -0,0 +1,15 @@
1
+ require 'fullname/matcher/core'
2
+ require 'fullname/matcher/version'
3
+
4
+ module Fullname
5
+ module Matcher
6
+
7
+ def self.create(table, mapping = {}, options = {}, &blk)
8
+ core = Core.new(table, mapping, options)
9
+ blk.call(core) if block_given?
10
+ core
11
+ end
12
+
13
+ end
14
+ end
15
+
data/lib/fullname.rb ADDED
@@ -0,0 +1,4 @@
1
+ module Fullname
2
+ end
3
+
4
+ require File.expand_path("../fullname/matcher", __FILE__)
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fullname-matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - xiaohui
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-10 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fullname-parser
16
+ requirement: &11930540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.0.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *11930540
25
+ description: Provide fullname, search in database with proper conditions
26
+ email:
27
+ - wesley@zhangxh.net
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - README.md
35
+ - fullname-matcher.gemspec
36
+ - lib/fullname.rb
37
+ - lib/fullname/equivalence.rb
38
+ - lib/fullname/matcher.rb
39
+ - lib/fullname/matcher/core.rb
40
+ - lib/fullname/matcher/version.rb
41
+ homepage: https://github.com/xiaohui-zhangxh/fullname-matcher
42
+ licenses: []
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 1.8.10
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: Match fullname in database
65
+ test_files: []