fullname-matcher 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,198 @@
1
+ # this class gets people, based on name match, from a table that stores firstname/middlename/lastaname/suffix separately.
2
+ #
3
+ # try a series of searches (exact match first, then different variations) until success
4
+ # a lof of logics/examples (eg. middlename handling, suffix handling, when to use abbreviation, when to use 'regexp') are commented inline
5
+ #
6
+ # public methods:
7
+ # . new(table, mapping={}, options = {})
8
+ # when constructing a name match using xxx = Fullname::Matcher.new(...), the first arg is the table where the search is executed in;
9
+ # default column mapping is {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'};
10
+ # - if the actual mapping is different, it should be provided as the second arg of new()
11
+ # - options:
12
+ # :skip_match_middle_name default is false
13
+ # . set_condition(c)
14
+ # if there's other condition (like "data_import_key = 'yyyy.mm.dd'") in search criteria, set it this way
15
+ #
16
+ # . get_matches
17
+ # if the name is one string, use get_matches(orig_name)
18
+ # if the name is in pieces, use get_matches(firstname, middlename, lastname, suffix)
19
+ # return ALL matches of the first successful search or [] if all searches fail
20
+ # . match_fullname
21
+ # alias of get_matches
22
+ # . names_match?(n1, style1, n2, style2)
23
+ # return true if two names (n1 and n2) are same; false otherwise
24
+ #
25
+ require 'fullname/parser'
26
+ require 'fullname/equivalence'
27
+
28
+ module Fullname::Matcher
29
+ class Core
30
+
31
+ DEFAULT_MAPPING = {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'}
32
+ DEFAULT_OPTIONS = {
33
+ :skip_match_middle_name => false, # skip match middle name if middle name not provided.
34
+ :skip_match_suffix => false # skip match suffix if suffix not provided or no column suffix in database.
35
+ }
36
+
37
+ class Error < StandardError ; end
38
+
39
+ attr_accessor :options
40
+
41
+ def initialize(table, mapping = {}, options = {})
42
+ @table = table
43
+ @mapping = DEFAULT_MAPPING.merge(mapping)
44
+ @condition = nil
45
+ @options = DEFAULT_OPTIONS.merge(options)
46
+ end
47
+
48
+ def set_condition(c)
49
+ @condition = c
50
+ end
51
+
52
+ def get_matches(*args)
53
+ name = nil
54
+ match_options = {}
55
+ case args.size
56
+ when 1
57
+ name = ::Fullname::Parser.parse_fullname(args[0])
58
+ when 4,5
59
+ name = {:first => args[0], :middle => args[1], :last => args[2], :suffix => args[3]}
60
+ match_options = args.pop if args.size == 5
61
+ else
62
+ raise Error, 'illeagle arguments length of get_matches, must be the length of 1,4,5'
63
+ end
64
+ recursive = match_options.include?(:recursive) ? match_options[:recursive] : true
65
+ return [] if name[:first].nil? || name[:last].nil?
66
+ match_list = match_first_last_and_suffix(name)
67
+
68
+ # skip validating middlename if @options[:skip_match_middle_name] == true
69
+ # all matched result which middle name is NULL or NON-NULL will be returned
70
+ return match_list if @options[:skip_match_middle_name] && match_list.size > 0
71
+
72
+ if match_list.size > 0
73
+ # 1. exactly match
74
+ match_list_with_middlename = match_list.select do |r|
75
+ r_middle_name = r.send(@mapping[:middle])
76
+ r_middle_name.to_s.downcase.strip == name[:middle].to_s.downcase.strip
77
+ end
78
+ return match_list_with_middlename if match_list_with_middlename.size > 0
79
+
80
+ # 2. if name[:middle] is not NULL, regexp match
81
+ if name[:middle]
82
+ m_re = build_middlename_regexp(name[:middle])
83
+ match_list_with_middlename = match_list.select do |r|
84
+ r_middle_name = r.send(@mapping[:middle])
85
+ r_middle_name && r_middle_name =~ m_re
86
+ end
87
+ return match_list_with_middlename if match_list_with_middlename.size > 0
88
+ # 2.1 fuzzy match: if middlename in DB is NULL, it matches
89
+ match_list_with_middlename = match_list.select{ |r| r.send(@mapping[:middle]).nil? }
90
+ return match_list_with_middlename if match_list_with_middlename.size > 0
91
+ else
92
+ # 2.2 fuzzy match: assume all matches since name[:middle] is NULL
93
+ return match_list if match_list.size > 0
94
+ end
95
+ end
96
+
97
+ # if nothing matches, try to search with equivalence of first name
98
+ if match_list.size == 0 && recursive
99
+ firstname_array = ::Fullname::Equivalence.get_name_equivalence(name[:first])
100
+ firstname_array.each do |n|
101
+ match_list += get_matches(n, name[:middle], name[:last], name[:suffix], {:recursive => false})
102
+ end if firstname_array
103
+ end
104
+
105
+ return match_list
106
+ end
107
+
108
+ alias_method :match_fullname, :get_matches
109
+
110
+ # return true if two names (n1 and n2) are same; false otherwise
111
+ # style = :short means the pieces are first/middle/last/suffix; firstname/middlename/lastname/suffix otherwise
112
+ def names_match?(n1, style1, n2, style2)
113
+ f1 = style1 == :short ? n1.first : n1.firstname
114
+ m1 = style1 == :short ? n1.middle : n1.middlename
115
+ l1 = style1 == :short ? n1.last : n1.lastname
116
+
117
+ f2 = style2 == :short ? n2.first : n2.firstname
118
+ m2 = style2 == :short ? n2.middle : n2.middlename
119
+ l2 = style2 == :short ? n2.last : n2.lastname
120
+
121
+ # first/last name have to be provided
122
+ return false if l1.nil? || l2.nil? || f1.nil? || f2.nil?
123
+ return false if l1.downcase.strip != l2.downcase.strip
124
+
125
+ unless @options[:skip_match_suffix]
126
+ s1 = n1.suffix
127
+ s2 = n2.suffix
128
+ return false if s1 && s2 && s1.gsub('.', '').downcase.strip != s2.gsub('.', '').downcase.strip
129
+ end
130
+
131
+ return false if !abbr_match?(f1, f2)
132
+ m1.nil? or m2.nil? or abbr_match?(m1, m2)
133
+ end
134
+
135
+ # 2 strings are 'abbr-match'ed if
136
+ # . they are same, or
137
+ # . one string is one char long and the other starts with it
138
+ # ex: 'abc edf' abbr-matches 'a. e' or 'abc edf', but not 'abc e'
139
+ def abbr_match?(str1, str2)
140
+ build_middlename_regexp(str1) =~ str2
141
+ end
142
+
143
+ private
144
+
145
+ def match_first_last_and_suffix(name)
146
+ conditions = []
147
+ queries = []
148
+ conditions << '(' + @condition + ')' if @condition
149
+ queries << '(placeholder)'
150
+ conditions << "(#{@mapping[:first]} = ? OR #{@mapping[:first]} REGEXP ?)"
151
+ queries << name[:first]
152
+ queries << '^' + name[:first][0].chr + '([.]?' + (name[:first] =~ /^[a-z]\.?$/i ? '|[a-z]+' : '') + ')$'
153
+ conditions << "#{@mapping[:last]} = ?"
154
+ queries << name[:last]
155
+ queries[0] = conditions.join(' AND ')
156
+ matched_list = @table.all(:conditions => queries)
157
+ unless @options[:skip_match_suffix]
158
+
159
+ suffix = name[:suffix] ? name[:suffix].gsub('.', '').downcase.strip : nil
160
+
161
+ # exactly match suffix
162
+ matched_list_with_suffix = matched_list.select{|r|
163
+ r_suffix = r.send(@mapping[:suffix])
164
+ r_suffix.to_s.downcase.strip == suffix.to_s
165
+ }
166
+ return matched_list_with_suffix if matched_list_with_suffix.size > 0
167
+
168
+ # fuzzy match suffix( NULL matches NON-NULL )
169
+ return matched_list.select{|r|
170
+ r_suffix = r.send(@mapping[:suffix])
171
+ r_suffix.nil? || suffix.nil? || suffix == r_suffix.gsub('.', '').downcase.strip
172
+ }
173
+
174
+ end
175
+ return matched_list
176
+ end
177
+
178
+ def build_middlename_regexp(middlename)
179
+ middle_arr = middlename.split(/[. ]+/)
180
+ tmp_reg = []
181
+ # Z M |Z M
182
+ # Z. M. |ZM
183
+ # Z.M. |Zoellner M
184
+ # Z Miller |Z Miller
185
+ # Zoellner M |Zoellner Miller
186
+ # Zoellner Miller |
187
+ # K.Taylor
188
+ if middle_arr.size > 1
189
+ last_ele = middle_arr.pop
190
+ tmp_reg << middle_arr.map{|m| Regexp.escape(m[0].chr) + '[. ]+'}.join + Regexp.escape(last_ele) + '[.]?'
191
+ middle_arr.push(last_ele)
192
+ end
193
+ tmp_reg << middle_arr.map{|m| m.size == 1 ? (Regexp.escape(m) + '\S*') : (Regexp.escape(m[0].chr) + '(' + Regexp.escape(m[1..-1]) + '|[.])?')}.join('[. ]+')
194
+ Regexp.new("^(#{tmp_reg.join('|')})$", true)
195
+ end
196
+
197
+ end
198
+ end
@@ -0,0 +1,5 @@
1
+ module Fullname
2
+ module Matcher
3
+ VERSION = '1.0.0'
4
+ end
5
+ end
@@ -0,0 +1,15 @@
1
+ require 'fullname/matcher/core'
2
+ require 'fullname/matcher/version'
3
+
4
+ module Fullname
5
+ module Matcher
6
+
7
+ def self.create(table, mapping = {}, options = {}, &blk)
8
+ core = Core.new(table, mapping, options)
9
+ blk.call(core) if block_given?
10
+ core
11
+ end
12
+
13
+ end
14
+ end
15
+
data/lib/fullname.rb ADDED
@@ -0,0 +1,4 @@
1
+ module Fullname
2
+ end
3
+
4
+ require File.expand_path("../fullname/matcher", __FILE__)
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fullname-matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - xiaohui
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-10 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: fullname-parser
16
+ requirement: &11930540 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.0.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *11930540
25
+ description: Provide fullname, search in database with proper conditions
26
+ email:
27
+ - wesley@zhangxh.net
28
+ executables: []
29
+ extensions: []
30
+ extra_rdoc_files: []
31
+ files:
32
+ - .gitignore
33
+ - Gemfile
34
+ - README.md
35
+ - fullname-matcher.gemspec
36
+ - lib/fullname.rb
37
+ - lib/fullname/equivalence.rb
38
+ - lib/fullname/matcher.rb
39
+ - lib/fullname/matcher/core.rb
40
+ - lib/fullname/matcher/version.rb
41
+ homepage: https://github.com/xiaohui-zhangxh/fullname-matcher
42
+ licenses: []
43
+ post_install_message:
44
+ rdoc_options: []
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 1.8.10
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: Match fullname in database
65
+ test_files: []