cijef-fullname-matcher 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ require 'fullname/matcher/core'
2
+ require 'fullname/matcher/version'
3
+
4
+ module Fullname
5
+ module Matcher
6
+
7
+ def self.create(table, mapping = {}, options = {}, &blk)
8
+ core = Core.new(table, mapping, options)
9
+ blk.call(core) if block_given?
10
+ core
11
+ end
12
+
13
+ end
14
+ end
15
+
@@ -0,0 +1,208 @@
1
+ # this class gets people, based on name match, from a table that stores firstname/middlename/lastaname/suffix separately.
2
+ #
3
+ # try a series of searches (exact match first, then different variations) until success
4
+ # a lof of logics/examples (eg. middlename handling, suffix handling, when to use abbreviation, when to use 'regexp') are commented inline
5
+ #
6
+ # public methods:
7
+ # . new(table, mapping={}, options = {})
8
+ # when constructing a name match using xxx = Fullname::Matcher.new(...), the first arg is the table where the search is executed in;
9
+ # default column mapping is {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'};
10
+ # - if the actual mapping is different, it should be provided as the second arg of new()
11
+ # - options:
12
+ # :skip_match_middle_name default is false
13
+ # . set_condition(c)
14
+ # if there's other condition (like "data_import_key = 'yyyy.mm.dd'") in search criteria, set it this way
15
+ #
16
+ # . get_matches
17
+ # if the name is one string, use get_matches(orig_name)
18
+ # if the name is in pieces, use get_matches(firstname, middlename, lastname, suffix)
19
+ # return ALL matches of the first successful search or [] if all searches fail
20
+ # . match_fullname
21
+ # alias of get_matches
22
+ # . names_match?(n1, style1, n2, style2)
23
+ # return true if two names (n1 and n2) are same; false otherwise
24
+ #
25
+ require 'fullname/parser'
26
+ require 'fullname/equivalence'
27
+
28
+ module Fullname::Matcher
29
+ class Core
30
+
31
+ DEFAULT_MAPPING = {:first => 'first', :middle => 'middle', :last => 'last', :suffix => 'suffix'}
32
+ DEFAULT_OPTIONS = {
33
+ :skip_match_middle_name => false, # skip match middle name if middle name not provided.
34
+ :null_middle_name_match_allowed => false,
35
+ :skip_match_suffix => false # skip match suffix if suffix not provided or no column suffix in database.
36
+ }
37
+
38
+ class Error < StandardError ; end
39
+
40
+ attr_accessor :options
41
+
42
+ def initialize(table, mapping = {}, options = {})
43
+ @table = table
44
+ @mapping = DEFAULT_MAPPING.merge(mapping)
45
+ @condition = nil
46
+ @options = DEFAULT_OPTIONS.merge(options)
47
+ end
48
+
49
+ def set_condition(c)
50
+ @condition = c
51
+ end
52
+
53
+ def get_matches(*args)
54
+ name = nil
55
+ match_options = {}
56
+ case args.size
57
+ when 1
58
+ name = ::Fullname::Parser.parse_fullname(args[0])
59
+ when 4,5
60
+ name = {:first => args[0], :middle => args[1], :last => args[2], :suffix => args[3]}
61
+ match_options = args.pop if args.size == 5
62
+ else
63
+ raise Error, 'illeagle arguments length of get_matches, must be the length of 1,4,5'
64
+ end
65
+ recursive = match_options.include?(:recursive) ? match_options[:recursive] : true
66
+ return [] if name[:first].nil? || name[:last].nil?
67
+ match_list = match_first_last_and_suffix(name)
68
+
69
+ # skip validating middlename if @options[:skip_match_middle_name] == true
70
+ # all matched result which middle name is NULL or NON-NULL will be returned
71
+ return match_list if @options[:skip_match_middle_name] && match_list.size > 0
72
+
73
+ if match_list.size > 0
74
+ # 1. exactly match
75
+ exact_match_list = match_list.select do |r|
76
+ compare_without_dot(r.send(@mapping[:middle]), name[:middle]) && compare_without_dot(r.send(@mapping[:suffix]), name[:suffix])
77
+ end
78
+ return exact_match_list if exact_match_list.size > 0 && @options[:null_middle_name_match_allowed] == false
79
+
80
+ # 2. if name[:middle] is not NULL, regexp match
81
+ if name[:middle]
82
+ m_re = build_middlename_regexp(name[:middle])
83
+ match_list_with_middlename = match_list.select do |r|
84
+ r_middle_name = r.send(@mapping[:middle])
85
+ (r_middle_name && r_middle_name =~ m_re) || (r_middle_name.blank? && @options[:null_middle_name_match_allowed])
86
+ end
87
+ return match_list_with_middlename if match_list_with_middlename.size > 0
88
+ # 2.1 fuzzy match: if middlename in DB is NULL, it matches
89
+ match_list_with_middlename = match_list.select{ |r| r.send(@mapping[:middle]).nil? }
90
+ return match_list_with_middlename if match_list_with_middlename.size > 0
91
+ # clear match list if don't match middlename
92
+ match_list = []
93
+ else
94
+ # 2.2 fuzzy match: assume all matches since name[:middle] is NULL
95
+ return match_list
96
+ end
97
+ end
98
+
99
+ # if nothing matches, try to search with equivalence of first name
100
+ if match_list.size == 0 && recursive
101
+ firstname_array = ::Fullname::Equivalence.get_name_equivalence(name[:first])
102
+ firstname_array.each do |n|
103
+ match_list += get_matches(n, name[:middle], name[:last], name[:suffix], {:recursive => false})
104
+ end if firstname_array
105
+ end
106
+
107
+ return match_list
108
+ end
109
+
110
+ alias_method :match_fullname, :get_matches
111
+
112
+ # return true if two names (n1 and n2) are same; false otherwise
113
+ # style = :short means the pieces are first/middle/last/suffix; firstname/middlename/lastname/suffix otherwise
114
+ def names_match?(n1, style1, n2, style2)
115
+ f1 = style1 == :short ? n1.first : n1.firstname
116
+ m1 = style1 == :short ? n1.middle : n1.middlename
117
+ l1 = style1 == :short ? n1.last : n1.lastname
118
+
119
+ f2 = style2 == :short ? n2.first : n2.firstname
120
+ m2 = style2 == :short ? n2.middle : n2.middlename
121
+ l2 = style2 == :short ? n2.last : n2.lastname
122
+
123
+ # first/last name have to be provided
124
+ return false if l1.nil? || l2.nil? || f1.nil? || f2.nil?
125
+ return false if l1.downcase.strip != l2.downcase.strip
126
+
127
+ unless @options[:skip_match_suffix]
128
+ s1 = n1.suffix
129
+ s2 = n2.suffix
130
+ return false if s1 && s2 && compare_without_dot(s1, s2) == false
131
+ end
132
+
133
+ return false if !abbr_match?(f1, f2)
134
+ m1.nil? or m2.nil? or abbr_match?(m1, m2)
135
+ end
136
+
137
+ # 2 strings are 'abbr-match'ed if
138
+ # . they are same, or
139
+ # . one string is one char long and the other starts with it
140
+ # ex: 'abc edf' abbr-matches 'a. e' or 'abc edf', but not 'abc e'
141
+ def abbr_match?(str1, str2)
142
+ build_middlename_regexp(str1) =~ str2
143
+ end
144
+
145
+ private
146
+
147
+ def match_first_last_and_suffix(name)
148
+ conditions = []
149
+ queries = []
150
+ firstname_filter = nil
151
+ conditions << '(' + @condition + ')' if @condition
152
+ queries << '(placeholder)'
153
+ conditions << "#{@mapping[:last]} = ?"
154
+ queries << name[:last]
155
+ # if first name is abbreviation, fetch all firstnames then filter with Ruby regexp
156
+ if name[:first] =~ /^[a-z]\.?$/i
157
+ firstname_filter = %r{^#{name[:first][0].chr}.*}i
158
+ # otherwise search directly in database, because search with regexp in DB won't use indexer
159
+ else
160
+ conditions << "(#{@mapping[:first]} IN (?, ?, ?))"
161
+ queries << name[:first]
162
+ queries << name[:first][0].chr
163
+ queries << name[:first][0].chr + '.'
164
+ end
165
+ queries[0] = conditions.join(' AND ')
166
+ matched_list = @table.where(queries).to_a
167
+ matched_list.delete_if{|r| r.send(@mapping[:first]) !~ firstname_filter} if firstname_filter
168
+
169
+ unless @options[:skip_match_suffix]
170
+
171
+ # exactly match suffix
172
+ if name[:suffix].present?
173
+ matched_list_with_suffix = matched_list.select{|r| compare_without_dot(r.send(@mapping[:suffix]), name[:suffix]) }
174
+ return matched_list_with_suffix if matched_list_with_suffix.size > 0
175
+ end
176
+
177
+ # fuzzy match suffix( NULL matches NON-NULL )
178
+ return matched_list.select{|r| r.send(@mapping[:suffix]).to_s.strip.empty? || name[:suffix].nil? }
179
+
180
+ end
181
+ return matched_list
182
+ end
183
+
184
+ def compare_without_dot(str1, str2)
185
+ [str1, str2].map{|s| s.to_s.gsub('.', '').downcase.strip}.uniq.size == 1
186
+ end
187
+
188
+ def build_middlename_regexp(middlename)
189
+ middle_arr = middlename.split(/[. ]+/)
190
+ tmp_reg = []
191
+ # Z M |Z M
192
+ # Z. M. |ZM
193
+ # Z.M. |Zoellner M
194
+ # Z Miller |Z Miller
195
+ # Zoellner M |Zoellner Miller
196
+ # Zoellner Miller |
197
+ # K.Taylor
198
+ if middle_arr.size > 1
199
+ last_ele = middle_arr.pop
200
+ tmp_reg << middle_arr.map{|m| Regexp.escape(m[0].chr) + '[. ]+'}.join + Regexp.escape(last_ele) + '[.]?'
201
+ middle_arr.push(last_ele)
202
+ end
203
+ tmp_reg << middle_arr.map{|m| m.size == 1 ? (Regexp.escape(m) + '\S*') : (Regexp.escape(m[0].chr) + '(' + Regexp.escape(m[1..-1]) + '|[.])?')}.join('[. ]+')
204
+ Regexp.new("^(#{tmp_reg.join('|')})$", true)
205
+ end
206
+
207
+ end
208
+ end
@@ -0,0 +1,5 @@
1
+ module Fullname
2
+ module Matcher
3
+ VERSION = '2.1.0'
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cijef-fullname-matcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.1.0
5
+ platform: ruby
6
+ authors:
7
+ - yegang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-02-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fullname-parser
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.0.3
27
+ description: Provide fullname, search in database with proper conditions
28
+ email:
29
+ - yegang.avvo@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - ".gitignore"
35
+ - Gemfile
36
+ - README.md
37
+ - fullname-matcher.gemspec
38
+ - lib/fullname.rb
39
+ - lib/fullname/equivalence.rb
40
+ - lib/fullname/matcher.rb
41
+ - lib/fullname/matcher/core.rb
42
+ - lib/fullname/matcher/version.rb
43
+ homepage: https://github.com/yegang90/cijef-fullname-matcher
44
+ licenses: []
45
+ metadata: {}
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 2.5.2
63
+ signing_key:
64
+ specification_version: 4
65
+ summary: Match fullname in database
66
+ test_files: []