nameable 0.5.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/nameable.rb CHANGED
@@ -1,220 +1,10 @@
1
- # Copyright (c) 2012 Chris Horn http://chorn.com/
2
- # See MIT-LICENSE.txt
3
-
4
- # TODO: Make this less sucky.
1
+ require "nameable/version"
2
+ require "nameable/error"
3
+ require "nameable/latin"
4
+ require "nameable/extensions"
5
5
 
6
6
  module Nameable
7
- class Latin
8
-
9
- ##
10
- # Raised if something other than a valid Name is supplied
11
- class InvalidNameError < StandardError
12
- end
13
-
14
- ##
15
- # Regex's to match the detritus that people add to their names
16
- module Patterns
17
- PREFIX = {
18
- "Mr." => /^\(*(mr\.*|mister)\)*$/i,
19
- "Mrs." => /^\(*(mrs\.*|misses)\)*$/i,
20
- "Ms." => /^\(*(ms\.*|miss)\)*$/i,
21
- "Dr." => /^\(*(dr\.*|doctor)\)*$/i,
22
- "Rev." => /^\(*(rev\.*|reverand)\)*$/i,
23
- "Fr." => /^\(*(fr\.*|friar)\)*$/i,
24
- "Master" => /^\(*(master)\)*$/i,
25
- "Sir" => /^\(*(sir)\)*$/i
26
- }
27
-
28
- SUFFIX = {
29
- "Sr." => /^\(*(sr\.?|senior)\)*$/i,
30
- "Jr." => /^\(*(jr\.?|junior)\)*$/i,
31
- "Esq." => /^\(*(esq\.?|esquire)\)*$/i,
32
- "Ph.D." => /^\(*(phd\.?)\)*$/i
33
- }
34
-
35
- SUFFIX_GENERATIONAL_ROMAN = /^\(*[IVX.]+\)*$/i
36
- SUFFIX_ACADEMIC = /^(APR|RPh|MD|MA|DMD|DDS|PharmD|EngD|DPhil|JD|DD|DO|BA|BS|BSc|BE|BFA|MA|MS|MSc|MFA|MLA|MBA)$/i
37
- SUFFIX_PROFESSIONAL = /^(PE|CSA|CPA|CPL|CME|CEng|OFM|CSV|Douchebag)$/i
38
- SUFFIX_ABBREVIATION = /^[A-Z.]+[A-Z.]+$/ # It should be at least 2 letters
39
-
40
- LAST_NAME_PRE_DANGLERS = /^(mc|vere|von|van|da|de|del|della|di|da|pietro|vanden|du|st|la|ter|ten)$/i
41
- O_LAST_NAME_PRE_CONCATS = /^(o'|o`)$/i
42
- # MC_LAST_NAME_PRE_CONCAT = /^(mc|da|de)$/i
43
- # ST_LAST_NAME_PRE_CONCAT = /^(st)\.*$/i
44
- end
45
-
46
- attr_accessor :prefix, :first, :middle, :last, :suffix
47
-
48
- ##
49
- #
50
- def initialize(parts={})
51
- self.prefix = parts[:prefix] ? parts[:prefix] : nil
52
- self.first = parts[:first] ? parts[:first] : nil
53
- self.middle = parts[:middle] ? parts[:middle] : nil
54
- self.last = parts[:last] ? parts[:last] : nil
55
- self.suffix = parts[:suffix] ? parts[:suffix] : nil
56
- end
57
-
58
- ##
59
- # name is an Array
60
- def extract_prefix(name)
61
- return unless name and name.size > 1 and @prefix.nil? and @first.nil?
62
- Patterns::PREFIX.each_pair do |pretty, regex|
63
- if name.first =~ regex
64
- @prefix = pretty
65
- name.delete(name.first)
66
- return
67
- end
68
- end
69
- end
70
-
71
- ##
72
- # name is an Array
73
- def extract_suffix(name)
74
- return unless name and name.size >= 3
75
-
76
- (name.size - 1).downto(2) do |n|
77
- suff = nil
78
-
79
- Patterns::SUFFIX.each_pair do |pretty, regex|
80
- suff = pretty if name[n] =~ regex
81
- end
82
-
83
- if name[n] =~ Patterns::SUFFIX_ACADEMIC or name[n] =~ Patterns::SUFFIX_PROFESSIONAL or name[n] =~ Patterns::SUFFIX_GENERATIONAL_ROMAN
84
- suff = name[n].upcase.gsub(/\./,'')
85
- end
86
-
87
- if name.join != name.join.upcase and name[n].length > 1 and name[n] =~ Patterns::SUFFIX_ABBREVIATION
88
- suff = name[n].upcase.gsub(/\./,'')
89
- end
90
-
91
- if suff
92
- @suffix = @suffix ? "#{suff}, #{@suffix}" : suff
93
- name.delete_at(n)
94
- end
95
-
96
- end
97
- end
98
-
99
- ##
100
- # name is an Array
101
- def extract_first(name)
102
- return unless name and name.size >= 1
103
-
104
- @first = name.first
105
- name.delete_at(0)
106
-
107
- @first.capitalize! unless @first =~ /[a-z]/ and @first =~ /[A-Z]/
108
- end
109
-
110
- ##
111
- # name is an Array
112
- def extract_last(name)
113
- return unless name and name.size >= 1
114
-
115
- @last = name.last
116
- name.delete_at(name.size - 1)
117
-
118
- @last.capitalize! unless @last =~ /[a-z]/ and @last =~ /[A-Z]/
119
- end
120
-
121
- ##
122
- # name is an Array
123
- def extract_middle(name)
124
- return unless name and name.size >= 1
125
-
126
- (name.size - 1).downto(0) do |n|
127
- next unless name[n]
128
-
129
- if name[n] =~ Patterns::LAST_NAME_PRE_DANGLERS
130
- @last = "#{name[n].downcase.capitalize} #{@last}"
131
- elsif name[n] =~ Patterns::O_LAST_NAME_PRE_CONCATS
132
- @last = "O'#{@last}"
133
- # elsif name[n] =~ Patterns::MC_LAST_NAME_PRE_CONCAT
134
- # @last = "#{name[n].downcase.capitalize} #{@last}"
135
- # elsif name[n] =~ Patterns::ST_LAST_NAME_PRE_CONCAT
136
- # @last = "St. #{@last}"
137
- elsif name[n] =~ /-+/ and n > 0 and name[n-1]
138
- @last = "#{name[n-1]}-#{@last}"
139
- name[n-1] = nil
140
- else
141
- @middle = @middle ? "#{name[n]} #{@middle}" : name[n]
142
- end
143
-
144
- name.delete_at(n)
145
- end
146
-
147
- @middle.capitalize! if @middle and !(@middle =~ /[a-z]/ and @middle =~ /[A-Z]/)
148
- @middle = "#{@middle}." if @middle and @middle.size == 1
149
- end
150
-
151
- def parse(name)
152
- raise InvalidNameError unless name
153
- if name.class == String
154
- if name.index(',')
155
- name = "#{$2} #{$1}" if name =~ /^([a-z]+)\s*,\s*,*(.*)/i
156
- end
157
-
158
- name = name.strip.split(/\s+/)
159
- end
160
-
161
- name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 and name.first.split(/[^[:alnum:]]+/)
162
-
163
- extract_prefix(name)
164
- extract_suffix(name)
165
- extract_first(name)
166
- extract_last(name)
167
- extract_middle(name)
168
-
169
- raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
170
-
171
- self
172
- end
173
-
174
- def to_s
175
- [@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : "")
176
- end
177
-
178
- def to_name
179
- to_nameable
180
- end
181
-
182
- def to_fullname
183
- to_s
184
- end
185
-
186
- def to_prefix
187
- @prefix
188
- end
189
-
190
- def to_firstname
191
- @first
192
- end
193
-
194
- def to_lastname
195
- @last
196
- end
197
-
198
- def to_middlename
199
- @middle
200
- end
201
-
202
- def to_suffix
203
- @suffix
204
- end
205
-
206
- def to_nameable
207
- [@first, @last].compact.join(' ')
208
- end
209
-
210
- def to_hash
211
- return {
212
- :prefix => @prefix,
213
- :first => @first,
214
- :middle => @middle,
215
- :last => @last,
216
- :suffix => @suffix
217
- }
218
- end
7
+ def self.parse(name)
8
+ Nameable::Latin.new.parse(name)
219
9
  end
220
10
  end
@@ -0,0 +1,5 @@
1
+ module Nameable
2
+ # Raised if something other than a valid Name is supplied
3
+ class InvalidNameError < StandardError
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class Object
2
+ def Nameable(name)
3
+ Nameable::Latin.new.parse(name).to_s
4
+ end
5
+ end
@@ -0,0 +1,247 @@
1
+ require 'csv'
2
+ require 'nameable/error'
3
+ require 'nameable/latin/patterns'
4
+
5
+ module Nameable
6
+ class Latin
7
+ @@first_names = {}
8
+ @@last_names = {}
9
+
10
+ attr_accessor :prefix, :first, :middle, :last, :suffix
11
+
12
+ ##
13
+ def initialize(*args)
14
+ if args.size == 1 && args.first.class == Hash
15
+ parts = args.first
16
+ @prefix = parts[:prefix] ? parts[:prefix] : nil
17
+ @first = parts[:first] ? parts[:first] : nil
18
+ @middle = parts[:middle] ? parts[:middle] : nil
19
+ @last = parts[:last] ? parts[:last] : nil
20
+ @suffix = parts[:suffix] ? parts[:suffix] : nil
21
+ else
22
+ @first = args.shift if args.size > 0
23
+ @middle = args.shift if args.size >= 2 # Only grab a middle name if we've got a last name left
24
+ @last = args.shift if args.size > 0
25
+ end
26
+ end
27
+
28
+ ##
29
+ # name is an Array
30
+ def extract_prefix(name)
31
+ return unless name and name.size > 1 and @prefix.nil? and @first.nil?
32
+ Nameable::Latin::Patterns::PREFIX.each_pair do |pretty, regex|
33
+ if name.first =~ regex
34
+ @prefix = pretty
35
+ name.delete(name.first)
36
+ return
37
+ end
38
+ end
39
+ end
40
+
41
+ ##
42
+ # name is an Array
43
+ def extract_suffix(name)
44
+ return unless name and name.size >= 3
45
+
46
+ (name.size - 1).downto(2) do |n|
47
+ suff = nil
48
+
49
+ Nameable::Latin::Patterns::SUFFIX.each_pair do |pretty, regex|
50
+ suff = pretty if name[n] =~ regex
51
+ end
52
+
53
+ if name[n] =~ Nameable::Latin::Patterns::SUFFIX_ACADEMIC or name[n] =~ Nameable::Latin::Patterns::SUFFIX_PROFESSIONAL or name[n] =~ Nameable::Latin::Patterns::SUFFIX_GENERATIONAL_ROMAN
54
+ suff = name[n].upcase.gsub(/\./,'')
55
+ end
56
+
57
+ if !suff && name.join != name.join.upcase and name[n].length > 1 and name[n] =~ Nameable::Latin::Patterns::SUFFIX_ABBREVIATION
58
+ suff = name[n].upcase.gsub(/\./,'')
59
+ end
60
+
61
+ if suff
62
+ @suffix = @suffix ? "#{suff}, #{@suffix}" : suff
63
+ name.delete_at(n)
64
+ end
65
+
66
+ end
67
+ end
68
+
69
+ ##
70
+ # name is an Array
71
+ def extract_first(name)
72
+ return unless name and name.size >= 1
73
+
74
+ @first = name.first
75
+ name.delete_at(0)
76
+
77
+ @first.capitalize! unless @first =~ /[a-z]/ and @first =~ /[A-Z]/
78
+ end
79
+
80
+ ##
81
+ # name is an Array
82
+ def extract_last(name)
83
+ return unless name and name.size >= 1
84
+
85
+ @last = name.last.gsub(/['`"]+/, "'").gsub(/-+/, '-')
86
+ name.delete_at(name.size - 1)
87
+
88
+ @last.capitalize! unless @last =~ /[a-z]/ and @last =~ /[A-Z]/
89
+ end
90
+
91
+ ##
92
+ # name is an Array
93
+ def extract_middle(name)
94
+ return unless name and name.size >= 1
95
+
96
+ (name.size - 1).downto(0) do |n|
97
+ next unless name[n]
98
+
99
+ if name[n] =~ Nameable::Latin::Patterns::LAST_NAME_PRE_DANGLERS
100
+ @last = "#{name[n].downcase.capitalize} #{@last}"
101
+ elsif name[n] =~ Nameable::Latin::Patterns::O_LAST_NAME_PRE_CONCATS
102
+ @last = "O'#{@last}"
103
+ elsif name[n] =~ /\-/ and n > 0 and name[n-1]
104
+ @last = "#{name[n-1].gsub(/\-/, '')}-#{@last}"
105
+ name[n-1] = nil
106
+ else
107
+ @middle = @middle ? "#{name[n]} #{@middle}" : name[n]
108
+ end
109
+
110
+ name.delete_at(n)
111
+ end
112
+
113
+ @middle.capitalize! if @middle and !(@middle =~ /[a-z]/ and @middle =~ /[A-Z]/)
114
+ @middle = "#{@middle}." if @middle and @middle.size == 1
115
+ end
116
+
117
+ def parse(name)
118
+ raise InvalidNameError unless name
119
+ if name.class == String
120
+ if name.index(',')
121
+ name = "#{$2} #{$1}" if name =~ /^([a-z]+)\s*,\s*,*(.*)/i
122
+ end
123
+
124
+ name = name.strip.split(/\s+/)
125
+ end
126
+
127
+ name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 and name.first.split(/[^[:alnum:]]+/)
128
+
129
+ extract_prefix(name)
130
+ extract_suffix(name)
131
+ extract_first(name)
132
+ extract_last(name)
133
+ extract_middle(name)
134
+
135
+ raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
136
+
137
+ self
138
+ end
139
+
140
+ # http://www.ssa.gov/oact/babynames/limits.html
141
+ def load_huge_gender_table
142
+ ranked = {}
143
+
144
+ CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'yob2013.txt'), __FILE__)).each do |first, gender, rank|
145
+ first.downcase!
146
+ gender.downcase!
147
+ ranked[first] = {} unless ranked[first]
148
+ ranked[first][gender] = rank.to_i
149
+ end
150
+
151
+ ranked.each do |first, ranks|
152
+ if ranks['m'] && !ranks['f']
153
+ @@first_names[first] = :male
154
+ elsif !ranks['m'] && ranks['f']
155
+ @@first_names[first] = :female
156
+ elsif ranks['m'] && ranks['f']
157
+ @@first_names[first] = ranks['m'] > ranks['f'] ? :male : :female
158
+ end
159
+ end
160
+ end
161
+
162
+ # http://www.census.gov/genealogy/www/data/2000surnames/index.html
163
+ def load_huge_ethnicity_table
164
+ CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'app_c.csv'), __FILE__)).each do |name, rank, count, prop100k, cum_prop100k, pctwhite, pctblack, pctapi, pctaian, pct2prace, pcthispanic|
165
+ next if name == 'name'
166
+ @@last_names[name.downcase] = {
167
+ rank:rank.to_i,
168
+ count:count.to_i,
169
+ percent_white:pctwhite.to_f,
170
+ percent_black:pctblack.to_f,
171
+ percent_asian_pacific_islander:pctapi.to_f,
172
+ percent_american_indian_alaska_native:pctaian.to_f,
173
+ percent_two_or_more_races:pct2prace.to_f,
174
+ percent_hispanic:pcthispanic.to_f
175
+ }
176
+ end
177
+ end
178
+
179
+ def gender
180
+ return @gender if @gender
181
+ load_huge_gender_table unless @@first_names && @@first_names.size > 0
182
+ @gender = @@first_names[@first.to_s.downcase] ? @@first_names[@first.to_s.downcase] : :unknown
183
+ @gender
184
+ end
185
+
186
+ def ethnicity
187
+ return @ethnicity if @ethnicity
188
+ load_huge_ethnicity_table unless @@last_names && @@last_names.size > 0
189
+ @ethnicity = (@last && @@last_names[@last.downcase]) ? @@last_names[@last.downcase] : :unknown
190
+ @ethnicity
191
+ end
192
+
193
+ def male?
194
+ self.gender == :male
195
+ end
196
+
197
+ def female?
198
+ self.gender == :female
199
+ end
200
+
201
+ def to_s
202
+ [@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : "")
203
+ end
204
+
205
+ def to_name
206
+ to_nameable
207
+ end
208
+
209
+ def to_fullname
210
+ to_s
211
+ end
212
+
213
+ def to_prefix
214
+ @prefix
215
+ end
216
+
217
+ def to_firstname
218
+ @first
219
+ end
220
+
221
+ def to_lastname
222
+ @last
223
+ end
224
+
225
+ def to_middlename
226
+ @middle
227
+ end
228
+
229
+ def to_suffix
230
+ @suffix
231
+ end
232
+
233
+ def to_nameable
234
+ [@first, @last].compact.join(' ')
235
+ end
236
+
237
+ def to_hash
238
+ return {
239
+ :prefix => @prefix,
240
+ :first => @first,
241
+ :middle => @middle,
242
+ :last => @last,
243
+ :suffix => @suffix
244
+ }
245
+ end
246
+ end
247
+ end