nameable 0.5.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/nameable.rb CHANGED
@@ -1,220 +1,10 @@
1
- # Copyright (c) 2012 Chris Horn http://chorn.com/
2
- # See MIT-LICENSE.txt
3
-
4
- # TODO: Make this less sucky.
1
+ require "nameable/version"
2
+ require "nameable/error"
3
+ require "nameable/latin"
4
+ require "nameable/extensions"
5
5
 
6
6
  module Nameable
7
- class Latin
8
-
9
- ##
10
- # Raised if something other than a valid Name is supplied
11
- class InvalidNameError < StandardError
12
- end
13
-
14
- ##
15
- # Regex's to match the detritus that people add to their names
16
- module Patterns
17
- PREFIX = {
18
- "Mr." => /^\(*(mr\.*|mister)\)*$/i,
19
- "Mrs." => /^\(*(mrs\.*|misses)\)*$/i,
20
- "Ms." => /^\(*(ms\.*|miss)\)*$/i,
21
- "Dr." => /^\(*(dr\.*|doctor)\)*$/i,
22
- "Rev." => /^\(*(rev\.*|reverand)\)*$/i,
23
- "Fr." => /^\(*(fr\.*|friar)\)*$/i,
24
- "Master" => /^\(*(master)\)*$/i,
25
- "Sir" => /^\(*(sir)\)*$/i
26
- }
27
-
28
- SUFFIX = {
29
- "Sr." => /^\(*(sr\.?|senior)\)*$/i,
30
- "Jr." => /^\(*(jr\.?|junior)\)*$/i,
31
- "Esq." => /^\(*(esq\.?|esquire)\)*$/i,
32
- "Ph.D." => /^\(*(phd\.?)\)*$/i
33
- }
34
-
35
- SUFFIX_GENERATIONAL_ROMAN = /^\(*[IVX.]+\)*$/i
36
- SUFFIX_ACADEMIC = /^(APR|RPh|MD|MA|DMD|DDS|PharmD|EngD|DPhil|JD|DD|DO|BA|BS|BSc|BE|BFA|MA|MS|MSc|MFA|MLA|MBA)$/i
37
- SUFFIX_PROFESSIONAL = /^(PE|CSA|CPA|CPL|CME|CEng|OFM|CSV|Douchebag)$/i
38
- SUFFIX_ABBREVIATION = /^[A-Z.]+[A-Z.]+$/ # It should be at least 2 letters
39
-
40
- LAST_NAME_PRE_DANGLERS = /^(mc|vere|von|van|da|de|del|della|di|da|pietro|vanden|du|st|la|ter|ten)$/i
41
- O_LAST_NAME_PRE_CONCATS = /^(o'|o`)$/i
42
- # MC_LAST_NAME_PRE_CONCAT = /^(mc|da|de)$/i
43
- # ST_LAST_NAME_PRE_CONCAT = /^(st)\.*$/i
44
- end
45
-
46
- attr_accessor :prefix, :first, :middle, :last, :suffix
47
-
48
- ##
49
- #
50
- def initialize(parts={})
51
- self.prefix = parts[:prefix] ? parts[:prefix] : nil
52
- self.first = parts[:first] ? parts[:first] : nil
53
- self.middle = parts[:middle] ? parts[:middle] : nil
54
- self.last = parts[:last] ? parts[:last] : nil
55
- self.suffix = parts[:suffix] ? parts[:suffix] : nil
56
- end
57
-
58
- ##
59
- # name is an Array
60
- def extract_prefix(name)
61
- return unless name and name.size > 1 and @prefix.nil? and @first.nil?
62
- Patterns::PREFIX.each_pair do |pretty, regex|
63
- if name.first =~ regex
64
- @prefix = pretty
65
- name.delete(name.first)
66
- return
67
- end
68
- end
69
- end
70
-
71
- ##
72
- # name is an Array
73
- def extract_suffix(name)
74
- return unless name and name.size >= 3
75
-
76
- (name.size - 1).downto(2) do |n|
77
- suff = nil
78
-
79
- Patterns::SUFFIX.each_pair do |pretty, regex|
80
- suff = pretty if name[n] =~ regex
81
- end
82
-
83
- if name[n] =~ Patterns::SUFFIX_ACADEMIC or name[n] =~ Patterns::SUFFIX_PROFESSIONAL or name[n] =~ Patterns::SUFFIX_GENERATIONAL_ROMAN
84
- suff = name[n].upcase.gsub(/\./,'')
85
- end
86
-
87
- if name.join != name.join.upcase and name[n].length > 1 and name[n] =~ Patterns::SUFFIX_ABBREVIATION
88
- suff = name[n].upcase.gsub(/\./,'')
89
- end
90
-
91
- if suff
92
- @suffix = @suffix ? "#{suff}, #{@suffix}" : suff
93
- name.delete_at(n)
94
- end
95
-
96
- end
97
- end
98
-
99
- ##
100
- # name is an Array
101
- def extract_first(name)
102
- return unless name and name.size >= 1
103
-
104
- @first = name.first
105
- name.delete_at(0)
106
-
107
- @first.capitalize! unless @first =~ /[a-z]/ and @first =~ /[A-Z]/
108
- end
109
-
110
- ##
111
- # name is an Array
112
- def extract_last(name)
113
- return unless name and name.size >= 1
114
-
115
- @last = name.last
116
- name.delete_at(name.size - 1)
117
-
118
- @last.capitalize! unless @last =~ /[a-z]/ and @last =~ /[A-Z]/
119
- end
120
-
121
- ##
122
- # name is an Array
123
- def extract_middle(name)
124
- return unless name and name.size >= 1
125
-
126
- (name.size - 1).downto(0) do |n|
127
- next unless name[n]
128
-
129
- if name[n] =~ Patterns::LAST_NAME_PRE_DANGLERS
130
- @last = "#{name[n].downcase.capitalize} #{@last}"
131
- elsif name[n] =~ Patterns::O_LAST_NAME_PRE_CONCATS
132
- @last = "O'#{@last}"
133
- # elsif name[n] =~ Patterns::MC_LAST_NAME_PRE_CONCAT
134
- # @last = "#{name[n].downcase.capitalize} #{@last}"
135
- # elsif name[n] =~ Patterns::ST_LAST_NAME_PRE_CONCAT
136
- # @last = "St. #{@last}"
137
- elsif name[n] =~ /-+/ and n > 0 and name[n-1]
138
- @last = "#{name[n-1]}-#{@last}"
139
- name[n-1] = nil
140
- else
141
- @middle = @middle ? "#{name[n]} #{@middle}" : name[n]
142
- end
143
-
144
- name.delete_at(n)
145
- end
146
-
147
- @middle.capitalize! if @middle and !(@middle =~ /[a-z]/ and @middle =~ /[A-Z]/)
148
- @middle = "#{@middle}." if @middle and @middle.size == 1
149
- end
150
-
151
- def parse(name)
152
- raise InvalidNameError unless name
153
- if name.class == String
154
- if name.index(',')
155
- name = "#{$2} #{$1}" if name =~ /^([a-z]+)\s*,\s*,*(.*)/i
156
- end
157
-
158
- name = name.strip.split(/\s+/)
159
- end
160
-
161
- name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 and name.first.split(/[^[:alnum:]]+/)
162
-
163
- extract_prefix(name)
164
- extract_suffix(name)
165
- extract_first(name)
166
- extract_last(name)
167
- extract_middle(name)
168
-
169
- raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
170
-
171
- self
172
- end
173
-
174
- def to_s
175
- [@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : "")
176
- end
177
-
178
- def to_name
179
- to_nameable
180
- end
181
-
182
- def to_fullname
183
- to_s
184
- end
185
-
186
- def to_prefix
187
- @prefix
188
- end
189
-
190
- def to_firstname
191
- @first
192
- end
193
-
194
- def to_lastname
195
- @last
196
- end
197
-
198
- def to_middlename
199
- @middle
200
- end
201
-
202
- def to_suffix
203
- @suffix
204
- end
205
-
206
- def to_nameable
207
- [@first, @last].compact.join(' ')
208
- end
209
-
210
- def to_hash
211
- return {
212
- :prefix => @prefix,
213
- :first => @first,
214
- :middle => @middle,
215
- :last => @last,
216
- :suffix => @suffix
217
- }
218
- end
7
+ def self.parse(name)
8
+ Nameable::Latin.new.parse(name)
219
9
  end
220
10
  end
@@ -0,0 +1,5 @@
1
+ module Nameable
2
+ # Raised if something other than a valid Name is supplied
3
+ class InvalidNameError < StandardError
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class Object
2
+ def Nameable(name)
3
+ Nameable::Latin.new.parse(name).to_s
4
+ end
5
+ end
@@ -0,0 +1,247 @@
1
+ require 'csv'
2
+ require 'nameable/error'
3
+ require 'nameable/latin/patterns'
4
+
5
+ module Nameable
6
+ class Latin
7
+ @@first_names = {}
8
+ @@last_names = {}
9
+
10
+ attr_accessor :prefix, :first, :middle, :last, :suffix
11
+
12
+ ##
13
+ def initialize(*args)
14
+ if args.size == 1 && args.first.class == Hash
15
+ parts = args.first
16
+ @prefix = parts[:prefix] ? parts[:prefix] : nil
17
+ @first = parts[:first] ? parts[:first] : nil
18
+ @middle = parts[:middle] ? parts[:middle] : nil
19
+ @last = parts[:last] ? parts[:last] : nil
20
+ @suffix = parts[:suffix] ? parts[:suffix] : nil
21
+ else
22
+ @first = args.shift if args.size > 0
23
+ @middle = args.shift if args.size >= 2 # Only grab a middle name if we've got a last name left
24
+ @last = args.shift if args.size > 0
25
+ end
26
+ end
27
+
28
+ ##
29
+ # name is an Array
30
+ def extract_prefix(name)
31
+ return unless name and name.size > 1 and @prefix.nil? and @first.nil?
32
+ Nameable::Latin::Patterns::PREFIX.each_pair do |pretty, regex|
33
+ if name.first =~ regex
34
+ @prefix = pretty
35
+ name.delete(name.first)
36
+ return
37
+ end
38
+ end
39
+ end
40
+
41
+ ##
42
+ # name is an Array
43
+ def extract_suffix(name)
44
+ return unless name and name.size >= 3
45
+
46
+ (name.size - 1).downto(2) do |n|
47
+ suff = nil
48
+
49
+ Nameable::Latin::Patterns::SUFFIX.each_pair do |pretty, regex|
50
+ suff = pretty if name[n] =~ regex
51
+ end
52
+
53
+ if name[n] =~ Nameable::Latin::Patterns::SUFFIX_ACADEMIC or name[n] =~ Nameable::Latin::Patterns::SUFFIX_PROFESSIONAL or name[n] =~ Nameable::Latin::Patterns::SUFFIX_GENERATIONAL_ROMAN
54
+ suff = name[n].upcase.gsub(/\./,'')
55
+ end
56
+
57
+ if !suff && name.join != name.join.upcase and name[n].length > 1 and name[n] =~ Nameable::Latin::Patterns::SUFFIX_ABBREVIATION
58
+ suff = name[n].upcase.gsub(/\./,'')
59
+ end
60
+
61
+ if suff
62
+ @suffix = @suffix ? "#{suff}, #{@suffix}" : suff
63
+ name.delete_at(n)
64
+ end
65
+
66
+ end
67
+ end
68
+
69
+ ##
70
+ # name is an Array
71
+ def extract_first(name)
72
+ return unless name and name.size >= 1
73
+
74
+ @first = name.first
75
+ name.delete_at(0)
76
+
77
+ @first.capitalize! unless @first =~ /[a-z]/ and @first =~ /[A-Z]/
78
+ end
79
+
80
+ ##
81
+ # name is an Array
82
+ def extract_last(name)
83
+ return unless name and name.size >= 1
84
+
85
+ @last = name.last.gsub(/['`"]+/, "'").gsub(/-+/, '-')
86
+ name.delete_at(name.size - 1)
87
+
88
+ @last.capitalize! unless @last =~ /[a-z]/ and @last =~ /[A-Z]/
89
+ end
90
+
91
+ ##
92
+ # name is an Array
93
+ def extract_middle(name)
94
+ return unless name and name.size >= 1
95
+
96
+ (name.size - 1).downto(0) do |n|
97
+ next unless name[n]
98
+
99
+ if name[n] =~ Nameable::Latin::Patterns::LAST_NAME_PRE_DANGLERS
100
+ @last = "#{name[n].downcase.capitalize} #{@last}"
101
+ elsif name[n] =~ Nameable::Latin::Patterns::O_LAST_NAME_PRE_CONCATS
102
+ @last = "O'#{@last}"
103
+ elsif name[n] =~ /\-/ and n > 0 and name[n-1]
104
+ @last = "#{name[n-1].gsub(/\-/, '')}-#{@last}"
105
+ name[n-1] = nil
106
+ else
107
+ @middle = @middle ? "#{name[n]} #{@middle}" : name[n]
108
+ end
109
+
110
+ name.delete_at(n)
111
+ end
112
+
113
+ @middle.capitalize! if @middle and !(@middle =~ /[a-z]/ and @middle =~ /[A-Z]/)
114
+ @middle = "#{@middle}." if @middle and @middle.size == 1
115
+ end
116
+
117
+ def parse(name)
118
+ raise InvalidNameError unless name
119
+ if name.class == String
120
+ if name.index(',')
121
+ name = "#{$2} #{$1}" if name =~ /^([a-z]+)\s*,\s*,*(.*)/i
122
+ end
123
+
124
+ name = name.strip.split(/\s+/)
125
+ end
126
+
127
+ name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 and name.first.split(/[^[:alnum:]]+/)
128
+
129
+ extract_prefix(name)
130
+ extract_suffix(name)
131
+ extract_first(name)
132
+ extract_last(name)
133
+ extract_middle(name)
134
+
135
+ raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
136
+
137
+ self
138
+ end
139
+
140
+ # http://www.ssa.gov/oact/babynames/limits.html
141
+ def load_huge_gender_table
142
+ ranked = {}
143
+
144
+ CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'yob2013.txt'), __FILE__)).each do |first, gender, rank|
145
+ first.downcase!
146
+ gender.downcase!
147
+ ranked[first] = {} unless ranked[first]
148
+ ranked[first][gender] = rank.to_i
149
+ end
150
+
151
+ ranked.each do |first, ranks|
152
+ if ranks['m'] && !ranks['f']
153
+ @@first_names[first] = :male
154
+ elsif !ranks['m'] && ranks['f']
155
+ @@first_names[first] = :female
156
+ elsif ranks['m'] && ranks['f']
157
+ @@first_names[first] = ranks['m'] > ranks['f'] ? :male : :female
158
+ end
159
+ end
160
+ end
161
+
162
+ # http://www.census.gov/genealogy/www/data/2000surnames/index.html
163
+ def load_huge_ethnicity_table
164
+ CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'app_c.csv'), __FILE__)).each do |name, rank, count, prop100k, cum_prop100k, pctwhite, pctblack, pctapi, pctaian, pct2prace, pcthispanic|
165
+ next if name == 'name'
166
+ @@last_names[name.downcase] = {
167
+ rank:rank.to_i,
168
+ count:count.to_i,
169
+ percent_white:pctwhite.to_f,
170
+ percent_black:pctblack.to_f,
171
+ percent_asian_pacific_islander:pctapi.to_f,
172
+ percent_american_indian_alaska_native:pctaian.to_f,
173
+ percent_two_or_more_races:pct2prace.to_f,
174
+ percent_hispanic:pcthispanic.to_f
175
+ }
176
+ end
177
+ end
178
+
179
+ def gender
180
+ return @gender if @gender
181
+ load_huge_gender_table unless @@first_names && @@first_names.size > 0
182
+ @gender = @@first_names[@first.to_s.downcase] ? @@first_names[@first.to_s.downcase] : :unknown
183
+ @gender
184
+ end
185
+
186
+ def ethnicity
187
+ return @ethnicity if @ethnicity
188
+ load_huge_ethnicity_table unless @@last_names && @@last_names.size > 0
189
+ @ethnicity = (@last && @@last_names[@last.downcase]) ? @@last_names[@last.downcase] : :unknown
190
+ @ethnicity
191
+ end
192
+
193
+ def male?
194
+ self.gender == :male
195
+ end
196
+
197
+ def female?
198
+ self.gender == :female
199
+ end
200
+
201
+ def to_s
202
+ [@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : "")
203
+ end
204
+
205
+ def to_name
206
+ to_nameable
207
+ end
208
+
209
+ def to_fullname
210
+ to_s
211
+ end
212
+
213
+ def to_prefix
214
+ @prefix
215
+ end
216
+
217
+ def to_firstname
218
+ @first
219
+ end
220
+
221
+ def to_lastname
222
+ @last
223
+ end
224
+
225
+ def to_middlename
226
+ @middle
227
+ end
228
+
229
+ def to_suffix
230
+ @suffix
231
+ end
232
+
233
+ def to_nameable
234
+ [@first, @last].compact.join(' ')
235
+ end
236
+
237
+ def to_hash
238
+ return {
239
+ :prefix => @prefix,
240
+ :first => @first,
241
+ :middle => @middle,
242
+ :last => @last,
243
+ :suffix => @suffix
244
+ }
245
+ end
246
+ end
247
+ end