openjournals-nameable 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +17 -0
- data/.travis.yml +14 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +48 -0
- data/LICENSE.txt +22 -0
- data/README.md +84 -0
- data/Rakefile +12 -0
- data/bin/nameable_web_service +24 -0
- data/certs/chorn.pem +55 -0
- data/checksums/nameable-1.1.0.gem.sha512 +1 -0
- data/checksums/nameable-1.1.1.gem.sha512 +1 -0
- data/checksums/nameable-1.1.3.gem.sha512 +1 -0
- data/checksums/nameable-1.1.4.gem.sha512 +1 -0
- data/data/app_c.csv +151672 -0
- data/data/yob2016.txt +32868 -0
- data/lib/nameable/assets.rb +6 -0
- data/lib/nameable/error.rb +4 -0
- data/lib/nameable/extensions.rb +5 -0
- data/lib/nameable/latin/patterns.rb +39 -0
- data/lib/nameable/latin.rb +251 -0
- data/lib/nameable/version.rb +3 -0
- data/lib/nameable.rb +11 -0
- data/nameable.gemspec +33 -0
- data/spec/nameable/extensions_spec.rb +11 -0
- data/spec/nameable/latin_spec.rb +192 -0
- data/spec/nameable_spec.rb +7 -0
- data/spec/spec_helper.rb +11 -0
- metadata +149 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4fef4b042621efe6e1a52e9d91380c10d596a07b
|
4
|
+
data.tar.gz: d354a86096588679916b84e9253c9d60362c4a83
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b5ca16fd4161af6aff56039fb7ff4f068b5c8d1d772d7f6b10f2d54e4a177cbd81e2c5d56ceee4a3669b41436b7efe798474f762596776fee13bdbe49d718578
|
7
|
+
data.tar.gz: 067150f1f4e864a8235f5a27027e04e94d24b86e208baf441b214fb97dcbc58e956a4d5bf040001acd830c0dc8a75c4f84d2eca6468758ebecc366af4c1dd485
|
data/.codeclimate.yml
ADDED
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
language: ruby
|
2
|
+
cache: bundler
|
3
|
+
script: bundle exec rspec
|
4
|
+
rvm:
|
5
|
+
- 1.9.3-p551
|
6
|
+
- 2.0.0-p648
|
7
|
+
- 2.1.10
|
8
|
+
- 2.2.7
|
9
|
+
- 2.3.0
|
10
|
+
- 2.3.4
|
11
|
+
- 2.4.0
|
12
|
+
- 2.4.1
|
13
|
+
dd:
|
14
|
+
secure: fzItORKaVrevTjQQu85FYgqgSVkRGG8PtOfNTCYY0meyP1+hF/lH6i5ZAb35VAuwi9InqaYnPx09dREBlfEqkzY/Ypxglq+EosAfqOgiKFu44JlM4vhxSdwn2mywCJpYZ0IbH5OnkF6rUULL/AoRQPlIV77eRqhB9LGfLNlYMfU=
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
nameable (1.1.4)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
codeclimate-test-reporter (1.0.8)
|
10
|
+
simplecov (<= 0.13)
|
11
|
+
diff-lcs (1.3)
|
12
|
+
docile (1.1.5)
|
13
|
+
json (2.1.0)
|
14
|
+
json (2.1.0-java)
|
15
|
+
rake (12.0.0)
|
16
|
+
rspec (3.6.0)
|
17
|
+
rspec-core (~> 3.6.0)
|
18
|
+
rspec-expectations (~> 3.6.0)
|
19
|
+
rspec-mocks (~> 3.6.0)
|
20
|
+
rspec-core (3.6.0)
|
21
|
+
rspec-support (~> 3.6.0)
|
22
|
+
rspec-expectations (3.6.0)
|
23
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
24
|
+
rspec-support (~> 3.6.0)
|
25
|
+
rspec-mocks (3.6.0)
|
26
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
27
|
+
rspec-support (~> 3.6.0)
|
28
|
+
rspec-support (3.6.0)
|
29
|
+
simplecov (0.13.0)
|
30
|
+
docile (~> 1.1.0)
|
31
|
+
json (>= 1.8, < 3)
|
32
|
+
simplecov-html (~> 0.10.0)
|
33
|
+
simplecov-html (0.10.1)
|
34
|
+
|
35
|
+
PLATFORMS
|
36
|
+
java
|
37
|
+
ruby
|
38
|
+
|
39
|
+
DEPENDENCIES
|
40
|
+
bundler
|
41
|
+
codeclimate-test-reporter
|
42
|
+
nameable!
|
43
|
+
rake
|
44
|
+
rspec (~> 3.6)
|
45
|
+
simplecov
|
46
|
+
|
47
|
+
BUNDLED WITH
|
48
|
+
1.14.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2008 Chris Horn http://chorn.com/
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# nameable
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/nameable.svg)](http://badge.fury.io/rb/nameable)
|
4
|
+
[![Build Status](https://travis-ci.org/chorn/nameable.svg?branch=master)](https://travis-ci.org/chorn/nameable)
|
5
|
+
|
6
|
+
A library that provides parsing and normalization of people's names.
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
require 'nameable'
|
10
|
+
n = Nameable::Latin.new.parse('Mr. Chris K Horn Esquire')
|
11
|
+
puts "#{n.prefix} #{n.first} #{n.middle} #{n.last} #{n.suffix}"
|
12
|
+
#=> Mr. Chris K Horn Esq.
|
13
|
+
puts n.to_fullname
|
14
|
+
#=> Mr. Chris K. Horn, Esq.
|
15
|
+
n = Nameable::Latin.new('CHRIS', 'HORN')
|
16
|
+
puts n.to_nameable
|
17
|
+
#=> Chris Horn
|
18
|
+
n = Nameable::Latin.new(prefix:'Sir', last:'Horn')
|
19
|
+
puts n
|
20
|
+
#=> Sir Horn
|
21
|
+
```
|
22
|
+
|
23
|
+
# Features
|
24
|
+
|
25
|
+
Convenience methods:
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
puts Nameable('chris horn, iii')
|
29
|
+
#=> "Chris Horn, III."
|
30
|
+
puts Nameable.parse('chris horn, iii')
|
31
|
+
#=> #<Nameable::Latin:0x007f8470e01b08 @first="Chris", @last="Horn", @middle=nil, @prefix=nil, @suffix="III.">
|
32
|
+
```
|
33
|
+
|
34
|
+
Using a database of first names from the U.S. Social Security Administration, Nameable will pick the most likely gender for a name.
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
Nameable::Latin.new('Chris').gender
|
38
|
+
#=> :male
|
39
|
+
Nameable::Latin.new('Janine').female?
|
40
|
+
#=> true
|
41
|
+
```
|
42
|
+
|
43
|
+
Using a database of last names from the U.S. Census, Nameable will return the ethnicity breakdown as a Hash.
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
Nameable::Latin.new('Chris', 'Horn').ethnicity
|
47
|
+
#=> {:rank=>593, :count=>51380, :percent_white=>86.75, :percent_black=>8.31, :percent_asian_pacific_islander=>0.84, :percent_american_indian_alaska_native=>1.16, :percent_two_or_more_races=>1.46, :percent_hispanic=>1.48}
|
48
|
+
```
|
49
|
+
|
50
|
+
# Other uses
|
51
|
+
|
52
|
+
I've included a little web service, which should be installed as "nameable_web_service" that requires sinatra. It's been handy when paired with OpenRefine, if I'm working with a file and I am not going to be parsing with Ruby. If you're reading this, that's probably not an issue for you, but I do think it's a nice way to show someone how to use OpenRefine in a more advanced way.
|
53
|
+
|
54
|
+
# Inspiration
|
55
|
+
|
56
|
+
By inspiration, I should really say "other projects from which I yanked their code, ideas, examples and data." At worst I'll make sure the other projects I looked at and borrowed from are credited here.
|
57
|
+
|
58
|
+
# Security
|
59
|
+
|
60
|
+
As of version `1.1.1`., the nameable gem is cryptographically signed. To be sure the gem you install hasn’t been tampered with, add my public key as a trusted certificate, and verify that nameable and any dependencies it has are also signed:
|
61
|
+
|
62
|
+
```
|
63
|
+
$ gem cert --add <(curl -Ls https://raw.github.com/chorn/nameable/master/certs/chorn.pem)
|
64
|
+
$ gem install nameable -P HighSecurity
|
65
|
+
```
|
66
|
+
|
67
|
+
# References
|
68
|
+
|
69
|
+
* [Open Refine](http://openrefine.org/) formerly [Google Refine](https://code.google.com/p/google-refine/)
|
70
|
+
* [Help with splitting names](http://www.onlineaspect.com/2009/08/17/splitting-names/)
|
71
|
+
* [First Names from the U.S. SSA](http://www.ssa.gov/oact/babynames/limits.html)
|
72
|
+
* [Last Names from the Census](http://www.census.gov/topics/population/genealogy/data/2000_surnames.html)
|
73
|
+
* [Data Science Toolkit](https://github.com/petewarden/dstk)
|
74
|
+
* [Addressable](https://github.com/sporkmonger/addressable)
|
75
|
+
|
76
|
+
# To-do
|
77
|
+
|
78
|
+
1. Extract all of the US Census / Ethnicity / Asset stuff out of `Latin`. Yuck, that's ugly why did I ever do that?
|
79
|
+
2. Rename `Latin` to be `US` or `English` because it's looks like I really only support English, and probably US English.
|
80
|
+
3. Use named captures for all the regexs.
|
81
|
+
4. Refactor the Ethnicity stuff into a class.
|
82
|
+
5. Refactor parsing into a class.
|
83
|
+
|
84
|
+
-chorn
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'digest/sha2'
|
3
|
+
|
4
|
+
task default: %w[build]
|
5
|
+
|
6
|
+
desc 'Update checksums for gems in ./pkg'
|
7
|
+
task :checksums do
|
8
|
+
Dir.glob('pkg/*.gem').each do |gem|
|
9
|
+
checksum = Digest::SHA512.new.hexdigest(File.read(gem))
|
10
|
+
File.open("#{gem.gsub(/pkg/, 'checksums')}.sha512", 'w') { |f| f.write(checksum) }
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# I use this with OpenRefine: http://openrefine.org/
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'json'
|
7
|
+
require 'sinatra'
|
8
|
+
require 'nameable'
|
9
|
+
|
10
|
+
get '/*/*.*' do |raw_name, function, type|
|
11
|
+
begin
|
12
|
+
name = Nameable(raw_name)
|
13
|
+
rescue Nameable::Latin::InvalidNameError
|
14
|
+
''
|
15
|
+
end
|
16
|
+
|
17
|
+
if type.to_sym == :json
|
18
|
+
content_type 'application/json'
|
19
|
+
name.to_hash.to_json
|
20
|
+
else
|
21
|
+
content_type 'text/plain'
|
22
|
+
name.send("to_#{function}") if function =~ /^(fullname|nameable|firstname|lastname|middlename)$/
|
23
|
+
end
|
24
|
+
end
|
data/certs/chorn.pem
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
2
|
+
MIIJvjCCBaagAwIBAgIJAM/4MvGHp6qBMA0GCSqGSIb3DQEBDQUAMDoxHjAcBgkq
|
3
|
+
hkiG9w0BCQEWD2Nob3JuQGNob3JuLmNvbTEYMBYGA1UEAwwPY2hvcm5AY2hvcm4u
|
4
|
+
Y29tMB4XDTE1MDYxOTExMzIyNVoXDTIwMDYxNzExMzIyNVowOjEeMBwGCSqGSIb3
|
5
|
+
DQEJARYPY2hvcm5AY2hvcm4uY29tMRgwFgYDVQQDDA9jaG9ybkBjaG9ybi5jb20w
|
6
|
+
ggQiMA0GCSqGSIb3DQEBAQUAA4IEDwAwggQKAoIEAQCrmvCKjihyb0/wGp6xds1M
|
7
|
+
PII8KrtYztDRFD5kmJioqBt/8VcjrLqgCPKzafCY/ztLoRq7Zegjeer/QS7mUJGL
|
8
|
+
EIG8hMSsA80y1+jCa0TmIbpRic9Y5RQoA0SiKGBqZ1j0rbhP5ZbQXxE6IG9aSxRU
|
9
|
+
WTeSiGfcxWOG+as0L1qBrpuS9wXGenwsudLNouCY0ekRhOyQr3g+9U/dJqkb5ucg
|
10
|
+
Mj9OyYQFPLnbml4SoTVy7KcbuuzEyPcaf+Zc688if5Bc3yQddnQfmbINhuQuqk/f
|
11
|
+
EXOFarN1jZLN04mqjOYa9vR0W+AxEtrYLaTD1+ILoJzFhlO5NCTBMxSVNxlt95sY
|
12
|
+
opEabhgZolrUA653mnAvNq7HEnbdH4iSUEHtSD1e0goGhaRmh7NmJPR3jQgj4dox
|
13
|
+
L+FlE2MF4SW0lP74438BO29ClenGnPNgYWFuV0rSjmyTIMKkFHYP43VsNGVUv3zh
|
14
|
+
JH6uqkXQvjEPWYqC42noEmpga1xxRKUizj90gzFtBXtOYi2AX2/RvpiPI2wabFoC
|
15
|
+
s2QByquD0f8oXj60e1MLDXm7i0qImCLzzWyjXqTmM+7pdRBFYOmD4MiOKViZsdfs
|
16
|
+
fTfv+bJxjLIyGg1igA0+T3A9Jhfr7nuBlt3u5rsTC6DthXpxReCsn/3fmzKOKE08
|
17
|
+
jODSjhS2kVQUux2j1HF1/3ldicqvCttxpzvbooEGhN7TzHzj/nTnovg9wG6zmhUh
|
18
|
+
8tmf4KO+4/JesXO2bsF5C/pUBO0w2sfut/vuQkI9vEXLoNENHfrTeRjB+NxjoIUX
|
19
|
+
t0LcXqB+KiGvfUbhBUv7qoh/hJdU1zyw0EKh/UEUpddIhMkrS3ULoklaR9fwN8kH
|
20
|
+
q+aAghO3djRFzQ5u3twHIeYsb+OR1gx+Wc0PhzSn93VQ5RCzfzPJz9YzFqiKktbl
|
21
|
+
In554mvExkppc6H1VBDFehRopuKmuWVthgQZ24CbKjoXfa6hbYYsCDipzXw1O5DO
|
22
|
+
8uPnzdPgh+KfCdUk8DNL9lmS0XrhE0aTRbj3JM+vAEjNT2+LjVIXVFj11o/zVMQx
|
23
|
+
pedoKT1xYkIkKpKS51vPrjrrtlBwWL8Om6TdEEy7ygNEA1yk4C5uuZSQYgZ06jku
|
24
|
+
9EF1vdawhzYF4sKnpO8XDf6xQzwDYRpX3f/Kj9PJNmyXisVqTH2Twdu6u2ZNtqrR
|
25
|
+
6AYhUYjlHjtrt2qnAYWJt6ZqxbkXY0oJDPxfdg8WX5py4LjJSh/6cTmxuMHqEhbX
|
26
|
+
aGZoH0va6h9KJIHFKmZzSePzPJO+qBp7tp6sqiOOpk8FlmS1UGzyYpQb8TmJiIKS
|
27
|
+
0oehp9o4qppe6Lwlhrz883ZGGhkrloHsZGY08l2yURQa7yLJ4dRq/Afvfdf58mFJ
|
28
|
+
AgMBAAGjgcYwgcMwHQYDVR0OBBYEFKK4azQaN8q8i/8LxqEf+gl0LAbAMGoGA1Ud
|
29
|
+
IwRjMGGAFKK4azQaN8q8i/8LxqEf+gl0LAbAoT6kPDA6MR4wHAYJKoZIhvcNAQkB
|
30
|
+
Fg9jaG9ybkBjaG9ybi5jb20xGDAWBgNVBAMMD2Nob3JuQGNob3JuLmNvbYIJAM/4
|
31
|
+
MvGHp6qBMBoGA1UdEQQTMBGBD2Nob3JuQGNob3JuLmNvbTAaBgNVHRIEEzARgQ9j
|
32
|
+
aG9ybkBjaG9ybi5jb20wDQYJKoZIhvcNAQENBQADggQBAAefdXGzhwlB9qY0MqTl
|
33
|
+
TLSI4NlqnJjmlZuL/bk/s49CMXWUvkk8km1DeY2mJLGPSimhJIp0sylJ2DlrsQ4T
|
34
|
+
C8gyd9ypjeeTaCs18t1FbwSUBoV5k5ci+ivR9rK7CFJQSLIX0rNlsj5j2JC3HvYG
|
35
|
+
MvVZQNfKWqdyXuNgemAUuCOg4Y+fuv2COQyr72pOwAKFWaIzRb9q6tr9Ipr/s4Gb
|
36
|
+
3qVe/JRbnZME3qhBr8YyorSH9aoOINZWMtquIs6thqNKXaxnIGij23Yc2ICumyuW
|
37
|
+
wf27XD6/YedONg9S7I/ruGWJGy80FvaOj0BTZR5qghmcCvffiArziqEdoM9qBhUw
|
38
|
+
1Aw1sxDG/VFSAJRp1f2zvWtXwaDnEDKjIAtjYCFjoTUKcztl0Di7+M5uGw2MKGbI
|
39
|
+
M9rjRh6rLZsgjIE+R3ppYwI503xlBiRoyuQFO6P9S2D0toxwvfkO+byPVvz4vxnQ
|
40
|
+
e9XfmSQqDIQVNkie1AaXHnNJRVNgi4MDmBKvwmKPWbPEN/4g1noO+ELDknINpoeo
|
41
|
+
AV0QU1YyumtpRVhKu3tuiYJifdtyihBVzb8n/4AYLcIXuPhlRgUBqPNSOlC2H2/+
|
42
|
+
h9Bu4VYZwOqYKVjJADz2thbAxQEW3e5GitVukOL5ZPJxMqCcRap/R8tlORKNiyQJ
|
43
|
+
YfJQ9Go4pvS82cwql9NEt5cyaVU5JWJCRRUlsCwYWv/hGkbjjDafCVa+ITw0sKbq
|
44
|
+
DvDiFKtFeZhLYI3VDS5kkrI1SeDoDX16DSCn0ECuMueWvmLkBzz5J4bjpHy+UCEr
|
45
|
+
nye3JA2O20TEScLDh/zeJixfHbjTzA+vEC047/Lh7dtRTUemk4K2JXWHFBveB4Se
|
46
|
+
rvvasIn7cygUq0MfhtNJd2wGYXMxWyT3LJvISqp+NP65tjna6zkpmZNnOpCEdq8x
|
47
|
+
U59ydjqtevg9cZIco0tiTzGrBrcy7xnVWQKt+hbsV702V02Nx7gG4ihLRMIdAjtL
|
48
|
+
nnnm3d3shuIrTfRG4YwBiYdJlT0X92geAtQbq5OPrT/27DBV6h3YydBoozkVEaZp
|
49
|
+
Bvbz27URCmOw2e4psQctUBuVrTPirjqEwOql/SqqJsMIdWQ6oXLy235iuGzDgx+p
|
50
|
+
Fi8s5azIOjbjFnc4OBols94L3oZxyIf+lUXgNBcvQ1wbWBlo+V1hyRWLSvLPLAtr
|
51
|
+
3eeuQZ5HTqTSZSaMKfFrH10hG06h7Zla7oYxSn0GJsgu/aI1STGFHenjNeNRyFb4
|
52
|
+
Qcofa1uD1LTI/r7EKL1I25n6eiABGw0KNDvow/ZtP9Kmx2QobEsWUbPfvkX1ogdj
|
53
|
+
plBjPb1jywhCxKfdcYv+f4er+X1AnUyoYNi7EiifiNys7nmVG4+aFEZyUULPrB2g
|
54
|
+
bHE=
|
55
|
+
-----END CERTIFICATE-----
|
@@ -0,0 +1 @@
|
|
1
|
+
294b882dbf36a5eadb5f9ee1e22262d403024e8c1263f22a0af84e9402e9b0236b0e6bb022342ae95e8e384389c64ed22d057f3f8c1e077ff313cf4f93ec5f23
|
@@ -0,0 +1 @@
|
|
1
|
+
20c36413799bce5ac2fff4d2655449f3c53eaf9e88225fa4c61e88d5c5ee275224343294115e52a6be22f0dcd3328a9346f81836739371299e527b7e9307730b
|
@@ -0,0 +1 @@
|
|
1
|
+
b5b26ab87ee234cbfb4c75e83f35bf765ab2afef12f72742be74725a652231ec103e85a1811821acc59e221bcf5c7b0910b0466b2f4cdee506d5b9befe27dcf0
|
@@ -0,0 +1 @@
|
|
1
|
+
071de8a4206e62e689131fd00f35a1d8d98d535ca574449baf92e3ffe207e6ed6f5a843c5cd8f040b148b8a2cfa918251621bd97482153084a7cd46d81961e9f
|