openjournals-nameable 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +17 -0
- data/.travis.yml +14 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +48 -0
- data/LICENSE.txt +22 -0
- data/README.md +84 -0
- data/Rakefile +12 -0
- data/bin/nameable_web_service +24 -0
- data/certs/chorn.pem +55 -0
- data/checksums/nameable-1.1.0.gem.sha512 +1 -0
- data/checksums/nameable-1.1.1.gem.sha512 +1 -0
- data/checksums/nameable-1.1.3.gem.sha512 +1 -0
- data/checksums/nameable-1.1.4.gem.sha512 +1 -0
- data/data/app_c.csv +151672 -0
- data/data/yob2016.txt +32868 -0
- data/lib/nameable/assets.rb +6 -0
- data/lib/nameable/error.rb +4 -0
- data/lib/nameable/extensions.rb +5 -0
- data/lib/nameable/latin/patterns.rb +39 -0
- data/lib/nameable/latin.rb +251 -0
- data/lib/nameable/version.rb +3 -0
- data/lib/nameable.rb +11 -0
- data/nameable.gemspec +33 -0
- data/spec/nameable/extensions_spec.rb +11 -0
- data/spec/nameable/latin_spec.rb +192 -0
- data/spec/nameable_spec.rb +7 -0
- data/spec/spec_helper.rb +11 -0
- metadata +149 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4fef4b042621efe6e1a52e9d91380c10d596a07b
|
4
|
+
data.tar.gz: d354a86096588679916b84e9253c9d60362c4a83
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b5ca16fd4161af6aff56039fb7ff4f068b5c8d1d772d7f6b10f2d54e4a177cbd81e2c5d56ceee4a3669b41436b7efe798474f762596776fee13bdbe49d718578
|
7
|
+
data.tar.gz: 067150f1f4e864a8235f5a27027e04e94d24b86e208baf441b214fb97dcbc58e956a4d5bf040001acd830c0dc8a75c4f84d2eca6468758ebecc366af4c1dd485
|
data/.codeclimate.yml
ADDED
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
language: ruby
|
2
|
+
cache: bundler
|
3
|
+
script: bundle exec rspec
|
4
|
+
rvm:
|
5
|
+
- 1.9.3-p551
|
6
|
+
- 2.0.0-p648
|
7
|
+
- 2.1.10
|
8
|
+
- 2.2.7
|
9
|
+
- 2.3.0
|
10
|
+
- 2.3.4
|
11
|
+
- 2.4.0
|
12
|
+
- 2.4.1
|
13
|
+
dd:
|
14
|
+
secure: fzItORKaVrevTjQQu85FYgqgSVkRGG8PtOfNTCYY0meyP1+hF/lH6i5ZAb35VAuwi9InqaYnPx09dREBlfEqkzY/Ypxglq+EosAfqOgiKFu44JlM4vhxSdwn2mywCJpYZ0IbH5OnkF6rUULL/AoRQPlIV77eRqhB9LGfLNlYMfU=
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
nameable (1.1.4)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
codeclimate-test-reporter (1.0.8)
|
10
|
+
simplecov (<= 0.13)
|
11
|
+
diff-lcs (1.3)
|
12
|
+
docile (1.1.5)
|
13
|
+
json (2.1.0)
|
14
|
+
json (2.1.0-java)
|
15
|
+
rake (12.0.0)
|
16
|
+
rspec (3.6.0)
|
17
|
+
rspec-core (~> 3.6.0)
|
18
|
+
rspec-expectations (~> 3.6.0)
|
19
|
+
rspec-mocks (~> 3.6.0)
|
20
|
+
rspec-core (3.6.0)
|
21
|
+
rspec-support (~> 3.6.0)
|
22
|
+
rspec-expectations (3.6.0)
|
23
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
24
|
+
rspec-support (~> 3.6.0)
|
25
|
+
rspec-mocks (3.6.0)
|
26
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
27
|
+
rspec-support (~> 3.6.0)
|
28
|
+
rspec-support (3.6.0)
|
29
|
+
simplecov (0.13.0)
|
30
|
+
docile (~> 1.1.0)
|
31
|
+
json (>= 1.8, < 3)
|
32
|
+
simplecov-html (~> 0.10.0)
|
33
|
+
simplecov-html (0.10.1)
|
34
|
+
|
35
|
+
PLATFORMS
|
36
|
+
java
|
37
|
+
ruby
|
38
|
+
|
39
|
+
DEPENDENCIES
|
40
|
+
bundler
|
41
|
+
codeclimate-test-reporter
|
42
|
+
nameable!
|
43
|
+
rake
|
44
|
+
rspec (~> 3.6)
|
45
|
+
simplecov
|
46
|
+
|
47
|
+
BUNDLED WITH
|
48
|
+
1.14.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2008 Chris Horn http://chorn.com/
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# nameable
|
2
|
+
|
3
|
+
[](http://badge.fury.io/rb/nameable)
|
4
|
+
[](https://travis-ci.org/chorn/nameable)
|
5
|
+
|
6
|
+
A library that provides parsing and normalization of people's names.
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
require 'nameable'
|
10
|
+
n = Nameable::Latin.new.parse('Mr. Chris K Horn Esquire')
|
11
|
+
puts "#{n.prefix} #{n.first} #{n.middle} #{n.last} #{n.suffix}"
|
12
|
+
#=> Mr. Chris K Horn Esq.
|
13
|
+
puts n.to_fullname
|
14
|
+
#=> Mr. Chris K. Horn, Esq.
|
15
|
+
n = Nameable::Latin.new('CHRIS', 'HORN')
|
16
|
+
puts n.to_nameable
|
17
|
+
#=> Chris Horn
|
18
|
+
n = Nameable::Latin.new(prefix:'Sir', last:'Horn')
|
19
|
+
puts n
|
20
|
+
#=> Sir Horn
|
21
|
+
```
|
22
|
+
|
23
|
+
# Features
|
24
|
+
|
25
|
+
Convenience methods:
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
puts Nameable('chris horn, iii')
|
29
|
+
#=> "Chris Horn, III."
|
30
|
+
puts Nameable.parse('chris horn, iii')
|
31
|
+
#=> #<Nameable::Latin:0x007f8470e01b08 @first="Chris", @last="Horn", @middle=nil, @prefix=nil, @suffix="III.">
|
32
|
+
```
|
33
|
+
|
34
|
+
Using a database of first names from the U.S. Social Security Administration, Nameable will pick the most likely gender for a name.
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
Nameable::Latin.new('Chris').gender
|
38
|
+
#=> :male
|
39
|
+
Nameable::Latin.new('Janine').female?
|
40
|
+
#=> true
|
41
|
+
```
|
42
|
+
|
43
|
+
Using a database of last names from the U.S. Census, Nameable will return the ethnicity breakdown as a Hash.
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
Nameable::Latin.new('Chris', 'Horn').ethnicity
|
47
|
+
#=> {:rank=>593, :count=>51380, :percent_white=>86.75, :percent_black=>8.31, :percent_asian_pacific_islander=>0.84, :percent_american_indian_alaska_native=>1.16, :percent_two_or_more_races=>1.46, :percent_hispanic=>1.48}
|
48
|
+
```
|
49
|
+
|
50
|
+
# Other uses
|
51
|
+
|
52
|
+
I've included a little web service, which should be installed as "nameable_web_service" that requires sinatra. It's been handy when paired with OpenRefine, if I'm working with a file and I am not going to be parsing with Ruby. If you're reading this, that's probably not an issue for you, but I do think it's a nice way to show someone how to use OpenRefine in a more advanced way.
|
53
|
+
|
54
|
+
# Inspiration
|
55
|
+
|
56
|
+
By inspiration, I should really say "other projects from which I yanked their code, ideas, examples and data." At worst I'll make sure the other projects I looked at and borrowed from are credited here.
|
57
|
+
|
58
|
+
# Security
|
59
|
+
|
60
|
+
As of version `1.1.1`., the nameable gem is cryptographically signed. To be sure the gem you install hasn’t been tampered with, add my public key as a trusted certificate, and verify that nameable and any dependencies it has are also signed:
|
61
|
+
|
62
|
+
```
|
63
|
+
$ gem cert --add <(curl -Ls https://raw.github.com/chorn/nameable/master/certs/chorn.pem)
|
64
|
+
$ gem install nameable -P HighSecurity
|
65
|
+
```
|
66
|
+
|
67
|
+
# References
|
68
|
+
|
69
|
+
* [Open Refine](http://openrefine.org/) formerly [Google Refine](https://code.google.com/p/google-refine/)
|
70
|
+
* [Help with splitting names](http://www.onlineaspect.com/2009/08/17/splitting-names/)
|
71
|
+
* [First Names from the U.S. SSA](http://www.ssa.gov/oact/babynames/limits.html)
|
72
|
+
* [Last Names from the Census](http://www.census.gov/topics/population/genealogy/data/2000_surnames.html)
|
73
|
+
* [Data Science Toolkit](https://github.com/petewarden/dstk)
|
74
|
+
* [Addressable](https://github.com/sporkmonger/addressable)
|
75
|
+
|
76
|
+
# To-do
|
77
|
+
|
78
|
+
1. Extract all of the US Census / Ethnicity / Asset stuff out of `Latin`. Yuck, that's ugly why did I ever do that?
|
79
|
+
2. Rename `Latin` to be `US` or `English` because it's looks like I really only support English, and probably US English.
|
80
|
+
3. Use named captures for all the regexs.
|
81
|
+
4. Refactor the Ethnicity stuff into a class.
|
82
|
+
5. Refactor parsing into a class.
|
83
|
+
|
84
|
+
-chorn
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'digest/sha2'
|
3
|
+
|
4
|
+
task default: %w[build]
|
5
|
+
|
6
|
+
desc 'Update checksums for gems in ./pkg'
|
7
|
+
task :checksums do
|
8
|
+
Dir.glob('pkg/*.gem').each do |gem|
|
9
|
+
checksum = Digest::SHA512.new.hexdigest(File.read(gem))
|
10
|
+
File.open("#{gem.gsub(/pkg/, 'checksums')}.sha512", 'w') { |f| f.write(checksum) }
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# I use this with OpenRefine: http://openrefine.org/
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'json'
|
7
|
+
require 'sinatra'
|
8
|
+
require 'nameable'
|
9
|
+
|
10
|
+
get '/*/*.*' do |raw_name, function, type|
|
11
|
+
begin
|
12
|
+
name = Nameable(raw_name)
|
13
|
+
rescue Nameable::Latin::InvalidNameError
|
14
|
+
''
|
15
|
+
end
|
16
|
+
|
17
|
+
if type.to_sym == :json
|
18
|
+
content_type 'application/json'
|
19
|
+
name.to_hash.to_json
|
20
|
+
else
|
21
|
+
content_type 'text/plain'
|
22
|
+
name.send("to_#{function}") if function =~ /^(fullname|nameable|firstname|lastname|middlename)$/
|
23
|
+
end
|
24
|
+
end
|
data/certs/chorn.pem
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
-----BEGIN CERTIFICATE-----
|
2
|
+
MIIJvjCCBaagAwIBAgIJAM/4MvGHp6qBMA0GCSqGSIb3DQEBDQUAMDoxHjAcBgkq
|
3
|
+
hkiG9w0BCQEWD2Nob3JuQGNob3JuLmNvbTEYMBYGA1UEAwwPY2hvcm5AY2hvcm4u
|
4
|
+
Y29tMB4XDTE1MDYxOTExMzIyNVoXDTIwMDYxNzExMzIyNVowOjEeMBwGCSqGSIb3
|
5
|
+
DQEJARYPY2hvcm5AY2hvcm4uY29tMRgwFgYDVQQDDA9jaG9ybkBjaG9ybi5jb20w
|
6
|
+
ggQiMA0GCSqGSIb3DQEBAQUAA4IEDwAwggQKAoIEAQCrmvCKjihyb0/wGp6xds1M
|
7
|
+
PII8KrtYztDRFD5kmJioqBt/8VcjrLqgCPKzafCY/ztLoRq7Zegjeer/QS7mUJGL
|
8
|
+
EIG8hMSsA80y1+jCa0TmIbpRic9Y5RQoA0SiKGBqZ1j0rbhP5ZbQXxE6IG9aSxRU
|
9
|
+
WTeSiGfcxWOG+as0L1qBrpuS9wXGenwsudLNouCY0ekRhOyQr3g+9U/dJqkb5ucg
|
10
|
+
Mj9OyYQFPLnbml4SoTVy7KcbuuzEyPcaf+Zc688if5Bc3yQddnQfmbINhuQuqk/f
|
11
|
+
EXOFarN1jZLN04mqjOYa9vR0W+AxEtrYLaTD1+ILoJzFhlO5NCTBMxSVNxlt95sY
|
12
|
+
opEabhgZolrUA653mnAvNq7HEnbdH4iSUEHtSD1e0goGhaRmh7NmJPR3jQgj4dox
|
13
|
+
L+FlE2MF4SW0lP74438BO29ClenGnPNgYWFuV0rSjmyTIMKkFHYP43VsNGVUv3zh
|
14
|
+
JH6uqkXQvjEPWYqC42noEmpga1xxRKUizj90gzFtBXtOYi2AX2/RvpiPI2wabFoC
|
15
|
+
s2QByquD0f8oXj60e1MLDXm7i0qImCLzzWyjXqTmM+7pdRBFYOmD4MiOKViZsdfs
|
16
|
+
fTfv+bJxjLIyGg1igA0+T3A9Jhfr7nuBlt3u5rsTC6DthXpxReCsn/3fmzKOKE08
|
17
|
+
jODSjhS2kVQUux2j1HF1/3ldicqvCttxpzvbooEGhN7TzHzj/nTnovg9wG6zmhUh
|
18
|
+
8tmf4KO+4/JesXO2bsF5C/pUBO0w2sfut/vuQkI9vEXLoNENHfrTeRjB+NxjoIUX
|
19
|
+
t0LcXqB+KiGvfUbhBUv7qoh/hJdU1zyw0EKh/UEUpddIhMkrS3ULoklaR9fwN8kH
|
20
|
+
q+aAghO3djRFzQ5u3twHIeYsb+OR1gx+Wc0PhzSn93VQ5RCzfzPJz9YzFqiKktbl
|
21
|
+
In554mvExkppc6H1VBDFehRopuKmuWVthgQZ24CbKjoXfa6hbYYsCDipzXw1O5DO
|
22
|
+
8uPnzdPgh+KfCdUk8DNL9lmS0XrhE0aTRbj3JM+vAEjNT2+LjVIXVFj11o/zVMQx
|
23
|
+
pedoKT1xYkIkKpKS51vPrjrrtlBwWL8Om6TdEEy7ygNEA1yk4C5uuZSQYgZ06jku
|
24
|
+
9EF1vdawhzYF4sKnpO8XDf6xQzwDYRpX3f/Kj9PJNmyXisVqTH2Twdu6u2ZNtqrR
|
25
|
+
6AYhUYjlHjtrt2qnAYWJt6ZqxbkXY0oJDPxfdg8WX5py4LjJSh/6cTmxuMHqEhbX
|
26
|
+
aGZoH0va6h9KJIHFKmZzSePzPJO+qBp7tp6sqiOOpk8FlmS1UGzyYpQb8TmJiIKS
|
27
|
+
0oehp9o4qppe6Lwlhrz883ZGGhkrloHsZGY08l2yURQa7yLJ4dRq/Afvfdf58mFJ
|
28
|
+
AgMBAAGjgcYwgcMwHQYDVR0OBBYEFKK4azQaN8q8i/8LxqEf+gl0LAbAMGoGA1Ud
|
29
|
+
IwRjMGGAFKK4azQaN8q8i/8LxqEf+gl0LAbAoT6kPDA6MR4wHAYJKoZIhvcNAQkB
|
30
|
+
Fg9jaG9ybkBjaG9ybi5jb20xGDAWBgNVBAMMD2Nob3JuQGNob3JuLmNvbYIJAM/4
|
31
|
+
MvGHp6qBMBoGA1UdEQQTMBGBD2Nob3JuQGNob3JuLmNvbTAaBgNVHRIEEzARgQ9j
|
32
|
+
aG9ybkBjaG9ybi5jb20wDQYJKoZIhvcNAQENBQADggQBAAefdXGzhwlB9qY0MqTl
|
33
|
+
TLSI4NlqnJjmlZuL/bk/s49CMXWUvkk8km1DeY2mJLGPSimhJIp0sylJ2DlrsQ4T
|
34
|
+
C8gyd9ypjeeTaCs18t1FbwSUBoV5k5ci+ivR9rK7CFJQSLIX0rNlsj5j2JC3HvYG
|
35
|
+
MvVZQNfKWqdyXuNgemAUuCOg4Y+fuv2COQyr72pOwAKFWaIzRb9q6tr9Ipr/s4Gb
|
36
|
+
3qVe/JRbnZME3qhBr8YyorSH9aoOINZWMtquIs6thqNKXaxnIGij23Yc2ICumyuW
|
37
|
+
wf27XD6/YedONg9S7I/ruGWJGy80FvaOj0BTZR5qghmcCvffiArziqEdoM9qBhUw
|
38
|
+
1Aw1sxDG/VFSAJRp1f2zvWtXwaDnEDKjIAtjYCFjoTUKcztl0Di7+M5uGw2MKGbI
|
39
|
+
M9rjRh6rLZsgjIE+R3ppYwI503xlBiRoyuQFO6P9S2D0toxwvfkO+byPVvz4vxnQ
|
40
|
+
e9XfmSQqDIQVNkie1AaXHnNJRVNgi4MDmBKvwmKPWbPEN/4g1noO+ELDknINpoeo
|
41
|
+
AV0QU1YyumtpRVhKu3tuiYJifdtyihBVzb8n/4AYLcIXuPhlRgUBqPNSOlC2H2/+
|
42
|
+
h9Bu4VYZwOqYKVjJADz2thbAxQEW3e5GitVukOL5ZPJxMqCcRap/R8tlORKNiyQJ
|
43
|
+
YfJQ9Go4pvS82cwql9NEt5cyaVU5JWJCRRUlsCwYWv/hGkbjjDafCVa+ITw0sKbq
|
44
|
+
DvDiFKtFeZhLYI3VDS5kkrI1SeDoDX16DSCn0ECuMueWvmLkBzz5J4bjpHy+UCEr
|
45
|
+
nye3JA2O20TEScLDh/zeJixfHbjTzA+vEC047/Lh7dtRTUemk4K2JXWHFBveB4Se
|
46
|
+
rvvasIn7cygUq0MfhtNJd2wGYXMxWyT3LJvISqp+NP65tjna6zkpmZNnOpCEdq8x
|
47
|
+
U59ydjqtevg9cZIco0tiTzGrBrcy7xnVWQKt+hbsV702V02Nx7gG4ihLRMIdAjtL
|
48
|
+
nnnm3d3shuIrTfRG4YwBiYdJlT0X92geAtQbq5OPrT/27DBV6h3YydBoozkVEaZp
|
49
|
+
Bvbz27URCmOw2e4psQctUBuVrTPirjqEwOql/SqqJsMIdWQ6oXLy235iuGzDgx+p
|
50
|
+
Fi8s5azIOjbjFnc4OBols94L3oZxyIf+lUXgNBcvQ1wbWBlo+V1hyRWLSvLPLAtr
|
51
|
+
3eeuQZ5HTqTSZSaMKfFrH10hG06h7Zla7oYxSn0GJsgu/aI1STGFHenjNeNRyFb4
|
52
|
+
Qcofa1uD1LTI/r7EKL1I25n6eiABGw0KNDvow/ZtP9Kmx2QobEsWUbPfvkX1ogdj
|
53
|
+
plBjPb1jywhCxKfdcYv+f4er+X1AnUyoYNi7EiifiNys7nmVG4+aFEZyUULPrB2g
|
54
|
+
bHE=
|
55
|
+
-----END CERTIFICATE-----
|
@@ -0,0 +1 @@
|
|
1
|
+
294b882dbf36a5eadb5f9ee1e22262d403024e8c1263f22a0af84e9402e9b0236b0e6bb022342ae95e8e384389c64ed22d057f3f8c1e077ff313cf4f93ec5f23
|
@@ -0,0 +1 @@
|
|
1
|
+
20c36413799bce5ac2fff4d2655449f3c53eaf9e88225fa4c61e88d5c5ee275224343294115e52a6be22f0dcd3328a9346f81836739371299e527b7e9307730b
|
@@ -0,0 +1 @@
|
|
1
|
+
b5b26ab87ee234cbfb4c75e83f35bf765ab2afef12f72742be74725a652231ec103e85a1811821acc59e221bcf5c7b0910b0466b2f4cdee506d5b9befe27dcf0
|
@@ -0,0 +1 @@
|
|
1
|
+
071de8a4206e62e689131fd00f35a1d8d98d535ca574449baf92e3ffe207e6ed6f5a843c5cd8f040b148b8a2cfa918251621bd97482153084a7cd46d81961e9f
|