bougyman-name_parse 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +2 -1
- data/CHANGELOG +20 -0
- data/Rakefile +1 -1
- data/lib/name_parse/parser.rb +67 -49
- data/lib/name_parse/version.rb +1 -1
- data/name_parse.gemspec +3 -3
- metadata +3 -3
data/AUTHORS
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
Following persons have contributed to name_parse.
|
|
2
2
|
(Sorted by number of submitted patches, then alphabetically)
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
10 TJ Vanderpoel <bougy.man@gmail.com>
|
|
5
5
|
2 Jayson Vaughn (thedonvaughn) <jayson.vaughn@gmail.com>
|
|
6
|
+
2 TJ Vanderpoel <bougyman@zero.(none)>
|
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,23 @@
|
|
|
1
|
+
[736bf3d | Thu May 07 16:54:52 UTC 2009] TJ Vanderpoel <bougyman@zero.(none)>
|
|
2
|
+
|
|
3
|
+
* completed refactoring to methodized matchers in #parse
|
|
4
|
+
|
|
5
|
+
[e58f6e4 | Thu May 07 16:29:25 UTC 2009] TJ Vanderpoel <bougyman@zero.(none)>
|
|
6
|
+
|
|
7
|
+
* continued to refacor into a methodized pattern for matching in #parse
|
|
8
|
+
|
|
9
|
+
[835e508 | Thu May 07 03:29:00 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
|
10
|
+
|
|
11
|
+
* ignore csv files
|
|
12
|
+
|
|
13
|
+
[fcb13a0 | Thu May 07 03:28:14 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
|
14
|
+
|
|
15
|
+
* added README as description
|
|
16
|
+
|
|
17
|
+
[016010c | Thu May 07 03:18:51 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
|
18
|
+
|
|
19
|
+
* Version 0.0.4
|
|
20
|
+
|
|
1
21
|
[5d85b44 | Thu May 07 03:14:31 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
|
2
22
|
|
|
3
23
|
* methodized when match target for first_name. TODO: make the other when matchers follow the same format
|
data/Rakefile
CHANGED
|
@@ -27,7 +27,7 @@ GEMSPEC = Gem::Specification.new{|s|
|
|
|
27
27
|
s.name = 'name_parse'
|
|
28
28
|
s.author = "TJ Vanderpoel"
|
|
29
29
|
s.summary = "Parse name strings into their constituent parts"
|
|
30
|
-
s.description =
|
|
30
|
+
s.description = File.read("README")
|
|
31
31
|
s.email = 'bougy.man@gmail.com'
|
|
32
32
|
s.homepage = 'http://github.com/bougyman/name_parse'
|
|
33
33
|
s.platform = Gem::Platform::RUBY
|
data/lib/name_parse/parser.rb
CHANGED
|
@@ -1,73 +1,91 @@
|
|
|
1
1
|
module NameParse
|
|
2
2
|
class Parser
|
|
3
3
|
attr_reader :first, :last, :suffix, :prefix, :middle, :raw, :matched
|
|
4
|
+
attr_accessor :first_name_re, :last_name_re, :middle_name_re, :prefix_re
|
|
4
5
|
|
|
5
|
-
def initialize(name_string = nil)
|
|
6
|
-
@
|
|
7
|
-
|
|
6
|
+
def initialize(name_string = nil, options = {})
|
|
7
|
+
@options = options
|
|
8
|
+
@last_name_re = @options[:last_name_re] || /(?:(?:v[ao]n(?:\s+der?)?|de\s+la)\s+)?\w[-.'\w]+/i
|
|
9
|
+
@middle_name_re = @options[:middle_name_re] || /\w(?:\.|[-.'\w]+)?/
|
|
10
|
+
@prefix_re = @options[:prefix_re] || /(?:c\/o|dr|mrs?|ms|miss|mister|sgt|cpt|cpl)\.?/i
|
|
11
|
+
@first_name_re = @options[:first_name_re] || /\w[-.'\w]+/
|
|
12
|
+
parse(name_string) if name_string
|
|
8
13
|
end
|
|
9
14
|
|
|
10
|
-
def prefix_re
|
|
11
|
-
/(?:c\/o|dr|mrs?|ms|miss|mister|sgt|cpt|cpl)\.?/i
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def first_name_re
|
|
15
|
-
/\w[-.'\w]+/
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def first_last(match = nil)
|
|
19
|
-
if match.nil?
|
|
20
|
-
/^(#{first_name_re})\s+(#{last_name_re})$/
|
|
21
|
-
else
|
|
22
|
-
@matched = :first_last
|
|
23
|
-
@first, @last = match.to_a[1 .. 2]
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def last_name_re
|
|
28
|
-
/(?:(?:v[ao]n(?:\s+der?)?|de\s+la)\s+)?\w[-.'\w]+/i
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def middle_name_re
|
|
32
|
-
/\w(?:\.|[-.'\w]+)?/
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def middle_i
|
|
36
|
-
return nil if middle.nil?
|
|
37
|
-
middle[0,1]
|
|
38
|
-
end
|
|
39
|
-
|
|
40
15
|
def parse(name)
|
|
16
|
+
@raw = name
|
|
41
17
|
case name
|
|
42
18
|
# just "Firstname Lastname"
|
|
43
19
|
when first_last
|
|
44
20
|
first_last($~)
|
|
45
21
|
# Catch names with prefixes, no comma
|
|
46
|
-
when
|
|
47
|
-
|
|
48
|
-
@matched = :pre_first_last
|
|
22
|
+
when pre_first_last
|
|
23
|
+
pre_first_last($~)
|
|
49
24
|
# Catch names with prefixes and middle names, no comma
|
|
50
|
-
when
|
|
51
|
-
|
|
52
|
-
@matched = :pre_first_mid_last
|
|
25
|
+
when pre_first_mid_last
|
|
26
|
+
pre_first_mid_last($~)
|
|
53
27
|
# just "Firstname Middle Lastname"
|
|
54
|
-
when
|
|
55
|
-
|
|
56
|
-
@matched = :first_mid_last
|
|
28
|
+
when first_mid_last
|
|
29
|
+
first_mid_last($~)
|
|
57
30
|
# just "Lastname, Firstname (Middle)" middle is optional
|
|
58
|
-
when
|
|
59
|
-
|
|
60
|
-
@matched = :last_comma_first_mid
|
|
31
|
+
when last_comma_first_mid
|
|
32
|
+
last_comma_first_mid($~)
|
|
61
33
|
# Comma with lots of lastnames, a first name, optional middle name
|
|
62
|
-
when
|
|
63
|
-
|
|
64
|
-
@matched = :multi_last_comma
|
|
34
|
+
when multi_last_comma
|
|
35
|
+
multi_last_comma($~)
|
|
65
36
|
else
|
|
66
|
-
|
|
37
|
+
if @options[:raise_on_unknown]
|
|
38
|
+
raise UnknownFormat, "Could not parse #{@raw}"
|
|
39
|
+
else
|
|
40
|
+
@matched = :unknown
|
|
41
|
+
false
|
|
42
|
+
end
|
|
67
43
|
end
|
|
68
44
|
end
|
|
69
45
|
|
|
70
46
|
alias :first_name :first
|
|
71
47
|
|
|
48
|
+
def middle_i
|
|
49
|
+
return nil if middle.nil?
|
|
50
|
+
middle[0,1]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
def first_last(match = nil)
|
|
55
|
+
return %r{^(#{first_name_re})\s+(#{last_name_re})$} if match.nil?
|
|
56
|
+
@first, @last = match[1 .. 2]
|
|
57
|
+
@matched = :first_last
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def pre_first_last(match = nil)
|
|
61
|
+
return %r{^(#{prefix_re})\s+(#{first_name_re})\s+(#{last_name_re})$} if match.nil?
|
|
62
|
+
@prefix, @first, @last = match[1 .. 3]
|
|
63
|
+
@matched = :pre_first_last
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def pre_first_mid_last(match = nil)
|
|
67
|
+
return %r{^(#{prefix_re})\s+(#{first_name_re})\s+(#{middle_name_re})\s+(#{last_name_re})$} if match.nil?
|
|
68
|
+
@prefix, @first, @middle, @last = match[1 .. 4]
|
|
69
|
+
@matched = :pre_first_mid_last
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def first_mid_last(match = nil)
|
|
73
|
+
return %r{^(#{first_name_re})\s+(#{middle_name_re})\s+(#{last_name_re})$} if match.nil?
|
|
74
|
+
@first, @middle, @last = match[1 .. 3]
|
|
75
|
+
@matched = :first_mid_last
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def last_comma_first_mid(match = nil)
|
|
79
|
+
return %r{^(#{last_name_re}),(?:\s+)?(#{first_name_re})(?:\s+(#{middle_name_re}))?$} if match.nil?
|
|
80
|
+
@first, @last, @middle = match[2], match[1], match[3]
|
|
81
|
+
@matched = :last_comma_first_mid
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def multi_last_comma(match = nil)
|
|
85
|
+
return %r{^((?:#{last_name_re}(?:\s+)?)+),(?:\s+)(#{first_name_re})(?:\s+(#{middle_name_re}))?$} if match.nil?
|
|
86
|
+
@first, @last, @middle = match[2], match[1], match[3]
|
|
87
|
+
@matched = :multi_last_comma
|
|
88
|
+
end
|
|
89
|
+
|
|
72
90
|
end
|
|
73
91
|
end
|
data/lib/name_parse/version.rb
CHANGED
data/name_parse.gemspec
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = %q{name_parse}
|
|
5
|
-
s.version = "0.0.
|
|
5
|
+
s.version = "0.0.5"
|
|
6
6
|
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
8
8
|
s.authors = ["TJ Vanderpoel"]
|
|
9
|
-
s.date = %q{2009-05-
|
|
10
|
-
s.description = %q{Parse name strings into
|
|
9
|
+
s.date = %q{2009-05-07}
|
|
10
|
+
s.description = %q{========================================================= Name Parse Copyright (c) 2009 The Rubyists (Jayson Vaughn, Tj Vanderpoel, Michael Fellinger, Kevin Berry) Distributed under the terms of the MIT License. ========================================================== About ----- A ruby library for turning arbitrary name strings such as "Dr Helen Hunt", "Mr James T. Kirk" into a standardized object usable as parsed = NameParse::Parser.new("Dr Helen Hunt") puts "%s %s" % [parsed.first, parsed.last] Requirements ------------ - ruby (>= 1.8) Usage ----- Example of using on a list: bougyman@zero:~/git_checkouts/name_parse$ irb -r lib/name_parse irb(main):001:0> list = ["Jayson Vaughn", "Dr Helen Hunt", "Mr James T. Kirk"] => ["Jayson Vaughn", "Dr Helen Hunt", "Mr James T. Kirk"] irb(main):002:0> list.map { |n| p = NameParse[n]; [p.first, p.last] } => [["Jayson", "Vaughn"], ["Helen", "Hunt"], ["James", "Kirk"]] Support ------- Home page at http://github.com/bougyman/name_parse #rubyists on FreeNode}
|
|
11
11
|
s.email = %q{bougy.man@gmail.com}
|
|
12
12
|
s.files = ["AUTHORS", "CHANGELOG", "MANIFEST", "README", "Rakefile", "lib/name_parse.rb", "lib/name_parse/error.rb", "lib/name_parse/parser.rb", "lib/name_parse/version.rb", "name_parse.gemspec", "spec/helper.rb", "spec/name_parse/parser.rb", "tasks/authors.rake", "tasks/bacon.rake", "tasks/changelog.rake", "tasks/copyright.rake", "tasks/gem.rake", "tasks/gem_installer.rake", "tasks/install_dependencies.rake", "tasks/manifest.rake", "tasks/rcov.rake", "tasks/release.rake", "tasks/reversion.rake", "tasks/setup.rake", "tasks/yard.rake"]
|
|
13
13
|
s.has_rdoc = true
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bougyman-name_parse
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- TJ Vanderpoel
|
|
@@ -9,11 +9,11 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date: 2009-05-
|
|
12
|
+
date: 2009-05-07 00:00:00 -07:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies: []
|
|
15
15
|
|
|
16
|
-
description: Parse name strings into
|
|
16
|
+
description: "========================================================= Name Parse Copyright (c) 2009 The Rubyists (Jayson Vaughn, Tj Vanderpoel, Michael Fellinger, Kevin Berry) Distributed under the terms of the MIT License. ========================================================== About ----- A ruby library for turning arbitrary name strings such as \"Dr Helen Hunt\", \"Mr James T. Kirk\" into a standardized object usable as parsed = NameParse::Parser.new(\"Dr Helen Hunt\") puts \"%s %s\" % [parsed.first, parsed.last] Requirements ------------ - ruby (>= 1.8) Usage ----- Example of using on a list: bougyman@zero:~/git_checkouts/name_parse$ irb -r lib/name_parse irb(main):001:0> list = [\"Jayson Vaughn\", \"Dr Helen Hunt\", \"Mr James T. Kirk\"] => [\"Jayson Vaughn\", \"Dr Helen Hunt\", \"Mr James T. Kirk\"] irb(main):002:0> list.map { |n| p = NameParse[n]; [p.first, p.last] } => [[\"Jayson\", \"Vaughn\"], [\"Helen\", \"Hunt\"], [\"James\", \"Kirk\"]] Support ------- Home page at http://github.com/bougyman/name_parse #rubyists on FreeNode"
|
|
17
17
|
email: bougy.man@gmail.com
|
|
18
18
|
executables: []
|
|
19
19
|
|