bougyman-name_parse 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +2 -1
- data/CHANGELOG +20 -0
- data/Rakefile +1 -1
- data/lib/name_parse/parser.rb +67 -49
- data/lib/name_parse/version.rb +1 -1
- data/name_parse.gemspec +3 -3
- metadata +3 -3
data/AUTHORS
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
Following persons have contributed to name_parse.
|
2
2
|
(Sorted by number of submitted patches, then alphabetically)
|
3
3
|
|
4
|
-
|
4
|
+
10 TJ Vanderpoel <bougy.man@gmail.com>
|
5
5
|
2 Jayson Vaughn (thedonvaughn) <jayson.vaughn@gmail.com>
|
6
|
+
2 TJ Vanderpoel <bougyman@zero.(none)>
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
[736bf3d | Thu May 07 16:54:52 UTC 2009] TJ Vanderpoel <bougyman@zero.(none)>
|
2
|
+
|
3
|
+
* completed refactoring to methodized matchers in #parse
|
4
|
+
|
5
|
+
[e58f6e4 | Thu May 07 16:29:25 UTC 2009] TJ Vanderpoel <bougyman@zero.(none)>
|
6
|
+
|
7
|
+
* continued to refacor into a methodized pattern for matching in #parse
|
8
|
+
|
9
|
+
[835e508 | Thu May 07 03:29:00 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
10
|
+
|
11
|
+
* ignore csv files
|
12
|
+
|
13
|
+
[fcb13a0 | Thu May 07 03:28:14 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
14
|
+
|
15
|
+
* added README as description
|
16
|
+
|
17
|
+
[016010c | Thu May 07 03:18:51 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
18
|
+
|
19
|
+
* Version 0.0.4
|
20
|
+
|
1
21
|
[5d85b44 | Thu May 07 03:14:31 UTC 2009] TJ Vanderpoel <bougy.man@gmail.com>
|
2
22
|
|
3
23
|
* methodized when match target for first_name. TODO: make the other when matchers follow the same format
|
data/Rakefile
CHANGED
@@ -27,7 +27,7 @@ GEMSPEC = Gem::Specification.new{|s|
|
|
27
27
|
s.name = 'name_parse'
|
28
28
|
s.author = "TJ Vanderpoel"
|
29
29
|
s.summary = "Parse name strings into their constituent parts"
|
30
|
-
s.description =
|
30
|
+
s.description = File.read("README")
|
31
31
|
s.email = 'bougy.man@gmail.com'
|
32
32
|
s.homepage = 'http://github.com/bougyman/name_parse'
|
33
33
|
s.platform = Gem::Platform::RUBY
|
data/lib/name_parse/parser.rb
CHANGED
@@ -1,73 +1,91 @@
|
|
1
1
|
module NameParse
|
2
2
|
class Parser
|
3
3
|
attr_reader :first, :last, :suffix, :prefix, :middle, :raw, :matched
|
4
|
+
attr_accessor :first_name_re, :last_name_re, :middle_name_re, :prefix_re
|
4
5
|
|
5
|
-
def initialize(name_string = nil)
|
6
|
-
@
|
7
|
-
|
6
|
+
def initialize(name_string = nil, options = {})
|
7
|
+
@options = options
|
8
|
+
@last_name_re = @options[:last_name_re] || /(?:(?:v[ao]n(?:\s+der?)?|de\s+la)\s+)?\w[-.'\w]+/i
|
9
|
+
@middle_name_re = @options[:middle_name_re] || /\w(?:\.|[-.'\w]+)?/
|
10
|
+
@prefix_re = @options[:prefix_re] || /(?:c\/o|dr|mrs?|ms|miss|mister|sgt|cpt|cpl)\.?/i
|
11
|
+
@first_name_re = @options[:first_name_re] || /\w[-.'\w]+/
|
12
|
+
parse(name_string) if name_string
|
8
13
|
end
|
9
14
|
|
10
|
-
def prefix_re
|
11
|
-
/(?:c\/o|dr|mrs?|ms|miss|mister|sgt|cpt|cpl)\.?/i
|
12
|
-
end
|
13
|
-
|
14
|
-
def first_name_re
|
15
|
-
/\w[-.'\w]+/
|
16
|
-
end
|
17
|
-
|
18
|
-
def first_last(match = nil)
|
19
|
-
if match.nil?
|
20
|
-
/^(#{first_name_re})\s+(#{last_name_re})$/
|
21
|
-
else
|
22
|
-
@matched = :first_last
|
23
|
-
@first, @last = match.to_a[1 .. 2]
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def last_name_re
|
28
|
-
/(?:(?:v[ao]n(?:\s+der?)?|de\s+la)\s+)?\w[-.'\w]+/i
|
29
|
-
end
|
30
|
-
|
31
|
-
def middle_name_re
|
32
|
-
/\w(?:\.|[-.'\w]+)?/
|
33
|
-
end
|
34
|
-
|
35
|
-
def middle_i
|
36
|
-
return nil if middle.nil?
|
37
|
-
middle[0,1]
|
38
|
-
end
|
39
|
-
|
40
15
|
def parse(name)
|
16
|
+
@raw = name
|
41
17
|
case name
|
42
18
|
# just "Firstname Lastname"
|
43
19
|
when first_last
|
44
20
|
first_last($~)
|
45
21
|
# Catch names with prefixes, no comma
|
46
|
-
when
|
47
|
-
|
48
|
-
@matched = :pre_first_last
|
22
|
+
when pre_first_last
|
23
|
+
pre_first_last($~)
|
49
24
|
# Catch names with prefixes and middle names, no comma
|
50
|
-
when
|
51
|
-
|
52
|
-
@matched = :pre_first_mid_last
|
25
|
+
when pre_first_mid_last
|
26
|
+
pre_first_mid_last($~)
|
53
27
|
# just "Firstname Middle Lastname"
|
54
|
-
when
|
55
|
-
|
56
|
-
@matched = :first_mid_last
|
28
|
+
when first_mid_last
|
29
|
+
first_mid_last($~)
|
57
30
|
# just "Lastname, Firstname (Middle)" middle is optional
|
58
|
-
when
|
59
|
-
|
60
|
-
@matched = :last_comma_first_mid
|
31
|
+
when last_comma_first_mid
|
32
|
+
last_comma_first_mid($~)
|
61
33
|
# Comma with lots of lastnames, a first name, optional middle name
|
62
|
-
when
|
63
|
-
|
64
|
-
@matched = :multi_last_comma
|
34
|
+
when multi_last_comma
|
35
|
+
multi_last_comma($~)
|
65
36
|
else
|
66
|
-
|
37
|
+
if @options[:raise_on_unknown]
|
38
|
+
raise UnknownFormat, "Could not parse #{@raw}"
|
39
|
+
else
|
40
|
+
@matched = :unknown
|
41
|
+
false
|
42
|
+
end
|
67
43
|
end
|
68
44
|
end
|
69
45
|
|
70
46
|
alias :first_name :first
|
71
47
|
|
48
|
+
def middle_i
|
49
|
+
return nil if middle.nil?
|
50
|
+
middle[0,1]
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def first_last(match = nil)
|
55
|
+
return %r{^(#{first_name_re})\s+(#{last_name_re})$} if match.nil?
|
56
|
+
@first, @last = match[1 .. 2]
|
57
|
+
@matched = :first_last
|
58
|
+
end
|
59
|
+
|
60
|
+
def pre_first_last(match = nil)
|
61
|
+
return %r{^(#{prefix_re})\s+(#{first_name_re})\s+(#{last_name_re})$} if match.nil?
|
62
|
+
@prefix, @first, @last = match[1 .. 3]
|
63
|
+
@matched = :pre_first_last
|
64
|
+
end
|
65
|
+
|
66
|
+
def pre_first_mid_last(match = nil)
|
67
|
+
return %r{^(#{prefix_re})\s+(#{first_name_re})\s+(#{middle_name_re})\s+(#{last_name_re})$} if match.nil?
|
68
|
+
@prefix, @first, @middle, @last = match[1 .. 4]
|
69
|
+
@matched = :pre_first_mid_last
|
70
|
+
end
|
71
|
+
|
72
|
+
def first_mid_last(match = nil)
|
73
|
+
return %r{^(#{first_name_re})\s+(#{middle_name_re})\s+(#{last_name_re})$} if match.nil?
|
74
|
+
@first, @middle, @last = match[1 .. 3]
|
75
|
+
@matched = :first_mid_last
|
76
|
+
end
|
77
|
+
|
78
|
+
def last_comma_first_mid(match = nil)
|
79
|
+
return %r{^(#{last_name_re}),(?:\s+)?(#{first_name_re})(?:\s+(#{middle_name_re}))?$} if match.nil?
|
80
|
+
@first, @last, @middle = match[2], match[1], match[3]
|
81
|
+
@matched = :last_comma_first_mid
|
82
|
+
end
|
83
|
+
|
84
|
+
def multi_last_comma(match = nil)
|
85
|
+
return %r{^((?:#{last_name_re}(?:\s+)?)+),(?:\s+)(#{first_name_re})(?:\s+(#{middle_name_re}))?$} if match.nil?
|
86
|
+
@first, @last, @middle = match[2], match[1], match[3]
|
87
|
+
@matched = :multi_last_comma
|
88
|
+
end
|
89
|
+
|
72
90
|
end
|
73
91
|
end
|
data/lib/name_parse/version.rb
CHANGED
data/name_parse.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{name_parse}
|
5
|
-
s.version = "0.0.
|
5
|
+
s.version = "0.0.5"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["TJ Vanderpoel"]
|
9
|
-
s.date = %q{2009-05-
|
10
|
-
s.description = %q{Parse name strings into
|
9
|
+
s.date = %q{2009-05-07}
|
10
|
+
s.description = %q{========================================================= Name Parse Copyright (c) 2009 The Rubyists (Jayson Vaughn, Tj Vanderpoel, Michael Fellinger, Kevin Berry) Distributed under the terms of the MIT License. ========================================================== About ----- A ruby library for turning arbitrary name strings such as "Dr Helen Hunt", "Mr James T. Kirk" into a standardized object usable as parsed = NameParse::Parser.new("Dr Helen Hunt") puts "%s %s" % [parsed.first, parsed.last] Requirements ------------ - ruby (>= 1.8) Usage ----- Example of using on a list: bougyman@zero:~/git_checkouts/name_parse$ irb -r lib/name_parse irb(main):001:0> list = ["Jayson Vaughn", "Dr Helen Hunt", "Mr James T. Kirk"] => ["Jayson Vaughn", "Dr Helen Hunt", "Mr James T. Kirk"] irb(main):002:0> list.map { |n| p = NameParse[n]; [p.first, p.last] } => [["Jayson", "Vaughn"], ["Helen", "Hunt"], ["James", "Kirk"]] Support ------- Home page at http://github.com/bougyman/name_parse #rubyists on FreeNode}
|
11
11
|
s.email = %q{bougy.man@gmail.com}
|
12
12
|
s.files = ["AUTHORS", "CHANGELOG", "MANIFEST", "README", "Rakefile", "lib/name_parse.rb", "lib/name_parse/error.rb", "lib/name_parse/parser.rb", "lib/name_parse/version.rb", "name_parse.gemspec", "spec/helper.rb", "spec/name_parse/parser.rb", "tasks/authors.rake", "tasks/bacon.rake", "tasks/changelog.rake", "tasks/copyright.rake", "tasks/gem.rake", "tasks/gem_installer.rake", "tasks/install_dependencies.rake", "tasks/manifest.rake", "tasks/rcov.rake", "tasks/release.rake", "tasks/reversion.rake", "tasks/setup.rake", "tasks/yard.rake"]
|
13
13
|
s.has_rdoc = true
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bougyman-name_parse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TJ Vanderpoel
|
@@ -9,11 +9,11 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-05-
|
12
|
+
date: 2009-05-07 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description: Parse name strings into
|
16
|
+
description: "========================================================= Name Parse Copyright (c) 2009 The Rubyists (Jayson Vaughn, Tj Vanderpoel, Michael Fellinger, Kevin Berry) Distributed under the terms of the MIT License. ========================================================== About ----- A ruby library for turning arbitrary name strings such as \"Dr Helen Hunt\", \"Mr James T. Kirk\" into a standardized object usable as parsed = NameParse::Parser.new(\"Dr Helen Hunt\") puts \"%s %s\" % [parsed.first, parsed.last] Requirements ------------ - ruby (>= 1.8) Usage ----- Example of using on a list: bougyman@zero:~/git_checkouts/name_parse$ irb -r lib/name_parse irb(main):001:0> list = [\"Jayson Vaughn\", \"Dr Helen Hunt\", \"Mr James T. Kirk\"] => [\"Jayson Vaughn\", \"Dr Helen Hunt\", \"Mr James T. Kirk\"] irb(main):002:0> list.map { |n| p = NameParse[n]; [p.first, p.last] } => [[\"Jayson\", \"Vaughn\"], [\"Helen\", \"Hunt\"], [\"James\", \"Kirk\"]] Support ------- Home page at http://github.com/bougyman/name_parse #rubyists on FreeNode"
|
17
17
|
email: bougy.man@gmail.com
|
18
18
|
executables: []
|
19
19
|
|