namae 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +0 -1
- data/features/bibtex.feature +1 -1
- data/features/lists.feature +18 -18
- data/features/suffix.feature +42 -0
- data/lib/namae/parser.rb +14 -6
- data/lib/namae/parser.y +14 -6
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +2 -1
- data/spec/namae/parser_spec.rb +26 -22
- metadata +3 -2
data/README.md
CHANGED
@@ -109,7 +109,6 @@ ambiguous. For example, multiple family names are always possible in sort-order:
|
|
109
109
|
Whilst in display-order, multiple family names are only supported when the
|
110
110
|
name contains a particle or a nickname.
|
111
111
|
|
112
|
-
|
113
112
|
Rationale
|
114
113
|
---------
|
115
114
|
Parsing human names is at once too easy and too hard. When working in the
|
data/features/bibtex.feature
CHANGED
@@ -61,7 +61,7 @@ Feature: Parse BibTeX-style names
|
|
61
61
|
| Dominique {G}alouzeau de Villepin | Dominique | {G}alouzeau de | Villepin | |
|
62
62
|
| Galouzeau {de} Villepin, Dominique | Dominique | | Galouzeau {de} Villepin | |
|
63
63
|
|
64
|
-
|
64
|
+
@names
|
65
65
|
Scenarios: Some actual names
|
66
66
|
| name | first | von | last | jr |
|
67
67
|
| John Paul Jones | John Paul | | Jones | |
|
data/features/lists.feature
CHANGED
@@ -21,14 +21,14 @@ Feature: Parse a list of names
|
|
21
21
|
| Brian | Kernighan |
|
22
22
|
| Dennis | Ritchie |
|
23
23
|
| Donald | Knuth |
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
Given a parser that prefers commas as separators
|
25
|
+
When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
|
26
|
+
Then there should be 3 names
|
27
|
+
And the names should be:
|
28
|
+
| given | family |
|
29
|
+
| Brian | Kernighan |
|
30
|
+
| Dennis | Ritchie |
|
31
|
+
| Donald | Knuth |
|
32
32
|
|
33
33
|
|
34
34
|
@list
|
@@ -51,13 +51,13 @@ Feature: Parse a list of names
|
|
51
51
|
| Dennis | Ritchie |
|
52
52
|
| Donald | Knuth |
|
53
53
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
54
|
+
@list
|
55
|
+
Scenario: A list of display-order names separated by commas and 'and'
|
56
|
+
Given a parser that prefers commas as separators
|
57
|
+
When I parse the names "Brian Kernighan, Dennis Ritchie, and Donald Knuth"
|
58
|
+
Then there should be 3 names
|
59
|
+
And the names should be:
|
60
|
+
| given | family |
|
61
|
+
| Brian | Kernighan |
|
62
|
+
| Dennis | Ritchie |
|
63
|
+
| Donald | Knuth |
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Feature: Parse names with a suffix
|
2
|
+
As a hacker who works with Namae
|
3
|
+
I want to be able to parse names with a suffix
|
4
|
+
|
5
|
+
@names @suffix
|
6
|
+
Scenario: Names with a suffix BibTeX style
|
7
|
+
When I parse the names "Griffey, Jr., Ken"
|
8
|
+
Then the names should be:
|
9
|
+
| given | family | suffix |
|
10
|
+
| Ken | Griffey | Jr. |
|
11
|
+
|
12
|
+
# @names @suffix
|
13
|
+
# Scenario: Names with a suffix in display-order
|
14
|
+
# When I parse the names "Ken Griffey, Jr."
|
15
|
+
# Then the names should be:
|
16
|
+
# | given | family | suffix |
|
17
|
+
# | Ken | Griffey | Jr. |
|
18
|
+
#
|
19
|
+
# @names @suffix
|
20
|
+
# Scenario: Names with a suffix in sort-order chicago style
|
21
|
+
# When I parse the names "Griffey, Ken, Jr."
|
22
|
+
# Then the names should be:
|
23
|
+
# | given | family | suffix |
|
24
|
+
# | Ken | Griffey | Jr. |
|
25
|
+
#
|
26
|
+
# @names @suffix
|
27
|
+
# Scenario: Names with a suffix in display-order no comma
|
28
|
+
# When I parse the names "Ken Griffey Jr."
|
29
|
+
# Then the names should be:
|
30
|
+
# | given | family | suffix |
|
31
|
+
# | Ken | Griffey | Jr. |
|
32
|
+
#
|
33
|
+
#
|
34
|
+
# @names @suffix @list
|
35
|
+
# Scenario: Names with a suffix
|
36
|
+
# When I parse the names "Griffey, Jr., Ken and Ken Griffey, Jr. and Griffey, Ken, Jr. and Ken Griffey Jr."
|
37
|
+
# Then the names should be:
|
38
|
+
# | given | family | suffix |
|
39
|
+
# | Ken | Griffey | Jr. |
|
40
|
+
# | Ken | Griffey | Jr. |
|
41
|
+
# | Ken | Griffey | Jr. |
|
42
|
+
# | Ken | Griffey | Jr. |
|
data/lib/namae/parser.rb
CHANGED
@@ -92,14 +92,14 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
92
92
|
end
|
93
93
|
|
94
94
|
def consume_separator
|
95
|
-
|
95
|
+
return next_token if seen_separator?
|
96
96
|
@commas, @words = 0, 0
|
97
|
-
[:AND,
|
97
|
+
[:AND, :AND]
|
98
98
|
end
|
99
99
|
|
100
100
|
def consume_comma
|
101
101
|
@commas += 1
|
102
|
-
[:COMMA,
|
102
|
+
[:COMMA, :COMMA]
|
103
103
|
end
|
104
104
|
|
105
105
|
def consume_word(type, word)
|
@@ -108,12 +108,20 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
108
108
|
end
|
109
109
|
|
110
110
|
def seen_separator?
|
111
|
-
!stack.empty? && last_token
|
111
|
+
!stack.empty? && last_token == :AND
|
112
112
|
end
|
113
113
|
|
114
|
+
def suffix?
|
115
|
+
seen_suffix? || will_see_suffix?
|
116
|
+
end
|
117
|
+
|
114
118
|
def seen_suffix?
|
115
119
|
return false unless stack.length > 1
|
116
|
-
last_token ==
|
120
|
+
last_token == :COMMA || last_token =~ suffix
|
121
|
+
end
|
122
|
+
|
123
|
+
def will_see_suffix?
|
124
|
+
input.peek(8).to_s.split(/\s+/)[0] =~ suffix
|
117
125
|
end
|
118
126
|
|
119
127
|
def seen_full_name?
|
@@ -127,7 +135,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
127
135
|
when input.scan(separator)
|
128
136
|
consume_separator
|
129
137
|
when input.scan(/\s*,\s*/)
|
130
|
-
if @commas.zero? && !seen_full_name? || @commas == 1 &&
|
138
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
131
139
|
consume_comma
|
132
140
|
else
|
133
141
|
consume_separator
|
data/lib/namae/parser.y
CHANGED
@@ -166,14 +166,14 @@ require 'strscan'
|
|
166
166
|
end
|
167
167
|
|
168
168
|
def consume_separator
|
169
|
-
|
169
|
+
return next_token if seen_separator?
|
170
170
|
@commas, @words = 0, 0
|
171
|
-
[:AND,
|
171
|
+
[:AND, :AND]
|
172
172
|
end
|
173
173
|
|
174
174
|
def consume_comma
|
175
175
|
@commas += 1
|
176
|
-
[:COMMA,
|
176
|
+
[:COMMA, :COMMA]
|
177
177
|
end
|
178
178
|
|
179
179
|
def consume_word(type, word)
|
@@ -182,12 +182,20 @@ require 'strscan'
|
|
182
182
|
end
|
183
183
|
|
184
184
|
def seen_separator?
|
185
|
-
!stack.empty? && last_token
|
185
|
+
!stack.empty? && last_token == :AND
|
186
186
|
end
|
187
187
|
|
188
|
+
def suffix?
|
189
|
+
seen_suffix? || will_see_suffix?
|
190
|
+
end
|
191
|
+
|
188
192
|
def seen_suffix?
|
189
193
|
return false unless stack.length > 1
|
190
|
-
last_token ==
|
194
|
+
last_token == :COMMA || last_token =~ suffix
|
195
|
+
end
|
196
|
+
|
197
|
+
def will_see_suffix?
|
198
|
+
input.peek(8).to_s.split(/\s+/)[0] =~ suffix
|
191
199
|
end
|
192
200
|
|
193
201
|
def seen_full_name?
|
@@ -201,7 +209,7 @@ require 'strscan'
|
|
201
209
|
when input.scan(separator)
|
202
210
|
consume_separator
|
203
211
|
when input.scan(/\s*,\s*/)
|
204
|
-
if @commas.zero? && !seen_full_name? || @commas == 1 &&
|
212
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
205
213
|
consume_comma
|
206
214
|
else
|
207
215
|
consume_separator
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.6.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
"features/examples.feature",
|
33
33
|
"features/lists.feature",
|
34
34
|
"features/step_definitions/namae_steps.rb",
|
35
|
+
"features/suffix.feature",
|
35
36
|
"features/support/env.rb",
|
36
37
|
"lib/namae.rb",
|
37
38
|
"lib/namae/name.rb",
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -1,52 +1,56 @@
|
|
1
1
|
module Namae
|
2
2
|
describe 'Parser' do
|
3
|
-
|
3
|
+
|
4
4
|
it 'does not respond to .new' do
|
5
5
|
Parser.should_not respond_to(:new)
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
describe '.instance' do
|
9
9
|
let(:parser) { Parser.instance }
|
10
|
-
|
10
|
+
|
11
11
|
it 'returns the parser' do
|
12
12
|
parser.should be_a(Parser)
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
describe '#next_token' do
|
16
|
+
before(:each) do
|
17
|
+
Parser.instance.reset
|
18
|
+
end
|
19
|
+
|
16
20
|
describe 'when the input is empty' do
|
17
21
|
it 'returns nil' do
|
18
22
|
parser.send(:next_token).should be_nil
|
19
23
|
end
|
20
24
|
end
|
21
|
-
|
25
|
+
|
22
26
|
describe 'when the next input is " and "' do
|
23
27
|
before { parser.send(:input).string = ' and ' }
|
24
28
|
it 'returns an AND token' do
|
25
|
-
parser.send(:next_token).should == [:AND,
|
29
|
+
parser.send(:next_token).should == [:AND, :AND]
|
26
30
|
end
|
27
31
|
end
|
28
32
|
|
29
33
|
describe 'when the next input is " & "' do
|
30
34
|
before { parser.send(:input).string = ' & ' }
|
31
35
|
it 'returns an AND token' do
|
32
|
-
parser.send(:next_token).should == [:AND,
|
36
|
+
parser.send(:next_token).should == [:AND, :AND]
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
36
40
|
describe 'when the next input is " , "' do
|
37
41
|
before { parser.send(:input).string = ' , ' }
|
38
42
|
it 'returns a COMMA token' do
|
39
|
-
parser.send(:next_token).should == [:COMMA,
|
43
|
+
parser.send(:next_token).should == [:COMMA, :COMMA]
|
40
44
|
end
|
41
45
|
end
|
42
|
-
|
46
|
+
|
43
47
|
describe 'when the next input is " \'foo bar\' "' do
|
44
48
|
before { parser.send(:input).string = " 'foo bar' " }
|
45
49
|
it 'returns a NICK token' do
|
46
50
|
parser.send(:next_token).should == [:NICK, 'foo bar']
|
47
51
|
end
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
%w{Mr. Mr Mrs. Ms Herr Frau Miss}.each do |appellation|
|
51
55
|
describe "the next token is #{appellation.inspect}" do
|
52
56
|
before { parser.send(:input).string = appellation }
|
@@ -55,20 +59,20 @@ module Namae
|
|
55
59
|
end
|
56
60
|
end
|
57
61
|
end
|
58
|
-
|
62
|
+
|
59
63
|
end
|
60
|
-
|
64
|
+
|
61
65
|
describe '#parse!' do
|
62
66
|
it 'returns an empty list by default' do
|
63
67
|
parser.parse!('').should be_empty
|
64
68
|
end
|
65
|
-
|
69
|
+
|
66
70
|
it 'returns a list of names' do
|
67
71
|
parser.parse!('foo')[0].should be_a(Name)
|
68
72
|
end
|
69
|
-
|
73
|
+
|
70
74
|
describe 'when parsing a single name' do
|
71
|
-
|
75
|
+
|
72
76
|
it 'treats "Ichiro" as a given name' do
|
73
77
|
parser.parse!('Ichiro')[0].given.should == 'Ichiro'
|
74
78
|
end
|
@@ -76,11 +80,11 @@ module Namae
|
|
76
80
|
it 'treats "Lord Byron" as a title and family name' do
|
77
81
|
parser.parse!('Lord Byron')[0].values_at(:family, :title).should == ['Byron', 'Lord']
|
78
82
|
end
|
79
|
-
|
83
|
+
|
80
84
|
it 'parses given and family part name in "Ichiro Suzuki"' do
|
81
85
|
parser.parse!('Ichiro Suzuki')[0].values_at(:given, :family).should == %w{Ichiro Suzuki}
|
82
86
|
end
|
83
|
-
|
87
|
+
|
84
88
|
it 'parses given, nick and family part name in "Yukihiro \'Matz\' Matsumoto"' do
|
85
89
|
parser.parse!("Yukihiro 'Matz' Matsumoto")[0].values_at(:given, :family, :nick).should == %w{Yukihiro Matsumoto Matz}
|
86
90
|
end
|
@@ -88,20 +92,20 @@ module Namae
|
|
88
92
|
it 'parses given, nick and family part name in \'Yukihiro "Matz" Matsumoto\'' do
|
89
93
|
parser.parse!('Yukihiro "Matz" Matsumoto')[0].values_at(:given, :family, :nick).should == %w{Yukihiro Matsumoto Matz}
|
90
94
|
end
|
91
|
-
|
95
|
+
|
92
96
|
it 'parses given and family name in "Poe, Edgar A."' do
|
93
97
|
parser.parse!('Poe, Edgar A.')[0].values_at(:given, :family).should == ['Edgar A.', 'Poe']
|
94
98
|
end
|
95
|
-
|
99
|
+
|
96
100
|
%w{Mr. Mr Mrs. Ms Herr Frau Miss}.each do |appellation|
|
97
101
|
it "recognizes #{appellation.inspect} as an appellation" do
|
98
102
|
parser.parse!([appellation, 'Edgar A. Poe'].join(' '))[0].appellation.should == appellation
|
99
103
|
end
|
100
104
|
end
|
101
|
-
|
105
|
+
|
102
106
|
end
|
103
107
|
end
|
104
|
-
|
105
|
-
end
|
108
|
+
|
109
|
+
end
|
106
110
|
end
|
107
111
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- features/examples.feature
|
120
120
|
- features/lists.feature
|
121
121
|
- features/step_definitions/namae_steps.rb
|
122
|
+
- features/suffix.feature
|
122
123
|
- features/support/env.rb
|
123
124
|
- lib/namae.rb
|
124
125
|
- lib/namae/name.rb
|
@@ -146,7 +147,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
147
|
version: '0'
|
147
148
|
segments:
|
148
149
|
- 0
|
149
|
-
hash: -
|
150
|
+
hash: -2694601471904958389
|
150
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
152
|
none: false
|
152
153
|
requirements:
|