namae 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +0 -1
- data/features/bibtex.feature +1 -1
- data/features/lists.feature +18 -18
- data/features/suffix.feature +42 -0
- data/lib/namae/parser.rb +14 -6
- data/lib/namae/parser.y +14 -6
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +2 -1
- data/spec/namae/parser_spec.rb +26 -22
- metadata +3 -2
data/README.md
CHANGED
@@ -109,7 +109,6 @@ ambiguous. For example, multiple family names are always possible in sort-order:
|
|
109
109
|
Whilst in display-order, multiple family names are only supported when the
|
110
110
|
name contains a particle or a nickname.
|
111
111
|
|
112
|
-
|
113
112
|
Rationale
|
114
113
|
---------
|
115
114
|
Parsing human names is at once too easy and too hard. When working in the
|
data/features/bibtex.feature
CHANGED
@@ -61,7 +61,7 @@ Feature: Parse BibTeX-style names
|
|
61
61
|
| Dominique {G}alouzeau de Villepin | Dominique | {G}alouzeau de | Villepin | |
|
62
62
|
| Galouzeau {de} Villepin, Dominique | Dominique | | Galouzeau {de} Villepin | |
|
63
63
|
|
64
|
-
|
64
|
+
@names
|
65
65
|
Scenarios: Some actual names
|
66
66
|
| name | first | von | last | jr |
|
67
67
|
| John Paul Jones | John Paul | | Jones | |
|
data/features/lists.feature
CHANGED
@@ -21,14 +21,14 @@ Feature: Parse a list of names
|
|
21
21
|
| Brian | Kernighan |
|
22
22
|
| Dennis | Ritchie |
|
23
23
|
| Donald | Knuth |
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
Given a parser that prefers commas as separators
|
25
|
+
When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
|
26
|
+
Then there should be 3 names
|
27
|
+
And the names should be:
|
28
|
+
| given | family |
|
29
|
+
| Brian | Kernighan |
|
30
|
+
| Dennis | Ritchie |
|
31
|
+
| Donald | Knuth |
|
32
32
|
|
33
33
|
|
34
34
|
@list
|
@@ -51,13 +51,13 @@ Feature: Parse a list of names
|
|
51
51
|
| Dennis | Ritchie |
|
52
52
|
| Donald | Knuth |
|
53
53
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
54
|
+
@list
|
55
|
+
Scenario: A list of display-order names separated by commas and 'and'
|
56
|
+
Given a parser that prefers commas as separators
|
57
|
+
When I parse the names "Brian Kernighan, Dennis Ritchie, and Donald Knuth"
|
58
|
+
Then there should be 3 names
|
59
|
+
And the names should be:
|
60
|
+
| given | family |
|
61
|
+
| Brian | Kernighan |
|
62
|
+
| Dennis | Ritchie |
|
63
|
+
| Donald | Knuth |
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Feature: Parse names with a suffix
|
2
|
+
As a hacker who works with Namae
|
3
|
+
I want to be able to parse names with a suffix
|
4
|
+
|
5
|
+
@names @suffix
|
6
|
+
Scenario: Names with a suffix BibTeX style
|
7
|
+
When I parse the names "Griffey, Jr., Ken"
|
8
|
+
Then the names should be:
|
9
|
+
| given | family | suffix |
|
10
|
+
| Ken | Griffey | Jr. |
|
11
|
+
|
12
|
+
# @names @suffix
|
13
|
+
# Scenario: Names with a suffix in display-order
|
14
|
+
# When I parse the names "Ken Griffey, Jr."
|
15
|
+
# Then the names should be:
|
16
|
+
# | given | family | suffix |
|
17
|
+
# | Ken | Griffey | Jr. |
|
18
|
+
#
|
19
|
+
# @names @suffix
|
20
|
+
# Scenario: Names with a suffix in sort-order chicago style
|
21
|
+
# When I parse the names "Griffey, Ken, Jr."
|
22
|
+
# Then the names should be:
|
23
|
+
# | given | family | suffix |
|
24
|
+
# | Ken | Griffey | Jr. |
|
25
|
+
#
|
26
|
+
# @names @suffix
|
27
|
+
# Scenario: Names with a suffix in display-order no comma
|
28
|
+
# When I parse the names "Ken Griffey Jr."
|
29
|
+
# Then the names should be:
|
30
|
+
# | given | family | suffix |
|
31
|
+
# | Ken | Griffey | Jr. |
|
32
|
+
#
|
33
|
+
#
|
34
|
+
# @names @suffix @list
|
35
|
+
# Scenario: Names with a suffix
|
36
|
+
# When I parse the names "Griffey, Jr., Ken and Ken Griffey, Jr. and Griffey, Ken, Jr. and Ken Griffey Jr."
|
37
|
+
# Then the names should be:
|
38
|
+
# | given | family | suffix |
|
39
|
+
# | Ken | Griffey | Jr. |
|
40
|
+
# | Ken | Griffey | Jr. |
|
41
|
+
# | Ken | Griffey | Jr. |
|
42
|
+
# | Ken | Griffey | Jr. |
|
data/lib/namae/parser.rb
CHANGED
@@ -92,14 +92,14 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
92
92
|
end
|
93
93
|
|
94
94
|
def consume_separator
|
95
|
-
|
95
|
+
return next_token if seen_separator?
|
96
96
|
@commas, @words = 0, 0
|
97
|
-
[:AND,
|
97
|
+
[:AND, :AND]
|
98
98
|
end
|
99
99
|
|
100
100
|
def consume_comma
|
101
101
|
@commas += 1
|
102
|
-
[:COMMA,
|
102
|
+
[:COMMA, :COMMA]
|
103
103
|
end
|
104
104
|
|
105
105
|
def consume_word(type, word)
|
@@ -108,12 +108,20 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
108
108
|
end
|
109
109
|
|
110
110
|
def seen_separator?
|
111
|
-
!stack.empty? && last_token
|
111
|
+
!stack.empty? && last_token == :AND
|
112
112
|
end
|
113
113
|
|
114
|
+
def suffix?
|
115
|
+
seen_suffix? || will_see_suffix?
|
116
|
+
end
|
117
|
+
|
114
118
|
def seen_suffix?
|
115
119
|
return false unless stack.length > 1
|
116
|
-
last_token ==
|
120
|
+
last_token == :COMMA || last_token =~ suffix
|
121
|
+
end
|
122
|
+
|
123
|
+
def will_see_suffix?
|
124
|
+
input.peek(8).to_s.split(/\s+/)[0] =~ suffix
|
117
125
|
end
|
118
126
|
|
119
127
|
def seen_full_name?
|
@@ -127,7 +135,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
127
135
|
when input.scan(separator)
|
128
136
|
consume_separator
|
129
137
|
when input.scan(/\s*,\s*/)
|
130
|
-
if @commas.zero? && !seen_full_name? || @commas == 1 &&
|
138
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
131
139
|
consume_comma
|
132
140
|
else
|
133
141
|
consume_separator
|
data/lib/namae/parser.y
CHANGED
@@ -166,14 +166,14 @@ require 'strscan'
|
|
166
166
|
end
|
167
167
|
|
168
168
|
def consume_separator
|
169
|
-
|
169
|
+
return next_token if seen_separator?
|
170
170
|
@commas, @words = 0, 0
|
171
|
-
[:AND,
|
171
|
+
[:AND, :AND]
|
172
172
|
end
|
173
173
|
|
174
174
|
def consume_comma
|
175
175
|
@commas += 1
|
176
|
-
[:COMMA,
|
176
|
+
[:COMMA, :COMMA]
|
177
177
|
end
|
178
178
|
|
179
179
|
def consume_word(type, word)
|
@@ -182,12 +182,20 @@ require 'strscan'
|
|
182
182
|
end
|
183
183
|
|
184
184
|
def seen_separator?
|
185
|
-
!stack.empty? && last_token
|
185
|
+
!stack.empty? && last_token == :AND
|
186
186
|
end
|
187
187
|
|
188
|
+
def suffix?
|
189
|
+
seen_suffix? || will_see_suffix?
|
190
|
+
end
|
191
|
+
|
188
192
|
def seen_suffix?
|
189
193
|
return false unless stack.length > 1
|
190
|
-
last_token ==
|
194
|
+
last_token == :COMMA || last_token =~ suffix
|
195
|
+
end
|
196
|
+
|
197
|
+
def will_see_suffix?
|
198
|
+
input.peek(8).to_s.split(/\s+/)[0] =~ suffix
|
191
199
|
end
|
192
200
|
|
193
201
|
def seen_full_name?
|
@@ -201,7 +209,7 @@ require 'strscan'
|
|
201
209
|
when input.scan(separator)
|
202
210
|
consume_separator
|
203
211
|
when input.scan(/\s*,\s*/)
|
204
|
-
if @commas.zero? && !seen_full_name? || @commas == 1 &&
|
212
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
205
213
|
consume_comma
|
206
214
|
else
|
207
215
|
consume_separator
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.6.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
@@ -32,6 +32,7 @@ Gem::Specification.new do |s|
|
|
32
32
|
"features/examples.feature",
|
33
33
|
"features/lists.feature",
|
34
34
|
"features/step_definitions/namae_steps.rb",
|
35
|
+
"features/suffix.feature",
|
35
36
|
"features/support/env.rb",
|
36
37
|
"lib/namae.rb",
|
37
38
|
"lib/namae/name.rb",
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -1,52 +1,56 @@
|
|
1
1
|
module Namae
|
2
2
|
describe 'Parser' do
|
3
|
-
|
3
|
+
|
4
4
|
it 'does not respond to .new' do
|
5
5
|
Parser.should_not respond_to(:new)
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
describe '.instance' do
|
9
9
|
let(:parser) { Parser.instance }
|
10
|
-
|
10
|
+
|
11
11
|
it 'returns the parser' do
|
12
12
|
parser.should be_a(Parser)
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
describe '#next_token' do
|
16
|
+
before(:each) do
|
17
|
+
Parser.instance.reset
|
18
|
+
end
|
19
|
+
|
16
20
|
describe 'when the input is empty' do
|
17
21
|
it 'returns nil' do
|
18
22
|
parser.send(:next_token).should be_nil
|
19
23
|
end
|
20
24
|
end
|
21
|
-
|
25
|
+
|
22
26
|
describe 'when the next input is " and "' do
|
23
27
|
before { parser.send(:input).string = ' and ' }
|
24
28
|
it 'returns an AND token' do
|
25
|
-
parser.send(:next_token).should == [:AND,
|
29
|
+
parser.send(:next_token).should == [:AND, :AND]
|
26
30
|
end
|
27
31
|
end
|
28
32
|
|
29
33
|
describe 'when the next input is " & "' do
|
30
34
|
before { parser.send(:input).string = ' & ' }
|
31
35
|
it 'returns an AND token' do
|
32
|
-
parser.send(:next_token).should == [:AND,
|
36
|
+
parser.send(:next_token).should == [:AND, :AND]
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
36
40
|
describe 'when the next input is " , "' do
|
37
41
|
before { parser.send(:input).string = ' , ' }
|
38
42
|
it 'returns a COMMA token' do
|
39
|
-
parser.send(:next_token).should == [:COMMA,
|
43
|
+
parser.send(:next_token).should == [:COMMA, :COMMA]
|
40
44
|
end
|
41
45
|
end
|
42
|
-
|
46
|
+
|
43
47
|
describe 'when the next input is " \'foo bar\' "' do
|
44
48
|
before { parser.send(:input).string = " 'foo bar' " }
|
45
49
|
it 'returns a NICK token' do
|
46
50
|
parser.send(:next_token).should == [:NICK, 'foo bar']
|
47
51
|
end
|
48
52
|
end
|
49
|
-
|
53
|
+
|
50
54
|
%w{Mr. Mr Mrs. Ms Herr Frau Miss}.each do |appellation|
|
51
55
|
describe "the next token is #{appellation.inspect}" do
|
52
56
|
before { parser.send(:input).string = appellation }
|
@@ -55,20 +59,20 @@ module Namae
|
|
55
59
|
end
|
56
60
|
end
|
57
61
|
end
|
58
|
-
|
62
|
+
|
59
63
|
end
|
60
|
-
|
64
|
+
|
61
65
|
describe '#parse!' do
|
62
66
|
it 'returns an empty list by default' do
|
63
67
|
parser.parse!('').should be_empty
|
64
68
|
end
|
65
|
-
|
69
|
+
|
66
70
|
it 'returns a list of names' do
|
67
71
|
parser.parse!('foo')[0].should be_a(Name)
|
68
72
|
end
|
69
|
-
|
73
|
+
|
70
74
|
describe 'when parsing a single name' do
|
71
|
-
|
75
|
+
|
72
76
|
it 'treats "Ichiro" as a given name' do
|
73
77
|
parser.parse!('Ichiro')[0].given.should == 'Ichiro'
|
74
78
|
end
|
@@ -76,11 +80,11 @@ module Namae
|
|
76
80
|
it 'treats "Lord Byron" as a title and family name' do
|
77
81
|
parser.parse!('Lord Byron')[0].values_at(:family, :title).should == ['Byron', 'Lord']
|
78
82
|
end
|
79
|
-
|
83
|
+
|
80
84
|
it 'parses given and family part name in "Ichiro Suzuki"' do
|
81
85
|
parser.parse!('Ichiro Suzuki')[0].values_at(:given, :family).should == %w{Ichiro Suzuki}
|
82
86
|
end
|
83
|
-
|
87
|
+
|
84
88
|
it 'parses given, nick and family part name in "Yukihiro \'Matz\' Matsumoto"' do
|
85
89
|
parser.parse!("Yukihiro 'Matz' Matsumoto")[0].values_at(:given, :family, :nick).should == %w{Yukihiro Matsumoto Matz}
|
86
90
|
end
|
@@ -88,20 +92,20 @@ module Namae
|
|
88
92
|
it 'parses given, nick and family part name in \'Yukihiro "Matz" Matsumoto\'' do
|
89
93
|
parser.parse!('Yukihiro "Matz" Matsumoto')[0].values_at(:given, :family, :nick).should == %w{Yukihiro Matsumoto Matz}
|
90
94
|
end
|
91
|
-
|
95
|
+
|
92
96
|
it 'parses given and family name in "Poe, Edgar A."' do
|
93
97
|
parser.parse!('Poe, Edgar A.')[0].values_at(:given, :family).should == ['Edgar A.', 'Poe']
|
94
98
|
end
|
95
|
-
|
99
|
+
|
96
100
|
%w{Mr. Mr Mrs. Ms Herr Frau Miss}.each do |appellation|
|
97
101
|
it "recognizes #{appellation.inspect} as an appellation" do
|
98
102
|
parser.parse!([appellation, 'Edgar A. Poe'].join(' '))[0].appellation.should == appellation
|
99
103
|
end
|
100
104
|
end
|
101
|
-
|
105
|
+
|
102
106
|
end
|
103
107
|
end
|
104
|
-
|
105
|
-
end
|
108
|
+
|
109
|
+
end
|
106
110
|
end
|
107
111
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -119,6 +119,7 @@ files:
|
|
119
119
|
- features/examples.feature
|
120
120
|
- features/lists.feature
|
121
121
|
- features/step_definitions/namae_steps.rb
|
122
|
+
- features/suffix.feature
|
122
123
|
- features/support/env.rb
|
123
124
|
- lib/namae.rb
|
124
125
|
- lib/namae/name.rb
|
@@ -146,7 +147,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
147
|
version: '0'
|
147
148
|
segments:
|
148
149
|
- 0
|
149
|
-
hash: -
|
150
|
+
hash: -2694601471904958389
|
150
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
152
|
none: false
|
152
153
|
requirements:
|