namae 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +0 -1
- data/Rakefile +1 -1
- data/features/lists.feature +20 -0
- data/features/step_definitions/namae_steps.rb +4 -0
- data/lib/namae/parser.rb +63 -37
- data/lib/namae/parser.y +40 -14
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +1 -4
- data/spec/namae/parser_spec.rb +1 -1
- data/spec/spec_helper.rb +5 -0
- metadata +2 -18
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/features/lists.feature
CHANGED
@@ -21,6 +21,15 @@ Feature: Parse a list of names
|
|
21
21
|
| Brian | Kernighan |
|
22
22
|
| Dennis | Ritchie |
|
23
23
|
| Donald | Knuth |
|
24
|
+
# Given a parser that prefers commas as separators
|
25
|
+
# When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
|
26
|
+
# Then there should be 3 names
|
27
|
+
# And the names should be:
|
28
|
+
# | given | family |
|
29
|
+
# | Brian | Kernighan |
|
30
|
+
# | Dennis | Ritchie |
|
31
|
+
# | Donald | Knuth |
|
32
|
+
|
24
33
|
|
25
34
|
@list
|
26
35
|
Scenario: A list of sort-order names with initials separated by commas
|
@@ -41,3 +50,14 @@ Feature: Parse a list of names
|
|
41
50
|
| Brian | Kernighan |
|
42
51
|
| Dennis | Ritchie |
|
43
52
|
| Donald | Knuth |
|
53
|
+
|
54
|
+
# @list
|
55
|
+
# Scenario: A list of display-order names separated by commas and 'and'
|
56
|
+
# Given a parser that prefers commas as separators
|
57
|
+
# When I parse the names "Brian Kernighan, Dennis Ritchie, and Donald Knuth"
|
58
|
+
# Then there should be 3 names
|
59
|
+
# And the names should be:
|
60
|
+
# | given | family |
|
61
|
+
# | Brian | Kernighan |
|
62
|
+
# | Dennis | Ritchie |
|
63
|
+
# | Donald | Knuth |
|
data/lib/namae/parser.rb
CHANGED
@@ -21,10 +21,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
21
21
|
def initialize
|
22
22
|
@input, @options = StringScanner.new(''), {
|
23
23
|
:debug => false,
|
24
|
+
:prefer_comma_as_separator => false,
|
24
25
|
:comma => ',',
|
25
26
|
:separator => /\s*(\band\b|\&)\s*/i,
|
26
27
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
27
|
-
|
28
|
+
:suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
|
28
29
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
29
30
|
}
|
30
31
|
end
|
@@ -53,6 +54,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
53
54
|
options[:appellation]
|
54
55
|
end
|
55
56
|
|
57
|
+
def prefer_comma_as_separator?
|
58
|
+
options[:prefer_comma_as_separator]
|
59
|
+
end
|
60
|
+
|
56
61
|
def parse(input)
|
57
62
|
parse!(input)
|
58
63
|
rescue => e
|
@@ -62,37 +67,58 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
62
67
|
|
63
68
|
def parse!(string)
|
64
69
|
input.string = normalize(string)
|
65
|
-
|
70
|
+
reset
|
66
71
|
do_parse
|
67
72
|
end
|
68
73
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
@commas, @yydebug = 0, debug?
|
76
|
-
|
77
|
-
|
74
|
+
def normalize(string)
|
75
|
+
string = string.strip
|
76
|
+
string
|
77
|
+
end
|
78
|
+
|
79
|
+
def reset
|
80
|
+
@commas, @words, @yydebug = 0, 0, debug?
|
81
|
+
self
|
82
|
+
end
|
78
83
|
|
79
84
|
private
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
85
|
+
|
86
|
+
def stack
|
87
|
+
@vstack || @racc_vstack || []
|
88
|
+
end
|
89
|
+
|
90
|
+
def last_token
|
91
|
+
stack[-1]
|
92
|
+
end
|
93
|
+
|
94
|
+
def consume_separator
|
95
|
+
# return next_token if seen_separator?
|
96
|
+
@commas, @words = 0, 0
|
97
|
+
[:AND, nil]
|
98
|
+
end
|
99
|
+
|
100
|
+
def consume_comma
|
101
|
+
@commas += 1
|
102
|
+
[:COMMA, comma]
|
103
|
+
end
|
104
|
+
|
105
|
+
def consume_word(type, word)
|
106
|
+
@words += 1
|
107
|
+
[type, word]
|
108
|
+
end
|
109
|
+
|
110
|
+
def seen_separator?
|
111
|
+
!stack.empty? && last_token.nil?
|
112
|
+
end
|
113
|
+
|
114
|
+
def seen_suffix?
|
115
|
+
return false unless stack.length > 1
|
116
|
+
last_token == comma || last_token =~ suffix
|
117
|
+
end
|
118
|
+
|
119
|
+
def seen_full_name?
|
120
|
+
prefer_comma_as_separator? && @words > 1
|
121
|
+
end
|
96
122
|
|
97
123
|
def next_token
|
98
124
|
case
|
@@ -101,25 +127,25 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
101
127
|
when input.scan(separator)
|
102
128
|
consume_separator
|
103
129
|
when input.scan(/\s*,\s*/)
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
130
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
|
131
|
+
consume_comma
|
132
|
+
else
|
133
|
+
consume_separator
|
134
|
+
end
|
109
135
|
when input.scan(/\s+/)
|
110
136
|
next_token
|
111
137
|
when input.scan(title)
|
112
|
-
|
138
|
+
consume_word(:TITLE, input.matched.strip)
|
113
139
|
when input.scan(appellation)
|
114
140
|
[:APPELLATION, input.matched.strip]
|
115
141
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
|
116
|
-
|
142
|
+
consume_word(:UWORD, input.matched)
|
117
143
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
|
118
|
-
|
144
|
+
consume_word(:LWORD, input.matched)
|
119
145
|
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
|
120
|
-
|
146
|
+
consume_word(:PWORD, input.matched)
|
121
147
|
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
122
|
-
|
148
|
+
consume_word(:NICK, input.matched[1...-1])
|
123
149
|
else
|
124
150
|
raise ArgumentError,
|
125
151
|
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
data/lib/namae/parser.y
CHANGED
@@ -95,6 +95,7 @@ require 'strscan'
|
|
95
95
|
def initialize
|
96
96
|
@input, @options = StringScanner.new(''), {
|
97
97
|
:debug => false,
|
98
|
+
:prefer_comma_as_separator => false,
|
98
99
|
:comma => ',',
|
99
100
|
:separator => /\s*(\band\b|\&)\s*/i,
|
100
101
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
@@ -127,6 +128,10 @@ require 'strscan'
|
|
127
128
|
options[:appellation]
|
128
129
|
end
|
129
130
|
|
131
|
+
def prefer_comma_as_separator?
|
132
|
+
options[:prefer_comma_as_separator]
|
133
|
+
end
|
134
|
+
|
130
135
|
def parse(input)
|
131
136
|
parse!(input)
|
132
137
|
rescue => e
|
@@ -146,26 +151,47 @@ require 'strscan'
|
|
146
151
|
end
|
147
152
|
|
148
153
|
def reset
|
149
|
-
@commas, @yydebug = 0, debug?
|
154
|
+
@commas, @words, @yydebug = 0, 0, debug?
|
150
155
|
self
|
151
156
|
end
|
152
157
|
|
153
158
|
private
|
154
|
-
|
159
|
+
|
160
|
+
def stack
|
161
|
+
@vstack || @racc_vstack || []
|
162
|
+
end
|
163
|
+
|
164
|
+
def last_token
|
165
|
+
stack[-1]
|
166
|
+
end
|
167
|
+
|
155
168
|
def consume_separator
|
156
|
-
|
169
|
+
# return next_token if seen_separator?
|
170
|
+
@commas, @words = 0, 0
|
157
171
|
[:AND, nil]
|
158
172
|
end
|
159
173
|
|
160
174
|
def consume_comma
|
161
175
|
@commas += 1
|
162
|
-
[:COMMA,
|
176
|
+
[:COMMA, comma]
|
163
177
|
end
|
164
|
-
|
178
|
+
|
179
|
+
def consume_word(type, word)
|
180
|
+
@words += 1
|
181
|
+
[type, word]
|
182
|
+
end
|
183
|
+
|
184
|
+
def seen_separator?
|
185
|
+
!stack.empty? && last_token.nil?
|
186
|
+
end
|
187
|
+
|
165
188
|
def seen_suffix?
|
166
|
-
return false unless
|
167
|
-
|
168
|
-
|
189
|
+
return false unless stack.length > 1
|
190
|
+
last_token == comma || last_token =~ suffix
|
191
|
+
end
|
192
|
+
|
193
|
+
def seen_full_name?
|
194
|
+
prefer_comma_as_separator? && @words > 1
|
169
195
|
end
|
170
196
|
|
171
197
|
def next_token
|
@@ -175,7 +201,7 @@ require 'strscan'
|
|
175
201
|
when input.scan(separator)
|
176
202
|
consume_separator
|
177
203
|
when input.scan(/\s*,\s*/)
|
178
|
-
if @commas.zero? || @commas == 1 && seen_suffix?
|
204
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
|
179
205
|
consume_comma
|
180
206
|
else
|
181
207
|
consume_separator
|
@@ -183,17 +209,17 @@ require 'strscan'
|
|
183
209
|
when input.scan(/\s+/)
|
184
210
|
next_token
|
185
211
|
when input.scan(title)
|
186
|
-
|
212
|
+
consume_word(:TITLE, input.matched.strip)
|
187
213
|
when input.scan(appellation)
|
188
214
|
[:APPELLATION, input.matched.strip]
|
189
215
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
|
190
|
-
|
216
|
+
consume_word(:UWORD, input.matched)
|
191
217
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
|
192
|
-
|
218
|
+
consume_word(:LWORD, input.matched)
|
193
219
|
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
|
194
|
-
|
220
|
+
consume_word(:PWORD, input.matched)
|
195
221
|
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
196
|
-
|
222
|
+
consume_word(:NICK, input.matched[1...-1])
|
197
223
|
else
|
198
224
|
raise ArgumentError,
|
199
225
|
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
@@ -56,14 +56,12 @@ Gem::Specification.new do |s|
|
|
56
56
|
|
57
57
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
58
58
|
s.add_development_dependency(%q<racc>, ["~> 1.4.8"])
|
59
|
-
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
60
59
|
s.add_development_dependency(%q<bundler>, ["~> 1.1"])
|
61
60
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
62
61
|
s.add_development_dependency(%q<ZenTest>, ["~> 4.8.0"])
|
63
62
|
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
64
63
|
else
|
65
64
|
s.add_dependency(%q<racc>, ["~> 1.4.8"])
|
66
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
67
65
|
s.add_dependency(%q<bundler>, ["~> 1.1"])
|
68
66
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
69
67
|
s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
|
@@ -71,7 +69,6 @@ Gem::Specification.new do |s|
|
|
71
69
|
end
|
72
70
|
else
|
73
71
|
s.add_dependency(%q<racc>, ["~> 1.4.8"])
|
74
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
75
72
|
s.add_dependency(%q<bundler>, ["~> 1.1"])
|
76
73
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
77
74
|
s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -36,7 +36,7 @@ module Namae
|
|
36
36
|
describe 'when the next input is " , "' do
|
37
37
|
before { parser.send(:input).string = ' , ' }
|
38
38
|
it 'returns a COMMA token' do
|
39
|
-
parser.send(:next_token).should == [:COMMA,
|
39
|
+
parser.send(:next_token).should == [:COMMA, ',']
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -28,22 +28,6 @@ dependencies:
|
|
28
28
|
- - ~>
|
29
29
|
- !ruby/object:Gem::Version
|
30
30
|
version: 1.4.8
|
31
|
-
- !ruby/object:Gem::Dependency
|
32
|
-
name: rdoc
|
33
|
-
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
|
-
requirements:
|
36
|
-
- - ~>
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
version: '3.12'
|
39
|
-
type: :development
|
40
|
-
prerelease: false
|
41
|
-
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
|
-
requirements:
|
44
|
-
- - ~>
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: '3.12'
|
47
31
|
- !ruby/object:Gem::Dependency
|
48
32
|
name: bundler
|
49
33
|
requirement: !ruby/object:Gem::Requirement
|
@@ -162,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
146
|
version: '0'
|
163
147
|
segments:
|
164
148
|
- 0
|
165
|
-
hash: -
|
149
|
+
hash: -4098302690694526777
|
166
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
151
|
none: false
|
168
152
|
requirements:
|