namae 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +0 -1
- data/Rakefile +1 -1
- data/features/lists.feature +20 -0
- data/features/step_definitions/namae_steps.rb +4 -0
- data/lib/namae/parser.rb +63 -37
- data/lib/namae/parser.y +40 -14
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +1 -4
- data/spec/namae/parser_spec.rb +1 -1
- data/spec/spec_helper.rb +5 -0
- metadata +2 -18
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/features/lists.feature
CHANGED
@@ -21,6 +21,15 @@ Feature: Parse a list of names
|
|
21
21
|
| Brian | Kernighan |
|
22
22
|
| Dennis | Ritchie |
|
23
23
|
| Donald | Knuth |
|
24
|
+
# Given a parser that prefers commas as separators
|
25
|
+
# When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
|
26
|
+
# Then there should be 3 names
|
27
|
+
# And the names should be:
|
28
|
+
# | given | family |
|
29
|
+
# | Brian | Kernighan |
|
30
|
+
# | Dennis | Ritchie |
|
31
|
+
# | Donald | Knuth |
|
32
|
+
|
24
33
|
|
25
34
|
@list
|
26
35
|
Scenario: A list of sort-order names with initials separated by commas
|
@@ -41,3 +50,14 @@ Feature: Parse a list of names
|
|
41
50
|
| Brian | Kernighan |
|
42
51
|
| Dennis | Ritchie |
|
43
52
|
| Donald | Knuth |
|
53
|
+
|
54
|
+
# @list
|
55
|
+
# Scenario: A list of display-order names separated by commas and 'and'
|
56
|
+
# Given a parser that prefers commas as separators
|
57
|
+
# When I parse the names "Brian Kernighan, Dennis Ritchie, and Donald Knuth"
|
58
|
+
# Then there should be 3 names
|
59
|
+
# And the names should be:
|
60
|
+
# | given | family |
|
61
|
+
# | Brian | Kernighan |
|
62
|
+
# | Dennis | Ritchie |
|
63
|
+
# | Donald | Knuth |
|
data/lib/namae/parser.rb
CHANGED
@@ -21,10 +21,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
21
21
|
def initialize
|
22
22
|
@input, @options = StringScanner.new(''), {
|
23
23
|
:debug => false,
|
24
|
+
:prefer_comma_as_separator => false,
|
24
25
|
:comma => ',',
|
25
26
|
:separator => /\s*(\band\b|\&)\s*/i,
|
26
27
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
27
|
-
|
28
|
+
:suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
|
28
29
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
29
30
|
}
|
30
31
|
end
|
@@ -53,6 +54,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
53
54
|
options[:appellation]
|
54
55
|
end
|
55
56
|
|
57
|
+
def prefer_comma_as_separator?
|
58
|
+
options[:prefer_comma_as_separator]
|
59
|
+
end
|
60
|
+
|
56
61
|
def parse(input)
|
57
62
|
parse!(input)
|
58
63
|
rescue => e
|
@@ -62,37 +67,58 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
62
67
|
|
63
68
|
def parse!(string)
|
64
69
|
input.string = normalize(string)
|
65
|
-
|
70
|
+
reset
|
66
71
|
do_parse
|
67
72
|
end
|
68
73
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
@commas, @yydebug = 0, debug?
|
76
|
-
|
77
|
-
|
74
|
+
def normalize(string)
|
75
|
+
string = string.strip
|
76
|
+
string
|
77
|
+
end
|
78
|
+
|
79
|
+
def reset
|
80
|
+
@commas, @words, @yydebug = 0, 0, debug?
|
81
|
+
self
|
82
|
+
end
|
78
83
|
|
79
84
|
private
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
85
|
+
|
86
|
+
def stack
|
87
|
+
@vstack || @racc_vstack || []
|
88
|
+
end
|
89
|
+
|
90
|
+
def last_token
|
91
|
+
stack[-1]
|
92
|
+
end
|
93
|
+
|
94
|
+
def consume_separator
|
95
|
+
# return next_token if seen_separator?
|
96
|
+
@commas, @words = 0, 0
|
97
|
+
[:AND, nil]
|
98
|
+
end
|
99
|
+
|
100
|
+
def consume_comma
|
101
|
+
@commas += 1
|
102
|
+
[:COMMA, comma]
|
103
|
+
end
|
104
|
+
|
105
|
+
def consume_word(type, word)
|
106
|
+
@words += 1
|
107
|
+
[type, word]
|
108
|
+
end
|
109
|
+
|
110
|
+
def seen_separator?
|
111
|
+
!stack.empty? && last_token.nil?
|
112
|
+
end
|
113
|
+
|
114
|
+
def seen_suffix?
|
115
|
+
return false unless stack.length > 1
|
116
|
+
last_token == comma || last_token =~ suffix
|
117
|
+
end
|
118
|
+
|
119
|
+
def seen_full_name?
|
120
|
+
prefer_comma_as_separator? && @words > 1
|
121
|
+
end
|
96
122
|
|
97
123
|
def next_token
|
98
124
|
case
|
@@ -101,25 +127,25 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
101
127
|
when input.scan(separator)
|
102
128
|
consume_separator
|
103
129
|
when input.scan(/\s*,\s*/)
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
130
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
|
131
|
+
consume_comma
|
132
|
+
else
|
133
|
+
consume_separator
|
134
|
+
end
|
109
135
|
when input.scan(/\s+/)
|
110
136
|
next_token
|
111
137
|
when input.scan(title)
|
112
|
-
|
138
|
+
consume_word(:TITLE, input.matched.strip)
|
113
139
|
when input.scan(appellation)
|
114
140
|
[:APPELLATION, input.matched.strip]
|
115
141
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
|
116
|
-
|
142
|
+
consume_word(:UWORD, input.matched)
|
117
143
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
|
118
|
-
|
144
|
+
consume_word(:LWORD, input.matched)
|
119
145
|
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
|
120
|
-
|
146
|
+
consume_word(:PWORD, input.matched)
|
121
147
|
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
122
|
-
|
148
|
+
consume_word(:NICK, input.matched[1...-1])
|
123
149
|
else
|
124
150
|
raise ArgumentError,
|
125
151
|
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
data/lib/namae/parser.y
CHANGED
@@ -95,6 +95,7 @@ require 'strscan'
|
|
95
95
|
def initialize
|
96
96
|
@input, @options = StringScanner.new(''), {
|
97
97
|
:debug => false,
|
98
|
+
:prefer_comma_as_separator => false,
|
98
99
|
:comma => ',',
|
99
100
|
:separator => /\s*(\band\b|\&)\s*/i,
|
100
101
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
@@ -127,6 +128,10 @@ require 'strscan'
|
|
127
128
|
options[:appellation]
|
128
129
|
end
|
129
130
|
|
131
|
+
def prefer_comma_as_separator?
|
132
|
+
options[:prefer_comma_as_separator]
|
133
|
+
end
|
134
|
+
|
130
135
|
def parse(input)
|
131
136
|
parse!(input)
|
132
137
|
rescue => e
|
@@ -146,26 +151,47 @@ require 'strscan'
|
|
146
151
|
end
|
147
152
|
|
148
153
|
def reset
|
149
|
-
@commas, @yydebug = 0, debug?
|
154
|
+
@commas, @words, @yydebug = 0, 0, debug?
|
150
155
|
self
|
151
156
|
end
|
152
157
|
|
153
158
|
private
|
154
|
-
|
159
|
+
|
160
|
+
def stack
|
161
|
+
@vstack || @racc_vstack || []
|
162
|
+
end
|
163
|
+
|
164
|
+
def last_token
|
165
|
+
stack[-1]
|
166
|
+
end
|
167
|
+
|
155
168
|
def consume_separator
|
156
|
-
|
169
|
+
# return next_token if seen_separator?
|
170
|
+
@commas, @words = 0, 0
|
157
171
|
[:AND, nil]
|
158
172
|
end
|
159
173
|
|
160
174
|
def consume_comma
|
161
175
|
@commas += 1
|
162
|
-
[:COMMA,
|
176
|
+
[:COMMA, comma]
|
163
177
|
end
|
164
|
-
|
178
|
+
|
179
|
+
def consume_word(type, word)
|
180
|
+
@words += 1
|
181
|
+
[type, word]
|
182
|
+
end
|
183
|
+
|
184
|
+
def seen_separator?
|
185
|
+
!stack.empty? && last_token.nil?
|
186
|
+
end
|
187
|
+
|
165
188
|
def seen_suffix?
|
166
|
-
return false unless
|
167
|
-
|
168
|
-
|
189
|
+
return false unless stack.length > 1
|
190
|
+
last_token == comma || last_token =~ suffix
|
191
|
+
end
|
192
|
+
|
193
|
+
def seen_full_name?
|
194
|
+
prefer_comma_as_separator? && @words > 1
|
169
195
|
end
|
170
196
|
|
171
197
|
def next_token
|
@@ -175,7 +201,7 @@ require 'strscan'
|
|
175
201
|
when input.scan(separator)
|
176
202
|
consume_separator
|
177
203
|
when input.scan(/\s*,\s*/)
|
178
|
-
if @commas.zero? || @commas == 1 && seen_suffix?
|
204
|
+
if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
|
179
205
|
consume_comma
|
180
206
|
else
|
181
207
|
consume_separator
|
@@ -183,17 +209,17 @@ require 'strscan'
|
|
183
209
|
when input.scan(/\s+/)
|
184
210
|
next_token
|
185
211
|
when input.scan(title)
|
186
|
-
|
212
|
+
consume_word(:TITLE, input.matched.strip)
|
187
213
|
when input.scan(appellation)
|
188
214
|
[:APPELLATION, input.matched.strip]
|
189
215
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
|
190
|
-
|
216
|
+
consume_word(:UWORD, input.matched)
|
191
217
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
|
192
|
-
|
218
|
+
consume_word(:LWORD, input.matched)
|
193
219
|
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
|
194
|
-
|
220
|
+
consume_word(:PWORD, input.matched)
|
195
221
|
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
196
|
-
|
222
|
+
consume_word(:NICK, input.matched[1...-1])
|
197
223
|
else
|
198
224
|
raise ArgumentError,
|
199
225
|
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
@@ -56,14 +56,12 @@ Gem::Specification.new do |s|
|
|
56
56
|
|
57
57
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
58
58
|
s.add_development_dependency(%q<racc>, ["~> 1.4.8"])
|
59
|
-
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
60
59
|
s.add_development_dependency(%q<bundler>, ["~> 1.1"])
|
61
60
|
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
62
61
|
s.add_development_dependency(%q<ZenTest>, ["~> 4.8.0"])
|
63
62
|
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
64
63
|
else
|
65
64
|
s.add_dependency(%q<racc>, ["~> 1.4.8"])
|
66
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
67
65
|
s.add_dependency(%q<bundler>, ["~> 1.1"])
|
68
66
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
69
67
|
s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
|
@@ -71,7 +69,6 @@ Gem::Specification.new do |s|
|
|
71
69
|
end
|
72
70
|
else
|
73
71
|
s.add_dependency(%q<racc>, ["~> 1.4.8"])
|
74
|
-
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
75
72
|
s.add_dependency(%q<bundler>, ["~> 1.1"])
|
76
73
|
s.add_dependency(%q<simplecov>, [">= 0"])
|
77
74
|
s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -36,7 +36,7 @@ module Namae
|
|
36
36
|
describe 'when the next input is " , "' do
|
37
37
|
before { parser.send(:input).string = ' , ' }
|
38
38
|
it 'returns a COMMA token' do
|
39
|
-
parser.send(:next_token).should == [:COMMA,
|
39
|
+
parser.send(:next_token).should == [:COMMA, ',']
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -28,22 +28,6 @@ dependencies:
|
|
28
28
|
- - ~>
|
29
29
|
- !ruby/object:Gem::Version
|
30
30
|
version: 1.4.8
|
31
|
-
- !ruby/object:Gem::Dependency
|
32
|
-
name: rdoc
|
33
|
-
requirement: !ruby/object:Gem::Requirement
|
34
|
-
none: false
|
35
|
-
requirements:
|
36
|
-
- - ~>
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
version: '3.12'
|
39
|
-
type: :development
|
40
|
-
prerelease: false
|
41
|
-
version_requirements: !ruby/object:Gem::Requirement
|
42
|
-
none: false
|
43
|
-
requirements:
|
44
|
-
- - ~>
|
45
|
-
- !ruby/object:Gem::Version
|
46
|
-
version: '3.12'
|
47
31
|
- !ruby/object:Gem::Dependency
|
48
32
|
name: bundler
|
49
33
|
requirement: !ruby/object:Gem::Requirement
|
@@ -162,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
146
|
version: '0'
|
163
147
|
segments:
|
164
148
|
- 0
|
165
|
-
hash: -
|
149
|
+
hash: -4098302690694526777
|
166
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
151
|
none: false
|
168
152
|
requirements:
|