namae 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -9,7 +9,6 @@ end
9
9
 
10
10
  group :development do
11
11
  gem 'racc', '~> 1.4.8', :platform => [:mri_19, :mri_18, :rbx]
12
- gem 'rdoc', '~> 3.12'
13
12
  gem 'bundler', '~> 1.1'
14
13
  gem 'simplecov', :require => false
15
14
  gem 'ZenTest', '~> 4.8.0'
data/Rakefile CHANGED
@@ -56,7 +56,7 @@ end
56
56
  require 'cucumber/rake/task'
57
57
  Cucumber::Rake::Task.new(:features)
58
58
 
59
- task :default => :spec
59
+ task :default => [:spec, :features]
60
60
 
61
61
  require 'yard'
62
62
  YARD::Rake::YardocTask.new
@@ -21,6 +21,15 @@ Feature: Parse a list of names
21
21
  | Brian | Kernighan |
22
22
  | Dennis | Ritchie |
23
23
  | Donald | Knuth |
24
+ # Given a parser that prefers commas as separators
25
+ # When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
26
+ # Then there should be 3 names
27
+ # And the names should be:
28
+ # | given | family |
29
+ # | Brian | Kernighan |
30
+ # | Dennis | Ritchie |
31
+ # | Donald | Knuth |
32
+
24
33
 
25
34
  @list
26
35
  Scenario: A list of sort-order names with initials separated by commas
@@ -41,3 +50,14 @@ Feature: Parse a list of names
41
50
  | Brian | Kernighan |
42
51
  | Dennis | Ritchie |
43
52
  | Donald | Knuth |
53
+
54
+ # @list
55
+ # Scenario: A list of display-order names separated by commas and 'and'
56
+ # Given a parser that prefers commas as separators
57
+ # When I parse the names "Brian Kernighan, Dennis Ritchie, and Donald Knuth"
58
+ # Then there should be 3 names
59
+ # And the names should be:
60
+ # | given | family |
61
+ # | Brian | Kernighan |
62
+ # | Dennis | Ritchie |
63
+ # | Donald | Knuth |
@@ -1,3 +1,7 @@
1
+ Given /^a parser that prefers commas as separators$/ do
2
+ Namae::Parser.instance.options[:prefer_comma_as_separator] = true
3
+ end
4
+
1
5
  When /^I parse the name "(.*)"$/ do |string|
2
6
  @name = Namae.parse!(string)[0]
3
7
  end
data/lib/namae/parser.rb CHANGED
@@ -21,10 +21,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
21
21
  def initialize
22
22
  @input, @options = StringScanner.new(''), {
23
23
  :debug => false,
24
+ :prefer_comma_as_separator => false,
24
25
  :comma => ',',
25
26
  :separator => /\s*(\band\b|\&)\s*/i,
26
27
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
27
- :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
28
+ :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
28
29
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
29
30
  }
30
31
  end
@@ -53,6 +54,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
53
54
  options[:appellation]
54
55
  end
55
56
 
57
+ def prefer_comma_as_separator?
58
+ options[:prefer_comma_as_separator]
59
+ end
60
+
56
61
  def parse(input)
57
62
  parse!(input)
58
63
  rescue => e
@@ -62,37 +67,58 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
62
67
 
63
68
  def parse!(string)
64
69
  input.string = normalize(string)
65
- reset
70
+ reset
66
71
  do_parse
67
72
  end
68
73
 
69
- def normalize(string)
70
- string = string.strip
71
- string
72
- end
73
-
74
- def reset
75
- @commas, @yydebug = 0, debug?
76
- self
77
- end
74
+ def normalize(string)
75
+ string = string.strip
76
+ string
77
+ end
78
+
79
+ def reset
80
+ @commas, @words, @yydebug = 0, 0, debug?
81
+ self
82
+ end
78
83
 
79
84
  private
80
-
81
- def consume_separator
82
- @commas = 0
83
- [:AND, nil]
84
- end
85
-
86
- def consume_comma
87
- @commas += 1
88
- [:COMMA, nil]
89
- end
90
-
91
- def seen_suffix?
92
- return false unless @vstack
93
- return true if @vstack[-1].nil?
94
- @vstack[-1] =~ suffix
95
- end
85
+
86
+ def stack
87
+ @vstack || @racc_vstack || []
88
+ end
89
+
90
+ def last_token
91
+ stack[-1]
92
+ end
93
+
94
+ def consume_separator
95
+ # return next_token if seen_separator?
96
+ @commas, @words = 0, 0
97
+ [:AND, nil]
98
+ end
99
+
100
+ def consume_comma
101
+ @commas += 1
102
+ [:COMMA, comma]
103
+ end
104
+
105
+ def consume_word(type, word)
106
+ @words += 1
107
+ [type, word]
108
+ end
109
+
110
+ def seen_separator?
111
+ !stack.empty? && last_token.nil?
112
+ end
113
+
114
+ def seen_suffix?
115
+ return false unless stack.length > 1
116
+ last_token == comma || last_token =~ suffix
117
+ end
118
+
119
+ def seen_full_name?
120
+ prefer_comma_as_separator? && @words > 1
121
+ end
96
122
 
97
123
  def next_token
98
124
  case
@@ -101,25 +127,25 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
101
127
  when input.scan(separator)
102
128
  consume_separator
103
129
  when input.scan(/\s*,\s*/)
104
- if @commas.zero? || @commas == 1 && seen_suffix?
105
- consume_comma
106
- else
107
- consume_separator
108
- end
130
+ if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
131
+ consume_comma
132
+ else
133
+ consume_separator
134
+ end
109
135
  when input.scan(/\s+/)
110
136
  next_token
111
137
  when input.scan(title)
112
- [:TITLE, input.matched.strip]
138
+ consume_word(:TITLE, input.matched.strip)
113
139
  when input.scan(appellation)
114
140
  [:APPELLATION, input.matched.strip]
115
141
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
116
- [:UWORD, input.matched]
142
+ consume_word(:UWORD, input.matched)
117
143
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
118
- [:LWORD, input.matched]
144
+ consume_word(:LWORD, input.matched)
119
145
  when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
120
- [:PWORD, input.matched]
146
+ consume_word(:PWORD, input.matched)
121
147
  when input.scan(/('[^'\n]+')|("[^"\n]+")/)
122
- [:NICK, input.matched[1...-1]]
148
+ consume_word(:NICK, input.matched[1...-1])
123
149
  else
124
150
  raise ArgumentError,
125
151
  "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
data/lib/namae/parser.y CHANGED
@@ -95,6 +95,7 @@ require 'strscan'
95
95
  def initialize
96
96
  @input, @options = StringScanner.new(''), {
97
97
  :debug => false,
98
+ :prefer_comma_as_separator => false,
98
99
  :comma => ',',
99
100
  :separator => /\s*(\band\b|\&)\s*/i,
100
101
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
@@ -127,6 +128,10 @@ require 'strscan'
127
128
  options[:appellation]
128
129
  end
129
130
 
131
+ def prefer_comma_as_separator?
132
+ options[:prefer_comma_as_separator]
133
+ end
134
+
130
135
  def parse(input)
131
136
  parse!(input)
132
137
  rescue => e
@@ -146,26 +151,47 @@ require 'strscan'
146
151
  end
147
152
 
148
153
  def reset
149
- @commas, @yydebug = 0, debug?
154
+ @commas, @words, @yydebug = 0, 0, debug?
150
155
  self
151
156
  end
152
157
 
153
158
  private
154
-
159
+
160
+ def stack
161
+ @vstack || @racc_vstack || []
162
+ end
163
+
164
+ def last_token
165
+ stack[-1]
166
+ end
167
+
155
168
  def consume_separator
156
- @commas = 0
169
+ # return next_token if seen_separator?
170
+ @commas, @words = 0, 0
157
171
  [:AND, nil]
158
172
  end
159
173
 
160
174
  def consume_comma
161
175
  @commas += 1
162
- [:COMMA, nil]
176
+ [:COMMA, comma]
163
177
  end
164
-
178
+
179
+ def consume_word(type, word)
180
+ @words += 1
181
+ [type, word]
182
+ end
183
+
184
+ def seen_separator?
185
+ !stack.empty? && last_token.nil?
186
+ end
187
+
165
188
  def seen_suffix?
166
- return false unless @vstack
167
- return true if @vstack[-1].nil?
168
- @vstack[-1] =~ suffix
189
+ return false unless stack.length > 1
190
+ last_token == comma || last_token =~ suffix
191
+ end
192
+
193
+ def seen_full_name?
194
+ prefer_comma_as_separator? && @words > 1
169
195
  end
170
196
 
171
197
  def next_token
@@ -175,7 +201,7 @@ require 'strscan'
175
201
  when input.scan(separator)
176
202
  consume_separator
177
203
  when input.scan(/\s*,\s*/)
178
- if @commas.zero? || @commas == 1 && seen_suffix?
204
+ if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
179
205
  consume_comma
180
206
  else
181
207
  consume_separator
@@ -183,17 +209,17 @@ require 'strscan'
183
209
  when input.scan(/\s+/)
184
210
  next_token
185
211
  when input.scan(title)
186
- [:TITLE, input.matched.strip]
212
+ consume_word(:TITLE, input.matched.strip)
187
213
  when input.scan(appellation)
188
214
  [:APPELLATION, input.matched.strip]
189
215
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
190
- [:UWORD, input.matched]
216
+ consume_word(:UWORD, input.matched)
191
217
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
192
- [:LWORD, input.matched]
218
+ consume_word(:LWORD, input.matched)
193
219
  when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
194
- [:PWORD, input.matched]
220
+ consume_word(:PWORD, input.matched)
195
221
  when input.scan(/('[^'\n]+')|("[^"\n]+")/)
196
- [:NICK, input.matched[1...-1]]
222
+ consume_word(:NICK, input.matched[1...-1])
197
223
  else
198
224
  raise ArgumentError,
199
225
  "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
data/lib/namae/version.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Namae
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 4
4
+ MINOR = 5
5
5
  PATCH = 0
6
6
  BUILD = nil
7
7
 
data/namae.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "namae"
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
@@ -56,14 +56,12 @@ Gem::Specification.new do |s|
56
56
 
57
57
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
58
58
  s.add_development_dependency(%q<racc>, ["~> 1.4.8"])
59
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
60
59
  s.add_development_dependency(%q<bundler>, ["~> 1.1"])
61
60
  s.add_development_dependency(%q<simplecov>, [">= 0"])
62
61
  s.add_development_dependency(%q<ZenTest>, ["~> 4.8.0"])
63
62
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
64
63
  else
65
64
  s.add_dependency(%q<racc>, ["~> 1.4.8"])
66
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
67
65
  s.add_dependency(%q<bundler>, ["~> 1.1"])
68
66
  s.add_dependency(%q<simplecov>, [">= 0"])
69
67
  s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
@@ -71,7 +69,6 @@ Gem::Specification.new do |s|
71
69
  end
72
70
  else
73
71
  s.add_dependency(%q<racc>, ["~> 1.4.8"])
74
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
75
72
  s.add_dependency(%q<bundler>, ["~> 1.1"])
76
73
  s.add_dependency(%q<simplecov>, [">= 0"])
77
74
  s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
@@ -36,7 +36,7 @@ module Namae
36
36
  describe 'when the next input is " , "' do
37
37
  before { parser.send(:input).string = ' , ' }
38
38
  it 'returns a COMMA token' do
39
- parser.send(:next_token).should == [:COMMA, nil]
39
+ parser.send(:next_token).should == [:COMMA, ',']
40
40
  end
41
41
  end
42
42
 
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  begin
2
2
  require 'simplecov'
3
+ rescue LoadError
4
+ # ignore
5
+ end
6
+
7
+ begin
3
8
  require 'debugger'
4
9
  rescue LoadError
5
10
  # ignore
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -28,22 +28,6 @@ dependencies:
28
28
  - - ~>
29
29
  - !ruby/object:Gem::Version
30
30
  version: 1.4.8
31
- - !ruby/object:Gem::Dependency
32
- name: rdoc
33
- requirement: !ruby/object:Gem::Requirement
34
- none: false
35
- requirements:
36
- - - ~>
37
- - !ruby/object:Gem::Version
38
- version: '3.12'
39
- type: :development
40
- prerelease: false
41
- version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
- requirements:
44
- - - ~>
45
- - !ruby/object:Gem::Version
46
- version: '3.12'
47
31
  - !ruby/object:Gem::Dependency
48
32
  name: bundler
49
33
  requirement: !ruby/object:Gem::Requirement
@@ -162,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
146
  version: '0'
163
147
  segments:
164
148
  - 0
165
- hash: -4370560385267353354
149
+ hash: -4098302690694526777
166
150
  required_rubygems_version: !ruby/object:Gem::Requirement
167
151
  none: false
168
152
  requirements: