namae 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -9,7 +9,6 @@ end
9
9
 
10
10
  group :development do
11
11
  gem 'racc', '~> 1.4.8', :platform => [:mri_19, :mri_18, :rbx]
12
- gem 'rdoc', '~> 3.12'
13
12
  gem 'bundler', '~> 1.1'
14
13
  gem 'simplecov', :require => false
15
14
  gem 'ZenTest', '~> 4.8.0'
data/Rakefile CHANGED
@@ -56,7 +56,7 @@ end
56
56
  require 'cucumber/rake/task'
57
57
  Cucumber::Rake::Task.new(:features)
58
58
 
59
- task :default => :spec
59
+ task :default => [:spec, :features]
60
60
 
61
61
  require 'yard'
62
62
  YARD::Rake::YardocTask.new
@@ -21,6 +21,15 @@ Feature: Parse a list of names
21
21
  | Brian | Kernighan |
22
22
  | Dennis | Ritchie |
23
23
  | Donald | Knuth |
24
+ # Given a parser that prefers commas as separators
25
+ # When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
26
+ # Then there should be 3 names
27
+ # And the names should be:
28
+ # | given | family |
29
+ # | Brian | Kernighan |
30
+ # | Dennis | Ritchie |
31
+ # | Donald | Knuth |
32
+
24
33
 
25
34
  @list
26
35
  Scenario: A list of sort-order names with initials separated by commas
@@ -41,3 +50,14 @@ Feature: Parse a list of names
41
50
  | Brian | Kernighan |
42
51
  | Dennis | Ritchie |
43
52
  | Donald | Knuth |
53
+
54
+ # @list
55
+ # Scenario: A list of display-order names separated by commas and 'and'
56
+ # Given a parser that prefers commas as separators
57
+ # When I parse the names "Brian Kernighan, Dennis Ritchie, and Donald Knuth"
58
+ # Then there should be 3 names
59
+ # And the names should be:
60
+ # | given | family |
61
+ # | Brian | Kernighan |
62
+ # | Dennis | Ritchie |
63
+ # | Donald | Knuth |
@@ -1,3 +1,7 @@
1
+ Given /^a parser that prefers commas as separators$/ do
2
+ Namae::Parser.instance.options[:prefer_comma_as_separator] = true
3
+ end
4
+
1
5
  When /^I parse the name "(.*)"$/ do |string|
2
6
  @name = Namae.parse!(string)[0]
3
7
  end
data/lib/namae/parser.rb CHANGED
@@ -21,10 +21,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
21
21
  def initialize
22
22
  @input, @options = StringScanner.new(''), {
23
23
  :debug => false,
24
+ :prefer_comma_as_separator => false,
24
25
  :comma => ',',
25
26
  :separator => /\s*(\band\b|\&)\s*/i,
26
27
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
27
- :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
28
+ :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
28
29
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
29
30
  }
30
31
  end
@@ -53,6 +54,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
53
54
  options[:appellation]
54
55
  end
55
56
 
57
+ def prefer_comma_as_separator?
58
+ options[:prefer_comma_as_separator]
59
+ end
60
+
56
61
  def parse(input)
57
62
  parse!(input)
58
63
  rescue => e
@@ -62,37 +67,58 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
62
67
 
63
68
  def parse!(string)
64
69
  input.string = normalize(string)
65
- reset
70
+ reset
66
71
  do_parse
67
72
  end
68
73
 
69
- def normalize(string)
70
- string = string.strip
71
- string
72
- end
73
-
74
- def reset
75
- @commas, @yydebug = 0, debug?
76
- self
77
- end
74
+ def normalize(string)
75
+ string = string.strip
76
+ string
77
+ end
78
+
79
+ def reset
80
+ @commas, @words, @yydebug = 0, 0, debug?
81
+ self
82
+ end
78
83
 
79
84
  private
80
-
81
- def consume_separator
82
- @commas = 0
83
- [:AND, nil]
84
- end
85
-
86
- def consume_comma
87
- @commas += 1
88
- [:COMMA, nil]
89
- end
90
-
91
- def seen_suffix?
92
- return false unless @vstack
93
- return true if @vstack[-1].nil?
94
- @vstack[-1] =~ suffix
95
- end
85
+
86
+ def stack
87
+ @vstack || @racc_vstack || []
88
+ end
89
+
90
+ def last_token
91
+ stack[-1]
92
+ end
93
+
94
+ def consume_separator
95
+ # return next_token if seen_separator?
96
+ @commas, @words = 0, 0
97
+ [:AND, nil]
98
+ end
99
+
100
+ def consume_comma
101
+ @commas += 1
102
+ [:COMMA, comma]
103
+ end
104
+
105
+ def consume_word(type, word)
106
+ @words += 1
107
+ [type, word]
108
+ end
109
+
110
+ def seen_separator?
111
+ !stack.empty? && last_token.nil?
112
+ end
113
+
114
+ def seen_suffix?
115
+ return false unless stack.length > 1
116
+ last_token == comma || last_token =~ suffix
117
+ end
118
+
119
+ def seen_full_name?
120
+ prefer_comma_as_separator? && @words > 1
121
+ end
96
122
 
97
123
  def next_token
98
124
  case
@@ -101,25 +127,25 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
101
127
  when input.scan(separator)
102
128
  consume_separator
103
129
  when input.scan(/\s*,\s*/)
104
- if @commas.zero? || @commas == 1 && seen_suffix?
105
- consume_comma
106
- else
107
- consume_separator
108
- end
130
+ if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
131
+ consume_comma
132
+ else
133
+ consume_separator
134
+ end
109
135
  when input.scan(/\s+/)
110
136
  next_token
111
137
  when input.scan(title)
112
- [:TITLE, input.matched.strip]
138
+ consume_word(:TITLE, input.matched.strip)
113
139
  when input.scan(appellation)
114
140
  [:APPELLATION, input.matched.strip]
115
141
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
116
- [:UWORD, input.matched]
142
+ consume_word(:UWORD, input.matched)
117
143
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
118
- [:LWORD, input.matched]
144
+ consume_word(:LWORD, input.matched)
119
145
  when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
120
- [:PWORD, input.matched]
146
+ consume_word(:PWORD, input.matched)
121
147
  when input.scan(/('[^'\n]+')|("[^"\n]+")/)
122
- [:NICK, input.matched[1...-1]]
148
+ consume_word(:NICK, input.matched[1...-1])
123
149
  else
124
150
  raise ArgumentError,
125
151
  "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
data/lib/namae/parser.y CHANGED
@@ -95,6 +95,7 @@ require 'strscan'
95
95
  def initialize
96
96
  @input, @options = StringScanner.new(''), {
97
97
  :debug => false,
98
+ :prefer_comma_as_separator => false,
98
99
  :comma => ',',
99
100
  :separator => /\s*(\band\b|\&)\s*/i,
100
101
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
@@ -127,6 +128,10 @@ require 'strscan'
127
128
  options[:appellation]
128
129
  end
129
130
 
131
+ def prefer_comma_as_separator?
132
+ options[:prefer_comma_as_separator]
133
+ end
134
+
130
135
  def parse(input)
131
136
  parse!(input)
132
137
  rescue => e
@@ -146,26 +151,47 @@ require 'strscan'
146
151
  end
147
152
 
148
153
  def reset
149
- @commas, @yydebug = 0, debug?
154
+ @commas, @words, @yydebug = 0, 0, debug?
150
155
  self
151
156
  end
152
157
 
153
158
  private
154
-
159
+
160
+ def stack
161
+ @vstack || @racc_vstack || []
162
+ end
163
+
164
+ def last_token
165
+ stack[-1]
166
+ end
167
+
155
168
  def consume_separator
156
- @commas = 0
169
+ # return next_token if seen_separator?
170
+ @commas, @words = 0, 0
157
171
  [:AND, nil]
158
172
  end
159
173
 
160
174
  def consume_comma
161
175
  @commas += 1
162
- [:COMMA, nil]
176
+ [:COMMA, comma]
163
177
  end
164
-
178
+
179
+ def consume_word(type, word)
180
+ @words += 1
181
+ [type, word]
182
+ end
183
+
184
+ def seen_separator?
185
+ !stack.empty? && last_token.nil?
186
+ end
187
+
165
188
  def seen_suffix?
166
- return false unless @vstack
167
- return true if @vstack[-1].nil?
168
- @vstack[-1] =~ suffix
189
+ return false unless stack.length > 1
190
+ last_token == comma || last_token =~ suffix
191
+ end
192
+
193
+ def seen_full_name?
194
+ prefer_comma_as_separator? && @words > 1
169
195
  end
170
196
 
171
197
  def next_token
@@ -175,7 +201,7 @@ require 'strscan'
175
201
  when input.scan(separator)
176
202
  consume_separator
177
203
  when input.scan(/\s*,\s*/)
178
- if @commas.zero? || @commas == 1 && seen_suffix?
204
+ if @commas.zero? && !seen_full_name? || @commas == 1 && seen_suffix?
179
205
  consume_comma
180
206
  else
181
207
  consume_separator
@@ -183,17 +209,17 @@ require 'strscan'
183
209
  when input.scan(/\s+/)
184
210
  next_token
185
211
  when input.scan(title)
186
- [:TITLE, input.matched.strip]
212
+ consume_word(:TITLE, input.matched.strip)
187
213
  when input.scan(appellation)
188
214
  [:APPELLATION, input.matched.strip]
189
215
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
190
- [:UWORD, input.matched]
216
+ consume_word(:UWORD, input.matched)
191
217
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
192
- [:LWORD, input.matched]
218
+ consume_word(:LWORD, input.matched)
193
219
  when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
194
- [:PWORD, input.matched]
220
+ consume_word(:PWORD, input.matched)
195
221
  when input.scan(/('[^'\n]+')|("[^"\n]+")/)
196
- [:NICK, input.matched[1...-1]]
222
+ consume_word(:NICK, input.matched[1...-1])
197
223
  else
198
224
  raise ArgumentError,
199
225
  "Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
data/lib/namae/version.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Namae
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 4
4
+ MINOR = 5
5
5
  PATCH = 0
6
6
  BUILD = nil
7
7
 
data/namae.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "namae"
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
@@ -56,14 +56,12 @@ Gem::Specification.new do |s|
56
56
 
57
57
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
58
58
  s.add_development_dependency(%q<racc>, ["~> 1.4.8"])
59
- s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
60
59
  s.add_development_dependency(%q<bundler>, ["~> 1.1"])
61
60
  s.add_development_dependency(%q<simplecov>, [">= 0"])
62
61
  s.add_development_dependency(%q<ZenTest>, ["~> 4.8.0"])
63
62
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
64
63
  else
65
64
  s.add_dependency(%q<racc>, ["~> 1.4.8"])
66
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
67
65
  s.add_dependency(%q<bundler>, ["~> 1.1"])
68
66
  s.add_dependency(%q<simplecov>, [">= 0"])
69
67
  s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
@@ -71,7 +69,6 @@ Gem::Specification.new do |s|
71
69
  end
72
70
  else
73
71
  s.add_dependency(%q<racc>, ["~> 1.4.8"])
74
- s.add_dependency(%q<rdoc>, ["~> 3.12"])
75
72
  s.add_dependency(%q<bundler>, ["~> 1.1"])
76
73
  s.add_dependency(%q<simplecov>, [">= 0"])
77
74
  s.add_dependency(%q<ZenTest>, ["~> 4.8.0"])
@@ -36,7 +36,7 @@ module Namae
36
36
  describe 'when the next input is " , "' do
37
37
  before { parser.send(:input).string = ' , ' }
38
38
  it 'returns a COMMA token' do
39
- parser.send(:next_token).should == [:COMMA, nil]
39
+ parser.send(:next_token).should == [:COMMA, ',']
40
40
  end
41
41
  end
42
42
 
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  begin
2
2
  require 'simplecov'
3
+ rescue LoadError
4
+ # ignore
5
+ end
6
+
7
+ begin
3
8
  require 'debugger'
4
9
  rescue LoadError
5
10
  # ignore
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -28,22 +28,6 @@ dependencies:
28
28
  - - ~>
29
29
  - !ruby/object:Gem::Version
30
30
  version: 1.4.8
31
- - !ruby/object:Gem::Dependency
32
- name: rdoc
33
- requirement: !ruby/object:Gem::Requirement
34
- none: false
35
- requirements:
36
- - - ~>
37
- - !ruby/object:Gem::Version
38
- version: '3.12'
39
- type: :development
40
- prerelease: false
41
- version_requirements: !ruby/object:Gem::Requirement
42
- none: false
43
- requirements:
44
- - - ~>
45
- - !ruby/object:Gem::Version
46
- version: '3.12'
47
31
  - !ruby/object:Gem::Dependency
48
32
  name: bundler
49
33
  requirement: !ruby/object:Gem::Requirement
@@ -162,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
146
  version: '0'
163
147
  segments:
164
148
  - 0
165
- hash: -4370560385267353354
149
+ hash: -4098302690694526777
166
150
  required_rubygems_version: !ruby/object:Gem::Requirement
167
151
  none: false
168
152
  requirements: