namae 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,3 +61,15 @@ Feature: Parse a list of names
61
61
  | Brian | Kernighan |
62
62
  | Dennis | Ritchie |
63
63
  | Donald | Knuth |
64
+
65
+ @list @wip
66
+ Scenario: A list of names separated by commas
67
+ Given a parser that prefers commas as separators
68
+ When I parse the names "G. Proctor, M. Cooper, P. Sanders & B. Malcom"
69
+ # Then there should be 4 names
70
+ Then the names should be:
71
+ | given | family |
72
+ | G. | Proctor |
73
+ | M. | Cooper |
74
+ | P. | Sanders |
75
+ | B. | Malcom |
data/lib/namae/parser.rb CHANGED
@@ -25,7 +25,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
25
25
  :comma => ',',
26
26
  :separator => /\s*(\band\b|\&)\s*/i,
27
27
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
28
- :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
28
+ :suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
29
29
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
30
30
  }
31
31
  end
@@ -77,7 +77,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
77
77
  end
78
78
 
79
79
  def reset
80
- @commas, @words, @yydebug = 0, 0, debug?
80
+ @commas, @words, @initials, @yydebug = 0, 0, 0, debug?
81
81
  self
82
82
  end
83
83
 
@@ -93,7 +93,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
93
93
 
94
94
  def consume_separator
95
95
  return next_token if seen_separator?
96
- @commas, @words = 0, 0
96
+ @commas, @words, @initials = 0, 0, 0
97
97
  [:AND, :AND]
98
98
  end
99
99
 
@@ -104,6 +104,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
104
104
 
105
105
  def consume_word(type, word)
106
106
  @words += 1
107
+ @initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
107
108
  [type, word]
108
109
  end
109
110
 
@@ -123,13 +124,13 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
123
124
  def will_see_suffix?
124
125
  input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
125
126
  end
126
-
127
+
127
128
  def will_see_initial?
128
129
  input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
129
130
  end
130
131
 
131
132
  def seen_full_name?
132
- prefer_comma_as_separator? && @words > 1 && !will_see_initial?
133
+ prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
133
134
  end
134
135
 
135
136
  def next_token
data/lib/namae/parser.y CHANGED
@@ -99,7 +99,7 @@ require 'strscan'
99
99
  :comma => ',',
100
100
  :separator => /\s*(\band\b|\&)\s*/i,
101
101
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
102
- :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
102
+ :suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
103
103
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
104
104
  }
105
105
  end
@@ -151,7 +151,7 @@ require 'strscan'
151
151
  end
152
152
 
153
153
  def reset
154
- @commas, @words, @yydebug = 0, 0, debug?
154
+ @commas, @words, @initials, @yydebug = 0, 0, 0, debug?
155
155
  self
156
156
  end
157
157
 
@@ -167,7 +167,7 @@ require 'strscan'
167
167
 
168
168
  def consume_separator
169
169
  return next_token if seen_separator?
170
- @commas, @words = 0, 0
170
+ @commas, @words, @initials = 0, 0, 0
171
171
  [:AND, :AND]
172
172
  end
173
173
 
@@ -178,6 +178,7 @@ require 'strscan'
178
178
 
179
179
  def consume_word(type, word)
180
180
  @words += 1
181
+ @initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
181
182
  [type, word]
182
183
  end
183
184
 
@@ -197,13 +198,13 @@ require 'strscan'
197
198
  def will_see_suffix?
198
199
  input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
199
200
  end
200
-
201
+
201
202
  def will_see_initial?
202
203
  input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
203
204
  end
204
205
 
205
206
  def seen_full_name?
206
- prefer_comma_as_separator? && @words > 1 && !will_see_initial?
207
+ prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
207
208
  end
208
209
 
209
210
  def next_token
data/lib/namae/version.rb CHANGED
@@ -2,7 +2,7 @@ module Namae
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 7
5
- PATCH = 0
5
+ PATCH = 1
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "namae"
8
- s.version = "0.7.0"
8
+ s.version = "0.7.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
12
- s.date = "2012-10-29"
12
+ s.date = "2012-11-08"
13
13
  s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
14
14
  s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-10-29 00:00:00.000000000 Z
13
+ date: 2012-11-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: racc
@@ -147,7 +147,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
147
147
  version: '0'
148
148
  segments:
149
149
  - 0
150
- hash: -219042018557465799
150
+ hash: -3595872270413772674
151
151
  required_rubygems_version: !ruby/object:Gem::Requirement
152
152
  none: false
153
153
  requirements: