namae 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -61,3 +61,15 @@ Feature: Parse a list of names
61
61
  | Brian | Kernighan |
62
62
  | Dennis | Ritchie |
63
63
  | Donald | Knuth |
64
+
65
+ @list @wip
66
+ Scenario: A list of names separated by commas
67
+ Given a parser that prefers commas as separators
68
+ When I parse the names "G. Proctor, M. Cooper, P. Sanders & B. Malcom"
69
+ # Then there should be 4 names
70
+ Then the names should be:
71
+ | given | family |
72
+ | G. | Proctor |
73
+ | M. | Cooper |
74
+ | P. | Sanders |
75
+ | B. | Malcom |
data/lib/namae/parser.rb CHANGED
@@ -25,7 +25,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
25
25
  :comma => ',',
26
26
  :separator => /\s*(\band\b|\&)\s*/i,
27
27
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
28
- :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
28
+ :suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
29
29
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
30
30
  }
31
31
  end
@@ -77,7 +77,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
77
77
  end
78
78
 
79
79
  def reset
80
- @commas, @words, @yydebug = 0, 0, debug?
80
+ @commas, @words, @initials, @yydebug = 0, 0, 0, debug?
81
81
  self
82
82
  end
83
83
 
@@ -93,7 +93,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
93
93
 
94
94
  def consume_separator
95
95
  return next_token if seen_separator?
96
- @commas, @words = 0, 0
96
+ @commas, @words, @initials = 0, 0, 0
97
97
  [:AND, :AND]
98
98
  end
99
99
 
@@ -104,6 +104,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
104
104
 
105
105
  def consume_word(type, word)
106
106
  @words += 1
107
+ @initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
107
108
  [type, word]
108
109
  end
109
110
 
@@ -123,13 +124,13 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
123
124
  def will_see_suffix?
124
125
  input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
125
126
  end
126
-
127
+
127
128
  def will_see_initial?
128
129
  input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
129
130
  end
130
131
 
131
132
  def seen_full_name?
132
- prefer_comma_as_separator? && @words > 1 && !will_see_initial?
133
+ prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
133
134
  end
134
135
 
135
136
  def next_token
data/lib/namae/parser.y CHANGED
@@ -99,7 +99,7 @@ require 'strscan'
99
99
  :comma => ',',
100
100
  :separator => /\s*(\band\b|\&)\s*/i,
101
101
  :title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
102
- :suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
102
+ :suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
103
103
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
104
104
  }
105
105
  end
@@ -151,7 +151,7 @@ require 'strscan'
151
151
  end
152
152
 
153
153
  def reset
154
- @commas, @words, @yydebug = 0, 0, debug?
154
+ @commas, @words, @initials, @yydebug = 0, 0, 0, debug?
155
155
  self
156
156
  end
157
157
 
@@ -167,7 +167,7 @@ require 'strscan'
167
167
 
168
168
  def consume_separator
169
169
  return next_token if seen_separator?
170
- @commas, @words = 0, 0
170
+ @commas, @words, @initials = 0, 0, 0
171
171
  [:AND, :AND]
172
172
  end
173
173
 
@@ -178,6 +178,7 @@ require 'strscan'
178
178
 
179
179
  def consume_word(type, word)
180
180
  @words += 1
181
+ @initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
181
182
  [type, word]
182
183
  end
183
184
 
@@ -197,13 +198,13 @@ require 'strscan'
197
198
  def will_see_suffix?
198
199
  input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
199
200
  end
200
-
201
+
201
202
  def will_see_initial?
202
203
  input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
203
204
  end
204
205
 
205
206
  def seen_full_name?
206
- prefer_comma_as_separator? && @words > 1 && !will_see_initial?
207
+ prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
207
208
  end
208
209
 
209
210
  def next_token
data/lib/namae/version.rb CHANGED
@@ -2,7 +2,7 @@ module Namae
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 7
5
- PATCH = 0
5
+ PATCH = 1
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "namae"
8
- s.version = "0.7.0"
8
+ s.version = "0.7.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
12
- s.date = "2012-10-29"
12
+ s.date = "2012-11-08"
13
13
  s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
14
14
  s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-10-29 00:00:00.000000000 Z
13
+ date: 2012-11-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: racc
@@ -147,7 +147,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
147
147
  version: '0'
148
148
  segments:
149
149
  - 0
150
- hash: -219042018557465799
150
+ hash: -3595872270413772674
151
151
  required_rubygems_version: !ruby/object:Gem::Requirement
152
152
  none: false
153
153
  requirements: