namae 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/features/lists.feature +12 -0
- data/lib/namae/parser.rb +6 -5
- data/lib/namae/parser.y +6 -5
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +2 -2
- metadata +3 -3
data/features/lists.feature
CHANGED
@@ -61,3 +61,15 @@ Feature: Parse a list of names
|
|
61
61
|
| Brian | Kernighan |
|
62
62
|
| Dennis | Ritchie |
|
63
63
|
| Donald | Knuth |
|
64
|
+
|
65
|
+
@list @wip
|
66
|
+
Scenario: A list of names separated by commas
|
67
|
+
Given a parser that prefers commas as separators
|
68
|
+
When I parse the names "G. Proctor, M. Cooper, P. Sanders & B. Malcom"
|
69
|
+
# Then there should be 4 names
|
70
|
+
Then the names should be:
|
71
|
+
| given | family |
|
72
|
+
| G. | Proctor |
|
73
|
+
| M. | Cooper |
|
74
|
+
| P. | Sanders |
|
75
|
+
| B. | Malcom |
|
data/lib/namae/parser.rb
CHANGED
@@ -25,7 +25,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
25
25
|
:comma => ',',
|
26
26
|
:separator => /\s*(\band\b|\&)\s*/i,
|
27
27
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
28
|
-
:suffix => /\s*\b(jr|sr|[ivx]
|
28
|
+
:suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
|
29
29
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
30
30
|
}
|
31
31
|
end
|
@@ -77,7 +77,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def reset
|
80
|
-
@commas, @words, @yydebug = 0, 0, debug?
|
80
|
+
@commas, @words, @initials, @yydebug = 0, 0, 0, debug?
|
81
81
|
self
|
82
82
|
end
|
83
83
|
|
@@ -93,7 +93,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
93
93
|
|
94
94
|
def consume_separator
|
95
95
|
return next_token if seen_separator?
|
96
|
-
@commas, @words = 0, 0
|
96
|
+
@commas, @words, @initials = 0, 0, 0
|
97
97
|
[:AND, :AND]
|
98
98
|
end
|
99
99
|
|
@@ -104,6 +104,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
104
104
|
|
105
105
|
def consume_word(type, word)
|
106
106
|
@words += 1
|
107
|
+
@initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
|
107
108
|
[type, word]
|
108
109
|
end
|
109
110
|
|
@@ -123,13 +124,13 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
123
124
|
def will_see_suffix?
|
124
125
|
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
125
126
|
end
|
126
|
-
|
127
|
+
|
127
128
|
def will_see_initial?
|
128
129
|
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
|
129
130
|
end
|
130
131
|
|
131
132
|
def seen_full_name?
|
132
|
-
prefer_comma_as_separator? && @words > 1 && !will_see_initial?
|
133
|
+
prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
|
133
134
|
end
|
134
135
|
|
135
136
|
def next_token
|
data/lib/namae/parser.y
CHANGED
@@ -99,7 +99,7 @@ require 'strscan'
|
|
99
99
|
:comma => ',',
|
100
100
|
:separator => /\s*(\band\b|\&)\s*/i,
|
101
101
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
102
|
-
:suffix => /\s*\b(jr|sr|[ivx]
|
102
|
+
:suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
|
103
103
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
104
104
|
}
|
105
105
|
end
|
@@ -151,7 +151,7 @@ require 'strscan'
|
|
151
151
|
end
|
152
152
|
|
153
153
|
def reset
|
154
|
-
@commas, @words, @yydebug = 0, 0, debug?
|
154
|
+
@commas, @words, @initials, @yydebug = 0, 0, 0, debug?
|
155
155
|
self
|
156
156
|
end
|
157
157
|
|
@@ -167,7 +167,7 @@ require 'strscan'
|
|
167
167
|
|
168
168
|
def consume_separator
|
169
169
|
return next_token if seen_separator?
|
170
|
-
@commas, @words = 0, 0
|
170
|
+
@commas, @words, @initials = 0, 0, 0
|
171
171
|
[:AND, :AND]
|
172
172
|
end
|
173
173
|
|
@@ -178,6 +178,7 @@ require 'strscan'
|
|
178
178
|
|
179
179
|
def consume_word(type, word)
|
180
180
|
@words += 1
|
181
|
+
@initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
|
181
182
|
[type, word]
|
182
183
|
end
|
183
184
|
|
@@ -197,13 +198,13 @@ require 'strscan'
|
|
197
198
|
def will_see_suffix?
|
198
199
|
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
199
200
|
end
|
200
|
-
|
201
|
+
|
201
202
|
def will_see_initial?
|
202
203
|
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
|
203
204
|
end
|
204
205
|
|
205
206
|
def seen_full_name?
|
206
|
-
prefer_comma_as_separator? && @words > 1 && !will_see_initial?
|
207
|
+
prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
|
207
208
|
end
|
208
209
|
|
209
210
|
def next_token
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-11-08"
|
13
13
|
s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
|
14
14
|
s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-11-08 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: racc
|
@@ -147,7 +147,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
147
147
|
version: '0'
|
148
148
|
segments:
|
149
149
|
- 0
|
150
|
-
hash: -
|
150
|
+
hash: -3595872270413772674
|
151
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
152
|
none: false
|
153
153
|
requirements:
|