namae 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/features/lists.feature +12 -0
- data/lib/namae/parser.rb +6 -5
- data/lib/namae/parser.y +6 -5
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +2 -2
- metadata +3 -3
data/features/lists.feature
CHANGED
@@ -61,3 +61,15 @@ Feature: Parse a list of names
|
|
61
61
|
| Brian | Kernighan |
|
62
62
|
| Dennis | Ritchie |
|
63
63
|
| Donald | Knuth |
|
64
|
+
|
65
|
+
@list @wip
|
66
|
+
Scenario: A list of names separated by commas
|
67
|
+
Given a parser that prefers commas as separators
|
68
|
+
When I parse the names "G. Proctor, M. Cooper, P. Sanders & B. Malcom"
|
69
|
+
# Then there should be 4 names
|
70
|
+
Then the names should be:
|
71
|
+
| given | family |
|
72
|
+
| G. | Proctor |
|
73
|
+
| M. | Cooper |
|
74
|
+
| P. | Sanders |
|
75
|
+
| B. | Malcom |
|
data/lib/namae/parser.rb
CHANGED
@@ -25,7 +25,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
25
25
|
:comma => ',',
|
26
26
|
:separator => /\s*(\band\b|\&)\s*/i,
|
27
27
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
28
|
-
:suffix => /\s*\b(jr|sr|[ivx]
|
28
|
+
:suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
|
29
29
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
30
30
|
}
|
31
31
|
end
|
@@ -77,7 +77,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def reset
|
80
|
-
@commas, @words, @yydebug = 0, 0, debug?
|
80
|
+
@commas, @words, @initials, @yydebug = 0, 0, 0, debug?
|
81
81
|
self
|
82
82
|
end
|
83
83
|
|
@@ -93,7 +93,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
93
93
|
|
94
94
|
def consume_separator
|
95
95
|
return next_token if seen_separator?
|
96
|
-
@commas, @words = 0, 0
|
96
|
+
@commas, @words, @initials = 0, 0, 0
|
97
97
|
[:AND, :AND]
|
98
98
|
end
|
99
99
|
|
@@ -104,6 +104,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
104
104
|
|
105
105
|
def consume_word(type, word)
|
106
106
|
@words += 1
|
107
|
+
@initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
|
107
108
|
[type, word]
|
108
109
|
end
|
109
110
|
|
@@ -123,13 +124,13 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
123
124
|
def will_see_suffix?
|
124
125
|
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
125
126
|
end
|
126
|
-
|
127
|
+
|
127
128
|
def will_see_initial?
|
128
129
|
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
|
129
130
|
end
|
130
131
|
|
131
132
|
def seen_full_name?
|
132
|
-
prefer_comma_as_separator? && @words > 1 && !will_see_initial?
|
133
|
+
prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
|
133
134
|
end
|
134
135
|
|
135
136
|
def next_token
|
data/lib/namae/parser.y
CHANGED
@@ -99,7 +99,7 @@ require 'strscan'
|
|
99
99
|
:comma => ',',
|
100
100
|
:separator => /\s*(\band\b|\&)\s*/i,
|
101
101
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
102
|
-
:suffix => /\s*\b(jr|sr|[ivx]
|
102
|
+
:suffix => /\s*\b(jr|sr|[ivx]{2,})\.?\s*/i,
|
103
103
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
104
104
|
}
|
105
105
|
end
|
@@ -151,7 +151,7 @@ require 'strscan'
|
|
151
151
|
end
|
152
152
|
|
153
153
|
def reset
|
154
|
-
@commas, @words, @yydebug = 0, 0, debug?
|
154
|
+
@commas, @words, @initials, @yydebug = 0, 0, 0, debug?
|
155
155
|
self
|
156
156
|
end
|
157
157
|
|
@@ -167,7 +167,7 @@ require 'strscan'
|
|
167
167
|
|
168
168
|
def consume_separator
|
169
169
|
return next_token if seen_separator?
|
170
|
-
@commas, @words = 0, 0
|
170
|
+
@commas, @words, @initials = 0, 0, 0
|
171
171
|
[:AND, :AND]
|
172
172
|
end
|
173
173
|
|
@@ -178,6 +178,7 @@ require 'strscan'
|
|
178
178
|
|
179
179
|
def consume_word(type, word)
|
180
180
|
@words += 1
|
181
|
+
@initials += 1 if type == :UWORD && word =~ /^\s*[[:alpha:]]\.\s*$/
|
181
182
|
[type, word]
|
182
183
|
end
|
183
184
|
|
@@ -197,13 +198,13 @@ require 'strscan'
|
|
197
198
|
def will_see_suffix?
|
198
199
|
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
199
200
|
end
|
200
|
-
|
201
|
+
|
201
202
|
def will_see_initial?
|
202
203
|
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
|
203
204
|
end
|
204
205
|
|
205
206
|
def seen_full_name?
|
206
|
-
prefer_comma_as_separator? && @words > 1 && !will_see_initial?
|
207
|
+
prefer_comma_as_separator? && @words > 1 && (@initials > 0 || !will_see_initial?)
|
207
208
|
end
|
208
209
|
|
209
210
|
def next_token
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-11-08"
|
13
13
|
s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
|
14
14
|
s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-11-08 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: racc
|
@@ -147,7 +147,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
147
147
|
version: '0'
|
148
148
|
segments:
|
149
149
|
- 0
|
150
|
-
hash: -
|
150
|
+
hash: -3595872270413772674
|
151
151
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
152
|
none: false
|
153
153
|
requirements:
|