namae 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/features/lists.feature +43 -0
- data/features/step_definitions/namae_steps.rb +10 -0
- data/features/support/env.rb +6 -0
- data/lib/namae/parser.rb +42 -7
- data/lib/namae/parser.y +40 -5
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +4 -3
- metadata +47 -16
@@ -0,0 +1,43 @@
|
|
1
|
+
Feature: Parse a list of names
|
2
|
+
As a hacker who works with Namae
|
3
|
+
I want to be able to parse multiple names in a list
|
4
|
+
|
5
|
+
@list
|
6
|
+
Scenario: A list of names separated by 'and'
|
7
|
+
When I parse the names "Plato and Archimedes and Publius Ovidius Naso"
|
8
|
+
Then there should be 3 names
|
9
|
+
And the names should be:
|
10
|
+
| given | family |
|
11
|
+
| Plato | |
|
12
|
+
| Archimedes | |
|
13
|
+
| Publius Ovidius | Naso |
|
14
|
+
|
15
|
+
@list
|
16
|
+
Scenario: A list of sort-order names separated by commas
|
17
|
+
When I parse the names "Kernighan, Brian, Ritchie, Dennis, Knuth, Donald"
|
18
|
+
Then there should be 3 names
|
19
|
+
And the names should be:
|
20
|
+
| given | family |
|
21
|
+
| Brian | Kernighan |
|
22
|
+
| Dennis | Ritchie |
|
23
|
+
| Donald | Knuth |
|
24
|
+
|
25
|
+
@list
|
26
|
+
Scenario: A list of sort-order names with initials separated by commas
|
27
|
+
When I parse the names "Kernighan, B., Ritchie, D., Knuth, D."
|
28
|
+
Then there should be 3 names
|
29
|
+
And the names should be:
|
30
|
+
| given | family |
|
31
|
+
| B. | Kernighan |
|
32
|
+
| D. | Ritchie |
|
33
|
+
| D. | Knuth |
|
34
|
+
|
35
|
+
@list
|
36
|
+
Scenario: A list of mixed names separated by commas and 'and'
|
37
|
+
When I parse the names "Kernighan, Brian, Ritchie, Dennis and Donald Knuth"
|
38
|
+
Then there should be 3 names
|
39
|
+
And the names should be:
|
40
|
+
| given | family |
|
41
|
+
| Brian | Kernighan |
|
42
|
+
| Dennis | Ritchie |
|
43
|
+
| Donald | Knuth |
|
@@ -20,3 +20,13 @@ Then /^the parts should be:$/ do |table|
|
|
20
20
|
row.values_at('given', 'particle', 'family', 'suffix', 'title', 'appellation', 'nick')
|
21
21
|
end
|
22
22
|
end
|
23
|
+
|
24
|
+
Then /^there should be (\d+) names$/ do |count|
|
25
|
+
@names.length.should == count.to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
Then /^the names should be:$/ do |table|
|
29
|
+
table.hashes.each_with_index do |row, i|
|
30
|
+
@names[i].values_at(*row.keys.map(&:to_sym)).map(&:to_s).should == row.values
|
31
|
+
end
|
32
|
+
end
|
data/features/support/env.rb
CHANGED
data/lib/namae/parser.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#
|
2
2
|
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by Racc 1.4.
|
3
|
+
# This file is automatically generated by Racc 1.4.9
|
4
4
|
# from Racc grammer file "".
|
5
5
|
#
|
6
6
|
|
@@ -24,6 +24,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
24
24
|
:comma => ',',
|
25
25
|
:separator => /\s*(\band\b|\&)\s*/i,
|
26
26
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
27
|
+
:suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
|
27
28
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
28
29
|
}
|
29
30
|
end
|
@@ -44,6 +45,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
44
45
|
options[:title]
|
45
46
|
end
|
46
47
|
|
48
|
+
def suffix
|
49
|
+
options[:suffix]
|
50
|
+
end
|
51
|
+
|
47
52
|
def appellation
|
48
53
|
options[:appellation]
|
49
54
|
end
|
@@ -56,21 +61,51 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
|
56
61
|
end
|
57
62
|
|
58
63
|
def parse!(string)
|
59
|
-
|
60
|
-
|
64
|
+
input.string = normalize(string)
|
65
|
+
reset
|
61
66
|
do_parse
|
62
67
|
end
|
63
|
-
|
64
|
-
private
|
65
68
|
|
69
|
+
def normalize(string)
|
70
|
+
string = string.strip
|
71
|
+
string
|
72
|
+
end
|
73
|
+
|
74
|
+
def reset
|
75
|
+
@commas, @yydebug = 0, debug?
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def consume_separator
|
82
|
+
@commas = 0
|
83
|
+
[:AND, nil]
|
84
|
+
end
|
85
|
+
|
86
|
+
def consume_comma
|
87
|
+
@commas += 1
|
88
|
+
[:COMMA, nil]
|
89
|
+
end
|
90
|
+
|
91
|
+
def seen_suffix?
|
92
|
+
return false unless @vstack
|
93
|
+
return true if @vstack[-1].nil?
|
94
|
+
@vstack[-1] =~ suffix
|
95
|
+
end
|
96
|
+
|
66
97
|
def next_token
|
67
98
|
case
|
68
99
|
when input.nil?, input.eos?
|
69
100
|
nil
|
70
101
|
when input.scan(separator)
|
71
|
-
|
102
|
+
consume_separator
|
72
103
|
when input.scan(/\s*,\s*/)
|
73
|
-
|
104
|
+
if @commas.zero? || @commas == 1 && seen_suffix?
|
105
|
+
consume_comma
|
106
|
+
else
|
107
|
+
consume_separator
|
108
|
+
end
|
74
109
|
when input.scan(/\s+/)
|
75
110
|
next_token
|
76
111
|
when input.scan(title)
|
data/lib/namae/parser.y
CHANGED
@@ -98,6 +98,7 @@ require 'strscan'
|
|
98
98
|
:comma => ',',
|
99
99
|
:separator => /\s*(\band\b|\&)\s*/i,
|
100
100
|
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
101
|
+
:suffix => /\s*\b(jr|sr|[ivx]+)\.?\s*/i,
|
101
102
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
102
103
|
}
|
103
104
|
end
|
@@ -118,6 +119,10 @@ require 'strscan'
|
|
118
119
|
options[:title]
|
119
120
|
end
|
120
121
|
|
122
|
+
def suffix
|
123
|
+
options[:suffix]
|
124
|
+
end
|
125
|
+
|
121
126
|
def appellation
|
122
127
|
options[:appellation]
|
123
128
|
end
|
@@ -130,21 +135,51 @@ require 'strscan'
|
|
130
135
|
end
|
131
136
|
|
132
137
|
def parse!(string)
|
133
|
-
|
134
|
-
|
138
|
+
input.string = normalize(string)
|
139
|
+
reset
|
135
140
|
do_parse
|
136
141
|
end
|
137
|
-
|
142
|
+
|
143
|
+
def normalize(string)
|
144
|
+
string = string.strip
|
145
|
+
string
|
146
|
+
end
|
147
|
+
|
148
|
+
def reset
|
149
|
+
@commas, @yydebug = 0, debug?
|
150
|
+
self
|
151
|
+
end
|
152
|
+
|
138
153
|
private
|
154
|
+
|
155
|
+
def consume_separator
|
156
|
+
@commas = 0
|
157
|
+
[:AND, nil]
|
158
|
+
end
|
139
159
|
|
160
|
+
def consume_comma
|
161
|
+
@commas += 1
|
162
|
+
[:COMMA, nil]
|
163
|
+
end
|
164
|
+
|
165
|
+
def seen_suffix?
|
166
|
+
return false unless @vstack
|
167
|
+
return true if @vstack[-1].nil?
|
168
|
+
@vstack[-1] =~ suffix
|
169
|
+
end
|
170
|
+
|
140
171
|
def next_token
|
141
172
|
case
|
142
173
|
when input.nil?, input.eos?
|
143
174
|
nil
|
144
175
|
when input.scan(separator)
|
145
|
-
|
176
|
+
consume_separator
|
146
177
|
when input.scan(/\s*,\s*/)
|
147
|
-
|
178
|
+
if @commas.zero? || @commas == 1 && seen_suffix?
|
179
|
+
consume_comma
|
180
|
+
else
|
181
|
+
consume_separator
|
182
|
+
end
|
148
183
|
when input.scan(/\s+/)
|
149
184
|
next_token
|
150
185
|
when input.scan(title)
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "namae"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-10-29"
|
13
13
|
s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
|
14
14
|
s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -30,6 +30,7 @@ Gem::Specification.new do |s|
|
|
30
30
|
"cucumber.yml",
|
31
31
|
"features/bibtex.feature",
|
32
32
|
"features/examples.feature",
|
33
|
+
"features/lists.feature",
|
33
34
|
"features/step_definitions/namae_steps.rb",
|
34
35
|
"features/support/env.rb",
|
35
36
|
"lib/namae.rb",
|
@@ -47,7 +48,7 @@ Gem::Specification.new do |s|
|
|
47
48
|
s.homepage = "https://github.com/berkmancenter/namae"
|
48
49
|
s.licenses = ["AGPL"]
|
49
50
|
s.require_paths = ["lib"]
|
50
|
-
s.rubygems_version = "1.8.
|
51
|
+
s.rubygems_version = "1.8.24"
|
51
52
|
s.summary = "Namae (\u{540d}\u{524d}) parses personal names and splits them into their component parts."
|
52
53
|
|
53
54
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-
|
13
|
+
date: 2012-10-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: racc
|
17
|
-
requirement:
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ~>
|
@@ -22,10 +22,15 @@ dependencies:
|
|
22
22
|
version: 1.4.8
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements:
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
none: false
|
27
|
+
requirements:
|
28
|
+
- - ~>
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: 1.4.8
|
26
31
|
- !ruby/object:Gem::Dependency
|
27
32
|
name: rdoc
|
28
|
-
requirement:
|
33
|
+
requirement: !ruby/object:Gem::Requirement
|
29
34
|
none: false
|
30
35
|
requirements:
|
31
36
|
- - ~>
|
@@ -33,10 +38,15 @@ dependencies:
|
|
33
38
|
version: '3.12'
|
34
39
|
type: :development
|
35
40
|
prerelease: false
|
36
|
-
version_requirements:
|
41
|
+
version_requirements: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ~>
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3.12'
|
37
47
|
- !ruby/object:Gem::Dependency
|
38
48
|
name: bundler
|
39
|
-
requirement:
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
40
50
|
none: false
|
41
51
|
requirements:
|
42
52
|
- - ~>
|
@@ -44,10 +54,15 @@ dependencies:
|
|
44
54
|
version: '1.1'
|
45
55
|
type: :development
|
46
56
|
prerelease: false
|
47
|
-
version_requirements:
|
57
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ~>
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '1.1'
|
48
63
|
- !ruby/object:Gem::Dependency
|
49
64
|
name: simplecov
|
50
|
-
requirement:
|
65
|
+
requirement: !ruby/object:Gem::Requirement
|
51
66
|
none: false
|
52
67
|
requirements:
|
53
68
|
- - ! '>='
|
@@ -55,10 +70,15 @@ dependencies:
|
|
55
70
|
version: '0'
|
56
71
|
type: :development
|
57
72
|
prerelease: false
|
58
|
-
version_requirements:
|
73
|
+
version_requirements: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
59
79
|
- !ruby/object:Gem::Dependency
|
60
80
|
name: ZenTest
|
61
|
-
requirement:
|
81
|
+
requirement: !ruby/object:Gem::Requirement
|
62
82
|
none: false
|
63
83
|
requirements:
|
64
84
|
- - ~>
|
@@ -66,10 +86,15 @@ dependencies:
|
|
66
86
|
version: 4.8.0
|
67
87
|
type: :development
|
68
88
|
prerelease: false
|
69
|
-
version_requirements:
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
90
|
+
none: false
|
91
|
+
requirements:
|
92
|
+
- - ~>
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 4.8.0
|
70
95
|
- !ruby/object:Gem::Dependency
|
71
96
|
name: jeweler
|
72
|
-
requirement:
|
97
|
+
requirement: !ruby/object:Gem::Requirement
|
73
98
|
none: false
|
74
99
|
requirements:
|
75
100
|
- - ~>
|
@@ -77,7 +102,12 @@ dependencies:
|
|
77
102
|
version: 1.8.3
|
78
103
|
type: :development
|
79
104
|
prerelease: false
|
80
|
-
version_requirements:
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
none: false
|
107
|
+
requirements:
|
108
|
+
- - ~>
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 1.8.3
|
81
111
|
description: ! ' Namae (名前) is a parser for human names. It recognizes personal names
|
82
112
|
of various cultural backgrounds and tries to split them into their component parts
|
83
113
|
(e.g., given and family names, honorifics etc.). '
|
@@ -103,6 +133,7 @@ files:
|
|
103
133
|
- cucumber.yml
|
104
134
|
- features/bibtex.feature
|
105
135
|
- features/examples.feature
|
136
|
+
- features/lists.feature
|
106
137
|
- features/step_definitions/namae_steps.rb
|
107
138
|
- features/support/env.rb
|
108
139
|
- lib/namae.rb
|
@@ -131,7 +162,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
131
162
|
version: '0'
|
132
163
|
segments:
|
133
164
|
- 0
|
134
|
-
hash:
|
165
|
+
hash: -4370560385267353354
|
135
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
167
|
none: false
|
137
168
|
requirements:
|
@@ -140,7 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
171
|
version: '0'
|
141
172
|
requirements: []
|
142
173
|
rubyforge_project:
|
143
|
-
rubygems_version: 1.8.
|
174
|
+
rubygems_version: 1.8.24
|
144
175
|
signing_key:
|
145
176
|
specification_version: 3
|
146
177
|
summary: Namae (名前) parses personal names and splits them into their component parts.
|