namae 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +5 -0
- data/Gemfile +2 -2
- data/features/lists.feature +36 -2
- data/lib/namae/parser.rb +10 -5
- data/lib/namae/parser.y +10 -5
- data/lib/namae/version.rb +2 -2
- data/namae.gemspec +3 -3
- data/spec/namae/parser_spec.rb +21 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7e3498b418afe338293a9fe9c7b4c8747ee014b2
|
4
|
+
data.tar.gz: cac2386cd67b385330b50a8d722429100020b2e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2001bf45ca48aa8d9d1a8f31143a3687f5013ffa4e8be7837c7efa08a5acb9257cd8e7685a0ee19f89a49d8e4bf841a596cc99e734087d6bb32828c025b0e34
|
7
|
+
data.tar.gz: e7b62fc3a1278938f42e5fcee152cf47ba06e5378f3bedef6bc238d3983a11d2ffc25bc7ec6c8dd90655dc6ca29f976d62e2c3dd710d6cd0700056481244f1d3
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -21,8 +21,8 @@ group :optional do
|
|
21
21
|
end
|
22
22
|
|
23
23
|
group :debug do
|
24
|
-
gem 'debugger', '~>1.6', :platform => [:
|
25
|
-
gem 'byebug', '~>3.5', :platform =>
|
24
|
+
gem 'debugger', '~>1.6', :platform => [:mri_19]
|
25
|
+
gem 'byebug', '~>3.5', :platform => :mri if RUBY_VERSION > '2.0'
|
26
26
|
gem 'rubinius-compiler', '~>2.0', :platform => :rbx
|
27
27
|
gem 'rubinius-debugger', '~>2.0', :platform => :rbx
|
28
28
|
end
|
data/features/lists.feature
CHANGED
@@ -30,6 +30,22 @@ Feature: Parse a list of names
|
|
30
30
|
| Dennis | Ritchie |
|
31
31
|
| Donald | Knuth |
|
32
32
|
|
33
|
+
@list
|
34
|
+
Scenario: A list of names separated by semicolons
|
35
|
+
When I parse the names "John D. Smith; Jack R. Johnson; Emily Tanner"
|
36
|
+
Then there should be 3 names
|
37
|
+
And the names should be:
|
38
|
+
| given | family |
|
39
|
+
| John D. | Smith |
|
40
|
+
| Jack R. | Johnson |
|
41
|
+
| Emily | Tanner |
|
42
|
+
When I parse the names "Smith, John D.; Johnson, Jack R.; Tanner, Emily"
|
43
|
+
Then there should be 3 names
|
44
|
+
And the names should be:
|
45
|
+
| given | family |
|
46
|
+
| John D. | Smith |
|
47
|
+
| Jack R. | Johnson |
|
48
|
+
| Emily | Tanner |
|
33
49
|
|
34
50
|
@list
|
35
51
|
Scenario: A list of sort-order names with initials separated by commas
|
@@ -51,6 +67,24 @@ Feature: Parse a list of names
|
|
51
67
|
| Dennis | Ritchie |
|
52
68
|
| Donald | Knuth |
|
53
69
|
|
70
|
+
@list
|
71
|
+
Scenario: A list of mixed names separated by semicolons, commas and 'and'
|
72
|
+
Given a parser that prefers commas as separators
|
73
|
+
When I parse the names "John D. Smith, Jack R. Johnson & Emily Tanner"
|
74
|
+
Then there should be 3 names
|
75
|
+
And the names should be:
|
76
|
+
| given | family |
|
77
|
+
| John D. | Smith |
|
78
|
+
| Jack R. | Johnson |
|
79
|
+
| Emily | Tanner |
|
80
|
+
When I parse the names "C. Foster; C. Hamel, C. Desroches"
|
81
|
+
Then there should be 3 names
|
82
|
+
And the names should be:
|
83
|
+
| given | family |
|
84
|
+
| C. | Foster |
|
85
|
+
| C. | Hamel |
|
86
|
+
| C. | Desroches |
|
87
|
+
|
54
88
|
@list
|
55
89
|
Scenario: A list of display-order names separated by commas and 'and'
|
56
90
|
Given a parser that prefers commas as separators
|
@@ -64,7 +98,7 @@ Feature: Parse a list of names
|
|
64
98
|
|
65
99
|
@list @wip
|
66
100
|
Scenario: A list of names separated by commas
|
67
|
-
|
101
|
+
Given a parser that prefers commas as separators
|
68
102
|
When I parse the names "G. Proctor, M. Cooper, P. Sanders & B. Malcom"
|
69
103
|
Then the names should be:
|
70
104
|
| given | family |
|
@@ -81,7 +115,7 @@ Feature: Parse a list of names
|
|
81
115
|
| B | Malcom |
|
82
116
|
|
83
117
|
Scenario: A list of names with particles separated by commas
|
84
|
-
|
118
|
+
Given a parser that prefers commas as separators
|
85
119
|
When I parse the names "Di Proctor, M., von Cooper, P."
|
86
120
|
Then the names should be:
|
87
121
|
| given | family |
|
data/lib/namae/parser.rb
CHANGED
@@ -23,7 +23,8 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
|
|
23
23
|
:debug => false,
|
24
24
|
:prefer_comma_as_separator => false,
|
25
25
|
:comma => ',',
|
26
|
-
:
|
26
|
+
:stops => ',;',
|
27
|
+
:separator => /\s*(\band\b|\&|;)\s*/i,
|
27
28
|
:title => /\s*\b(sir|lord|count(ess)?|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
28
29
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
29
30
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
@@ -42,6 +43,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
|
|
42
43
|
options[:comma]
|
43
44
|
end
|
44
45
|
|
46
|
+
def stops
|
47
|
+
options[:stops]
|
48
|
+
end
|
49
|
+
|
45
50
|
def title
|
46
51
|
options[:title]
|
47
52
|
end
|
@@ -142,7 +147,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
|
|
142
147
|
nil
|
143
148
|
when input.scan(separator)
|
144
149
|
consume_separator
|
145
|
-
when input.scan(/\s
|
150
|
+
when input.scan(/\s*#{comma}\s*/)
|
146
151
|
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
147
152
|
consume_comma
|
148
153
|
else
|
@@ -156,11 +161,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
|
|
156
161
|
consume_word(:SUFFIX, input.matched.strip)
|
157
162
|
when input.scan(appellation)
|
158
163
|
[:APPELLATION, input.matched.strip]
|
159
|
-
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{
|
164
|
+
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
|
160
165
|
consume_word(:UWORD, input.matched)
|
161
|
-
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{
|
166
|
+
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
|
162
167
|
consume_word(:LWORD, input.matched)
|
163
|
-
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{
|
168
|
+
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
|
164
169
|
consume_word(:PWORD, input.matched)
|
165
170
|
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
166
171
|
consume_word(:NICK, input.matched[1...-1])
|
data/lib/namae/parser.y
CHANGED
@@ -114,7 +114,8 @@ require 'strscan'
|
|
114
114
|
:debug => false,
|
115
115
|
:prefer_comma_as_separator => false,
|
116
116
|
:comma => ',',
|
117
|
-
:
|
117
|
+
:stops => ',;',
|
118
|
+
:separator => /\s*(\band\b|\&|;)\s*/i,
|
118
119
|
:title => /\s*\b(sir|lord|count(ess)?|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
119
120
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
120
121
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
@@ -133,6 +134,10 @@ require 'strscan'
|
|
133
134
|
options[:comma]
|
134
135
|
end
|
135
136
|
|
137
|
+
def stops
|
138
|
+
options[:stops]
|
139
|
+
end
|
140
|
+
|
136
141
|
def title
|
137
142
|
options[:title]
|
138
143
|
end
|
@@ -233,7 +238,7 @@ require 'strscan'
|
|
233
238
|
nil
|
234
239
|
when input.scan(separator)
|
235
240
|
consume_separator
|
236
|
-
when input.scan(/\s
|
241
|
+
when input.scan(/\s*#{comma}\s*/)
|
237
242
|
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
238
243
|
consume_comma
|
239
244
|
else
|
@@ -247,11 +252,11 @@ require 'strscan'
|
|
247
252
|
consume_word(:SUFFIX, input.matched.strip)
|
248
253
|
when input.scan(appellation)
|
249
254
|
[:APPELLATION, input.matched.strip]
|
250
|
-
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{
|
255
|
+
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
|
251
256
|
consume_word(:UWORD, input.matched)
|
252
|
-
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{
|
257
|
+
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
|
253
258
|
consume_word(:LWORD, input.matched)
|
254
|
-
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{
|
259
|
+
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
|
255
260
|
consume_word(:PWORD, input.matched)
|
256
261
|
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
257
262
|
consume_word(:NICK, input.matched[1...-1])
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: namae 0.
|
5
|
+
# stub: namae 0.10.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "namae"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.10.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
14
|
-
s.date = "2015-
|
14
|
+
s.date = "2015-04-26"
|
15
15
|
s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
|
16
16
|
s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
|
17
17
|
s.extra_rdoc_files = [
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -44,6 +44,27 @@ module Namae
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
+
describe 'when the next input is ", "' do
|
48
|
+
before { parser.send(:input).string = ', ' }
|
49
|
+
it 'returns a COMMA token' do
|
50
|
+
expect(parser.send(:next_token)).to eq([:COMMA, :COMMA])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe 'when the next input is "; "' do
|
55
|
+
before { parser.send(:input).string = '; ' }
|
56
|
+
it 'returns an AND token' do
|
57
|
+
expect(parser.send(:next_token)).to eq([:AND, :AND])
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe 'when the next input is "foo;"' do
|
62
|
+
before { parser.send(:input).string = 'foo;' }
|
63
|
+
it 'returns an LWORD token "foo"' do
|
64
|
+
expect(parser.send(:next_token)).to eq([:LWORD, 'foo'])
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
47
68
|
describe 'when the next input is " \'foo bar\' "' do
|
48
69
|
before { parser.send(:input).string = " 'foo bar' " }
|
49
70
|
it 'returns a NICK token' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-04-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: simplecov
|
@@ -134,3 +134,4 @@ signing_key:
|
|
134
134
|
specification_version: 4
|
135
135
|
summary: Namae (名前) parses personal names and splits them into their component parts.
|
136
136
|
test_files: []
|
137
|
+
has_rdoc:
|