nomener 0.2.1 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +4 -0
- data/lib/nomener.rb +2 -0
- data/lib/nomener/compounders.rb +1 -1
- data/lib/nomener/helper.rb +2 -2
- data/lib/nomener/parser.rb +66 -20
- data/lib/nomener/suffixes.rb +5 -5
- data/lib/nomener/version.rb +1 -1
- data/spec/nomener/names/guardian_spec.rb +6 -6
- data/spec/nomener/names/wikiquote_spec.rb +71 -41
- data/spec/nomener/nomener_parser_spec.rb +6 -0
- data/spec/nomener/titles_spec.rb +5 -4
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2b4d3e3d0da16de4111205c7c0355bd8db5526a5
|
|
4
|
+
data.tar.gz: 314c165d421692570ebf48a64cbfdc9748f22c70
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3d1d2126b1bb6a6e6e4622a5eae671157dae540515137e2862f224a399d8e6b19c9d6f21e74796f4be2c4f47f51cf4cb7280533ce0aef79eaf121571801a9309
|
|
7
|
+
data.tar.gz: 3527b6b3e9fcbb1e3d8d317ad4d25ec1eee6ff926377722bf32581b13b47a1fff56a04cded4929955a0bc37365c88b250eced36a4a0c6558247506b0ef19f7f2
|
data/LICENSE.txt
CHANGED
data/lib/nomener.rb
CHANGED
data/lib/nomener/compounders.rb
CHANGED
data/lib/nomener/helper.rb
CHANGED
|
@@ -24,8 +24,8 @@ module Nomener
|
|
|
24
24
|
n.tr!("\u0027\u2018\u201A\u2035\u2039\u300C\uFE41\uFF62", left) # replace left single quotes
|
|
25
25
|
n.tr!("\u0027\u2019\u201B\u2032\u203A\u300D\uFE42\uFF62", right) # replace left single quotes
|
|
26
26
|
|
|
27
|
-
n.gsub!(/\./, ' ')
|
|
28
|
-
n.gsub!(/[^\p{Alpha}\-&\/
|
|
27
|
+
#n.gsub!(/\./, ' ')
|
|
28
|
+
n.gsub!(/[^\p{Alpha}\-&\/ \.\,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, " ") # what others may be in a name?
|
|
29
29
|
n.gsub!(/\p{Blank}+/, " ") # compress whitespace
|
|
30
30
|
n.strip! # trim space
|
|
31
31
|
|
data/lib/nomener/parser.rb
CHANGED
|
@@ -10,6 +10,30 @@ module Nomener
|
|
|
10
10
|
include Nomener::Suffixes
|
|
11
11
|
include Nomener::Compounders
|
|
12
12
|
|
|
13
|
+
# regex for stuff at the end we want to get out
|
|
14
|
+
TRAILER_TRASH = /[,|\s]+$/
|
|
15
|
+
|
|
16
|
+
# regex for name characters we aren't going to use
|
|
17
|
+
DIRTY_STUFF = /[^,'(?:\p{Alpha}(?<\.))\p{Alpha}]{2,}/
|
|
18
|
+
|
|
19
|
+
# regex for boundaries we'll use to find leftover nickname boundaries
|
|
20
|
+
NICKNAME_LEFTOVER = /["'\(\)]{2}/
|
|
21
|
+
|
|
22
|
+
# regex for matching enclosed nicknames
|
|
23
|
+
NICKNAME = /(?<=["'\(])([\p{Alpha}\-\ '\.\,]+?)(?=["'\)])/
|
|
24
|
+
|
|
25
|
+
# regex for matching last names in a "first last" pattern
|
|
26
|
+
FIRSTLAST_MATCHER = /\p{Blank}(?<fam>#{COMPOUNDS}[\p{Alpha}\-\']+)\Z/i
|
|
27
|
+
|
|
28
|
+
# regex for matching last names in a "last first" pattern
|
|
29
|
+
LASTFIRST_MATCHER = /\A(?<fam>#{COMPOUNDS}\b[\p{Alpha}\-\']+)\p{Blank}/i
|
|
30
|
+
|
|
31
|
+
# regex for matching last names in a "last, first" pattern
|
|
32
|
+
LASTCOMFIRST_MATCHER = /\A(?<fam>#{COMPOUNDS}\b[\p{Alpha}\-\'\p{Blank}]+),/i
|
|
33
|
+
|
|
34
|
+
# period. probably not much performance help.
|
|
35
|
+
PERIOD = /\./
|
|
36
|
+
|
|
13
37
|
# Public: parse a string into name parts
|
|
14
38
|
#
|
|
15
39
|
# name - a string to get the name from
|
|
@@ -39,19 +63,30 @@ module Nomener
|
|
|
39
63
|
def self.parse!(name, format = {:order => :auto, :spacelimit => 0})
|
|
40
64
|
raise ArgumentError, 'Name to parse not provided' unless (name.kind_of?(String) && !name.empty?)
|
|
41
65
|
|
|
42
|
-
name = Nomener::Helper.reformat
|
|
66
|
+
name = Nomener::Helper.reformat name
|
|
43
67
|
|
|
44
68
|
# grab any identified nickname before working on the rest
|
|
45
|
-
nick = parse_nick!
|
|
46
|
-
|
|
69
|
+
nick = parse_nick! name
|
|
70
|
+
cleanup! name
|
|
47
71
|
|
|
48
72
|
# grab any suffix' we can find
|
|
49
|
-
suffix = parse_suffix!
|
|
73
|
+
suffix = parse_suffix! name
|
|
74
|
+
cleanup! name
|
|
75
|
+
|
|
76
|
+
title = parse_title! name
|
|
77
|
+
cleanup! name
|
|
78
|
+
|
|
79
|
+
name.gsub! PERIOD, ' '
|
|
80
|
+
name.squeeze! " "
|
|
81
|
+
name.strip!
|
|
82
|
+
|
|
50
83
|
first = last = middle = ""
|
|
51
84
|
|
|
52
85
|
# if there's a comma, it may be a useful hint
|
|
53
86
|
if !name.index(',').nil? # && (format[:order] == :auto || format[:order] == :lcf)
|
|
54
87
|
clues = name.split(",")
|
|
88
|
+
clues.each { |i| i.strip! }
|
|
89
|
+
|
|
55
90
|
# convention is last, first
|
|
56
91
|
if clues.length == 2
|
|
57
92
|
last, first = clues
|
|
@@ -70,16 +105,17 @@ module Nomener
|
|
|
70
105
|
end
|
|
71
106
|
# titles are part of the first name
|
|
72
107
|
title = parse_title!(first) if title.nil? || title.empty?
|
|
108
|
+
elsif clues.length == 1
|
|
109
|
+
last = clues.shift
|
|
73
110
|
else
|
|
74
|
-
raise ParseError "Could not
|
|
111
|
+
raise ParseError, "Could not decipher commas in \"#{name}\""
|
|
75
112
|
end
|
|
76
113
|
elsif !name.index(" ").nil?
|
|
77
114
|
last = parse_last!(name, format[:order])
|
|
78
115
|
first, middle = parse_first!(name, format[:spacelimit])
|
|
79
|
-
elsif name.index(" ").nil?
|
|
80
|
-
first = name[0] # mononym
|
|
81
116
|
else
|
|
82
|
-
|
|
117
|
+
last = name # possibly mononym
|
|
118
|
+
first = ""
|
|
83
119
|
end
|
|
84
120
|
|
|
85
121
|
{
|
|
@@ -99,10 +135,10 @@ module Nomener
|
|
|
99
135
|
#
|
|
100
136
|
# Returns nothing
|
|
101
137
|
def self.cleanup!(dirty)
|
|
102
|
-
dirty.gsub!
|
|
138
|
+
dirty.gsub! DIRTY_STUFF, ''
|
|
103
139
|
dirty.squeeze! " "
|
|
104
140
|
# remove any trailing commas or whitespace
|
|
105
|
-
dirty.gsub!
|
|
141
|
+
dirty.gsub! TRAILER_TRASH, ''
|
|
106
142
|
dirty.strip!
|
|
107
143
|
end
|
|
108
144
|
|
|
@@ -118,8 +154,11 @@ module Nomener
|
|
|
118
154
|
titles << title.strip
|
|
119
155
|
''
|
|
120
156
|
end
|
|
121
|
-
|
|
122
|
-
|
|
157
|
+
t = titles.join " "
|
|
158
|
+
t.gsub! PERIOD, ' '
|
|
159
|
+
t.squeeze! " "
|
|
160
|
+
t.strip!
|
|
161
|
+
t
|
|
123
162
|
end
|
|
124
163
|
|
|
125
164
|
# Internal: pull off what suffixes we can
|
|
@@ -134,8 +173,11 @@ module Nomener
|
|
|
134
173
|
suffixes << suffix.strip
|
|
135
174
|
''
|
|
136
175
|
end
|
|
137
|
-
|
|
138
|
-
|
|
176
|
+
s = suffixes.join " "
|
|
177
|
+
s.gsub! /\./, ' '
|
|
178
|
+
s.squeeze! " "
|
|
179
|
+
s.strip!
|
|
180
|
+
s
|
|
139
181
|
end
|
|
140
182
|
|
|
141
183
|
# Internal: parse nickname out of string. presuming it's in quotes
|
|
@@ -145,11 +187,15 @@ module Nomener
|
|
|
145
187
|
#
|
|
146
188
|
# Returns string of the nickname found or and empty string
|
|
147
189
|
def self.parse_nick!(nm)
|
|
148
|
-
|
|
190
|
+
nick = ""
|
|
191
|
+
nm.sub! NICKNAME, ''
|
|
149
192
|
nick = $1.strip unless $1.nil?
|
|
150
|
-
nm.sub!
|
|
193
|
+
nm.sub! NICKNAME_LEFTOVER, ''
|
|
151
194
|
nm.squeeze! " "
|
|
152
|
-
nick
|
|
195
|
+
nick.gsub! /\./, ' '
|
|
196
|
+
nick.squeeze! " "
|
|
197
|
+
nick.strip!
|
|
198
|
+
nick
|
|
153
199
|
end
|
|
154
200
|
|
|
155
201
|
# Internal: parse last name from string
|
|
@@ -167,13 +213,13 @@ module Nomener
|
|
|
167
213
|
# format = :lcf if !nm.index(',').nil?
|
|
168
214
|
end
|
|
169
215
|
|
|
170
|
-
if format == :fl && n = nm.match(
|
|
216
|
+
if format == :fl && n = nm.match( FIRSTLAST_MATCHER )
|
|
171
217
|
last = n[:fam].strip
|
|
172
218
|
nm.sub!(last, "").strip!
|
|
173
|
-
elsif format == :lf && n = nm.match(
|
|
219
|
+
elsif format == :lf && n = nm.match( LASTFIRST_MATCHER )
|
|
174
220
|
last = n[:fam].strip
|
|
175
221
|
nm.sub!(last, "").strip!
|
|
176
|
-
elsif format == :lcf && n = nm.match(
|
|
222
|
+
elsif format == :lcf && n = nm.match( LASTCOMFIRST_MATCHER )
|
|
177
223
|
last = n[:fam].strip
|
|
178
224
|
nm.sub!(last, "").strip!
|
|
179
225
|
nm.sub!(',', "").strip!
|
data/lib/nomener/suffixes.rb
CHANGED
|
@@ -2,7 +2,7 @@ module Nomener
|
|
|
2
2
|
module Suffixes
|
|
3
3
|
|
|
4
4
|
# Internal: Regex to match suffixes or honorifics after names
|
|
5
|
-
SUFFIXES = %r
|
|
5
|
+
SUFFIXES = %r/(?<=\p{^Alpha})(?:
|
|
6
6
|
AB # Bachelor of Arts
|
|
7
7
|
| APC
|
|
8
8
|
| Attorney[\p{Blank}\-]at[\p{Blank}\-]Law\.? # Attorney at Law, Attorney-at-Law
|
|
@@ -16,9 +16,9 @@ module Nomener
|
|
|
16
16
|
| FAC(?:P|S) # FACP, FACS
|
|
17
17
|
| fils
|
|
18
18
|
| FRSL
|
|
19
|
-
| [VX]?I{1,3}
|
|
20
|
-
| IX|IV|V|VI|XI
|
|
21
|
-
| X{1,3}
|
|
19
|
+
| (?:[VX]?I{1,3})(?!\.) # roman numbers
|
|
20
|
+
| (?:IX|IV|V|VI|XI)(?!\.) # roman numbers
|
|
21
|
+
| (?:X{1,3})(?!\.) # roman numbers
|
|
22
22
|
| Jn?r\.?
|
|
23
23
|
| Junior
|
|
24
24
|
| LLB
|
|
@@ -33,7 +33,7 @@ module Nomener
|
|
|
33
33
|
| Sn?r\.? # Snr, Sr
|
|
34
34
|
| Senior
|
|
35
35
|
| V\.?M\.?D\.?
|
|
36
|
-
)\
|
|
36
|
+
)(?=[^\p{Alpha}\p{Blank}]+|\z)/x
|
|
37
37
|
end
|
|
38
38
|
end
|
|
39
39
|
|
data/lib/nomener/version.rb
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
require 'spec_helper'
|
|
2
2
|
|
|
3
3
|
# from http://www.theguardian.com/books/list/authorsaz
|
|
4
|
-
# { from: "Brooke Magnanti (Belle de Jour)", to: { full: "Brooke Magnanti (Belle de Jour)", first: "Brooke Magnanti (Belle de", last: "Jour)" } },
|
|
5
4
|
# { from: "Saki (Hector Hugh Munro)", to: { full: "Saki (Hector Hugh Munro)", first: "Saki (Hector Hugh", last: "Munro)" } },
|
|
6
5
|
# { from: "Steven D Levitt and Stephen J Dubner", to: { full: "Steven D Levitt and Stephen J Dubner", first: "Steven D Levitt and Stephen J", last: "Dubner" } },
|
|
7
6
|
|
|
@@ -87,7 +86,7 @@ RSpec.describe "The Guardian author list" do
|
|
|
87
86
|
{ from: "Robert Burns", to: { full: "Robert Burns", first: "Robert", last: "Burns" } },
|
|
88
87
|
{ from: "William Burroughs", to: { full: "William Burroughs", first: "William", last: "Burroughs" } },
|
|
89
88
|
{ from: "AS Byatt", to: { full: "AS Byatt", first: "AS", last: "Byatt" } },
|
|
90
|
-
{
|
|
89
|
+
{ from: "Lord Byron", to: { full: "Lord Byron", title: "Lord", last: "Byron" } },
|
|
91
90
|
|
|
92
91
|
{ from: "Carmen Callil", to: { full: "Carmen Callil", first: "Carmen", last: "Callil" } },
|
|
93
92
|
{ from: "Italo Calvino", to: { full: "Italo Calvino", first: "Italo", last: "Calvino" } },
|
|
@@ -144,7 +143,7 @@ RSpec.describe "The Guardian author list" do
|
|
|
144
143
|
{ from: "Edmund de Waal", to: { full: "Edmund de Waal", first: "Edmund", last: "de Waal" } },
|
|
145
144
|
{ from: "Daniel Defoe", to: { full: "Daniel Defoe", first: "Daniel", last: "Defoe" } },
|
|
146
145
|
{ from: "Len Deighton", to: { full: "Len Deighton", first: "Len", last: "Deighton" } },
|
|
147
|
-
{
|
|
146
|
+
{ from: "Don DeLillo", to: { full: "Don DeLillo", first: "Don", last: "DeLillo" } },
|
|
148
147
|
{ from: "Patrick DeWitt", to: { full: "Patrick DeWitt", first: "Patrick", last: "DeWitt" } },
|
|
149
148
|
{ from: "Giuseppe Tomasi di Lampedusa", to: { full: "Giuseppe Tomasi di Lampedusa", first: "Giuseppe Tomasi", last: "di Lampedusa" } },
|
|
150
149
|
{ from: "Junot Diaz", to: { full: "Junot Diaz", first: "Junot", last: "Diaz" } },
|
|
@@ -152,7 +151,7 @@ RSpec.describe "The Guardian author list" do
|
|
|
152
151
|
{ from: "Dick King-Smith", to: { full: "Dick King-Smith", first: "Dick", last: "King-Smith" } },
|
|
153
152
|
{ from: "Charles Dickens", to: { full: "Charles Dickens", first: "Charles", last: "Dickens" } },
|
|
154
153
|
{ from: "Emily Dickinson", to: { full: "Emily Dickinson", first: "Emily", last: "Dickinson" } },
|
|
155
|
-
{
|
|
154
|
+
{ from: "Don Paterson", to: { full: "Don Paterson", first: "Don", last: "Paterson" } },
|
|
156
155
|
{ from: "Emma Donoghue", to: { full: "Emma Donoghue", first: "Emma", last: "Donoghue" } },
|
|
157
156
|
{ from: "Fyodor Dostoevsky", to: { full: "Fyodor Dostoevsky", first: "Fyodor", last: "Dostoevsky" } },
|
|
158
157
|
{ from: "Roddy Doyle", to: { full: "Roddy Doyle", first: "Roddy", last: "Doyle" } },
|
|
@@ -338,6 +337,7 @@ RSpec.describe "The Guardian author list" do
|
|
|
338
337
|
{ from: "Richard Mabey", to: { full: "Richard Mabey", first: "Richard", last: "Mabey" } },
|
|
339
338
|
{ from: "George MacDonald Fraser", to: { full: "George MacDonald Fraser", first: "George MacDonald", last: "Fraser" } },
|
|
340
339
|
{ from: "Madeline Miller", to: { full: "Madeline Miller", first: "Madeline", last: "Miller" } },
|
|
340
|
+
{ from: "Brooke Magnanti (Belle de Jour)", to: { full: "Brooke (Belle de Jour) Magnanti", first: "Brooke", last: "Magnanti", nick: "Belle de Jour" } },
|
|
341
341
|
{ from: "Maggie Gee", to: { full: "Maggie Gee", first: "Maggie", last: "Gee" } },
|
|
342
342
|
{ from: "Maile Chapman", to: { full: "Maile Chapman", first: "Maile", last: "Chapman" } },
|
|
343
343
|
{ from: "Andrei Makine", to: { full: "Andrei Makine", first: "Andrei", last: "Makine" } },
|
|
@@ -484,7 +484,7 @@ RSpec.describe "The Guardian author list" do
|
|
|
484
484
|
{ from: "WG Sebald", to: { full: "WG Sebald", first: "WG", last: "Sebald" } },
|
|
485
485
|
{ from: "Will Self", to: { full: "Will Self", first: "Will", last: "Self" } },
|
|
486
486
|
{ from: "Maurice Sendak", to: { full: "Maurice Sendak", first: "Maurice", last: "Sendak" } },
|
|
487
|
-
{
|
|
487
|
+
{ from: "Dr Seuss", to: { full: "Dr Seuss", title: "Dr", last: "Seuss" } },
|
|
488
488
|
{ from: "William Shakespeare", to: { full: "William Shakespeare", first: "William", last: "Shakespeare" } },
|
|
489
489
|
{ from: "Shaun Tan", to: { full: "Shaun Tan", first: "Shaun", last: "Tan" } },
|
|
490
490
|
{ from: "Mary Shelley", to: { full: "Mary Shelley", first: "Mary", last: "Shelley" } },
|
|
@@ -581,7 +581,7 @@ RSpec.describe "The Guardian author list" do
|
|
|
581
581
|
{ from: "Emile Zola", to: { full: "Emile Zola", first: "Emile", last: "Zola" } }
|
|
582
582
|
].each do |name|
|
|
583
583
|
it "parses #{name[:from]}" do
|
|
584
|
-
skip if name
|
|
584
|
+
skip name[:skip] if name.has_key?(:skip)
|
|
585
585
|
parsed = Nomener.parse(name[:from])
|
|
586
586
|
parse_hash = parsed.to_h
|
|
587
587
|
parse_hash.each_pair do |k,v|
|
|
@@ -17,13 +17,12 @@ require 'spec_helper'
|
|
|
17
17
|
# Bojaxhi, Agnes Gonxha (Mother Teresa)
|
|
18
18
|
# Butler, Samuel (1835-1902)
|
|
19
19
|
# Butler, Samuel (1612-1680)
|
|
20
|
-
#
|
|
21
|
-
#
|
|
20
|
+
#
|
|
21
|
+
#
|
|
22
22
|
# Cecil, Robert (1st Viscount Cecil of Chelwood)
|
|
23
23
|
# Chaplin, Charlie (Sir Charles Spencer Chaplin)
|
|
24
24
|
# Chuang Chou (Chuang Tzu; Chuang Tse; Zhuang Zi)
|
|
25
25
|
# Churchill, Sarah (Duchess of Marlborough)
|
|
26
|
-
# Cole, Nat "King"
|
|
27
26
|
# Cooper, Diana (Lady Diana Manners)
|
|
28
27
|
# Courtney, Leonard H. (Lord Courtney)
|
|
29
28
|
# Dalai Lama; see Tenzin Gyatso (His Holiness the 14th Dalai Lama)
|
|
@@ -37,7 +36,6 @@ require 'spec_helper'
|
|
|
37
36
|
# Gwanghae-gun of Joseon
|
|
38
37
|
# Han Shan
|
|
39
38
|
# Hall, Evelyn Beatrice (also known by pseudonym "Stephen G. Tallentyre")
|
|
40
|
-
# Halsey, William "Bull"
|
|
41
39
|
# Henry VIII
|
|
42
40
|
# Hillel the Elder
|
|
43
41
|
# Hubbard, Kin (Frank McKinney Hubbard)
|
|
@@ -53,7 +51,6 @@ require 'spec_helper'
|
|
|
53
51
|
# Li Bai
|
|
54
52
|
# Lu Xun
|
|
55
53
|
# Maharaji (Prem Rawat)
|
|
56
|
-
# Malda, Rob "CmdrTaco"
|
|
57
54
|
# Mao Zedong
|
|
58
55
|
# Mary I of England
|
|
59
56
|
# Michelangelo Buonarroti
|
|
@@ -71,7 +68,6 @@ require 'spec_helper'
|
|
|
71
68
|
# Qin Shi Huang
|
|
72
69
|
# Sathya Sai Baba
|
|
73
70
|
# Savitri Devi
|
|
74
|
-
# Shankar, Ravi (Art of Living founder, not the sitar maestro)
|
|
75
71
|
# Simonides of Ceos
|
|
76
72
|
# Sixtus V (pope)
|
|
77
73
|
# Scanderbeg|Skenderbeu(The prince of Arberia)
|
|
@@ -85,12 +81,10 @@ require 'spec_helper'
|
|
|
85
81
|
# Tzu Hsi
|
|
86
82
|
# Jarmo Visakorpi
|
|
87
83
|
# William of Occam
|
|
88
|
-
# White, T. H. (Terence Hanbury)
|
|
89
|
-
# Wilson, (Thomas) Woodrow
|
|
90
84
|
# X, Malcolm (Malcolm Little)
|
|
91
85
|
# Yankovic, "Weird Al"
|
|
92
86
|
# Zé Tom
|
|
93
|
-
#
|
|
87
|
+
#
|
|
94
88
|
|
|
95
89
|
RSpec.describe "WikiQuote list parsing" do
|
|
96
90
|
context "with the name" do
|
|
@@ -392,7 +386,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
392
386
|
{ from: "Byrd, Robert", to: { full: "Robert Byrd", first: "Robert", last: "Byrd"} },
|
|
393
387
|
{ from: "Byrne, David", to: { full: "David Byrne", first: "David", last: "Byrne"} },
|
|
394
388
|
{ from: "Byrne, Robert", to: { full: "Robert Byrne", first: "Robert", last: "Byrne"} },
|
|
395
|
-
{ skip:
|
|
389
|
+
{ skip: "The given string implies Lord is his name, it is his title",
|
|
390
|
+
from: "Byron, Lord", to: { full: "Lord Byron", title: "Lord", last: "Byron"} },
|
|
396
391
|
|
|
397
392
|
{ from: "Cabell, James Branch", to: { full: "James Branch Cabell", first: "James Branch", last: "Cabell"} },
|
|
398
393
|
{ from: "Caesar, Irving", to: { full: "Irving Caesar", first: "Irving", last: "Caesar"} },
|
|
@@ -403,6 +398,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
403
398
|
{ from: "Calvin, John", to: { full: "John Calvin", first: "John", last: "Calvin"} },
|
|
404
399
|
{ from: "Cameron, Julia", to: { full: "Julia Cameron", first: "Julia", last: "Cameron"} },
|
|
405
400
|
{ from: "Cameron, Kirk", to: { full: "Kirk Cameron", first: "Kirk", last: "Cameron"} },
|
|
401
|
+
{ from: "Campbell, Beatrice Stella; (Mrs. Patrick Campbell)", to: { full: "Beatrice Stella Campbell", first: "Beatrice Stella", nick: "Mrs Patrick Campbell", last: "Campbell"} },
|
|
406
402
|
{ from: "Camus, Albert", to: { full: "Albert Camus", first: "Albert", last: "Camus"} },
|
|
407
403
|
{ from: "Cannon, James P.", to: { full: "James P Cannon", first: "James P", last: "Cannon"} },
|
|
408
404
|
{ from: "Canseco, José", to: { full: "José Canseco", first: "José", last: "Canseco"} },
|
|
@@ -430,6 +426,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
430
426
|
{ from: "Carter, Jimmy", to: { full: "Jimmy Carter", first: "Jimmy", last: "Carter"} },
|
|
431
427
|
{ from: "Carville, James", to: { full: "James Carville", first: "James", last: "Carville"} },
|
|
432
428
|
{ from: "Casals, Pablo", to: { full: "Pablo Casals", first: "Pablo", last: "Casals"} },
|
|
429
|
+
{ from: "Casanova, Giacomo (Jacques Casanova de Seingal)", to: { full: "Giacomo Casanova", first: "Giacomo", nick:"Jacques Casanova de Seingal", last: "Casanova" } },
|
|
433
430
|
{ from: "Cash, Johnny", to: { full: "Johnny Cash", first: "Johnny", last: "Cash"} },
|
|
434
431
|
{ from: "Castaneda, Carlos", to: { full: "Carlos Castaneda", first: "Carlos", last: "Castaneda"} },
|
|
435
432
|
{ from: "Castro, Fidel", to: { full: "Fidel Castro", first: "Fidel", last: "Castro"} },
|
|
@@ -447,8 +444,10 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
447
444
|
{ from: "Chapin, Harry", to: { full: "Harry Chapin", first: "Harry", last: "Chapin"} },
|
|
448
445
|
{ from: "Chapman, Colin", to: { full: "Colin Chapman", first: "Colin", last: "Chapman"} },
|
|
449
446
|
{ from: "Chappelle, Dave", to: { full: "Dave Chappelle", first: "Dave", last: "Chappelle"} },
|
|
450
|
-
{ skip:
|
|
451
|
-
|
|
447
|
+
{ skip: "Haven't figured this one yet",
|
|
448
|
+
from: "Charles II, King of England", to: { full: "King of England Charles II", first: "King of England", last: "Charles II"} },
|
|
449
|
+
{ skip: "Haven't figured this one yet",
|
|
450
|
+
from: "Charles V, Holy Roman Emperor", to: { full: "Holy Roman Emperor Charles V", first: "Holy Roman Emperor", last: "Charles V"} },
|
|
452
451
|
{ from: "Charles, Ray", to: { full: "Ray Charles", first: "Ray", last: "Charles"} },
|
|
453
452
|
{ from: "Chateaubriand, François-René de", to: { full: "François-René de Chateaubriand", first: "François-René", last: "de Chateaubriand"} },
|
|
454
453
|
{ from: "Chatwin, Bruce", to: { full: "Bruce Chatwin", first: "Bruce", last: "Chatwin"} },
|
|
@@ -587,11 +586,12 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
587
586
|
{ from: "Deutsch, David", to: { full: "David Deutsch", first: "David", last: "Deutsch"} },
|
|
588
587
|
{ from: "Devine, Carl", to: { full: "Carl Devine", first: "Carl", last: "Devine"} },
|
|
589
588
|
{ from: "Devo, musical group", to: { full: "musical group Devo", first: "musical group", last: "Devo"} },
|
|
590
|
-
{
|
|
589
|
+
{ from: "DeWolfe, Ron (born L. Ron Hubbard, Jr.)", to: { full: "Ron DeWolfe", first: "Ron", last: "DeWolfe", nick: "born L Ron Hubbard, Jr"} },
|
|
591
590
|
{ from: "Dhavernas, Caroline", to: { full: "Caroline Dhavernas", first: "Caroline", last: "Dhavernas"} },
|
|
592
591
|
{ from: "Diana, Princess of Wales", to: { full: "Princess of Wales Diana", first: "Princess of Wales", last: "Diana"} },
|
|
593
592
|
{ from: "DiCaprio, Leonardo", to: { full: "Leonardo DiCaprio", first: "Leonardo", last: "DiCaprio"} },
|
|
594
|
-
{ skip:
|
|
593
|
+
{ skip: "Haven't figured this one yet",
|
|
594
|
+
from: "Dick, Philip Kindred - a.k.a. PKD", to: { full: "Philip Kindred - a.k.a. PKD Dick", first: "Philip Kindred - a.k.a. PKD", last: "Dick"} },
|
|
595
595
|
{ from: "Dickens, Charles", to: { full: "Charles Dickens", first: "Charles", last: "Dickens"} },
|
|
596
596
|
{ from: "Dickinson, Emily", to: { full: "Emily Dickinson", first: "Emily", last: "Dickinson"} },
|
|
597
597
|
{ from: "Dickson, Leonard Eugene", to: { full: "Leonard Eugene Dickson", first: "Leonard Eugene", last: "Dickson"} },
|
|
@@ -646,7 +646,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
646
646
|
{ from: "Eckhart, Meister", to: { full: "Meister Eckhart", first: "Meister", last: "Eckhart"} },
|
|
647
647
|
{ from: "Eddington, Arthur Stanley", to: { full: "Arthur Stanley Eddington", first: "Arthur Stanley", last: "Eddington"} },
|
|
648
648
|
{ from: "Edison, Thomas Alva", to: { full: "Thomas Alva Edison", first: "Thomas Alva", last: "Edison"} },
|
|
649
|
-
{ skip:
|
|
649
|
+
{ skip: "Haven't figured this one yet",
|
|
650
|
+
from: "Edward VII, King of England", to: { full: "King of England Edward VII", first: "King of England", last: "Edward VII"} },
|
|
650
651
|
{ from: "Edwards, Edwin W.", to: { full: "Edwin W Edwards", first: "Edwin W", last: "Edwards"} },
|
|
651
652
|
{ from: "Edwards, John", to: { full: "John Edwards", first: "John", last: "Edwards"} },
|
|
652
653
|
{ from: "Eggers, Dave", to: { full: "Dave Eggers", first: "Dave", last: "Eggers"} },
|
|
@@ -658,8 +659,10 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
658
659
|
{ from: "Eldredge, Niles", to: { full: "Niles Eldredge", first: "Niles", last: "Eldredge"} },
|
|
659
660
|
{ from: "Eliot, George", to: { full: "George Eliot", first: "George", last: "Eliot"} },
|
|
660
661
|
{ from: "Eliot, Thomas Stearns", to: { full: "Thomas Stearns Eliot", first: "Thomas Stearns", last: "Eliot"} },
|
|
661
|
-
{ skip:
|
|
662
|
-
|
|
662
|
+
{ skip: "Haven't figured this one yet",
|
|
663
|
+
from: "Elizabeth, the Queen Mother", to: { full: "the Queen Mother Elizabeth", first: "the Queen Mother", last: "Elizabeth"} },
|
|
664
|
+
{ skip: "Duke will be treated as a title, not a name",
|
|
665
|
+
from: "Ellington, Duke", to: { full: "Duke Ellington", first: "Duke", last: "Ellington"} },
|
|
663
666
|
{ from: "Elliot, Cass", to: { full: "Cass Elliot", first: "Cass", last: "Elliot"} },
|
|
664
667
|
{ from: "Elliot, Jim", to: { full: "Jim Elliot", first: "Jim", last: "Elliot"} },
|
|
665
668
|
{ from: "Ellison, Harlan", to: { full: "Harlan Ellison", first: "Harlan", last: "Ellison"} },
|
|
@@ -785,8 +788,10 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
785
788
|
{ from: "Geagea, Samir", to: { full: "Samir Geagea", first: "Samir", last: "Geagea"} },
|
|
786
789
|
{ from: "Gemayel, Solange", to: { full: "Solange Gemayel", first: "Solange", last: "Gemayel"} },
|
|
787
790
|
{ from: "Genet, Jean", to: { full: "Jean Genet", first: "Jean", last: "Genet"} },
|
|
788
|
-
{ skip:
|
|
789
|
-
|
|
791
|
+
{ skip: "Haven't figured this one yet",
|
|
792
|
+
from: "George III, King of England", to: { full: "King of England George III", first: "King of England", last: "George III"} },
|
|
793
|
+
{ skip: "Haven't figured this one yet",
|
|
794
|
+
from: "George V, King of England", to: { full: "King of England George V", first: "King of England", last: "George V"} },
|
|
790
795
|
{ from: "Gephardt, Dick", to: { full: "Dick Gephardt", first: "Dick", last: "Gephardt"} },
|
|
791
796
|
{ from: "Gerety, Frances", to: { full: "Frances Gerety", first: "Frances", last: "Gerety"} },
|
|
792
797
|
{ from: "Gergen, David", to: { full: "David Gergen", first: "David", last: "Gergen"} },
|
|
@@ -860,6 +865,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
860
865
|
{ from: "Haley, Alex", to: { full: "Alex Haley", first: "Alex", last: "Haley"} },
|
|
861
866
|
{ from: "Halliburton, Richard", to: { full: "Richard Halliburton", first: "Richard", last: "Halliburton"} },
|
|
862
867
|
{ from: "Halme, Tony", to: { full: "Tony Halme", first: "Tony", last: "Halme"} },
|
|
868
|
+
{ from: "Halsey, William \"Bull\"", to: { full: "William Halsey", first: "William", nick: "Bull", last: "Halsey"} },
|
|
863
869
|
{ from: "Hamilton, Alexander", to: { full: "Alexander Hamilton", first: "Alexander", last: "Hamilton"} },
|
|
864
870
|
{ from: "Hamilton, Gail", to: { full: "Gail Hamilton", first: "Gail", last: "Hamilton"} },
|
|
865
871
|
{ from: "Hamilton, Laurell K.", to: { full: "Laurell K Hamilton", first: "Laurell K", last: "Hamilton"} },
|
|
@@ -867,7 +873,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
867
873
|
{ from: "Handy, Charles", to: { full: "Charles Handy", first: "Charles", last: "Handy"} },
|
|
868
874
|
{ from: "Handy, W. C.", to: { full: "W C Handy", first: "W C", last: "Handy"} },
|
|
869
875
|
{ from: "Hannity, Sean", to: { full: "Sean Hannity", first: "Sean", last: "Hannity"} },
|
|
870
|
-
{ skip:
|
|
876
|
+
{ skip: "Haven't figured this one yet",
|
|
877
|
+
from: "Harold II Godwinson, King of England", to: { full: "King of England Harold II Godwinson", first: "King of England", last: "Harold II Godwinson"} },
|
|
871
878
|
{ from: "Harper, Stephen", to: { full: "Stephen Harper", first: "Stephen", last: "Harper"} },
|
|
872
879
|
{ from: "Harris, Sam", to: { full: "Sam Harris", first: "Sam", last: "Harris"} },
|
|
873
880
|
{ from: "Harris, William Torrey", to: { full: "William Torrey Harris", first: "William Torrey", last: "Harris"} },
|
|
@@ -943,7 +950,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
943
950
|
{ from: "Hofstadter, Richard", to: { full: "Richard Hofstadter", first: "Richard", last: "Hofstadter"} },
|
|
944
951
|
{ from: "Holderlin, Friedrich", to: { full: "Friedrich Holderlin", first: "Friedrich", last: "Holderlin"} },
|
|
945
952
|
{ from: "Holmes, Oliver Wendell, Jr.", to: { full: "Oliver Wendell Holmes Jr", first: "Oliver Wendell", last: "Holmes", suffix: "Jr"} },
|
|
946
|
-
{ from: "Holmes, Oliver Wendell, Sr.", to: { full: "Oliver Wendell Holmes Sr
|
|
953
|
+
{ from: "Holmes, Oliver Wendell, Sr.", to: { full: "Oliver Wendell Holmes Sr", first: "Oliver Wendell", last: "Holmes", suffix: "Sr"} },
|
|
947
954
|
{ from: "Holt, Anatol", to: { full: "Anatol Holt", first: "Anatol", last: "Holt"} },
|
|
948
955
|
{ from: "Holt, John", to: { full: "John Holt", first: "John", last: "Holt"} },
|
|
949
956
|
{ from: "Honda, Soichiro", to: { full: "Soichiro Honda", first: "Soichiro", last: "Honda"} },
|
|
@@ -1019,7 +1026,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1019
1026
|
{ from: "Janáček, Leoš", to: { full: "Leoš Janáček", first: "Leoš", last: "Janáček"} },
|
|
1020
1027
|
{ from: "Jarrell, Randall", to: { full: "Randall Jarrell", first: "Randall", last: "Jarrell"} },
|
|
1021
1028
|
{ from: "Jarrett, Jeff", to: { full: "Jeff Jarrett", first: "Jeff", last: "Jarrett"} },
|
|
1022
|
-
{ skip:
|
|
1029
|
+
{ skip: "Too many commas!",
|
|
1030
|
+
from: "Jay, Glenn, Miner", to: { full: "Miner Jay, Glenn", first: "Miner", last: "Jay, Glenn"} },
|
|
1023
1031
|
{ from: "Jarry, Alfred", to: { full: "Alfred Jarry", first: "Alfred", last: "Jarry"} },
|
|
1024
1032
|
{ from: "Jeffers, Robinson", to: { full: "Robinson Jeffers", first: "Robinson", last: "Jeffers"} },
|
|
1025
1033
|
{ from: "Jefferson, Thomas", to: { full: "Thomas Jefferson", first: "Thomas", last: "Jefferson"} },
|
|
@@ -1086,7 +1094,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1086
1094
|
{ from: "Kellner, Friedrich", to: { full: "Friedrich Kellner", first: "Friedrich", last: "Kellner"} },
|
|
1087
1095
|
{ from: "Kelly, Walt", to: { full: "Walt Kelly", first: "Walt", last: "Kelly"} },
|
|
1088
1096
|
{ from: "Kempton, Murray", to: { full: "Murray Kempton", first: "Murray", last: "Kempton"} },
|
|
1089
|
-
{ from: "Kelvin, Lord", to: { full: "Lord Kelvin",
|
|
1097
|
+
{ from: "Kelvin, Lord", to: { full: "Lord Kelvin", title: "Lord", last: "Kelvin"} },
|
|
1090
1098
|
{ from: "Kendrick, Alex", to: { full: "Alex Kendrick", first: "Alex", last: "Kendrick"} },
|
|
1091
1099
|
{ from: "Kennan, George F.", to: { full: "George F Kennan", first: "George F", last: "Kennan"} },
|
|
1092
1100
|
{ from: "Kennedy, Anthony", to: { full: "Anthony Kennedy", first: "Anthony", last: "Kennedy"} },
|
|
@@ -1226,7 +1234,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1226
1234
|
{ from: "Loo, Tristan J.", to: { full: "Tristan J Loo", first: "Tristan J", last: "Loo"} },
|
|
1227
1235
|
{ from: "Looney, General William", to: { full: "William Looney", title: "General", first: "William", last: "Looney"} },
|
|
1228
1236
|
{ from: "Lott, Trent", to: { full: "Trent Lott", first: "Trent", last: "Lott"} },
|
|
1229
|
-
{ skip:
|
|
1237
|
+
{ skip: "Haven't figured this one yet",
|
|
1238
|
+
from: "Louis VII, King of France", to: { full: "King of France Louis VII", first: "King of France", last: "Louis VII"} },
|
|
1230
1239
|
{ from: "Louis, Joe", to: { full: "Joe Louis", first: "Joe", last: "Louis"} },
|
|
1231
1240
|
{ from: "Love, Courtney", to: { full: "Courtney Love", first: "Courtney", last: "Love"} },
|
|
1232
1241
|
{ from: "Lovecraft, H. P.", to: { full: "H P Lovecraft", first: "H P", last: "Lovecraft"} },
|
|
@@ -1252,7 +1261,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1252
1261
|
{ from: "Madraiwiwi, Ratu Joni", to: { full: "Ratu Joni Madraiwiwi", title: "Ratu", first: "Joni", last: "Madraiwiwi"} },
|
|
1253
1262
|
{ from: "Magee, Bryan", to: { full: "Bryan Magee", first: "Bryan", last: "Magee"} },
|
|
1254
1263
|
{ from: "Maher, Bill", to: { full: "Bill Maher", first: "Bill", last: "Maher"} },
|
|
1255
|
-
{
|
|
1264
|
+
{ from: "Malda, Rob \"CmdrTaco\"", to: { full: "Rob Malda", first: "Rob", nick: "CmdrTaco", last: "Malda"} },
|
|
1265
|
+
{ from: "Mallet, David (or David Malloch)", to: { full: "David Mallet", first: "David", last: "Mallet", nick: "or David Malloch"} },
|
|
1256
1266
|
{ from: "Malley, Matt", to: { full: "Matt Malley", first: "Matt", last: "Malley"} },
|
|
1257
1267
|
{ from: "Malory, Thomas", to: { full: "Thomas Malory", first: "Thomas", last: "Malory"} },
|
|
1258
1268
|
{ from: "Malraux, André", to: { full: "André Malraux", first: "André", last: "Malraux"} },
|
|
@@ -1337,7 +1347,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1337
1347
|
{ from: "Miller, Bode", to: { full: "Bode Miller", first: "Bode", last: "Miller"} },
|
|
1338
1348
|
{ from: "Miller, Henry", to: { full: "Henry Miller", first: "Henry", last: "Miller"} },
|
|
1339
1349
|
{ from: "Miller, Ron", to: { full: "Ron Miller", first: "Ron", last: "Miller"} },
|
|
1340
|
-
{
|
|
1350
|
+
{ skip: "Jr will be parsed as a nick",
|
|
1351
|
+
from: "Miller, Walter M. (Jr.)", to: { full: "Walter M Miller", first: "Walter M", last: "Miller", suffix: "Jr"} },
|
|
1341
1352
|
{ from: "Milligan, Spike", to: { full: "Spike Milligan", first: "Spike", last: "Milligan"} },
|
|
1342
1353
|
{ from: "Mills, C. Wright", to: { full: "C Wright Mills", first: "C Wright", last: "Mills"} },
|
|
1343
1354
|
{ from: "Milne, A.A.", to: { full: "A A Milne", first: "A A", last: "Milne"} },
|
|
@@ -1376,7 +1387,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1376
1387
|
{ from: "Muhammad, Holy Prophet", to: { full: "Holy Prophet Muhammad", first: "Holy Prophet", last: "Muhammad"} },
|
|
1377
1388
|
{ from: "Muir, John", to: { full: "John Muir", first: "John", last: "Muir"} },
|
|
1378
1389
|
{ from: "Mullally, Megan", to: { full: "Megan Mullally", first: "Megan", last: "Mullally"} },
|
|
1379
|
-
{ skip:
|
|
1390
|
+
{ skip: "Haven't figured this one yet",
|
|
1391
|
+
from: "Mulock, Dinah Maria; also Dinah Maria Craik", to: { full: "Dinah Maria; also Dinah Maria Craik Mulock", first: "Dinah Maria; also Dinah Maria Craik", last: "Mulock"} },
|
|
1380
1392
|
{ from: "Mumford, Lewis", to: { full: "Lewis Mumford", first: "Lewis", last: "Mumford"} },
|
|
1381
1393
|
{ from: "Mumpfield, Susie", to: { full: "Susie Mumpfield", first: "Susie", last: "Mumpfield"} },
|
|
1382
1394
|
{ from: "Munch, Edvard", to: { full: "Edvard Munch", first: "Edvard", last: "Munch"} },
|
|
@@ -1385,10 +1397,13 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1385
1397
|
{ from: "Mussolini, Benito", to: { full: "Benito Mussolini", first: "Benito", last: "Mussolini"} },
|
|
1386
1398
|
{ from: "Mustaine, Dave", to: { full: "Dave Mustaine", first: "Dave", last: "Mustaine"} },
|
|
1387
1399
|
{ from: "Myatt, David", to: { full: "David Myatt", first: "David", last: "Myatt"} },
|
|
1400
|
+
|
|
1388
1401
|
{ from: "Nabokov, Vladimir", to: { full: "Vladimir Nabokov", first: "Vladimir", last: "Nabokov"} },
|
|
1389
|
-
{
|
|
1402
|
+
{ skip: "Haven't figured this one yet",
|
|
1403
|
+
from: "Nachman, Rabbi, of Bratzlav", to: { full: "of Bratzlav Nachman, Rabbi", first: "of Bratzlav", last: "Nachman, Rabbi"} },
|
|
1390
1404
|
{ from: "Nader, Ralph", to: { full: "Ralph Nader", first: "Ralph", last: "Nader"} },
|
|
1391
1405
|
{ from: "Nagel, Thomas", to: { full: "Thomas Nagel", first: "Thomas", last: "Nagel"} },
|
|
1406
|
+
{ from: "Cole, Nat \"King\"", to: { full: "Nat Cole", first: "Nat", nick: "King", last: "Cole"} },
|
|
1392
1407
|
{ from: "Naidu, Richard", to: { full: "Richard Naidu", first: "Richard", last: "Naidu"} },
|
|
1393
1408
|
{ from: "Nailatikau, Adi Koila", to: { full: "Adi Koila Nailatikau", title: "Adi", first: "Koila", last: "Nailatikau"} },
|
|
1394
1409
|
{ from: "Nailatikau, Ratu Epeli Qaraninamu", to: { full: "Ratu Epeli Qaraninamu Nailatikau", title: "Ratu", first: "Epeli Qaraninamu", last: "Nailatikau"} },
|
|
@@ -1410,16 +1425,19 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1410
1425
|
{ from: "Nietzsche, Friedrich", to: { full: "Friedrich Nietzsche", first: "Friedrich", last: "Nietzsche"} },
|
|
1411
1426
|
{ from: "Nightingale, Florence", to: { full: "Florence Nightingale", first: "Florence", last: "Nightingale"} },
|
|
1412
1427
|
{ from: "Nijinsky, Vaslav", to: { full: "Vaslav Nijinsky", first: "Vaslav", last: "Nijinsky"} },
|
|
1413
|
-
{
|
|
1428
|
+
{ skip: "Not only misspelled, Nin get treated as a compound",
|
|
1429
|
+
from: "Nin, Anaïs Nin", to: { full: "Anaïs Nin Nin", first: "Anaïs", last: "Nin Nin"} },
|
|
1414
1430
|
{ from: "Ninio, Jacques", to: { full: "Jacques Ninio", first: "Jacques", last: "Ninio"} },
|
|
1415
1431
|
{ from: "Niranjan, Sangeeta", to: { full: "Sangeeta Niranjan", first: "Sangeeta", last: "Niranjan"} },
|
|
1416
1432
|
{ from: "Niven, Larry", to: { full: "Larry Niven", first: "Larry", last: "Niven"} },
|
|
1417
1433
|
{ from: "Nixon, Richard", to: { full: "Richard Nixon", first: "Richard", last: "Nixon"} },
|
|
1418
1434
|
{ from: "Noam, Eli", to: { full: "Eli Noam", first: "Eli", last: "Noam"} },
|
|
1419
1435
|
{ from: "Norton, Joshua Abraham", to: { full: "Joshua Abraham Norton", first: "Joshua Abraham", last: "Norton"} },
|
|
1420
|
-
{ skip:
|
|
1436
|
+
{ skip: "Haven't figured this one yet",
|
|
1437
|
+
from: "Nostradamus (Michel de Notredame, or Michel de Nostredame)", to: { full: "or Michel de Nostredame) Nostradamus (Michel de Notredame", first: "or Michel de Nostredame)", last: "Nostradamus (Michel de Notredame"} },
|
|
1421
1438
|
{ from: "Nugent, Ted", to: { full: "Ted Nugent", first: "Ted", last: "Nugent"} },
|
|
1422
|
-
{
|
|
1439
|
+
{ skip: "Duke will treated as a title, not a name",
|
|
1440
|
+
from: "Nukem, Duke", to: { full: "Duke Nukem", title: "Duke", last: "Nukem"} },
|
|
1423
1441
|
{ from: "Null, Gary", to: { full: "Gary Null", first: "Gary", last: "Null"} },
|
|
1424
1442
|
{ from: "Nunally, Patrick", to: { full: "Patrick Nunally", first: "Patrick", last: "Nunally"} },
|
|
1425
1443
|
{ from: "Nuwas, Abu", to: { full: "Abu Nuwas", first: "Abu", last: "Nuwas"} },
|
|
@@ -1435,7 +1453,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1435
1453
|
{ from: "Oliver, Jamie", to: { full: "Jamie Oliver", first: "Jamie", last: "Oliver"} },
|
|
1436
1454
|
{ from: "Oliver, Robert T.", to: { full: "Robert T Oliver", first: "Robert T", last: "Oliver"} },
|
|
1437
1455
|
{ from: "Olson, Ken", to: { full: "Ken Olson", first: "Ken", last: "Olson"} },
|
|
1438
|
-
{ skip:
|
|
1456
|
+
{ skip: "Haven't figured this one yet",
|
|
1457
|
+
from: "Olsen, Mary-Kate and Ashley", to: { full: "Mary-Kate and Ashley Olsen", first: "Mary-Kate and Ashley", last: "Olsen"} },
|
|
1439
1458
|
{ from: "Onassis, Jacqueline Kennedy", to: { full: "Jacqueline Kennedy Onassis", first: "Jacqueline Kennedy", last: "Onassis"} },
|
|
1440
1459
|
{ from: "Ondrick, William F.", to: { full: "William F Ondrick", first: "William F", last: "Ondrick"} },
|
|
1441
1460
|
{ from: "Oppenheimer, J. Robert", to: { full: "J Robert Oppenheimer", first: "J Robert", last: "Oppenheimer"} },
|
|
@@ -1486,7 +1505,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1486
1505
|
{ from: "Petronius, Gaius", to: { full: "Gaius Petronius", first: "Gaius", last: "Petronius"} },
|
|
1487
1506
|
{ from: "Petty, Tom", to: { full: "Tom Petty", first: "Tom", last: "Petty"} },
|
|
1488
1507
|
{ from: "Phelps, Michael", to: { full: "Michael Phelps", first: "Michael", last: "Phelps"} },
|
|
1489
|
-
{ skip:
|
|
1508
|
+
{ skip: "Haven't figured this one yet",
|
|
1509
|
+
from: "Philip, Duke of Edinburgh", to: { full: "Duke of Edinburgh Philip", first: "Duke of Edinburgh", last: "Philip"} },
|
|
1490
1510
|
{ from: "Philips, Emo", to: { full: "Emo Philips", first: "Emo", last: "Philips"} },
|
|
1491
1511
|
{ from: "Piaget, Jean", to: { full: "Jean Piaget", first: "Jean", last: "Piaget"} },
|
|
1492
1512
|
{ from: "Picasso, Pablo", to: { full: "Pablo Picasso", first: "Pablo", last: "Picasso"} },
|
|
@@ -1521,7 +1541,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1521
1541
|
{ from: "Quale, Anthony", to: { full: "Anthony Quale", first: "Anthony", last: "Quale"} },
|
|
1522
1542
|
{ from: "Quarles, Francis", to: { full: "Francis Quarles", first: "Francis", last: "Quarles"} },
|
|
1523
1543
|
{ from: "Quayle, Dan", to: { full: "Dan Quayle", first: "Dan", last: "Quayle"} },
|
|
1524
|
-
{ skip:
|
|
1544
|
+
{ skip: "Haven't figured this one yet",
|
|
1545
|
+
from: "Marie, Queen of Romania", to: { full: "Queen of Romania Marie", first: "Queen of Romania", last: "Marie"} },
|
|
1525
1546
|
{ from: "Quine, Willard van Orman", to: { full: "Willard van Orman Quine", first: "Willard van Orman", last: "Quine"} },
|
|
1526
1547
|
{ from: "Quintilian, Marcus Fabius", to: { full: "Marcus Fabius Quintilian", first: "Marcus Fabius", last: "Quintilian"} },
|
|
1527
1548
|
{ from: "Quisenberry, Dan", to: { full: "Dan Quisenberry", first: "Dan", last: "Quisenberry"} },
|
|
@@ -1581,7 +1602,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1581
1602
|
{ from: "Roth, Geneen", to: { full: "Geneen Roth", first: "Geneen", last: "Roth"} },
|
|
1582
1603
|
{ from: "Roth, Philip", to: { full: "Philip Roth", first: "Philip", last: "Roth"} },
|
|
1583
1604
|
{ from: "Rothbard, Murray", to: { full: "Murray Rothbard", first: "Murray", last: "Rothbard"} },
|
|
1584
|
-
{
|
|
1605
|
+
{ skip: "Technically this isn't a name, so do what?",
|
|
1606
|
+
from: "Rothschild, Baron", to: { full: "Baron Rothschild", title: "Baron", last: "Rothschild"} },
|
|
1585
1607
|
{ from: "Roux, Joseph", to: { full: "Joseph Roux", first: "Joseph", last: "Roux"} },
|
|
1586
1608
|
{ from: "Rousseau, Jean-Jacques", to: { full: "Jean-Jacques Rousseau", first: "Jean-Jacques", last: "Rousseau"} },
|
|
1587
1609
|
{ from: "Rovabokola, Ratu Viliame", to: { full: "Ratu Viliame Rovabokola", title: "Ratu", first: "Viliame", last: "Rovabokola"} },
|
|
@@ -1636,10 +1658,11 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1636
1658
|
{ from: "Sellers, Peter", to: { full: "Peter Sellers", first: "Peter", last: "Sellers"} },
|
|
1637
1659
|
{ from: "Serling, Rod", to: { full: "Rod Serling", first: "Rod", last: "Serling"} },
|
|
1638
1660
|
{ from: "Serrano, Miguel", to: { full: "Miguel Serrano", first: "Miguel", last: "Serrano"} },
|
|
1639
|
-
{
|
|
1661
|
+
{ from: "Seuss, Dr.", to: { full: "Dr. Seuss", title: "Dr", last: "Seuss"} },
|
|
1640
1662
|
{ from: "Shahak, Israel", to: { full: "Israel Shahak", first: "Israel", last: "Shahak"} },
|
|
1641
1663
|
{ from: "Shakespeare, William", to: { full: "William Shakespeare", first: "William", last: "Shakespeare"} },
|
|
1642
1664
|
{ from: "Shakur, Tupac", to: { full: "Tupac Shakur", first: "Tupac", last: "Shakur"} },
|
|
1665
|
+
{ from: "Shankar, Ravi (Art of Living founder, not the sitar maestro)", to: { full: "Ravi Shankar", first: "Ravi", last: "Shankar", nick: "Art of Living founder, not the sitar maestro"} },
|
|
1643
1666
|
{ from: "Sharpton, Al", to: { full: "Al Sharpton", first: "Al", last: "Sharpton"} },
|
|
1644
1667
|
{ from: "Shaw, George Bernard", to: { full: "George Bernard Shaw", first: "George Bernard", last: "Shaw"} },
|
|
1645
1668
|
{ from: "Shawcross, Hartley", to: { full: "Hartley Shawcross", first: "Hartley", last: "Shawcross"} },
|
|
@@ -1725,7 +1748,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1725
1748
|
{ from: "Stoppard, Tom", to: { full: "Tom Stoppard", first: "Tom", last: "Stoppard"} },
|
|
1726
1749
|
{ from: "Stout, Rex", to: { full: "Rex Stout", first: "Rex", last: "Stout"} },
|
|
1727
1750
|
{ from: "Stowe, Madeleine", to: { full: "Madeleine Stowe", first: "Madeleine", last: "Stowe"} },
|
|
1728
|
-
{ from: "Stratford, Lord (Tony Banks)", to: { full: "Lord (Tony Banks) Stratford",
|
|
1751
|
+
{ from: "Stratford, Lord (Tony Banks)", to: { full: "Lord (Tony Banks) Stratford", title: "Lord", last: "Stratford", nick: "Tony Banks"} },
|
|
1729
1752
|
{ from: "Strauss, Richard", to: { full: "Richard Strauss", first: "Richard", last: "Strauss"} },
|
|
1730
1753
|
{ from: "Stravinsky, Igor", to: { full: "Igor Stravinsky", first: "Igor", last: "Stravinsky"} },
|
|
1731
1754
|
{ from: "Stroustrup, Bjarne", to: { full: "Bjarne Stroustrup", first: "Bjarne", last: "Stroustrup"} },
|
|
@@ -1751,7 +1774,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1751
1774
|
{ from: "Tavola, Kaliopate", to: { full: "Kaliopate Tavola", first: "Kaliopate", last: "Tavola"} },
|
|
1752
1775
|
{ from: "Tchaikovsky, Pyotr Ilyich", to: { full: "Pyotr Ilyich Tchaikovsky", first: "Pyotr Ilyich", last: "Tchaikovsky"} },
|
|
1753
1776
|
{ from: "Teilhard de Chardin, Pierre", to: { full: "Pierre Teilhard de Chardin", first: "Pierre", last: "Teilhard de Chardin"} },
|
|
1754
|
-
{
|
|
1777
|
+
{ skip: "Lord will be treated as a nick",
|
|
1778
|
+
from: "Tennyson, Alfred (Lord)", to: { full: "Alfred (Lord) Tennyson", first: "Alfred", last: "Tennyson", nick: "Lord"} },
|
|
1755
1779
|
{ from: "Tesla, Nikola", to: { full: "Nikola Tesla", first: "Nikola", last: "Tesla"} },
|
|
1756
1780
|
{ from: "Thackeray, William Makepeace", to: { full: "William Makepeace Thackeray", first: "William Makepeace", last: "Thackeray"} },
|
|
1757
1781
|
{ from: "Thant, U", to: { full: "U Thant", first: "U", last: "Thant"} },
|
|
@@ -1759,7 +1783,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1759
1783
|
{ from: "Theron, Charlize", to: { full: "Charlize Theron", first: "Charlize", last: "Theron"} },
|
|
1760
1784
|
{ from: "Thompson, Dorothy", to: { full: "Dorothy Thompson", first: "Dorothy", last: "Thompson"} },
|
|
1761
1785
|
{ from: "Thompson, Hunter S.", to: { full: "Hunter S Thompson", first: "Hunter S", last: "Thompson"} },
|
|
1762
|
-
{ skip:
|
|
1786
|
+
{ skip: "Haven't figured this one yet",
|
|
1787
|
+
from: "Thomson, William - a.k.a. Lord Kelvin", to: { full: "William - a.k.a. Lord Kelvin Thomson", first: "William - a.k.a. Lord Kelvin", last: "Thomson"} },
|
|
1763
1788
|
{ from: "Thoreau, Henry David", to: { full: "Henry David Thoreau", first: "Henry David", last: "Thoreau"} },
|
|
1764
1789
|
{ from: "Throttle, Ben", to: { full: "Ben Throttle", first: "Ben", last: "Throttle"} },
|
|
1765
1790
|
{ from: "Thurber, James", to: { full: "James Thurber", first: "James", last: "Thurber"} },
|
|
@@ -1859,6 +1884,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1859
1884
|
{ from: "White, Andrew Dickson", to: { full: "Andrew Dickson White", first: "Andrew Dickson", last: "White"} },
|
|
1860
1885
|
{ from: "White, E. B. (Elwyn Brooks)", to: { full: "E B (Elwyn Brooks) White", first: "E B", nick: "Elwyn Brooks", last: "White"} },
|
|
1861
1886
|
{ from: "White, Reggie", to: { full: "Reggie White", first: "Reggie", last: "White"} },
|
|
1887
|
+
{ from: "White, T. H. (Terence Hanbury)", to: { full: "T H White", first: "T H", nick: "Terence Hanbury", last: "White"} },
|
|
1862
1888
|
{ from: "Whitehead, Alfred North", to: { full: "Alfred North Whitehead", first: "Alfred North", last: "Whitehead"} },
|
|
1863
1889
|
{ from: "Whitman, Walt", to: { full: "Walt Whitman", first: "Walt", last: "Whitman"} },
|
|
1864
1890
|
{ from: "Whittier, John Greenleaf", to: { full: "John Greenleaf Whittier", first: "John Greenleaf", last: "Whittier"} },
|
|
@@ -1869,7 +1895,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1869
1895
|
{ from: "Wilde, Oscar", to: { full: "Oscar Wilde", first: "Oscar", last: "Wilde"} },
|
|
1870
1896
|
{ from: "Wilder, Thornton", to: { full: "Thornton Wilder", first: "Thornton", last: "Wilder"} },
|
|
1871
1897
|
{ from: "Wilkes, Maurice", to: { full: "Maurice Wilkes", first: "Maurice", last: "Wilkes"} },
|
|
1872
|
-
{ skip:
|
|
1898
|
+
{ skip: "Haven't figured this one yet",
|
|
1899
|
+
from: "William I, King of England", to: { full: "King of England William I", first: "King of England", last: "William I"} },
|
|
1873
1900
|
{ from: "Williams, Robin", to: { full: "Robin Williams", first: "Robin", last: "Williams"} },
|
|
1874
1901
|
{ from: "Williams, Roger", to: { full: "Roger Williams", first: "Roger", last: "Williams"} },
|
|
1875
1902
|
{ from: "Williams, Ted", to: { full: "Ted Williams", first: "Ted", last: "Williams"} },
|
|
@@ -1880,6 +1907,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1880
1907
|
{ from: "Wilson, Colin", to: { full: "Colin Wilson", first: "Colin", last: "Wilson"} },
|
|
1881
1908
|
{ from: "Wilson, Flip", to: { full: "Flip Wilson", first: "Flip", last: "Wilson"} },
|
|
1882
1909
|
{ from: "Wilson, Ron", to: { full: "Ron Wilson", first: "Ron", last: "Wilson"} },
|
|
1910
|
+
{ from: "Wilson, (Thomas) Woodrow", to: { full: "Woodrow Wilson", first: "Woodrow", nick: "Thomas", last: "Wilson"} },
|
|
1883
1911
|
{ from: "Winfrey, Oprah", to: { full: "Oprah Winfrey", first: "Oprah", last: "Winfrey"} },
|
|
1884
1912
|
{ from: "Winner, Michael", to: { full: "Michael Winner", first: "Michael", last: "Winner"} },
|
|
1885
1913
|
{ from: "Winter, William", to: { full: "William Winter", first: "William", last: "Winter"} },
|
|
@@ -1919,6 +1947,8 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1919
1947
|
{ from: "Zappa, Frank", to: { full: "Frank Zappa", first: "Frank", last: "Zappa"} },
|
|
1920
1948
|
{ from: "Zawinski, Jamie", to: { full: "Jamie Zawinski", first: "Jamie", last: "Zawinski"} },
|
|
1921
1949
|
{ from: "Zeldin, Theodore", to: { full: "Theodore Zeldin", first: "Theodore", last: "Zeldin"} },
|
|
1950
|
+
{ skip: "Haven't figured this one yet",
|
|
1951
|
+
from: "Zhuangzi (Zhuang Zi; Zhuang Zhou; Chuang Tzu; Chuang Tse)", to: {} },
|
|
1922
1952
|
{ from: "Ziglar, Zig", to: { full: "Zig Ziglar", first: "Zig", last: "Ziglar"} },
|
|
1923
1953
|
{ from: "Zimmermann, Philip", to: { full: "Philip Zimmermann", first: "Philip", last: "Zimmermann"} },
|
|
1924
1954
|
{ from: "Zinck, Kenneth", to: { full: "Kenneth Zinck", first: "Kenneth", last: "Zinck"} },
|
|
@@ -1926,7 +1956,7 @@ RSpec.describe "WikiQuote list parsing" do
|
|
|
1926
1956
|
{ from: "Zola, Emile", to: { full: "Emile Zola", first: "Emile", last: "Zola"} }
|
|
1927
1957
|
].each do |name|
|
|
1928
1958
|
it "parses #{name[:from]}" do
|
|
1929
|
-
skip if name
|
|
1959
|
+
skip name[:skip] if name.has_key?(:skip)
|
|
1930
1960
|
parsed = Nomener.parse(name[:from])
|
|
1931
1961
|
parse_hash = parsed.to_h
|
|
1932
1962
|
parse_hash.each_pair do |k,v|
|
|
@@ -22,6 +22,12 @@ RSpec.describe "Nomener::Parser" do
|
|
|
22
22
|
expect(name).to be_a Nomener::Name
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
+
it "throw ParseError when passed too many commas" do
|
|
26
|
+
expect {
|
|
27
|
+
Nomener::Parser.parse!("Joe, John, Smith")
|
|
28
|
+
}.to raise_error Nomener::ParseError
|
|
29
|
+
end
|
|
30
|
+
|
|
25
31
|
[
|
|
26
32
|
{from: "Joe Smith", to: { first: "Joe", last: "Smith"} },
|
|
27
33
|
{from: "Joe Smith Jr.", to: { first: "Joe", last: "Smith", suffix: "Jr"} },
|
data/spec/nomener/titles_spec.rb
CHANGED
|
@@ -5,7 +5,8 @@ RSpec.describe "Title" do
|
|
|
5
5
|
[
|
|
6
6
|
{ name: "خانم Augusta Ada King", result: "خانم" },
|
|
7
7
|
{ name: "רעב Bertrand Russell", result: "רעב" },
|
|
8
|
-
{ skip:
|
|
8
|
+
{ skip: "Something is typed, or we clean it, improperly",
|
|
9
|
+
name: "'ר Bertrand Russell", result: "'ר" },
|
|
9
10
|
{ name: "አቶ Bertrand Russell", result: "አቶ" },
|
|
10
11
|
{ name: "Air Commander Bertrand Russell", result: "Air Commander" },
|
|
11
12
|
{ name: "Air Commodore Bertrand Russell", result: "Air Commodore" },
|
|
@@ -44,8 +45,8 @@ RSpec.describe "Title" do
|
|
|
44
45
|
{ name: "Dr Bertrand Russell", result: "Dr" },
|
|
45
46
|
{ name: "Dr. Bertrand Russell", result: "Dr" },
|
|
46
47
|
{ name: "Dom Bertrand Russell", result: "Dom" },
|
|
47
|
-
{
|
|
48
|
-
{
|
|
48
|
+
# { name: "Don Bertrand Russell", result: "Don" },
|
|
49
|
+
# { name: "Dona Augusta Ada King", result: "Dona" },
|
|
49
50
|
{ name: "Erzherzog Bertrand Russell", result: "Erzherzog" },
|
|
50
51
|
{ name: "Erzherzogin Augusta Ada King", result: "Erzherzogin" },
|
|
51
52
|
{ name: "Father Bertrand Russell", result: "Father" },
|
|
@@ -215,7 +216,7 @@ RSpec.describe "Title" do
|
|
|
215
216
|
{ name: "Very Reverand Bertrand Russell", result: "Very Reverand" },
|
|
216
217
|
].each do |name|
|
|
217
218
|
it "parses #{name[:result]} from #{name[:name]}" do
|
|
218
|
-
skip if name
|
|
219
|
+
skip name[:skip] if name.has_key?(:skip)
|
|
219
220
|
parsed = Nomener.parse(name[:name])
|
|
220
221
|
expect(parsed.title).to eq name[:result]
|
|
221
222
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: nomener
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dante Piombino
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-04-
|
|
11
|
+
date: 2015-04-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|