nomener 0.2.1 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0b78306c6b080208d8379ccf779b4478c9ebc470
4
- data.tar.gz: b1f532bab036ba0d6e22ebdf8722cb607f8fed60
3
+ metadata.gz: 2b4d3e3d0da16de4111205c7c0355bd8db5526a5
4
+ data.tar.gz: 314c165d421692570ebf48a64cbfdc9748f22c70
5
5
  SHA512:
6
- metadata.gz: 8968422e564d9a8165b3773c60ac13dc0ea4ab28483235ee37dea32b5922ab4e398bf68d9bb39e84c248b2600f6d7820a7f304d1836d04a3c8614fdb5e541ffb
7
- data.tar.gz: b9d47e737959cc409c7d4f92f373420975815c143428f79b11c54694d94208ea7c18431a202080966244ade9346c5fda127ca6666eb0a3fc903c298cca66f615
6
+ metadata.gz: 3d1d2126b1bb6a6e6e4622a5eae671157dae540515137e2862f224a399d8e6b19c9d6f21e74796f4be2c4f47f51cf4cb7280533ce0aef79eaf121571801a9309
7
+ data.tar.gz: 3527b6b3e9fcbb1e3d8d317ad4d25ec1eee6ff926377722bf32581b13b47a1fff56a04cded4929955a0bc37365c88b250eced36a4a0c6558247506b0ef19f7f2
data/LICENSE.txt CHANGED
@@ -1,5 +1,9 @@
1
1
  Copyright (c) 2015 Dante Piombino
2
2
 
3
+ Listen to the Wreckids (www.thewreckids.com)
4
+
5
+ Thanks: Alice, Tom Waits, Mogwai, and Tim Berners-Lee
6
+
3
7
  MIT License
4
8
 
5
9
  Permission is hereby granted, free of charge, to any person obtaining
data/lib/nomener.rb CHANGED
@@ -4,6 +4,8 @@ require "nomener/parser"
4
4
 
5
5
  module Nomener
6
6
 
7
+ class ParseError < StandardError; end
8
+
7
9
  # Public: Convenience method to parse a name
8
10
  #
9
11
  # name - a string of a name to parse
@@ -73,6 +73,6 @@ module Nomener
73
73
  | Zu
74
74
  | (?-i:y)
75
75
  | 't
76
- )\p{Blank}?\g<part>*)*!xi
76
+ )\b\p{Blank}?\g<part>*)*!xi
77
77
  end
78
78
  end
@@ -24,8 +24,8 @@ module Nomener
24
24
  n.tr!("\u0027\u2018\u201A\u2035\u2039\u300C\uFE41\uFF62", left) # replace left single quotes
25
25
  n.tr!("\u0027\u2019\u201B\u2032\u203A\u300D\uFE42\uFF62", right) # replace left single quotes
26
26
 
27
- n.gsub!(/\./, ' ')
28
- n.gsub!(/[^\p{Alpha}\-&\/ \,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, "") # what others may be in a name?
27
+ #n.gsub!(/\./, ' ')
28
+ n.gsub!(/[^\p{Alpha}\-&\/ \.\,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, " ") # what others may be in a name?
29
29
  n.gsub!(/\p{Blank}+/, " ") # compress whitespace
30
30
  n.strip! # trim space
31
31
 
@@ -10,6 +10,30 @@ module Nomener
10
10
  include Nomener::Suffixes
11
11
  include Nomener::Compounders
12
12
 
13
+ # regex for stuff at the end we want to get out
14
+ TRAILER_TRASH = /[,|\s]+$/
15
+
16
+ # regex for name characters we aren't going to use
17
+ DIRTY_STUFF = /[^,'(?:\p{Alpha}(?<\.))\p{Alpha}]{2,}/
18
+
19
+ # regex for boundaries we'll use to find leftover nickname boundaries
20
+ NICKNAME_LEFTOVER = /["'\(\)]{2}/
21
+
22
+ # regex for matching enclosed nicknames
23
+ NICKNAME = /(?<=["'\(])([\p{Alpha}\-\ '\.\,]+?)(?=["'\)])/
24
+
25
+ # regex for matching last names in a "first last" pattern
26
+ FIRSTLAST_MATCHER = /\p{Blank}(?<fam>#{COMPOUNDS}[\p{Alpha}\-\']+)\Z/i
27
+
28
+ # regex for matching last names in a "last first" pattern
29
+ LASTFIRST_MATCHER = /\A(?<fam>#{COMPOUNDS}\b[\p{Alpha}\-\']+)\p{Blank}/i
30
+
31
+ # regex for matching last names in a "last, first" pattern
32
+ LASTCOMFIRST_MATCHER = /\A(?<fam>#{COMPOUNDS}\b[\p{Alpha}\-\'\p{Blank}]+),/i
33
+
34
+ # period. probably not much performance help.
35
+ PERIOD = /\./
36
+
13
37
  # Public: parse a string into name parts
14
38
  #
15
39
  # name - a string to get the name from
@@ -39,19 +63,30 @@ module Nomener
39
63
  def self.parse!(name, format = {:order => :auto, :spacelimit => 0})
40
64
  raise ArgumentError, 'Name to parse not provided' unless (name.kind_of?(String) && !name.empty?)
41
65
 
42
- name = Nomener::Helper.reformat(name)
66
+ name = Nomener::Helper.reformat name
43
67
 
44
68
  # grab any identified nickname before working on the rest
45
- nick = parse_nick!(name)
46
- title = parse_title!(name)
69
+ nick = parse_nick! name
70
+ cleanup! name
47
71
 
48
72
  # grab any suffix' we can find
49
- suffix = parse_suffix!(name)
73
+ suffix = parse_suffix! name
74
+ cleanup! name
75
+
76
+ title = parse_title! name
77
+ cleanup! name
78
+
79
+ name.gsub! PERIOD, ' '
80
+ name.squeeze! " "
81
+ name.strip!
82
+
50
83
  first = last = middle = ""
51
84
 
52
85
  # if there's a comma, it may be a useful hint
53
86
  if !name.index(',').nil? # && (format[:order] == :auto || format[:order] == :lcf)
54
87
  clues = name.split(",")
88
+ clues.each { |i| i.strip! }
89
+
55
90
  # convention is last, first
56
91
  if clues.length == 2
57
92
  last, first = clues
@@ -70,16 +105,17 @@ module Nomener
70
105
  end
71
106
  # titles are part of the first name
72
107
  title = parse_title!(first) if title.nil? || title.empty?
108
+ elsif clues.length == 1
109
+ last = clues.shift
73
110
  else
74
- raise ParseError "Could not understand #{rename}"
111
+ raise ParseError, "Could not decipher commas in \"#{name}\""
75
112
  end
76
113
  elsif !name.index(" ").nil?
77
114
  last = parse_last!(name, format[:order])
78
115
  first, middle = parse_first!(name, format[:spacelimit])
79
- elsif name.index(" ").nil?
80
- first = name[0] # mononym
81
116
  else
82
- raise ParseError "Could not understand #{rename}"
117
+ last = name # possibly mononym
118
+ first = ""
83
119
  end
84
120
 
85
121
  {
@@ -99,10 +135,10 @@ module Nomener
99
135
  #
100
136
  # Returns nothing
101
137
  def self.cleanup!(dirty)
102
- dirty.gsub! /[^,'\p{Alpha}]{2,}/, ''
138
+ dirty.gsub! DIRTY_STUFF, ''
103
139
  dirty.squeeze! " "
104
140
  # remove any trailing commas or whitespace
105
- dirty.gsub! /[,|\s]+$/, ''
141
+ dirty.gsub! TRAILER_TRASH, ''
106
142
  dirty.strip!
107
143
  end
108
144
 
@@ -118,8 +154,11 @@ module Nomener
118
154
  titles << title.strip
119
155
  ''
120
156
  end
121
- cleanup!(nm)
122
- titles.join " "
157
+ t = titles.join " "
158
+ t.gsub! PERIOD, ' '
159
+ t.squeeze! " "
160
+ t.strip!
161
+ t
123
162
  end
124
163
 
125
164
  # Internal: pull off what suffixes we can
@@ -134,8 +173,11 @@ module Nomener
134
173
  suffixes << suffix.strip
135
174
  ''
136
175
  end
137
- cleanup!(nm)
138
- suffixes.join " "
176
+ s = suffixes.join " "
177
+ s.gsub! /\./, ' '
178
+ s.squeeze! " "
179
+ s.strip!
180
+ s
139
181
  end
140
182
 
141
183
  # Internal: parse nickname out of string. presuming it's in quotes
@@ -145,11 +187,15 @@ module Nomener
145
187
  #
146
188
  # Returns string of the nickname found or and empty string
147
189
  def self.parse_nick!(nm)
148
- nm.sub!(/(?<=["'\(])([\p{Alpha}\-\ ']+?)(?=["'\)])/, '')
190
+ nick = ""
191
+ nm.sub! NICKNAME, ''
149
192
  nick = $1.strip unless $1.nil?
150
- nm.sub! /["'\(\)]{2}/, ''
193
+ nm.sub! NICKNAME_LEFTOVER, ''
151
194
  nm.squeeze! " "
152
- nick || ""
195
+ nick.gsub! /\./, ' '
196
+ nick.squeeze! " "
197
+ nick.strip!
198
+ nick
153
199
  end
154
200
 
155
201
  # Internal: parse last name from string
@@ -167,13 +213,13 @@ module Nomener
167
213
  # format = :lcf if !nm.index(',').nil?
168
214
  end
169
215
 
170
- if format == :fl && n = nm.match(/\p{Blank}(?<fam>#{COMPOUNDS}[\p{Alpha}\-\']+)\Z/i)
216
+ if format == :fl && n = nm.match( FIRSTLAST_MATCHER )
171
217
  last = n[:fam].strip
172
218
  nm.sub!(last, "").strip!
173
- elsif format == :lf && n = nm.match(/\A(?<fam>#{COMPOUNDS}\b[\p{Alpha}\-\']+)\p{Blank}/i)
219
+ elsif format == :lf && n = nm.match( LASTFIRST_MATCHER )
174
220
  last = n[:fam].strip
175
221
  nm.sub!(last, "").strip!
176
- elsif format == :lcf && n = nm.match(/\A(?<fam>#{COMPOUNDS}\b[\p{Alpha}\-\'\p{Blank}]+),/i)
222
+ elsif format == :lcf && n = nm.match( LASTCOMFIRST_MATCHER )
177
223
  last = n[:fam].strip
178
224
  nm.sub!(last, "").strip!
179
225
  nm.sub!(',', "").strip!
@@ -2,7 +2,7 @@ module Nomener
2
2
  module Suffixes
3
3
 
4
4
  # Internal: Regex to match suffixes or honorifics after names
5
- SUFFIXES = %r!\b(?:
5
+ SUFFIXES = %r/(?<=\p{^Alpha})(?:
6
6
  AB # Bachelor of Arts
7
7
  | APC
8
8
  | Attorney[\p{Blank}\-]at[\p{Blank}\-]Law\.? # Attorney at Law, Attorney-at-Law
@@ -16,9 +16,9 @@ module Nomener
16
16
  | FAC(?:P|S) # FACP, FACS
17
17
  | fils
18
18
  | FRSL
19
- | [VX]?I{1,3} # roman numbers
20
- | IX|IV|V|VI|XI # roman numbers
21
- | X{1,3} # roman numbers
19
+ | (?:[VX]?I{1,3})(?!\.) # roman numbers
20
+ | (?:IX|IV|V|VI|XI)(?!\.) # roman numbers
21
+ | (?:X{1,3})(?!\.) # roman numbers
22
22
  | Jn?r\.?
23
23
  | Junior
24
24
  | LLB
@@ -33,7 +33,7 @@ module Nomener
33
33
  | Sn?r\.? # Snr, Sr
34
34
  | Senior
35
35
  | V\.?M\.?D\.?
36
- )\b!x
36
+ )(?=[^\p{Alpha}\p{Blank}]+|\z)/x
37
37
  end
38
38
  end
39
39
 
@@ -1,3 +1,3 @@
1
1
  module Nomener
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.4"
3
3
  end
@@ -1,7 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  # from http://www.theguardian.com/books/list/authorsaz
4
- # { from: "Brooke Magnanti (Belle de Jour)", to: { full: "Brooke Magnanti (Belle de Jour)", first: "Brooke Magnanti (Belle de", last: "Jour)" } },
5
4
  # { from: "Saki (Hector Hugh Munro)", to: { full: "Saki (Hector Hugh Munro)", first: "Saki (Hector Hugh", last: "Munro)" } },
6
5
  # { from: "Steven D Levitt and Stephen J Dubner", to: { full: "Steven D Levitt and Stephen J Dubner", first: "Steven D Levitt and Stephen J", last: "Dubner" } },
7
6
 
@@ -87,7 +86,7 @@ RSpec.describe "The Guardian author list" do
87
86
  { from: "Robert Burns", to: { full: "Robert Burns", first: "Robert", last: "Burns" } },
88
87
  { from: "William Burroughs", to: { full: "William Burroughs", first: "William", last: "Burroughs" } },
89
88
  { from: "AS Byatt", to: { full: "AS Byatt", first: "AS", last: "Byatt" } },
90
- { skip: true, from: "Lord Byron", to: { full: "Lord Byron", first: "Lord", last: "Byron" } },
89
+ { from: "Lord Byron", to: { full: "Lord Byron", title: "Lord", last: "Byron" } },
91
90
 
92
91
  { from: "Carmen Callil", to: { full: "Carmen Callil", first: "Carmen", last: "Callil" } },
93
92
  { from: "Italo Calvino", to: { full: "Italo Calvino", first: "Italo", last: "Calvino" } },
@@ -144,7 +143,7 @@ RSpec.describe "The Guardian author list" do
144
143
  { from: "Edmund de Waal", to: { full: "Edmund de Waal", first: "Edmund", last: "de Waal" } },
145
144
  { from: "Daniel Defoe", to: { full: "Daniel Defoe", first: "Daniel", last: "Defoe" } },
146
145
  { from: "Len Deighton", to: { full: "Len Deighton", first: "Len", last: "Deighton" } },
147
- { skip: true, from: "Don DeLillo", to: { full: "Don DeLillo", first: "Don", last: "DeLillo" } },
146
+ { from: "Don DeLillo", to: { full: "Don DeLillo", first: "Don", last: "DeLillo" } },
148
147
  { from: "Patrick DeWitt", to: { full: "Patrick DeWitt", first: "Patrick", last: "DeWitt" } },
149
148
  { from: "Giuseppe Tomasi di Lampedusa", to: { full: "Giuseppe Tomasi di Lampedusa", first: "Giuseppe Tomasi", last: "di Lampedusa" } },
150
149
  { from: "Junot Diaz", to: { full: "Junot Diaz", first: "Junot", last: "Diaz" } },
@@ -152,7 +151,7 @@ RSpec.describe "The Guardian author list" do
152
151
  { from: "Dick King-Smith", to: { full: "Dick King-Smith", first: "Dick", last: "King-Smith" } },
153
152
  { from: "Charles Dickens", to: { full: "Charles Dickens", first: "Charles", last: "Dickens" } },
154
153
  { from: "Emily Dickinson", to: { full: "Emily Dickinson", first: "Emily", last: "Dickinson" } },
155
- { skip: true, from: "Don Paterson", to: { full: "Don Paterson", first: "Don", last: "Paterson" } },
154
+ { from: "Don Paterson", to: { full: "Don Paterson", first: "Don", last: "Paterson" } },
156
155
  { from: "Emma Donoghue", to: { full: "Emma Donoghue", first: "Emma", last: "Donoghue" } },
157
156
  { from: "Fyodor Dostoevsky", to: { full: "Fyodor Dostoevsky", first: "Fyodor", last: "Dostoevsky" } },
158
157
  { from: "Roddy Doyle", to: { full: "Roddy Doyle", first: "Roddy", last: "Doyle" } },
@@ -338,6 +337,7 @@ RSpec.describe "The Guardian author list" do
338
337
  { from: "Richard Mabey", to: { full: "Richard Mabey", first: "Richard", last: "Mabey" } },
339
338
  { from: "George MacDonald Fraser", to: { full: "George MacDonald Fraser", first: "George MacDonald", last: "Fraser" } },
340
339
  { from: "Madeline Miller", to: { full: "Madeline Miller", first: "Madeline", last: "Miller" } },
340
+ { from: "Brooke Magnanti (Belle de Jour)", to: { full: "Brooke (Belle de Jour) Magnanti", first: "Brooke", last: "Magnanti", nick: "Belle de Jour" } },
341
341
  { from: "Maggie Gee", to: { full: "Maggie Gee", first: "Maggie", last: "Gee" } },
342
342
  { from: "Maile Chapman", to: { full: "Maile Chapman", first: "Maile", last: "Chapman" } },
343
343
  { from: "Andrei Makine", to: { full: "Andrei Makine", first: "Andrei", last: "Makine" } },
@@ -484,7 +484,7 @@ RSpec.describe "The Guardian author list" do
484
484
  { from: "WG Sebald", to: { full: "WG Sebald", first: "WG", last: "Sebald" } },
485
485
  { from: "Will Self", to: { full: "Will Self", first: "Will", last: "Self" } },
486
486
  { from: "Maurice Sendak", to: { full: "Maurice Sendak", first: "Maurice", last: "Sendak" } },
487
- { skip: true, from: "Dr Seuss", to: { full: "Dr Seuss", title: "Dr", last: "Seuss" } },
487
+ { from: "Dr Seuss", to: { full: "Dr Seuss", title: "Dr", last: "Seuss" } },
488
488
  { from: "William Shakespeare", to: { full: "William Shakespeare", first: "William", last: "Shakespeare" } },
489
489
  { from: "Shaun Tan", to: { full: "Shaun Tan", first: "Shaun", last: "Tan" } },
490
490
  { from: "Mary Shelley", to: { full: "Mary Shelley", first: "Mary", last: "Shelley" } },
@@ -581,7 +581,7 @@ RSpec.describe "The Guardian author list" do
581
581
  { from: "Emile Zola", to: { full: "Emile Zola", first: "Emile", last: "Zola" } }
582
582
  ].each do |name|
583
583
  it "parses #{name[:from]}" do
584
- skip if name[:skip]
584
+ skip name[:skip] if name.has_key?(:skip)
585
585
  parsed = Nomener.parse(name[:from])
586
586
  parse_hash = parsed.to_h
587
587
  parse_hash.each_pair do |k,v|
@@ -17,13 +17,12 @@ require 'spec_helper'
17
17
  # Bojaxhi, Agnes Gonxha (Mother Teresa)
18
18
  # Butler, Samuel (1835-1902)
19
19
  # Butler, Samuel (1612-1680)
20
- # Campbell, Beatrice Stella; (Mrs. Patrick Campbell)
21
- # Casanova, Giacomo (Jacques Casanova de Seingal)
20
+ #
21
+ #
22
22
  # Cecil, Robert (1st Viscount Cecil of Chelwood)
23
23
  # Chaplin, Charlie (Sir Charles Spencer Chaplin)
24
24
  # Chuang Chou (Chuang Tzu; Chuang Tse; Zhuang Zi)
25
25
  # Churchill, Sarah (Duchess of Marlborough)
26
- # Cole, Nat "King"
27
26
  # Cooper, Diana (Lady Diana Manners)
28
27
  # Courtney, Leonard H. (Lord Courtney)
29
28
  # Dalai Lama; see Tenzin Gyatso (His Holiness the 14th Dalai Lama)
@@ -37,7 +36,6 @@ require 'spec_helper'
37
36
  # Gwanghae-gun of Joseon
38
37
  # Han Shan
39
38
  # Hall, Evelyn Beatrice (also known by pseudonym "Stephen G. Tallentyre")
40
- # Halsey, William "Bull"
41
39
  # Henry VIII
42
40
  # Hillel the Elder
43
41
  # Hubbard, Kin (Frank McKinney Hubbard)
@@ -53,7 +51,6 @@ require 'spec_helper'
53
51
  # Li Bai
54
52
  # Lu Xun
55
53
  # Maharaji (Prem Rawat)
56
- # Malda, Rob "CmdrTaco"
57
54
  # Mao Zedong
58
55
  # Mary I of England
59
56
  # Michelangelo Buonarroti
@@ -71,7 +68,6 @@ require 'spec_helper'
71
68
  # Qin Shi Huang
72
69
  # Sathya Sai Baba
73
70
  # Savitri Devi
74
- # Shankar, Ravi (Art of Living founder, not the sitar maestro)
75
71
  # Simonides of Ceos
76
72
  # Sixtus V (pope)
77
73
  # Scanderbeg|Skenderbeu(The prince of Arberia)
@@ -85,12 +81,10 @@ require 'spec_helper'
85
81
  # Tzu Hsi
86
82
  # Jarmo Visakorpi
87
83
  # William of Occam
88
- # White, T. H. (Terence Hanbury)
89
- # Wilson, (Thomas) Woodrow
90
84
  # X, Malcolm (Malcolm Little)
91
85
  # Yankovic, "Weird Al"
92
86
  # Zé Tom
93
- # Zhuangzi (Zhuang Zi; Zhuang Zhou; Chuang Tzu; Chuang Tse)
87
+ #
94
88
 
95
89
  RSpec.describe "WikiQuote list parsing" do
96
90
  context "with the name" do
@@ -392,7 +386,8 @@ RSpec.describe "WikiQuote list parsing" do
392
386
  { from: "Byrd, Robert", to: { full: "Robert Byrd", first: "Robert", last: "Byrd"} },
393
387
  { from: "Byrne, David", to: { full: "David Byrne", first: "David", last: "Byrne"} },
394
388
  { from: "Byrne, Robert", to: { full: "Robert Byrne", first: "Robert", last: "Byrne"} },
395
- { skip: true, from: "Byron, Lord", to: { full: "Lord Byron", first: "Lord", last: "Byron"} },
389
+ { skip: "The given string implies Lord is his name, it is his title",
390
+ from: "Byron, Lord", to: { full: "Lord Byron", title: "Lord", last: "Byron"} },
396
391
 
397
392
  { from: "Cabell, James Branch", to: { full: "James Branch Cabell", first: "James Branch", last: "Cabell"} },
398
393
  { from: "Caesar, Irving", to: { full: "Irving Caesar", first: "Irving", last: "Caesar"} },
@@ -403,6 +398,7 @@ RSpec.describe "WikiQuote list parsing" do
403
398
  { from: "Calvin, John", to: { full: "John Calvin", first: "John", last: "Calvin"} },
404
399
  { from: "Cameron, Julia", to: { full: "Julia Cameron", first: "Julia", last: "Cameron"} },
405
400
  { from: "Cameron, Kirk", to: { full: "Kirk Cameron", first: "Kirk", last: "Cameron"} },
401
+ { from: "Campbell, Beatrice Stella; (Mrs. Patrick Campbell)", to: { full: "Beatrice Stella Campbell", first: "Beatrice Stella", nick: "Mrs Patrick Campbell", last: "Campbell"} },
406
402
  { from: "Camus, Albert", to: { full: "Albert Camus", first: "Albert", last: "Camus"} },
407
403
  { from: "Cannon, James P.", to: { full: "James P Cannon", first: "James P", last: "Cannon"} },
408
404
  { from: "Canseco, José", to: { full: "José Canseco", first: "José", last: "Canseco"} },
@@ -430,6 +426,7 @@ RSpec.describe "WikiQuote list parsing" do
430
426
  { from: "Carter, Jimmy", to: { full: "Jimmy Carter", first: "Jimmy", last: "Carter"} },
431
427
  { from: "Carville, James", to: { full: "James Carville", first: "James", last: "Carville"} },
432
428
  { from: "Casals, Pablo", to: { full: "Pablo Casals", first: "Pablo", last: "Casals"} },
429
+ { from: "Casanova, Giacomo (Jacques Casanova de Seingal)", to: { full: "Giacomo Casanova", first: "Giacomo", nick:"Jacques Casanova de Seingal", last: "Casanova" } },
433
430
  { from: "Cash, Johnny", to: { full: "Johnny Cash", first: "Johnny", last: "Cash"} },
434
431
  { from: "Castaneda, Carlos", to: { full: "Carlos Castaneda", first: "Carlos", last: "Castaneda"} },
435
432
  { from: "Castro, Fidel", to: { full: "Fidel Castro", first: "Fidel", last: "Castro"} },
@@ -447,8 +444,10 @@ RSpec.describe "WikiQuote list parsing" do
447
444
  { from: "Chapin, Harry", to: { full: "Harry Chapin", first: "Harry", last: "Chapin"} },
448
445
  { from: "Chapman, Colin", to: { full: "Colin Chapman", first: "Colin", last: "Chapman"} },
449
446
  { from: "Chappelle, Dave", to: { full: "Dave Chappelle", first: "Dave", last: "Chappelle"} },
450
- { skip: true, from: "Charles II, King of England", to: { full: "King of England Charles II", first: "King of England", last: "Charles II"} },
451
- { skip: true, from: "Charles V, Holy Roman Emperor", to: { full: "Holy Roman Emperor Charles V", first: "Holy Roman Emperor", last: "Charles V"} },
447
+ { skip: "Haven't figured this one yet",
448
+ from: "Charles II, King of England", to: { full: "King of England Charles II", first: "King of England", last: "Charles II"} },
449
+ { skip: "Haven't figured this one yet",
450
+ from: "Charles V, Holy Roman Emperor", to: { full: "Holy Roman Emperor Charles V", first: "Holy Roman Emperor", last: "Charles V"} },
452
451
  { from: "Charles, Ray", to: { full: "Ray Charles", first: "Ray", last: "Charles"} },
453
452
  { from: "Chateaubriand, François-René de", to: { full: "François-René de Chateaubriand", first: "François-René", last: "de Chateaubriand"} },
454
453
  { from: "Chatwin, Bruce", to: { full: "Bruce Chatwin", first: "Bruce", last: "Chatwin"} },
@@ -587,11 +586,12 @@ RSpec.describe "WikiQuote list parsing" do
587
586
  { from: "Deutsch, David", to: { full: "David Deutsch", first: "David", last: "Deutsch"} },
588
587
  { from: "Devine, Carl", to: { full: "Carl Devine", first: "Carl", last: "Devine"} },
589
588
  { from: "Devo, musical group", to: { full: "musical group Devo", first: "musical group", last: "Devo"} },
590
- { skip: true, from: "DeWolfe, Ron (born L. Ron Hubbard, Jr.)", to: { full: "Jr.) DeWolfe, Ron (born L Ron Hubbard", first: "Jr.)", last: "DeWolfe, Ron (born L Ron Hubbard"} },
589
+ { from: "DeWolfe, Ron (born L. Ron Hubbard, Jr.)", to: { full: "Ron DeWolfe", first: "Ron", last: "DeWolfe", nick: "born L Ron Hubbard, Jr"} },
591
590
  { from: "Dhavernas, Caroline", to: { full: "Caroline Dhavernas", first: "Caroline", last: "Dhavernas"} },
592
591
  { from: "Diana, Princess of Wales", to: { full: "Princess of Wales Diana", first: "Princess of Wales", last: "Diana"} },
593
592
  { from: "DiCaprio, Leonardo", to: { full: "Leonardo DiCaprio", first: "Leonardo", last: "DiCaprio"} },
594
- { skip: true, from: "Dick, Philip Kindred - a.k.a. PKD", to: { full: "Philip Kindred - a.k.a. PKD Dick", first: "Philip Kindred - a.k.a. PKD", last: "Dick"} },
593
+ { skip: "Haven't figured this one yet",
594
+ from: "Dick, Philip Kindred - a.k.a. PKD", to: { full: "Philip Kindred - a.k.a. PKD Dick", first: "Philip Kindred - a.k.a. PKD", last: "Dick"} },
595
595
  { from: "Dickens, Charles", to: { full: "Charles Dickens", first: "Charles", last: "Dickens"} },
596
596
  { from: "Dickinson, Emily", to: { full: "Emily Dickinson", first: "Emily", last: "Dickinson"} },
597
597
  { from: "Dickson, Leonard Eugene", to: { full: "Leonard Eugene Dickson", first: "Leonard Eugene", last: "Dickson"} },
@@ -646,7 +646,8 @@ RSpec.describe "WikiQuote list parsing" do
646
646
  { from: "Eckhart, Meister", to: { full: "Meister Eckhart", first: "Meister", last: "Eckhart"} },
647
647
  { from: "Eddington, Arthur Stanley", to: { full: "Arthur Stanley Eddington", first: "Arthur Stanley", last: "Eddington"} },
648
648
  { from: "Edison, Thomas Alva", to: { full: "Thomas Alva Edison", first: "Thomas Alva", last: "Edison"} },
649
- { skip: true, from: "Edward VII, King of England", to: { full: "King of England Edward VII", first: "King of England", last: "Edward VII"} },
649
+ { skip: "Haven't figured this one yet",
650
+ from: "Edward VII, King of England", to: { full: "King of England Edward VII", first: "King of England", last: "Edward VII"} },
650
651
  { from: "Edwards, Edwin W.", to: { full: "Edwin W Edwards", first: "Edwin W", last: "Edwards"} },
651
652
  { from: "Edwards, John", to: { full: "John Edwards", first: "John", last: "Edwards"} },
652
653
  { from: "Eggers, Dave", to: { full: "Dave Eggers", first: "Dave", last: "Eggers"} },
@@ -658,8 +659,10 @@ RSpec.describe "WikiQuote list parsing" do
658
659
  { from: "Eldredge, Niles", to: { full: "Niles Eldredge", first: "Niles", last: "Eldredge"} },
659
660
  { from: "Eliot, George", to: { full: "George Eliot", first: "George", last: "Eliot"} },
660
661
  { from: "Eliot, Thomas Stearns", to: { full: "Thomas Stearns Eliot", first: "Thomas Stearns", last: "Eliot"} },
661
- { skip: true, from: "Elizabeth, the Queen Mother", to: { full: "the Queen Mother Elizabeth", first: "the Queen Mother", last: "Elizabeth"} },
662
- { from: "Ellington, Duke", to: { full: "Duke Ellington", first: "Duke", last: "Ellington"} },
662
+ { skip: "Haven't figured this one yet",
663
+ from: "Elizabeth, the Queen Mother", to: { full: "the Queen Mother Elizabeth", first: "the Queen Mother", last: "Elizabeth"} },
664
+ { skip: "Duke will be treated as a title, not a name",
665
+ from: "Ellington, Duke", to: { full: "Duke Ellington", first: "Duke", last: "Ellington"} },
663
666
  { from: "Elliot, Cass", to: { full: "Cass Elliot", first: "Cass", last: "Elliot"} },
664
667
  { from: "Elliot, Jim", to: { full: "Jim Elliot", first: "Jim", last: "Elliot"} },
665
668
  { from: "Ellison, Harlan", to: { full: "Harlan Ellison", first: "Harlan", last: "Ellison"} },
@@ -785,8 +788,10 @@ RSpec.describe "WikiQuote list parsing" do
785
788
  { from: "Geagea, Samir", to: { full: "Samir Geagea", first: "Samir", last: "Geagea"} },
786
789
  { from: "Gemayel, Solange", to: { full: "Solange Gemayel", first: "Solange", last: "Gemayel"} },
787
790
  { from: "Genet, Jean", to: { full: "Jean Genet", first: "Jean", last: "Genet"} },
788
- { skip: true, from: "George III, King of England", to: { full: "King of England George III", first: "King of England", last: "George III"} },
789
- { skip: true, from: "George V, King of England", to: { full: "King of England George V", first: "King of England", last: "George V"} },
791
+ { skip: "Haven't figured this one yet",
792
+ from: "George III, King of England", to: { full: "King of England George III", first: "King of England", last: "George III"} },
793
+ { skip: "Haven't figured this one yet",
794
+ from: "George V, King of England", to: { full: "King of England George V", first: "King of England", last: "George V"} },
790
795
  { from: "Gephardt, Dick", to: { full: "Dick Gephardt", first: "Dick", last: "Gephardt"} },
791
796
  { from: "Gerety, Frances", to: { full: "Frances Gerety", first: "Frances", last: "Gerety"} },
792
797
  { from: "Gergen, David", to: { full: "David Gergen", first: "David", last: "Gergen"} },
@@ -860,6 +865,7 @@ RSpec.describe "WikiQuote list parsing" do
860
865
  { from: "Haley, Alex", to: { full: "Alex Haley", first: "Alex", last: "Haley"} },
861
866
  { from: "Halliburton, Richard", to: { full: "Richard Halliburton", first: "Richard", last: "Halliburton"} },
862
867
  { from: "Halme, Tony", to: { full: "Tony Halme", first: "Tony", last: "Halme"} },
868
+ { from: "Halsey, William \"Bull\"", to: { full: "William Halsey", first: "William", nick: "Bull", last: "Halsey"} },
863
869
  { from: "Hamilton, Alexander", to: { full: "Alexander Hamilton", first: "Alexander", last: "Hamilton"} },
864
870
  { from: "Hamilton, Gail", to: { full: "Gail Hamilton", first: "Gail", last: "Hamilton"} },
865
871
  { from: "Hamilton, Laurell K.", to: { full: "Laurell K Hamilton", first: "Laurell K", last: "Hamilton"} },
@@ -867,7 +873,8 @@ RSpec.describe "WikiQuote list parsing" do
867
873
  { from: "Handy, Charles", to: { full: "Charles Handy", first: "Charles", last: "Handy"} },
868
874
  { from: "Handy, W. C.", to: { full: "W C Handy", first: "W C", last: "Handy"} },
869
875
  { from: "Hannity, Sean", to: { full: "Sean Hannity", first: "Sean", last: "Hannity"} },
870
- { skip: true, from: "Harold II Godwinson, King of England", to: { full: "King of England Harold II Godwinson", first: "King of England", last: "Harold II Godwinson"} },
876
+ { skip: "Haven't figured this one yet",
877
+ from: "Harold II Godwinson, King of England", to: { full: "King of England Harold II Godwinson", first: "King of England", last: "Harold II Godwinson"} },
871
878
  { from: "Harper, Stephen", to: { full: "Stephen Harper", first: "Stephen", last: "Harper"} },
872
879
  { from: "Harris, Sam", to: { full: "Sam Harris", first: "Sam", last: "Harris"} },
873
880
  { from: "Harris, William Torrey", to: { full: "William Torrey Harris", first: "William Torrey", last: "Harris"} },
@@ -943,7 +950,7 @@ RSpec.describe "WikiQuote list parsing" do
943
950
  { from: "Hofstadter, Richard", to: { full: "Richard Hofstadter", first: "Richard", last: "Hofstadter"} },
944
951
  { from: "Holderlin, Friedrich", to: { full: "Friedrich Holderlin", first: "Friedrich", last: "Holderlin"} },
945
952
  { from: "Holmes, Oliver Wendell, Jr.", to: { full: "Oliver Wendell Holmes Jr", first: "Oliver Wendell", last: "Holmes", suffix: "Jr"} },
946
- { from: "Holmes, Oliver Wendell, Sr.", to: { full: "Oliver Wendell Holmes Sr, ", first: "Oliver Wendell", last: "Holmes", suffix: "Sr"} },
953
+ { from: "Holmes, Oliver Wendell, Sr.", to: { full: "Oliver Wendell Holmes Sr", first: "Oliver Wendell", last: "Holmes", suffix: "Sr"} },
947
954
  { from: "Holt, Anatol", to: { full: "Anatol Holt", first: "Anatol", last: "Holt"} },
948
955
  { from: "Holt, John", to: { full: "John Holt", first: "John", last: "Holt"} },
949
956
  { from: "Honda, Soichiro", to: { full: "Soichiro Honda", first: "Soichiro", last: "Honda"} },
@@ -1019,7 +1026,8 @@ RSpec.describe "WikiQuote list parsing" do
1019
1026
  { from: "Janáček, Leoš", to: { full: "Leoš Janáček", first: "Leoš", last: "Janáček"} },
1020
1027
  { from: "Jarrell, Randall", to: { full: "Randall Jarrell", first: "Randall", last: "Jarrell"} },
1021
1028
  { from: "Jarrett, Jeff", to: { full: "Jeff Jarrett", first: "Jeff", last: "Jarrett"} },
1022
- { skip: true, from: "Jay, Glenn, Miner", to: { full: "Miner Jay, Glenn", first: "Miner", last: "Jay, Glenn"} },
1029
+ { skip: "Too many commas!",
1030
+ from: "Jay, Glenn, Miner", to: { full: "Miner Jay, Glenn", first: "Miner", last: "Jay, Glenn"} },
1023
1031
  { from: "Jarry, Alfred", to: { full: "Alfred Jarry", first: "Alfred", last: "Jarry"} },
1024
1032
  { from: "Jeffers, Robinson", to: { full: "Robinson Jeffers", first: "Robinson", last: "Jeffers"} },
1025
1033
  { from: "Jefferson, Thomas", to: { full: "Thomas Jefferson", first: "Thomas", last: "Jefferson"} },
@@ -1086,7 +1094,7 @@ RSpec.describe "WikiQuote list parsing" do
1086
1094
  { from: "Kellner, Friedrich", to: { full: "Friedrich Kellner", first: "Friedrich", last: "Kellner"} },
1087
1095
  { from: "Kelly, Walt", to: { full: "Walt Kelly", first: "Walt", last: "Kelly"} },
1088
1096
  { from: "Kempton, Murray", to: { full: "Murray Kempton", first: "Murray", last: "Kempton"} },
1089
- { from: "Kelvin, Lord", to: { full: "Lord Kelvin", first: "Lord", last: "Kelvin"} },
1097
+ { from: "Kelvin, Lord", to: { full: "Lord Kelvin", title: "Lord", last: "Kelvin"} },
1090
1098
  { from: "Kendrick, Alex", to: { full: "Alex Kendrick", first: "Alex", last: "Kendrick"} },
1091
1099
  { from: "Kennan, George F.", to: { full: "George F Kennan", first: "George F", last: "Kennan"} },
1092
1100
  { from: "Kennedy, Anthony", to: { full: "Anthony Kennedy", first: "Anthony", last: "Kennedy"} },
@@ -1226,7 +1234,8 @@ RSpec.describe "WikiQuote list parsing" do
1226
1234
  { from: "Loo, Tristan J.", to: { full: "Tristan J Loo", first: "Tristan J", last: "Loo"} },
1227
1235
  { from: "Looney, General William", to: { full: "William Looney", title: "General", first: "William", last: "Looney"} },
1228
1236
  { from: "Lott, Trent", to: { full: "Trent Lott", first: "Trent", last: "Lott"} },
1229
- { skip: true, from: "Louis VII, King of France", to: { full: "King of France Louis VII", first: "King of France", last: "Louis VII"} },
1237
+ { skip: "Haven't figured this one yet",
1238
+ from: "Louis VII, King of France", to: { full: "King of France Louis VII", first: "King of France", last: "Louis VII"} },
1230
1239
  { from: "Louis, Joe", to: { full: "Joe Louis", first: "Joe", last: "Louis"} },
1231
1240
  { from: "Love, Courtney", to: { full: "Courtney Love", first: "Courtney", last: "Love"} },
1232
1241
  { from: "Lovecraft, H. P.", to: { full: "H P Lovecraft", first: "H P", last: "Lovecraft"} },
@@ -1252,7 +1261,8 @@ RSpec.describe "WikiQuote list parsing" do
1252
1261
  { from: "Madraiwiwi, Ratu Joni", to: { full: "Ratu Joni Madraiwiwi", title: "Ratu", first: "Joni", last: "Madraiwiwi"} },
1253
1262
  { from: "Magee, Bryan", to: { full: "Bryan Magee", first: "Bryan", last: "Magee"} },
1254
1263
  { from: "Maher, Bill", to: { full: "Bill Maher", first: "Bill", last: "Maher"} },
1255
- { skip: true, from: "Mallet, David (or David Malloch)", to: { full: "David (or David Malloch) Mallet", first: "David (or David Malloch)", last: "Mallet"} },
1264
+ { from: "Malda, Rob \"CmdrTaco\"", to: { full: "Rob Malda", first: "Rob", nick: "CmdrTaco", last: "Malda"} },
1265
+ { from: "Mallet, David (or David Malloch)", to: { full: "David Mallet", first: "David", last: "Mallet", nick: "or David Malloch"} },
1256
1266
  { from: "Malley, Matt", to: { full: "Matt Malley", first: "Matt", last: "Malley"} },
1257
1267
  { from: "Malory, Thomas", to: { full: "Thomas Malory", first: "Thomas", last: "Malory"} },
1258
1268
  { from: "Malraux, André", to: { full: "André Malraux", first: "André", last: "Malraux"} },
@@ -1337,7 +1347,8 @@ RSpec.describe "WikiQuote list parsing" do
1337
1347
  { from: "Miller, Bode", to: { full: "Bode Miller", first: "Bode", last: "Miller"} },
1338
1348
  { from: "Miller, Henry", to: { full: "Henry Miller", first: "Henry", last: "Miller"} },
1339
1349
  { from: "Miller, Ron", to: { full: "Ron Miller", first: "Ron", last: "Miller"} },
1340
- { from: "Miller, Walter M. (Jr.)", to: { full: "Walter M Miller", first: "Walter M", last: "Miller", suffix: "Jr"} },
1350
+ { skip: "Jr will be parsed as a nick",
1351
+ from: "Miller, Walter M. (Jr.)", to: { full: "Walter M Miller", first: "Walter M", last: "Miller", suffix: "Jr"} },
1341
1352
  { from: "Milligan, Spike", to: { full: "Spike Milligan", first: "Spike", last: "Milligan"} },
1342
1353
  { from: "Mills, C. Wright", to: { full: "C Wright Mills", first: "C Wright", last: "Mills"} },
1343
1354
  { from: "Milne, A.A.", to: { full: "A A Milne", first: "A A", last: "Milne"} },
@@ -1376,7 +1387,8 @@ RSpec.describe "WikiQuote list parsing" do
1376
1387
  { from: "Muhammad, Holy Prophet", to: { full: "Holy Prophet Muhammad", first: "Holy Prophet", last: "Muhammad"} },
1377
1388
  { from: "Muir, John", to: { full: "John Muir", first: "John", last: "Muir"} },
1378
1389
  { from: "Mullally, Megan", to: { full: "Megan Mullally", first: "Megan", last: "Mullally"} },
1379
- { skip: true, from: "Mulock, Dinah Maria; also Dinah Maria Craik", to: { full: "Dinah Maria; also Dinah Maria Craik Mulock", first: "Dinah Maria; also Dinah Maria Craik", last: "Mulock"} },
1390
+ { skip: "Haven't figured this one yet",
1391
+ from: "Mulock, Dinah Maria; also Dinah Maria Craik", to: { full: "Dinah Maria; also Dinah Maria Craik Mulock", first: "Dinah Maria; also Dinah Maria Craik", last: "Mulock"} },
1380
1392
  { from: "Mumford, Lewis", to: { full: "Lewis Mumford", first: "Lewis", last: "Mumford"} },
1381
1393
  { from: "Mumpfield, Susie", to: { full: "Susie Mumpfield", first: "Susie", last: "Mumpfield"} },
1382
1394
  { from: "Munch, Edvard", to: { full: "Edvard Munch", first: "Edvard", last: "Munch"} },
@@ -1385,10 +1397,13 @@ RSpec.describe "WikiQuote list parsing" do
1385
1397
  { from: "Mussolini, Benito", to: { full: "Benito Mussolini", first: "Benito", last: "Mussolini"} },
1386
1398
  { from: "Mustaine, Dave", to: { full: "Dave Mustaine", first: "Dave", last: "Mustaine"} },
1387
1399
  { from: "Myatt, David", to: { full: "David Myatt", first: "David", last: "Myatt"} },
1400
+
1388
1401
  { from: "Nabokov, Vladimir", to: { full: "Vladimir Nabokov", first: "Vladimir", last: "Nabokov"} },
1389
- { from: "Nachman, Rabbi, of Bratzlav", to: { full: "of Bratzlav Nachman, Rabbi", first: "of Bratzlav", last: "Nachman, Rabbi"} },
1402
+ { skip: "Haven't figured this one yet",
1403
+ from: "Nachman, Rabbi, of Bratzlav", to: { full: "of Bratzlav Nachman, Rabbi", first: "of Bratzlav", last: "Nachman, Rabbi"} },
1390
1404
  { from: "Nader, Ralph", to: { full: "Ralph Nader", first: "Ralph", last: "Nader"} },
1391
1405
  { from: "Nagel, Thomas", to: { full: "Thomas Nagel", first: "Thomas", last: "Nagel"} },
1406
+ { from: "Cole, Nat \"King\"", to: { full: "Nat Cole", first: "Nat", nick: "King", last: "Cole"} },
1392
1407
  { from: "Naidu, Richard", to: { full: "Richard Naidu", first: "Richard", last: "Naidu"} },
1393
1408
  { from: "Nailatikau, Adi Koila", to: { full: "Adi Koila Nailatikau", title: "Adi", first: "Koila", last: "Nailatikau"} },
1394
1409
  { from: "Nailatikau, Ratu Epeli Qaraninamu", to: { full: "Ratu Epeli Qaraninamu Nailatikau", title: "Ratu", first: "Epeli Qaraninamu", last: "Nailatikau"} },
@@ -1410,16 +1425,19 @@ RSpec.describe "WikiQuote list parsing" do
1410
1425
  { from: "Nietzsche, Friedrich", to: { full: "Friedrich Nietzsche", first: "Friedrich", last: "Nietzsche"} },
1411
1426
  { from: "Nightingale, Florence", to: { full: "Florence Nightingale", first: "Florence", last: "Nightingale"} },
1412
1427
  { from: "Nijinsky, Vaslav", to: { full: "Vaslav Nijinsky", first: "Vaslav", last: "Nijinsky"} },
1413
- { from: "Nin, Anaïs Nin", to: { full: "Anaïs Nin Nin", first: "Anaïs Nin", last: "Nin"} },
1428
+ { skip: "Not only misspelled, Nin get treated as a compound",
1429
+ from: "Nin, Anaïs Nin", to: { full: "Anaïs Nin Nin", first: "Anaïs", last: "Nin Nin"} },
1414
1430
  { from: "Ninio, Jacques", to: { full: "Jacques Ninio", first: "Jacques", last: "Ninio"} },
1415
1431
  { from: "Niranjan, Sangeeta", to: { full: "Sangeeta Niranjan", first: "Sangeeta", last: "Niranjan"} },
1416
1432
  { from: "Niven, Larry", to: { full: "Larry Niven", first: "Larry", last: "Niven"} },
1417
1433
  { from: "Nixon, Richard", to: { full: "Richard Nixon", first: "Richard", last: "Nixon"} },
1418
1434
  { from: "Noam, Eli", to: { full: "Eli Noam", first: "Eli", last: "Noam"} },
1419
1435
  { from: "Norton, Joshua Abraham", to: { full: "Joshua Abraham Norton", first: "Joshua Abraham", last: "Norton"} },
1420
- { skip: true, from: "Nostradamus (Michel de Notredame, or Michel de Nostredame)", to: { full: "or Michel de Nostredame) Nostradamus (Michel de Notredame", first: "or Michel de Nostredame)", last: "Nostradamus (Michel de Notredame"} },
1436
+ { skip: "Haven't figured this one yet",
1437
+ from: "Nostradamus (Michel de Notredame, or Michel de Nostredame)", to: { full: "or Michel de Nostredame) Nostradamus (Michel de Notredame", first: "or Michel de Nostredame)", last: "Nostradamus (Michel de Notredame"} },
1421
1438
  { from: "Nugent, Ted", to: { full: "Ted Nugent", first: "Ted", last: "Nugent"} },
1422
- { from: "Nukem, Duke", to: { full: "Duke Nukem", first: "Duke", last: "Nukem"} },
1439
+ { skip: "Duke will treated as a title, not a name",
1440
+ from: "Nukem, Duke", to: { full: "Duke Nukem", title: "Duke", last: "Nukem"} },
1423
1441
  { from: "Null, Gary", to: { full: "Gary Null", first: "Gary", last: "Null"} },
1424
1442
  { from: "Nunally, Patrick", to: { full: "Patrick Nunally", first: "Patrick", last: "Nunally"} },
1425
1443
  { from: "Nuwas, Abu", to: { full: "Abu Nuwas", first: "Abu", last: "Nuwas"} },
@@ -1435,7 +1453,8 @@ RSpec.describe "WikiQuote list parsing" do
1435
1453
  { from: "Oliver, Jamie", to: { full: "Jamie Oliver", first: "Jamie", last: "Oliver"} },
1436
1454
  { from: "Oliver, Robert T.", to: { full: "Robert T Oliver", first: "Robert T", last: "Oliver"} },
1437
1455
  { from: "Olson, Ken", to: { full: "Ken Olson", first: "Ken", last: "Olson"} },
1438
- { skip: true, from: "Olsen, Mary-Kate and Ashley", to: { full: "Mary-Kate and Ashley Olsen", first: "Mary-Kate and Ashley", last: "Olsen"} },
1456
+ { skip: "Haven't figured this one yet",
1457
+ from: "Olsen, Mary-Kate and Ashley", to: { full: "Mary-Kate and Ashley Olsen", first: "Mary-Kate and Ashley", last: "Olsen"} },
1439
1458
  { from: "Onassis, Jacqueline Kennedy", to: { full: "Jacqueline Kennedy Onassis", first: "Jacqueline Kennedy", last: "Onassis"} },
1440
1459
  { from: "Ondrick, William F.", to: { full: "William F Ondrick", first: "William F", last: "Ondrick"} },
1441
1460
  { from: "Oppenheimer, J. Robert", to: { full: "J Robert Oppenheimer", first: "J Robert", last: "Oppenheimer"} },
@@ -1486,7 +1505,8 @@ RSpec.describe "WikiQuote list parsing" do
1486
1505
  { from: "Petronius, Gaius", to: { full: "Gaius Petronius", first: "Gaius", last: "Petronius"} },
1487
1506
  { from: "Petty, Tom", to: { full: "Tom Petty", first: "Tom", last: "Petty"} },
1488
1507
  { from: "Phelps, Michael", to: { full: "Michael Phelps", first: "Michael", last: "Phelps"} },
1489
- { skip: true, from: "Philip, Duke of Edinburgh", to: { full: "Duke of Edinburgh Philip", first: "Duke of Edinburgh", last: "Philip"} },
1508
+ { skip: "Haven't figured this one yet",
1509
+ from: "Philip, Duke of Edinburgh", to: { full: "Duke of Edinburgh Philip", first: "Duke of Edinburgh", last: "Philip"} },
1490
1510
  { from: "Philips, Emo", to: { full: "Emo Philips", first: "Emo", last: "Philips"} },
1491
1511
  { from: "Piaget, Jean", to: { full: "Jean Piaget", first: "Jean", last: "Piaget"} },
1492
1512
  { from: "Picasso, Pablo", to: { full: "Pablo Picasso", first: "Pablo", last: "Picasso"} },
@@ -1521,7 +1541,8 @@ RSpec.describe "WikiQuote list parsing" do
1521
1541
  { from: "Quale, Anthony", to: { full: "Anthony Quale", first: "Anthony", last: "Quale"} },
1522
1542
  { from: "Quarles, Francis", to: { full: "Francis Quarles", first: "Francis", last: "Quarles"} },
1523
1543
  { from: "Quayle, Dan", to: { full: "Dan Quayle", first: "Dan", last: "Quayle"} },
1524
- { skip: true, from: "Marie, Queen of Romania", to: { full: "Queen of Romania Marie", first: "Queen of Romania", last: "Marie"} },
1544
+ { skip: "Haven't figured this one yet",
1545
+ from: "Marie, Queen of Romania", to: { full: "Queen of Romania Marie", first: "Queen of Romania", last: "Marie"} },
1525
1546
  { from: "Quine, Willard van Orman", to: { full: "Willard van Orman Quine", first: "Willard van Orman", last: "Quine"} },
1526
1547
  { from: "Quintilian, Marcus Fabius", to: { full: "Marcus Fabius Quintilian", first: "Marcus Fabius", last: "Quintilian"} },
1527
1548
  { from: "Quisenberry, Dan", to: { full: "Dan Quisenberry", first: "Dan", last: "Quisenberry"} },
@@ -1581,7 +1602,8 @@ RSpec.describe "WikiQuote list parsing" do
1581
1602
  { from: "Roth, Geneen", to: { full: "Geneen Roth", first: "Geneen", last: "Roth"} },
1582
1603
  { from: "Roth, Philip", to: { full: "Philip Roth", first: "Philip", last: "Roth"} },
1583
1604
  { from: "Rothbard, Murray", to: { full: "Murray Rothbard", first: "Murray", last: "Rothbard"} },
1584
- { from: "Rothschild, Baron", to: { full: "Baron Rothschild", first: "Baron", last: "Rothschild"} },
1605
+ { skip: "Technically this isn't a name, so do what?",
1606
+ from: "Rothschild, Baron", to: { full: "Baron Rothschild", title: "Baron", last: "Rothschild"} },
1585
1607
  { from: "Roux, Joseph", to: { full: "Joseph Roux", first: "Joseph", last: "Roux"} },
1586
1608
  { from: "Rousseau, Jean-Jacques", to: { full: "Jean-Jacques Rousseau", first: "Jean-Jacques", last: "Rousseau"} },
1587
1609
  { from: "Rovabokola, Ratu Viliame", to: { full: "Ratu Viliame Rovabokola", title: "Ratu", first: "Viliame", last: "Rovabokola"} },
@@ -1636,10 +1658,11 @@ RSpec.describe "WikiQuote list parsing" do
1636
1658
  { from: "Sellers, Peter", to: { full: "Peter Sellers", first: "Peter", last: "Sellers"} },
1637
1659
  { from: "Serling, Rod", to: { full: "Rod Serling", first: "Rod", last: "Serling"} },
1638
1660
  { from: "Serrano, Miguel", to: { full: "Miguel Serrano", first: "Miguel", last: "Serrano"} },
1639
- { skip: true, from: "Seuss, Dr.", to: { full: "Dr. Seuss", first: "Dr.", last: "Seuss"} },
1661
+ { from: "Seuss, Dr.", to: { full: "Dr. Seuss", title: "Dr", last: "Seuss"} },
1640
1662
  { from: "Shahak, Israel", to: { full: "Israel Shahak", first: "Israel", last: "Shahak"} },
1641
1663
  { from: "Shakespeare, William", to: { full: "William Shakespeare", first: "William", last: "Shakespeare"} },
1642
1664
  { from: "Shakur, Tupac", to: { full: "Tupac Shakur", first: "Tupac", last: "Shakur"} },
1665
+ { from: "Shankar, Ravi (Art of Living founder, not the sitar maestro)", to: { full: "Ravi Shankar", first: "Ravi", last: "Shankar", nick: "Art of Living founder, not the sitar maestro"} },
1643
1666
  { from: "Sharpton, Al", to: { full: "Al Sharpton", first: "Al", last: "Sharpton"} },
1644
1667
  { from: "Shaw, George Bernard", to: { full: "George Bernard Shaw", first: "George Bernard", last: "Shaw"} },
1645
1668
  { from: "Shawcross, Hartley", to: { full: "Hartley Shawcross", first: "Hartley", last: "Shawcross"} },
@@ -1725,7 +1748,7 @@ RSpec.describe "WikiQuote list parsing" do
1725
1748
  { from: "Stoppard, Tom", to: { full: "Tom Stoppard", first: "Tom", last: "Stoppard"} },
1726
1749
  { from: "Stout, Rex", to: { full: "Rex Stout", first: "Rex", last: "Stout"} },
1727
1750
  { from: "Stowe, Madeleine", to: { full: "Madeleine Stowe", first: "Madeleine", last: "Stowe"} },
1728
- { from: "Stratford, Lord (Tony Banks)", to: { full: "Lord (Tony Banks) Stratford", first: "Lord (Tony Banks)", last: "Stratford"} },
1751
+ { from: "Stratford, Lord (Tony Banks)", to: { full: "Lord (Tony Banks) Stratford", title: "Lord", last: "Stratford", nick: "Tony Banks"} },
1729
1752
  { from: "Strauss, Richard", to: { full: "Richard Strauss", first: "Richard", last: "Strauss"} },
1730
1753
  { from: "Stravinsky, Igor", to: { full: "Igor Stravinsky", first: "Igor", last: "Stravinsky"} },
1731
1754
  { from: "Stroustrup, Bjarne", to: { full: "Bjarne Stroustrup", first: "Bjarne", last: "Stroustrup"} },
@@ -1751,7 +1774,8 @@ RSpec.describe "WikiQuote list parsing" do
1751
1774
  { from: "Tavola, Kaliopate", to: { full: "Kaliopate Tavola", first: "Kaliopate", last: "Tavola"} },
1752
1775
  { from: "Tchaikovsky, Pyotr Ilyich", to: { full: "Pyotr Ilyich Tchaikovsky", first: "Pyotr Ilyich", last: "Tchaikovsky"} },
1753
1776
  { from: "Teilhard de Chardin, Pierre", to: { full: "Pierre Teilhard de Chardin", first: "Pierre", last: "Teilhard de Chardin"} },
1754
- { from: "Tennyson, Alfred (Lord)", to: { full: "Alfred (Lord) Tennyson", first: "Alfred (Lord)", last: "Tennyson"} },
1777
+ { skip: "Lord will be treated as a nick",
1778
+ from: "Tennyson, Alfred (Lord)", to: { full: "Alfred (Lord) Tennyson", first: "Alfred", last: "Tennyson", nick: "Lord"} },
1755
1779
  { from: "Tesla, Nikola", to: { full: "Nikola Tesla", first: "Nikola", last: "Tesla"} },
1756
1780
  { from: "Thackeray, William Makepeace", to: { full: "William Makepeace Thackeray", first: "William Makepeace", last: "Thackeray"} },
1757
1781
  { from: "Thant, U", to: { full: "U Thant", first: "U", last: "Thant"} },
@@ -1759,7 +1783,8 @@ RSpec.describe "WikiQuote list parsing" do
1759
1783
  { from: "Theron, Charlize", to: { full: "Charlize Theron", first: "Charlize", last: "Theron"} },
1760
1784
  { from: "Thompson, Dorothy", to: { full: "Dorothy Thompson", first: "Dorothy", last: "Thompson"} },
1761
1785
  { from: "Thompson, Hunter S.", to: { full: "Hunter S Thompson", first: "Hunter S", last: "Thompson"} },
1762
- { skip: true, from: "Thomson, William - a.k.a. Lord Kelvin", to: { full: "William - a.k.a. Lord Kelvin Thomson", first: "William - a.k.a. Lord Kelvin", last: "Thomson"} },
1786
+ { skip: "Haven't figured this one yet",
1787
+ from: "Thomson, William - a.k.a. Lord Kelvin", to: { full: "William - a.k.a. Lord Kelvin Thomson", first: "William - a.k.a. Lord Kelvin", last: "Thomson"} },
1763
1788
  { from: "Thoreau, Henry David", to: { full: "Henry David Thoreau", first: "Henry David", last: "Thoreau"} },
1764
1789
  { from: "Throttle, Ben", to: { full: "Ben Throttle", first: "Ben", last: "Throttle"} },
1765
1790
  { from: "Thurber, James", to: { full: "James Thurber", first: "James", last: "Thurber"} },
@@ -1859,6 +1884,7 @@ RSpec.describe "WikiQuote list parsing" do
1859
1884
  { from: "White, Andrew Dickson", to: { full: "Andrew Dickson White", first: "Andrew Dickson", last: "White"} },
1860
1885
  { from: "White, E. B. (Elwyn Brooks)", to: { full: "E B (Elwyn Brooks) White", first: "E B", nick: "Elwyn Brooks", last: "White"} },
1861
1886
  { from: "White, Reggie", to: { full: "Reggie White", first: "Reggie", last: "White"} },
1887
+ { from: "White, T. H. (Terence Hanbury)", to: { full: "T H White", first: "T H", nick: "Terence Hanbury", last: "White"} },
1862
1888
  { from: "Whitehead, Alfred North", to: { full: "Alfred North Whitehead", first: "Alfred North", last: "Whitehead"} },
1863
1889
  { from: "Whitman, Walt", to: { full: "Walt Whitman", first: "Walt", last: "Whitman"} },
1864
1890
  { from: "Whittier, John Greenleaf", to: { full: "John Greenleaf Whittier", first: "John Greenleaf", last: "Whittier"} },
@@ -1869,7 +1895,8 @@ RSpec.describe "WikiQuote list parsing" do
1869
1895
  { from: "Wilde, Oscar", to: { full: "Oscar Wilde", first: "Oscar", last: "Wilde"} },
1870
1896
  { from: "Wilder, Thornton", to: { full: "Thornton Wilder", first: "Thornton", last: "Wilder"} },
1871
1897
  { from: "Wilkes, Maurice", to: { full: "Maurice Wilkes", first: "Maurice", last: "Wilkes"} },
1872
- { skip: true, from: "William I, King of England", to: { full: "King of England William I", first: "King of England", last: "William I"} },
1898
+ { skip: "Haven't figured this one yet",
1899
+ from: "William I, King of England", to: { full: "King of England William I", first: "King of England", last: "William I"} },
1873
1900
  { from: "Williams, Robin", to: { full: "Robin Williams", first: "Robin", last: "Williams"} },
1874
1901
  { from: "Williams, Roger", to: { full: "Roger Williams", first: "Roger", last: "Williams"} },
1875
1902
  { from: "Williams, Ted", to: { full: "Ted Williams", first: "Ted", last: "Williams"} },
@@ -1880,6 +1907,7 @@ RSpec.describe "WikiQuote list parsing" do
1880
1907
  { from: "Wilson, Colin", to: { full: "Colin Wilson", first: "Colin", last: "Wilson"} },
1881
1908
  { from: "Wilson, Flip", to: { full: "Flip Wilson", first: "Flip", last: "Wilson"} },
1882
1909
  { from: "Wilson, Ron", to: { full: "Ron Wilson", first: "Ron", last: "Wilson"} },
1910
+ { from: "Wilson, (Thomas) Woodrow", to: { full: "Woodrow Wilson", first: "Woodrow", nick: "Thomas", last: "Wilson"} },
1883
1911
  { from: "Winfrey, Oprah", to: { full: "Oprah Winfrey", first: "Oprah", last: "Winfrey"} },
1884
1912
  { from: "Winner, Michael", to: { full: "Michael Winner", first: "Michael", last: "Winner"} },
1885
1913
  { from: "Winter, William", to: { full: "William Winter", first: "William", last: "Winter"} },
@@ -1919,6 +1947,8 @@ RSpec.describe "WikiQuote list parsing" do
1919
1947
  { from: "Zappa, Frank", to: { full: "Frank Zappa", first: "Frank", last: "Zappa"} },
1920
1948
  { from: "Zawinski, Jamie", to: { full: "Jamie Zawinski", first: "Jamie", last: "Zawinski"} },
1921
1949
  { from: "Zeldin, Theodore", to: { full: "Theodore Zeldin", first: "Theodore", last: "Zeldin"} },
1950
+ { skip: "Haven't figured this one yet",
1951
+ from: "Zhuangzi (Zhuang Zi; Zhuang Zhou; Chuang Tzu; Chuang Tse)", to: {} },
1922
1952
  { from: "Ziglar, Zig", to: { full: "Zig Ziglar", first: "Zig", last: "Ziglar"} },
1923
1953
  { from: "Zimmermann, Philip", to: { full: "Philip Zimmermann", first: "Philip", last: "Zimmermann"} },
1924
1954
  { from: "Zinck, Kenneth", to: { full: "Kenneth Zinck", first: "Kenneth", last: "Zinck"} },
@@ -1926,7 +1956,7 @@ RSpec.describe "WikiQuote list parsing" do
1926
1956
  { from: "Zola, Emile", to: { full: "Emile Zola", first: "Emile", last: "Zola"} }
1927
1957
  ].each do |name|
1928
1958
  it "parses #{name[:from]}" do
1929
- skip if name[:skip]
1959
+ skip name[:skip] if name.has_key?(:skip)
1930
1960
  parsed = Nomener.parse(name[:from])
1931
1961
  parse_hash = parsed.to_h
1932
1962
  parse_hash.each_pair do |k,v|
@@ -22,6 +22,12 @@ RSpec.describe "Nomener::Parser" do
22
22
  expect(name).to be_a Nomener::Name
23
23
  end
24
24
 
25
+ it "throw ParseError when passed too many commas" do
26
+ expect {
27
+ Nomener::Parser.parse!("Joe, John, Smith")
28
+ }.to raise_error Nomener::ParseError
29
+ end
30
+
25
31
  [
26
32
  {from: "Joe Smith", to: { first: "Joe", last: "Smith"} },
27
33
  {from: "Joe Smith Jr.", to: { first: "Joe", last: "Smith", suffix: "Jr"} },
@@ -5,7 +5,8 @@ RSpec.describe "Title" do
5
5
  [
6
6
  { name: "خانم Augusta Ada King", result: "خانم" },
7
7
  { name: "רעב Bertrand Russell", result: "רעב" },
8
- { skip: true, name: "'ר Bertrand Russell", result: "'ר" },
8
+ { skip: "Something is typed, or we clean it, improperly",
9
+ name: "'ר Bertrand Russell", result: "'ר" },
9
10
  { name: "አቶ Bertrand Russell", result: "አቶ" },
10
11
  { name: "Air Commander Bertrand Russell", result: "Air Commander" },
11
12
  { name: "Air Commodore Bertrand Russell", result: "Air Commodore" },
@@ -44,8 +45,8 @@ RSpec.describe "Title" do
44
45
  { name: "Dr Bertrand Russell", result: "Dr" },
45
46
  { name: "Dr. Bertrand Russell", result: "Dr" },
46
47
  { name: "Dom Bertrand Russell", result: "Dom" },
47
- { skip: true, name: "Don Bertrand Russell", result: "Don" },
48
- { skip: true, name: "Dona Augusta Ada King", result: "Dona" },
48
+ # { name: "Don Bertrand Russell", result: "Don" },
49
+ # { name: "Dona Augusta Ada King", result: "Dona" },
49
50
  { name: "Erzherzog Bertrand Russell", result: "Erzherzog" },
50
51
  { name: "Erzherzogin Augusta Ada King", result: "Erzherzogin" },
51
52
  { name: "Father Bertrand Russell", result: "Father" },
@@ -215,7 +216,7 @@ RSpec.describe "Title" do
215
216
  { name: "Very Reverand Bertrand Russell", result: "Very Reverand" },
216
217
  ].each do |name|
217
218
  it "parses #{name[:result]} from #{name[:name]}" do
218
- skip if name[:skip]
219
+ skip name[:skip] if name.has_key?(:skip)
219
220
  parsed = Nomener.parse(name[:name])
220
221
  expect(parsed.title).to eq name[:result]
221
222
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nomener
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dante Piombino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-09 00:00:00.000000000 Z
11
+ date: 2015-04-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler