namae 0.11.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/namae/parser.y CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  class Namae::Parser
5
5
 
6
- token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
6
+ token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX UPARTICLE
7
7
 
8
8
  expect 0
9
9
 
@@ -20,7 +20,7 @@ rule
20
20
  | sort_order
21
21
 
22
22
  honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
23
- | TITLE { result = Name.new(:title => val[0]) }
23
+ | titles { result = Name.new(:title => val[0]) }
24
24
 
25
25
  display_order : u_words word opt_suffices opt_titles
26
26
  {
@@ -43,6 +43,14 @@ rule
43
43
  result = Name.new(:given => val[0], :particle => val[1],
44
44
  :family => val[2])
45
45
  }
46
+ | u_words UPARTICLE last
47
+ {
48
+ result = if include_particle_in_family?
49
+ Name.new(:given => val[0], :family => val[1,2].join(' '))
50
+ else
51
+ Name.new(:given => val[0], :particle => val[1], :family => val[2])
52
+ end
53
+ }
46
54
  | von last
47
55
  {
48
56
  result = Name.new(:particle => val[0], :family => val[1])
@@ -53,6 +61,14 @@ rule
53
61
  result = Name.new({ :family => val[0], :suffix => val[2][0],
54
62
  :given => val[2][1] }, !!val[2][0])
55
63
  }
64
+ | UPARTICLE last COMMA first
65
+ {
66
+ result = if include_particle_in_family?
67
+ Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
68
+ else
69
+ Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
70
+ end
71
+ }
56
72
  | von last COMMA first
57
73
  {
58
74
  result = Name.new({ :particle => val[0], :family => val[1],
@@ -100,26 +116,35 @@ rule
100
116
  | titles TITLE { result = val.join(' ') }
101
117
 
102
118
  ---- header
103
- require 'singleton'
104
119
  require 'strscan'
105
120
 
106
121
  ---- inner
107
122
 
108
- include Singleton
123
+ @defaults = {
124
+ :debug => false,
125
+ :prefer_comma_as_separator => false,
126
+ :include_particle_in_family => false,
127
+ :comma => ',',
128
+ :stops => ',;',
129
+ :separator => /\s*(\band\b|\&|;)\s*/i,
130
+ :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
131
+ :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
132
+ :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
133
+ :uppercase_particle => /\s*\b((Da|De|Di|De\sLa|Du|Der|Des|Da|St|Saint|Les|Van)\.?)(\s+|$)/
134
+ }
135
+
136
+ class << self
137
+ attr_reader :defaults
138
+
139
+ def instance
140
+ Thread.current[:namae] ||= new
141
+ end
142
+ end
109
143
 
110
144
  attr_reader :options, :input
111
145
 
112
- def initialize
113
- @input, @options = StringScanner.new(''), {
114
- :debug => false,
115
- :prefer_comma_as_separator => false,
116
- :comma => ',',
117
- :stops => ',;',
118
- :separator => /\s*(\band\b|\&|;)\s*/i,
119
- :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
120
- :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
121
- :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
122
- }
146
+ def initialize(options = {})
147
+ @options = self.class.defaults.merge(options)
123
148
  end
124
149
 
125
150
  def debug?
@@ -134,6 +159,10 @@ require 'strscan'
134
159
  options[:comma]
135
160
  end
136
161
 
162
+ def include_particle_in_family?
163
+ options[:include_particle_in_family]
164
+ end
165
+
137
166
  def stops
138
167
  options[:stops]
139
168
  end
@@ -150,26 +179,29 @@ require 'strscan'
150
179
  options[:appellation]
151
180
  end
152
181
 
182
+ def uppercase_particle
183
+ options[:uppercase_particle]
184
+ end
185
+
153
186
  def prefer_comma_as_separator?
154
187
  options[:prefer_comma_as_separator]
155
188
  end
156
189
 
157
- def parse(input)
158
- parse!(input)
190
+ def parse(string)
191
+ parse!(string)
159
192
  rescue => e
160
193
  warn e.message if debug?
161
194
  []
162
195
  end
163
196
 
164
197
  def parse!(string)
165
- input.string = normalize(string)
198
+ @input = StringScanner.new(normalize(string))
166
199
  reset
167
200
  do_parse
168
201
  end
169
202
 
170
203
  def normalize(string)
171
- string = string.strip
172
- string
204
+ string.scrub.strip
173
205
  end
174
206
 
175
207
  def reset
@@ -220,11 +252,11 @@ require 'strscan'
220
252
  end
221
253
 
222
254
  def will_see_suffix?
223
- input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
255
+ input.rest.strip.split(/\s+/)[0] =~ suffix
224
256
  end
225
257
 
226
258
  def will_see_initial?
227
- input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
259
+ input.rest.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
228
260
  end
229
261
 
230
262
  def seen_full_name?
@@ -256,6 +288,8 @@ require 'strscan'
256
288
  else
257
289
  consume_word(:UWORD, input.matched)
258
290
  end
291
+ when input.scan(uppercase_particle)
292
+ consume_word(:UPARTICLE, input.matched.strip)
259
293
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
260
294
  consume_word(:UWORD, input.matched)
261
295
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
data/lib/namae/utility.rb CHANGED
@@ -44,4 +44,8 @@ module Namae
44
44
  Parser.instance.options
45
45
  end
46
46
 
47
+ # @yield [Hash] the parser's default configuration.
48
+ def configure
49
+ yield Parser.defaults
50
+ end
47
51
  end
data/lib/namae/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Namae
2
2
  module Version
3
- MAJOR = 0
4
- MINOR = 11
5
- PATCH = 2
3
+ MAJOR = 1
4
+ MINOR = 1
5
+ PATCH = 0
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -2,23 +2,22 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: namae 0.11.2 ruby lib
5
+ # stub: namae 1.1.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "namae".freeze
9
- s.version = "0.11.2"
9
+ s.version = "1.1.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
14
- s.date = "2016-11-12"
15
- s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
14
+ s.date = "2021-03-12"
15
+ s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
16
16
  s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
17
17
  s.extra_rdoc_files = [
18
18
  "README.md"
19
19
  ]
20
20
  s.files = [
21
- ".autotest",
22
21
  ".codeclimate.yml",
23
22
  ".coveralls.yml",
24
23
  ".document",
@@ -50,23 +49,22 @@ Gem::Specification.new do |s|
50
49
  "spec/namae/name_spec.rb",
51
50
  "spec/namae/parser_spec.rb",
52
51
  "spec/namae/utility_spec.rb",
53
- "spec/spec_helper.rb"
52
+ "spec/spec_helper.rb",
53
+ "spec/thread_safety_spec.rb"
54
54
  ]
55
55
  s.homepage = "https://github.com/berkmancenter/namae".freeze
56
56
  s.licenses = ["AGPL-3.0".freeze]
57
- s.rubygems_version = "2.6.3".freeze
58
- s.summary = "Namae (\u{540d}\u{524d}) parses personal names and splits them into their component parts.".freeze
57
+ s.rubygems_version = "3.2.3".freeze
58
+ s.summary = "Namae (\u540D\u524D) parses personal names and splits them into their component parts.".freeze
59
59
 
60
60
  if s.respond_to? :specification_version then
61
61
  s.specification_version = 4
62
+ end
62
63
 
63
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
64
- s.add_development_dependency(%q<racc>.freeze, ["= 1.4.9"])
65
- else
66
- s.add_dependency(%q<racc>.freeze, ["= 1.4.9"])
67
- end
64
+ if s.respond_to? :add_runtime_dependency then
65
+ s.add_development_dependency(%q<racc>.freeze, ["~> 1.4"])
68
66
  else
69
- s.add_dependency(%q<racc>.freeze, ["= 1.4.9"])
67
+ s.add_dependency(%q<racc>.freeze, ["~> 1.4"])
70
68
  end
71
69
  end
72
70
 
@@ -1,10 +1,5 @@
1
1
  module Namae
2
2
  describe 'Parser' do
3
-
4
- it 'does not respond to .new' do
5
- expect(Parser).not_to respond_to(:new)
6
- end
7
-
8
3
  describe '.instance' do
9
4
  let(:parser) { Parser.instance }
10
5
 
@@ -120,7 +115,7 @@ module Namae
120
115
  end
121
116
  end
122
117
 
123
- %w{Pastor Pr. Reverend Rev. Elder Deacon Deaconess Father Fr. Vicar}.each do |title|
118
+ %w{Pastor Pr. Reverend Rev. Elder Deacon Deaconess Father Fr. Vicar Rabbi Cantor}.each do |title|
124
119
  describe "the next token is #{title.inspect}" do
125
120
  before { parser.send(:input).string = title }
126
121
  it 'returns a TITLE token' do
@@ -149,10 +144,6 @@ module Namae
149
144
  expect(parser.parse!('Ichiro')[0].given).to eq('Ichiro')
150
145
  end
151
146
 
152
- it 'removes numbers' do
153
- expect(parser.parse!('Ichiro 20156')[0].given).to eq('Ichiro')
154
- end
155
-
156
147
  it 'treats "Lord Byron" as a title and family name' do
157
148
  expect(parser.parse!('Lord Byron')[0].values_at(:family, :title)).to eq(['Byron', 'Lord'])
158
149
  end
@@ -200,6 +191,66 @@ module Namae
200
191
  expect(parser.parse!('Bernado Franecki Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
201
192
  #expect(parser.parse!('Bernado Franecki, Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
202
193
  end
194
+
195
+ it 'parses consecutive titles in display order' do
196
+ expect(parser.parse!('Lt. Col. Bernado Franecki')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Lt. Col.'])
197
+ end
198
+
199
+ context 'when include_particle_in_family is false' do
200
+ let(:parser) { Parser.new(include_particle_in_family: false) }
201
+
202
+ it 'parses common capitalized particles as the family name in display order' do
203
+ expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
204
+ end
205
+
206
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
207
+ expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
208
+ end
209
+
210
+ it 'parses common lowercase particles as a particle, not family name in display order' do
211
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
212
+ end
213
+
214
+ it 'parses common capitalized particles as the family name in sort order' do
215
+ expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
216
+ end
217
+
218
+ it 'parses common lowercase particles as a particle, not family name in sort order' do
219
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
220
+ end
221
+
222
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
223
+ expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
224
+ end
225
+ end
226
+
227
+ context 'when include_particle_in_family is true' do
228
+ let(:parser) { Parser.new(include_particle_in_family: true) }
229
+
230
+ it 'parses common capitalized particles as the family name in display order' do
231
+ expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
232
+ end
233
+
234
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
235
+ expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
236
+ end
237
+
238
+ it 'parses common lowercase particles as a particle, not family name in display order' do
239
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
240
+ end
241
+
242
+ it 'parses common capitalized particles as the family name in sort order' do
243
+ expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
244
+ end
245
+
246
+ it 'parses common lowercase particles as a particle, not family name in sort order' do
247
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
248
+ end
249
+
250
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
251
+ expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
252
+ end
253
+ end
203
254
  end
204
255
  end
205
256
 
@@ -0,0 +1,25 @@
1
+ module Namae
2
+ describe 'Parser using threads' do
3
+ let(:name_1_str) { "Foo Bar" }
4
+ let(:name_2_str) { "Baz" }
5
+ let(:name_1) { Namae.parse(name_1_str).first }
6
+ let(:name_2) { Namae.parse(name_2_str).first }
7
+
8
+ def compare(string, expectation)
9
+ name = Namae.parse(string).first
10
+ given_name_match = expectation.given == name.given
11
+ family_name_match = expectation.family == name.family
12
+ raise unless given_name_match && family_name_match
13
+ end
14
+
15
+ it 'has no conflicts' do
16
+ [[name_1_str, name_1], [name_2_str, name_2]].map do |string, expectation|
17
+ Thread.new do
18
+ 1000.times do
19
+ compare(string, expectation)
20
+ end
21
+ end
22
+ end.each(&:join)
23
+ end
24
+ end
25
+ end
metadata CHANGED
@@ -1,30 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  - Dan Collis-Puro
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-11-12 00:00:00.000000000 Z
12
+ date: 2021-03-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: racc
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '='
18
+ - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: 1.4.9
20
+ version: '1.4'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '='
25
+ - - "~>"
26
26
  - !ruby/object:Gem::Version
27
- version: 1.4.9
27
+ version: '1.4'
28
28
  description: " Namae (名前) is a parser for human names. It recognizes personal names
29
29
  of various cultural backgrounds and tries to split them into their component parts
30
30
  (e.g., given and family names, honorifics etc.). "
@@ -36,7 +36,6 @@ extensions: []
36
36
  extra_rdoc_files:
37
37
  - README.md
38
38
  files:
39
- - ".autotest"
40
39
  - ".codeclimate.yml"
41
40
  - ".coveralls.yml"
42
41
  - ".document"
@@ -69,11 +68,12 @@ files:
69
68
  - spec/namae/parser_spec.rb
70
69
  - spec/namae/utility_spec.rb
71
70
  - spec/spec_helper.rb
71
+ - spec/thread_safety_spec.rb
72
72
  homepage: https://github.com/berkmancenter/namae
73
73
  licenses:
74
74
  - AGPL-3.0
75
75
  metadata: {}
76
- post_install_message:
76
+ post_install_message:
77
77
  rdoc_options: []
78
78
  require_paths:
79
79
  - lib
@@ -88,9 +88,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  requirements: []
91
- rubyforge_project:
92
- rubygems_version: 2.6.3
93
- signing_key:
91
+ rubygems_version: 3.2.3
92
+ signing_key:
94
93
  specification_version: 4
95
94
  summary: Namae (名前) parses personal names and splits them into their component parts.
96
95
  test_files: []
data/.autotest DELETED
@@ -1,21 +0,0 @@
1
- require 'bundler'
2
- begin
3
- if RUBY_PLATFORM =~ /darwin/
4
- Bundler.setup(:default, :development, :debug, :test, :osx)
5
- require 'autotest/fsevent'
6
- else
7
- Bundler.setup(:default, :development, :debug, :test)
8
- end
9
- rescue Bundler::BundlerError => e
10
- $stderr.puts e.message
11
- $stderr.puts "Run `bundle install` to install missing gems"
12
- exit e.status_code
13
- end
14
-
15
-
16
- Autotest.add_hook :initialize do |at|
17
- at.add_mapping(/.+\.y$/) do |f,_|
18
- system 'rake clean racc'
19
- end
20
- end
21
-