namae 0.11.3 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/namae/parser.y CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  class Namae::Parser
5
5
 
6
- token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
6
+ token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX UPARTICLE
7
7
 
8
8
  expect 0
9
9
 
@@ -20,28 +20,28 @@ rule
20
20
  | sort_order
21
21
 
22
22
  honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
23
- | TITLE { result = Name.new(:title => val[0]) }
23
+ | titles { result = Name.new(:title => val[0]) }
24
24
 
25
25
  display_order : u_words word opt_suffices opt_titles
26
26
  {
27
- result = Name.new(:given => val[0], :family => val[1],
28
- :suffix => val[2], :title => val[3])
27
+ result = Name.new(
28
+ :given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
29
+ )
29
30
  }
30
31
  | u_words NICK last opt_suffices opt_titles
31
32
  {
32
- result = Name.new(:given => val[0], :nick => val[1],
33
- :family => val[2], :suffix => val[3], :title => val[4])
33
+ result = Name.new(
34
+ :given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
35
+ )
34
36
  }
35
37
  | u_words NICK von last opt_suffices opt_titles
36
38
  {
37
- result = Name.new(:given => val[0], :nick => val[1],
38
- :particle => val[2], :family => val[3],
39
- :suffix => val[4], :title => val[5])
39
+ result = Name.new(
40
+ :given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
40
41
  }
41
42
  | u_words von last
42
43
  {
43
- result = Name.new(:given => val[0], :particle => val[1],
44
- :family => val[2])
44
+ result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
45
45
  }
46
46
  | von last
47
47
  {
@@ -50,24 +50,29 @@ rule
50
50
 
51
51
  sort_order : last COMMA first
52
52
  {
53
- result = Name.new({ :family => val[0], :suffix => val[2][0],
54
- :given => val[2][1] }, !!val[2][0])
53
+ result = Name.new({
54
+ :family => val[0], :suffix => val[2][0], :given => val[2][1]
55
+ }, !!val[2][0])
55
56
  }
56
57
  | von last COMMA first
57
58
  {
58
- result = Name.new({ :particle => val[0], :family => val[1],
59
- :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
59
+ result = Name.new({
60
+ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
61
+ }, !!val[3][0])
60
62
  }
61
63
  | u_words von last COMMA first
62
64
  {
63
- result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
64
- :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
65
+ result = Name.new({
66
+ :particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
67
+ }, !!val[4][0])
65
68
  }
66
69
  ;
67
70
 
68
- von : LWORD
69
- | von LWORD { result = val.join(' ') }
70
- | von u_words LWORD { result = val.join(' ') }
71
+ von : particle
72
+ | von particle { result = val.join(' ') }
73
+ | von u_words particle { result = val.join(' ') }
74
+
75
+ particle : LWORD | UPARTICLE
71
76
 
72
77
  last : LWORD | u_words
73
78
 
@@ -87,7 +92,7 @@ rule
87
92
  opt_comma : /* empty */ | COMMA
88
93
  opt_words : /* empty */ | words
89
94
 
90
- word : LWORD | UWORD | PWORD
95
+ word : LWORD | UWORD | PWORD | UPARTICLE
91
96
 
92
97
  opt_suffices : /* empty */ | suffices
93
98
 
@@ -100,26 +105,35 @@ rule
100
105
  | titles TITLE { result = val.join(' ') }
101
106
 
102
107
  ---- header
103
- require 'singleton'
104
108
  require 'strscan'
105
109
 
106
110
  ---- inner
107
111
 
108
- include Singleton
112
+ @defaults = {
113
+ :debug => false,
114
+ :prefer_comma_as_separator => false,
115
+ :include_particle_in_family => false,
116
+ :comma => ',',
117
+ :stops => ',;',
118
+ :separator => /\s*(\band\b|\&|;)\s*/i,
119
+ :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
120
+ :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
121
+ :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
122
+ :uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
123
+ }
124
+
125
+ class << self
126
+ attr_reader :defaults
127
+
128
+ def instance
129
+ Thread.current[:namae] ||= new
130
+ end
131
+ end
109
132
 
110
133
  attr_reader :options, :input
111
134
 
112
- def initialize
113
- @input, @options = StringScanner.new(''), {
114
- :debug => false,
115
- :prefer_comma_as_separator => false,
116
- :comma => ',',
117
- :stops => ',;',
118
- :separator => /\s*(\band\b|\&|;)\s*/i,
119
- :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
120
- :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
121
- :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
122
- }
135
+ def initialize(options = {})
136
+ @options = self.class.defaults.merge(options)
123
137
  end
124
138
 
125
139
  def debug?
@@ -134,6 +148,10 @@ require 'strscan'
134
148
  options[:comma]
135
149
  end
136
150
 
151
+ def include_particle_in_family?
152
+ options[:include_particle_in_family]
153
+ end
154
+
137
155
  def stops
138
156
  options[:stops]
139
157
  end
@@ -150,26 +168,31 @@ require 'strscan'
150
168
  options[:appellation]
151
169
  end
152
170
 
171
+ def uppercase_particle
172
+ options[:uppercase_particle]
173
+ end
174
+
153
175
  def prefer_comma_as_separator?
154
176
  options[:prefer_comma_as_separator]
155
177
  end
156
178
 
157
- def parse(input)
158
- parse!(input)
179
+ def parse(string)
180
+ parse!(string)
159
181
  rescue => e
160
182
  warn e.message if debug?
161
183
  []
162
184
  end
163
185
 
164
186
  def parse!(string)
165
- input.string = normalize(string)
187
+ @input = StringScanner.new(normalize(string))
166
188
  reset
167
- do_parse
189
+ names = do_parse
190
+ names.map(&:merge_particles!) if include_particle_in_family?
191
+ names
168
192
  end
169
193
 
170
194
  def normalize(string)
171
- string = string.strip
172
- string
195
+ string.scrub.strip
173
196
  end
174
197
 
175
198
  def reset
@@ -220,11 +243,11 @@ require 'strscan'
220
243
  end
221
244
 
222
245
  def will_see_suffix?
223
- input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
246
+ input.rest.strip.split(/\s+/)[0] =~ suffix
224
247
  end
225
248
 
226
249
  def will_see_initial?
227
- input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
250
+ input.rest.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
228
251
  end
229
252
 
230
253
  def seen_full_name?
@@ -256,6 +279,8 @@ require 'strscan'
256
279
  else
257
280
  consume_word(:UWORD, input.matched)
258
281
  end
282
+ when input.scan(uppercase_particle)
283
+ consume_word(:UPARTICLE, input.matched.strip)
259
284
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
260
285
  consume_word(:UWORD, input.matched)
261
286
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
data/lib/namae/utility.rb CHANGED
@@ -44,4 +44,8 @@ module Namae
44
44
  Parser.instance.options
45
45
  end
46
46
 
47
+ # @yield [Hash] the parser's default configuration.
48
+ def configure
49
+ yield Parser.defaults
50
+ end
47
51
  end
data/lib/namae/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Namae
2
2
  module Version
3
- MAJOR = 0
4
- MINOR = 11
5
- PATCH = 3
3
+ MAJOR = 1
4
+ MINOR = 1
5
+ PATCH = 1
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -2,23 +2,22 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: namae 0.11.3 ruby lib
5
+ # stub: namae 1.1.1 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "namae".freeze
9
- s.version = "0.11.3"
9
+ s.version = "1.1.1"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
14
- s.date = "2016-11-12"
15
- s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
14
+ s.date = "2021-03-14"
15
+ s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
16
16
  s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
17
17
  s.extra_rdoc_files = [
18
18
  "README.md"
19
19
  ]
20
20
  s.files = [
21
- ".autotest",
22
21
  ".codeclimate.yml",
23
22
  ".coveralls.yml",
24
23
  ".document",
@@ -50,23 +49,22 @@ Gem::Specification.new do |s|
50
49
  "spec/namae/name_spec.rb",
51
50
  "spec/namae/parser_spec.rb",
52
51
  "spec/namae/utility_spec.rb",
53
- "spec/spec_helper.rb"
52
+ "spec/spec_helper.rb",
53
+ "spec/thread_safety_spec.rb"
54
54
  ]
55
55
  s.homepage = "https://github.com/berkmancenter/namae".freeze
56
56
  s.licenses = ["AGPL-3.0".freeze]
57
- s.rubygems_version = "2.6.3".freeze
58
- s.summary = "Namae (\u{540d}\u{524d}) parses personal names and splits them into their component parts.".freeze
57
+ s.rubygems_version = "3.2.3".freeze
58
+ s.summary = "Namae (\u540D\u524D) parses personal names and splits them into their component parts.".freeze
59
59
 
60
60
  if s.respond_to? :specification_version then
61
61
  s.specification_version = 4
62
+ end
62
63
 
63
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
64
- s.add_development_dependency(%q<racc>.freeze, ["= 1.4.9"])
65
- else
66
- s.add_dependency(%q<racc>.freeze, ["= 1.4.9"])
67
- end
64
+ if s.respond_to? :add_runtime_dependency then
65
+ s.add_development_dependency(%q<racc>.freeze, ["~> 1.4"])
68
66
  else
69
- s.add_dependency(%q<racc>.freeze, ["= 1.4.9"])
67
+ s.add_dependency(%q<racc>.freeze, ["~> 1.4"])
70
68
  end
71
69
  end
72
70
 
@@ -1,10 +1,5 @@
1
1
  module Namae
2
2
  describe 'Parser' do
3
-
4
- it 'does not respond to .new' do
5
- expect(Parser).not_to respond_to(:new)
6
- end
7
-
8
3
  describe '.instance' do
9
4
  let(:parser) { Parser.instance }
10
5
 
@@ -120,7 +115,7 @@ module Namae
120
115
  end
121
116
  end
122
117
 
123
- %w{Pastor Pr. Reverend Rev. Elder Deacon Deaconess Father Fr. Vicar}.each do |title|
118
+ %w{Pastor Pr. Reverend Rev. Elder Deacon Deaconess Father Fr. Vicar Rabbi Cantor}.each do |title|
124
119
  describe "the next token is #{title.inspect}" do
125
120
  before { parser.send(:input).string = title }
126
121
  it 'returns a TITLE token' do
@@ -196,6 +191,70 @@ module Namae
196
191
  expect(parser.parse!('Bernado Franecki Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
197
192
  #expect(parser.parse!('Bernado Franecki, Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
198
193
  end
194
+
195
+ it 'parses consecutive titles in display order' do
196
+ expect(parser.parse!('Lt. Col. Bernado Franecki')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Lt. Col.'])
197
+ end
198
+
199
+ context 'when include_particle_in_family is false' do
200
+ let(:parser) { Parser.new(include_particle_in_family: false) }
201
+
202
+ it 'parses common capitalized particles as the family name in display order' do
203
+ expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
204
+ end
205
+
206
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
207
+ expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
208
+ end
209
+
210
+ it 'parses multiple common capitalized particles as the family name in display order' do
211
+ expect(parser.parse!('Tom Van De Weghe')[0].values_at(:given, :family, :particle)).to eq(['Tom', 'Weghe', 'Van De'])
212
+ end
213
+
214
+ it 'parses common lowercase particles as a particle, not family name in display order' do
215
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
216
+ end
217
+
218
+ it 'parses common capitalized particles as the family name in sort order' do
219
+ expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
220
+ end
221
+
222
+ it 'parses common lowercase particles as a particle, not family name in sort order' do
223
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
224
+ end
225
+
226
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
227
+ expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
228
+ end
229
+ end
230
+
231
+ context 'when include_particle_in_family is true' do
232
+ let(:parser) { Parser.new(include_particle_in_family: true) }
233
+
234
+ it 'parses common capitalized particles as the family name in display order' do
235
+ expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
236
+ end
237
+
238
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
239
+ expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
240
+ end
241
+
242
+ it 'parses common lowercase particles as family name in display order' do
243
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
244
+ end
245
+
246
+ it 'parses common capitalized particles as the family name in sort order' do
247
+ expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
248
+ end
249
+
250
+ it 'parses common lowercase particles as family name in sort order' do
251
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
252
+ end
253
+
254
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
255
+ expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
256
+ end
257
+ end
199
258
  end
200
259
  end
201
260
 
@@ -0,0 +1,25 @@
1
+ module Namae
2
+ describe 'Parser using threads' do
3
+ let(:name_1_str) { "Foo Bar" }
4
+ let(:name_2_str) { "Baz" }
5
+ let(:name_1) { Namae.parse(name_1_str).first }
6
+ let(:name_2) { Namae.parse(name_2_str).first }
7
+
8
+ def compare(string, expectation)
9
+ name = Namae.parse(string).first
10
+ given_name_match = expectation.given == name.given
11
+ family_name_match = expectation.family == name.family
12
+ raise unless given_name_match && family_name_match
13
+ end
14
+
15
+ it 'has no conflicts' do
16
+ [[name_1_str, name_1], [name_2_str, name_2]].map do |string, expectation|
17
+ Thread.new do
18
+ 1000.times do
19
+ compare(string, expectation)
20
+ end
21
+ end
22
+ end.each(&:join)
23
+ end
24
+ end
25
+ end
metadata CHANGED
@@ -1,30 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  - Dan Collis-Puro
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-11-12 00:00:00.000000000 Z
12
+ date: 2021-03-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: racc
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '='
18
+ - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: 1.4.9
20
+ version: '1.4'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '='
25
+ - - "~>"
26
26
  - !ruby/object:Gem::Version
27
- version: 1.4.9
27
+ version: '1.4'
28
28
  description: " Namae (名前) is a parser for human names. It recognizes personal names
29
29
  of various cultural backgrounds and tries to split them into their component parts
30
30
  (e.g., given and family names, honorifics etc.). "
@@ -36,7 +36,6 @@ extensions: []
36
36
  extra_rdoc_files:
37
37
  - README.md
38
38
  files:
39
- - ".autotest"
40
39
  - ".codeclimate.yml"
41
40
  - ".coveralls.yml"
42
41
  - ".document"
@@ -69,11 +68,12 @@ files:
69
68
  - spec/namae/parser_spec.rb
70
69
  - spec/namae/utility_spec.rb
71
70
  - spec/spec_helper.rb
71
+ - spec/thread_safety_spec.rb
72
72
  homepage: https://github.com/berkmancenter/namae
73
73
  licenses:
74
74
  - AGPL-3.0
75
75
  metadata: {}
76
- post_install_message:
76
+ post_install_message:
77
77
  rdoc_options: []
78
78
  require_paths:
79
79
  - lib
@@ -88,9 +88,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  requirements: []
91
- rubyforge_project:
92
- rubygems_version: 2.6.3
93
- signing_key:
91
+ rubygems_version: 3.2.3
92
+ signing_key:
94
93
  specification_version: 4
95
94
  summary: Namae (名前) parses personal names and splits them into their component parts.
96
95
  test_files: []