namae 0.11.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -11
- data/BSDL +1 -1
- data/Gemfile +10 -24
- data/README.md +22 -4
- data/features/support/env.rb +0 -9
- data/lib/namae/parser.rb +53 -47
- data/lib/namae/parser.y +24 -18
- data/lib/namae/utility.rb +4 -0
- data/lib/namae/version.rb +3 -3
- data/namae.gemspec +11 -11
- data/spec/namae/parser_spec.rb +0 -5
- data/spec/thread_safety_spec.rb +25 -0
- metadata +8 -8
- data/.autotest +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29ef835cd7ed974e0c9c83f2a843cb0889c39f68
|
4
|
+
data.tar.gz: b0acc0482f22c4c7b2fd5353d41132ebb1380b7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6fca3ad7d0eb3ef0329ef8a8b5f794501f99519162307076dd1b7b0135703d7cb92dd63841091943040d220f682eb76cbbb5782f435c56e50472fbd71435f08
|
7
|
+
data.tar.gz: 7d815f130df7b0fd1f629b914e1104349aafb0a02780bb36e7020474e397632eb801aa079a0871e442e96653c7afba16f60a76cd40531243d50ccd17282713fa
|
data/.travis.yml
CHANGED
@@ -6,26 +6,20 @@ cache: bundler
|
|
6
6
|
matrix:
|
7
7
|
fast_finish: true
|
8
8
|
include:
|
9
|
-
- rvm: 2.
|
9
|
+
- rvm: 2.4
|
10
10
|
env: WITH_COVERALLS=true
|
11
|
-
- rvm: 2.
|
12
|
-
env: WITH_COVERALLS=false
|
13
|
-
- rvm: 2.1
|
14
|
-
env: WITH_COVERALLS=false
|
15
|
-
- rvm: 2.0
|
11
|
+
- rvm: 2.3
|
16
12
|
env: WITH_COVERALLS=false
|
17
|
-
- rvm:
|
13
|
+
- rvm: 2.2
|
18
14
|
env: WITH_COVERALLS=false
|
19
15
|
- rvm: jruby-19mode
|
20
16
|
env: WITH_COVERALLS=false
|
21
|
-
- rvm: rbx-2
|
22
|
-
env: WITH_COVERALLS=false
|
23
17
|
|
24
18
|
install:
|
25
19
|
- if [[ $WITH_COVERALLS = "true" ]]; then
|
26
|
-
bundle install --without debug
|
20
|
+
bundle install --without debug optional;
|
27
21
|
else
|
28
|
-
bundle install --without debug
|
22
|
+
bundle install --without debug optional coverage;
|
29
23
|
fi
|
30
24
|
|
31
25
|
script:
|
data/BSDL
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Namae. A personal name parser.
|
2
2
|
Copyright (C) 2012 President and Fellows of Harvard College
|
3
|
-
Copyright (C) 2013-
|
3
|
+
Copyright (C) 2013-2017 Sylvester Keil
|
4
4
|
|
5
5
|
Redistribution and use in source and binary forms, with or without
|
6
6
|
modification, are permitted provided that the following conditions are met:
|
data/Gemfile
CHANGED
@@ -1,40 +1,26 @@
|
|
1
1
|
source 'https://rubygems.org'
|
2
2
|
|
3
3
|
group :test do
|
4
|
-
gem 'rspec', '~>3.
|
5
|
-
gem 'rake'
|
6
|
-
gem 'cucumber', '~>1
|
4
|
+
gem 'rspec', '~> 3.7'
|
5
|
+
gem 'rake'
|
6
|
+
gem 'cucumber', '~> 3.1'
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
9
|
group :development do
|
11
|
-
gem 'racc', '1.4
|
10
|
+
gem 'racc', '~> 1.4', :platform => :ruby
|
12
11
|
end
|
13
12
|
|
14
13
|
group :coverage do
|
15
|
-
gem 'simplecov',
|
16
|
-
gem '
|
17
|
-
gem 'coveralls', '~>0.7', :require => false
|
14
|
+
gem 'simplecov', :require => false, :platforms => :ruby
|
15
|
+
gem 'coveralls', :require => false if ENV['CI']
|
18
16
|
end
|
19
17
|
|
20
18
|
group :optional do
|
21
|
-
gem '
|
22
|
-
gem '
|
23
|
-
gem 'yard', '~>0.8'
|
19
|
+
gem 'jeweler'
|
20
|
+
gem 'yard'
|
24
21
|
end
|
25
22
|
|
26
23
|
group :debug do
|
27
|
-
gem 'debugger',
|
28
|
-
gem 'byebug',
|
29
|
-
gem 'rubinius-compiler', '~>2.0', :platform => :rbx
|
30
|
-
gem 'rubinius-debugger', '~>2.0', :platform => :rbx
|
31
|
-
end
|
32
|
-
|
33
|
-
group :osx do
|
34
|
-
gem 'autotest-fsevent'
|
35
|
-
end
|
36
|
-
|
37
|
-
group :rbx do
|
38
|
-
gem 'rubysl', '~>2.0', :platform => :rbx
|
39
|
-
gem 'json', '~>1.8', :platform => :rbx
|
24
|
+
gem 'debugger', :platform => [:mri_19]
|
25
|
+
gem 'byebug', :platform => :mri if RUBY_VERSION > '2.0'
|
40
26
|
end
|
data/README.md
CHANGED
@@ -4,9 +4,9 @@ Namae is a parser for human names. It recognizes personal names of various
|
|
4
4
|
cultural backgrounds and tries to split them into their component parts
|
5
5
|
(e.g., given and family names, honorifics etc.).
|
6
6
|
|
7
|
-
[![Build Status](https://
|
8
|
-
[![Coverage Status](https://coveralls.io/repos/
|
9
|
-
[![Gem Version](https://badge.fury.io/rb/namae.
|
7
|
+
[![Build Status](https://travis-ci.org/berkmancenter/namae.svg?branch=master)](https://travis-ci.org/berkmancenter/namae)
|
8
|
+
[![Coverage Status](https://coveralls.io/repos/github/berkmancenter/namae/badge.svg?branch=master)](https://coveralls.io/github/berkmancenter/namae?branch=master)
|
9
|
+
[![Gem Version](https://badge.fury.io/rb/namae.svg)](http://badge.fury.io/rb/namae)
|
10
10
|
[![Code Climate](https://codeclimate.com/github/berkmancenter/namae/badges/gpa.svg)](https://codeclimate.com/github/berkmancenter/namae)
|
11
11
|
|
12
12
|
Quickstart
|
@@ -121,6 +121,23 @@ ambiguous. For example, multiple family names are always possible in sort-order:
|
|
121
121
|
Whilst in display-order, multiple family names are only supported when the
|
122
122
|
name contains a particle or a nickname.
|
123
123
|
|
124
|
+
Configuration
|
125
|
+
-------------
|
126
|
+
You can tweak some of Namae's parse rules by configuring the parser's
|
127
|
+
options. Take a look at `Namae.options` to see your current settings.
|
128
|
+
If you want to change the default settings for all parsers, you can run
|
129
|
+
`Namae.configure` which will yield the default options (make sure to
|
130
|
+
change the configuration before using the parser).
|
131
|
+
|
132
|
+
A Note On Thread Safety
|
133
|
+
-----------------------
|
134
|
+
When using the top-level parse functions, Namae will re-use a thread-local
|
135
|
+
parser instance (`Namae::Parser.instance`); the instance is created, using
|
136
|
+
the current default options (`Namae::Parser.defaults`). If you need more
|
137
|
+
control, you are encouraged to create individual parser instances using
|
138
|
+
`Namae::Parser.new`.
|
139
|
+
|
140
|
+
|
124
141
|
Rationale
|
125
142
|
---------
|
126
143
|
Parsing human names is at once too easy and too hard. When working in the
|
@@ -170,7 +187,8 @@ Namae was written as a part of a Google Summer of Code project. Thanks Google!
|
|
170
187
|
|
171
188
|
Copyright
|
172
189
|
---------
|
190
|
+
Copyright (c) 2013-2017 Sylvester Keil
|
191
|
+
|
173
192
|
Copyright (c) 2012 President and Fellows of Harvard College.
|
174
|
-
Copyright (c) 2013-2014 Sylvester Keil
|
175
193
|
|
176
194
|
Namae is dual licensed under the AGPL and a BSD-style license.
|
data/features/support/env.rb
CHANGED
@@ -1,12 +1,3 @@
|
|
1
|
-
require 'bundler'
|
2
|
-
begin
|
3
|
-
Bundler.setup(:default, :development)
|
4
|
-
rescue Bundler::BundlerError => e
|
5
|
-
$stderr.puts e.message
|
6
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
7
|
-
exit e.status_code
|
8
|
-
end
|
9
|
-
|
10
1
|
begin
|
11
2
|
require 'simplecov'
|
12
3
|
require 'coveralls' if ENV['CI']
|
data/lib/namae/parser.rb
CHANGED
@@ -1,34 +1,41 @@
|
|
1
1
|
#
|
2
2
|
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by Racc 1.4.
|
3
|
+
# This file is automatically generated by Racc 1.4.14
|
4
4
|
# from Racc grammer file "".
|
5
5
|
#
|
6
6
|
|
7
7
|
require 'racc/parser.rb'
|
8
8
|
|
9
|
-
require 'singleton'
|
10
9
|
require 'strscan'
|
11
10
|
|
12
11
|
module Namae
|
13
12
|
class Parser < Racc::Parser
|
14
13
|
|
15
|
-
module_eval(<<'...end parser.y/module_eval...', 'parser.y',
|
14
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
16
15
|
|
17
|
-
|
16
|
+
@defaults = {
|
17
|
+
:debug => false,
|
18
|
+
:prefer_comma_as_separator => false,
|
19
|
+
:comma => ',',
|
20
|
+
:stops => ',;',
|
21
|
+
:separator => /\s*(\band\b|\&|;)\s*/i,
|
22
|
+
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
23
|
+
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
24
|
+
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
25
|
+
}
|
26
|
+
|
27
|
+
class << self
|
28
|
+
attr_reader :defaults
|
29
|
+
|
30
|
+
def instance
|
31
|
+
Thread.current[:namae] ||= new
|
32
|
+
end
|
33
|
+
end
|
18
34
|
|
19
35
|
attr_reader :options, :input
|
20
36
|
|
21
|
-
def initialize
|
22
|
-
@
|
23
|
-
:debug => false,
|
24
|
-
:prefer_comma_as_separator => false,
|
25
|
-
:comma => ',',
|
26
|
-
:stops => ',;',
|
27
|
-
:separator => /\s*(\band\b|\&|;)\s*/i,
|
28
|
-
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
29
|
-
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
30
|
-
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
31
|
-
}
|
37
|
+
def initialize(options = {})
|
38
|
+
@options = self.class.defaults.merge(options)
|
32
39
|
end
|
33
40
|
|
34
41
|
def debug?
|
@@ -63,22 +70,21 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
|
|
63
70
|
options[:prefer_comma_as_separator]
|
64
71
|
end
|
65
72
|
|
66
|
-
def parse(
|
67
|
-
parse!(
|
73
|
+
def parse(string)
|
74
|
+
parse!(string)
|
68
75
|
rescue => e
|
69
76
|
warn e.message if debug?
|
70
77
|
[]
|
71
78
|
end
|
72
79
|
|
73
80
|
def parse!(string)
|
74
|
-
input
|
81
|
+
@input = StringScanner.new(normalize(string))
|
75
82
|
reset
|
76
83
|
do_parse
|
77
84
|
end
|
78
85
|
|
79
86
|
def normalize(string)
|
80
|
-
string
|
81
|
-
string
|
87
|
+
string.scrub.strip
|
82
88
|
end
|
83
89
|
|
84
90
|
def reset
|
@@ -390,28 +396,28 @@ Racc_debug_parser = false
|
|
390
396
|
|
391
397
|
module_eval(<<'.,.,', 'parser.y', 11)
|
392
398
|
def _reduce_1(val, _values, result)
|
393
|
-
result = []
|
399
|
+
result = []
|
394
400
|
result
|
395
401
|
end
|
396
402
|
.,.,
|
397
403
|
|
398
404
|
module_eval(<<'.,.,', 'parser.y', 12)
|
399
405
|
def _reduce_2(val, _values, result)
|
400
|
-
result = [val[0]]
|
406
|
+
result = [val[0]]
|
401
407
|
result
|
402
408
|
end
|
403
409
|
.,.,
|
404
410
|
|
405
411
|
module_eval(<<'.,.,', 'parser.y', 13)
|
406
412
|
def _reduce_3(val, _values, result)
|
407
|
-
result = val[0] << val[2]
|
413
|
+
result = val[0] << val[2]
|
408
414
|
result
|
409
415
|
end
|
410
416
|
.,.,
|
411
417
|
|
412
418
|
module_eval(<<'.,.,', 'parser.y', 15)
|
413
419
|
def _reduce_4(val, _values, result)
|
414
|
-
result = Name.new(:given => val[0])
|
420
|
+
result = Name.new(:given => val[0])
|
415
421
|
result
|
416
422
|
end
|
417
423
|
.,.,
|
@@ -420,14 +426,14 @@ module_eval(<<'.,.,', 'parser.y', 15)
|
|
420
426
|
|
421
427
|
module_eval(<<'.,.,', 'parser.y', 17)
|
422
428
|
def _reduce_6(val, _values, result)
|
423
|
-
result = val[0].merge(:family => val[1])
|
429
|
+
result = val[0].merge(:family => val[1])
|
424
430
|
result
|
425
431
|
end
|
426
432
|
.,.,
|
427
433
|
|
428
434
|
module_eval(<<'.,.,', 'parser.y', 18)
|
429
435
|
def _reduce_7(val, _values, result)
|
430
|
-
result = val[1].merge(val[0])
|
436
|
+
result = val[1].merge(val[0])
|
431
437
|
result
|
432
438
|
end
|
433
439
|
.,.,
|
@@ -436,14 +442,14 @@ module_eval(<<'.,.,', 'parser.y', 18)
|
|
436
442
|
|
437
443
|
module_eval(<<'.,.,', 'parser.y', 21)
|
438
444
|
def _reduce_9(val, _values, result)
|
439
|
-
result = Name.new(:appellation => val[0])
|
445
|
+
result = Name.new(:appellation => val[0])
|
440
446
|
result
|
441
447
|
end
|
442
448
|
.,.,
|
443
449
|
|
444
450
|
module_eval(<<'.,.,', 'parser.y', 22)
|
445
451
|
def _reduce_10(val, _values, result)
|
446
|
-
result = Name.new(:title => val[0])
|
452
|
+
result = Name.new(:title => val[0])
|
447
453
|
result
|
448
454
|
end
|
449
455
|
.,.,
|
@@ -452,7 +458,7 @@ module_eval(<<'.,.,', 'parser.y', 26)
|
|
452
458
|
def _reduce_11(val, _values, result)
|
453
459
|
result = Name.new(:given => val[0], :family => val[1],
|
454
460
|
:suffix => val[2], :title => val[3])
|
455
|
-
|
461
|
+
|
456
462
|
result
|
457
463
|
end
|
458
464
|
.,.,
|
@@ -461,7 +467,7 @@ module_eval(<<'.,.,', 'parser.y', 31)
|
|
461
467
|
def _reduce_12(val, _values, result)
|
462
468
|
result = Name.new(:given => val[0], :nick => val[1],
|
463
469
|
:family => val[2], :suffix => val[3], :title => val[4])
|
464
|
-
|
470
|
+
|
465
471
|
result
|
466
472
|
end
|
467
473
|
.,.,
|
@@ -471,7 +477,7 @@ module_eval(<<'.,.,', 'parser.y', 36)
|
|
471
477
|
result = Name.new(:given => val[0], :nick => val[1],
|
472
478
|
:particle => val[2], :family => val[3],
|
473
479
|
:suffix => val[4], :title => val[5])
|
474
|
-
|
480
|
+
|
475
481
|
result
|
476
482
|
end
|
477
483
|
.,.,
|
@@ -480,7 +486,7 @@ module_eval(<<'.,.,', 'parser.y', 42)
|
|
480
486
|
def _reduce_14(val, _values, result)
|
481
487
|
result = Name.new(:given => val[0], :particle => val[1],
|
482
488
|
:family => val[2])
|
483
|
-
|
489
|
+
|
484
490
|
result
|
485
491
|
end
|
486
492
|
.,.,
|
@@ -488,7 +494,7 @@ module_eval(<<'.,.,', 'parser.y', 42)
|
|
488
494
|
module_eval(<<'.,.,', 'parser.y', 47)
|
489
495
|
def _reduce_15(val, _values, result)
|
490
496
|
result = Name.new(:particle => val[0], :family => val[1])
|
491
|
-
|
497
|
+
|
492
498
|
result
|
493
499
|
end
|
494
500
|
.,.,
|
@@ -497,7 +503,7 @@ module_eval(<<'.,.,', 'parser.y', 52)
|
|
497
503
|
def _reduce_16(val, _values, result)
|
498
504
|
result = Name.new({ :family => val[0], :suffix => val[2][0],
|
499
505
|
:given => val[2][1] }, !!val[2][0])
|
500
|
-
|
506
|
+
|
501
507
|
result
|
502
508
|
end
|
503
509
|
.,.,
|
@@ -506,7 +512,7 @@ module_eval(<<'.,.,', 'parser.y', 57)
|
|
506
512
|
def _reduce_17(val, _values, result)
|
507
513
|
result = Name.new({ :particle => val[0], :family => val[1],
|
508
514
|
:suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
509
|
-
|
515
|
+
|
510
516
|
result
|
511
517
|
end
|
512
518
|
.,.,
|
@@ -515,7 +521,7 @@ module_eval(<<'.,.,', 'parser.y', 62)
|
|
515
521
|
def _reduce_18(val, _values, result)
|
516
522
|
result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
|
517
523
|
:suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
|
518
|
-
|
524
|
+
|
519
525
|
result
|
520
526
|
end
|
521
527
|
.,.,
|
@@ -524,14 +530,14 @@ module_eval(<<'.,.,', 'parser.y', 62)
|
|
524
530
|
|
525
531
|
module_eval(<<'.,.,', 'parser.y', 68)
|
526
532
|
def _reduce_20(val, _values, result)
|
527
|
-
result = val.join(' ')
|
533
|
+
result = val.join(' ')
|
528
534
|
result
|
529
535
|
end
|
530
536
|
.,.,
|
531
537
|
|
532
538
|
module_eval(<<'.,.,', 'parser.y', 69)
|
533
539
|
def _reduce_21(val, _values, result)
|
534
|
-
result = val.join(' ')
|
540
|
+
result = val.join(' ')
|
535
541
|
result
|
536
542
|
end
|
537
543
|
.,.,
|
@@ -542,28 +548,28 @@ module_eval(<<'.,.,', 'parser.y', 69)
|
|
542
548
|
|
543
549
|
module_eval(<<'.,.,', 'parser.y', 73)
|
544
550
|
def _reduce_24(val, _values, result)
|
545
|
-
result = [nil,val[0]]
|
551
|
+
result = [nil,val[0]]
|
546
552
|
result
|
547
553
|
end
|
548
554
|
.,.,
|
549
555
|
|
550
556
|
module_eval(<<'.,.,', 'parser.y', 74)
|
551
557
|
def _reduce_25(val, _values, result)
|
552
|
-
result = [val[2],val[0]]
|
558
|
+
result = [val[2],val[0]]
|
553
559
|
result
|
554
560
|
end
|
555
561
|
.,.,
|
556
562
|
|
557
563
|
module_eval(<<'.,.,', 'parser.y', 75)
|
558
564
|
def _reduce_26(val, _values, result)
|
559
|
-
result = [val[0],nil]
|
565
|
+
result = [val[0],nil]
|
560
566
|
result
|
561
567
|
end
|
562
568
|
.,.,
|
563
569
|
|
564
570
|
module_eval(<<'.,.,', 'parser.y', 76)
|
565
571
|
def _reduce_27(val, _values, result)
|
566
|
-
result = [val[0],val[2]]
|
572
|
+
result = [val[0],val[2]]
|
567
573
|
result
|
568
574
|
end
|
569
575
|
.,.,
|
@@ -572,7 +578,7 @@ module_eval(<<'.,.,', 'parser.y', 76)
|
|
572
578
|
|
573
579
|
module_eval(<<'.,.,', 'parser.y', 79)
|
574
580
|
def _reduce_29(val, _values, result)
|
575
|
-
result = val.join(' ')
|
581
|
+
result = val.join(' ')
|
576
582
|
result
|
577
583
|
end
|
578
584
|
.,.,
|
@@ -585,7 +591,7 @@ module_eval(<<'.,.,', 'parser.y', 79)
|
|
585
591
|
|
586
592
|
module_eval(<<'.,.,', 'parser.y', 84)
|
587
593
|
def _reduce_33(val, _values, result)
|
588
|
-
result = val.join(' ')
|
594
|
+
result = val.join(' ')
|
589
595
|
result
|
590
596
|
end
|
591
597
|
.,.,
|
@@ -612,7 +618,7 @@ module_eval(<<'.,.,', 'parser.y', 84)
|
|
612
618
|
|
613
619
|
module_eval(<<'.,.,', 'parser.y', 94)
|
614
620
|
def _reduce_44(val, _values, result)
|
615
|
-
result = val.join(' ')
|
621
|
+
result = val.join(' ')
|
616
622
|
result
|
617
623
|
end
|
618
624
|
.,.,
|
@@ -625,7 +631,7 @@ module_eval(<<'.,.,', 'parser.y', 94)
|
|
625
631
|
|
626
632
|
module_eval(<<'.,.,', 'parser.y', 99)
|
627
633
|
def _reduce_48(val, _values, result)
|
628
|
-
result = val.join(' ')
|
634
|
+
result = val.join(' ')
|
629
635
|
result
|
630
636
|
end
|
631
637
|
.,.,
|
@@ -635,4 +641,4 @@ def _reduce_none(val, _values, result)
|
|
635
641
|
end
|
636
642
|
|
637
643
|
end # class Parser
|
638
|
-
end # module Namae
|
644
|
+
end # module Namae
|
data/lib/namae/parser.y
CHANGED
@@ -100,26 +100,33 @@ rule
|
|
100
100
|
| titles TITLE { result = val.join(' ') }
|
101
101
|
|
102
102
|
---- header
|
103
|
-
require 'singleton'
|
104
103
|
require 'strscan'
|
105
104
|
|
106
105
|
---- inner
|
107
106
|
|
108
|
-
|
107
|
+
@defaults = {
|
108
|
+
:debug => false,
|
109
|
+
:prefer_comma_as_separator => false,
|
110
|
+
:comma => ',',
|
111
|
+
:stops => ',;',
|
112
|
+
:separator => /\s*(\band\b|\&|;)\s*/i,
|
113
|
+
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
114
|
+
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
115
|
+
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
116
|
+
}
|
117
|
+
|
118
|
+
class << self
|
119
|
+
attr_reader :defaults
|
120
|
+
|
121
|
+
def instance
|
122
|
+
Thread.current[:namae] ||= new
|
123
|
+
end
|
124
|
+
end
|
109
125
|
|
110
126
|
attr_reader :options, :input
|
111
127
|
|
112
|
-
def initialize
|
113
|
-
@
|
114
|
-
:debug => false,
|
115
|
-
:prefer_comma_as_separator => false,
|
116
|
-
:comma => ',',
|
117
|
-
:stops => ',;',
|
118
|
-
:separator => /\s*(\band\b|\&|;)\s*/i,
|
119
|
-
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
120
|
-
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
121
|
-
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
122
|
-
}
|
128
|
+
def initialize(options = {})
|
129
|
+
@options = self.class.defaults.merge(options)
|
123
130
|
end
|
124
131
|
|
125
132
|
def debug?
|
@@ -154,22 +161,21 @@ require 'strscan'
|
|
154
161
|
options[:prefer_comma_as_separator]
|
155
162
|
end
|
156
163
|
|
157
|
-
def parse(
|
158
|
-
parse!(
|
164
|
+
def parse(string)
|
165
|
+
parse!(string)
|
159
166
|
rescue => e
|
160
167
|
warn e.message if debug?
|
161
168
|
[]
|
162
169
|
end
|
163
170
|
|
164
171
|
def parse!(string)
|
165
|
-
input
|
172
|
+
@input = StringScanner.new(normalize(string))
|
166
173
|
reset
|
167
174
|
do_parse
|
168
175
|
end
|
169
176
|
|
170
177
|
def normalize(string)
|
171
|
-
string
|
172
|
-
string
|
178
|
+
string.scrub.strip
|
173
179
|
end
|
174
180
|
|
175
181
|
def reset
|
data/lib/namae/utility.rb
CHANGED
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -2,23 +2,22 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: namae 0.
|
5
|
+
# stub: namae 1.0.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "namae".freeze
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "1.0.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
|
14
|
-
s.date = "
|
15
|
-
s.description = " Namae (\
|
14
|
+
s.date = "2017-11-30"
|
15
|
+
s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
|
16
16
|
s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
|
17
17
|
s.extra_rdoc_files = [
|
18
18
|
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
|
-
".autotest",
|
22
21
|
".codeclimate.yml",
|
23
22
|
".coveralls.yml",
|
24
23
|
".document",
|
@@ -50,23 +49,24 @@ Gem::Specification.new do |s|
|
|
50
49
|
"spec/namae/name_spec.rb",
|
51
50
|
"spec/namae/parser_spec.rb",
|
52
51
|
"spec/namae/utility_spec.rb",
|
53
|
-
"spec/spec_helper.rb"
|
52
|
+
"spec/spec_helper.rb",
|
53
|
+
"spec/thread_safety_spec.rb"
|
54
54
|
]
|
55
55
|
s.homepage = "https://github.com/berkmancenter/namae".freeze
|
56
56
|
s.licenses = ["AGPL-3.0".freeze]
|
57
|
-
s.rubygems_version = "2.6.
|
58
|
-
s.summary = "Namae (\
|
57
|
+
s.rubygems_version = "2.6.13".freeze
|
58
|
+
s.summary = "Namae (\u540D\u524D) parses personal names and splits them into their component parts.".freeze
|
59
59
|
|
60
60
|
if s.respond_to? :specification_version then
|
61
61
|
s.specification_version = 4
|
62
62
|
|
63
63
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
64
|
-
s.add_development_dependency(%q<racc>.freeze, ["
|
64
|
+
s.add_development_dependency(%q<racc>.freeze, ["~> 1.4"])
|
65
65
|
else
|
66
|
-
s.add_dependency(%q<racc>.freeze, ["
|
66
|
+
s.add_dependency(%q<racc>.freeze, ["~> 1.4"])
|
67
67
|
end
|
68
68
|
else
|
69
|
-
s.add_dependency(%q<racc>.freeze, ["
|
69
|
+
s.add_dependency(%q<racc>.freeze, ["~> 1.4"])
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Namae
|
2
|
+
describe 'Parser using threads' do
|
3
|
+
let(:name_1_str) { "Foo Bar" }
|
4
|
+
let(:name_2_str) { "Baz" }
|
5
|
+
let(:name_1) { Namae.parse(name_1_str).first }
|
6
|
+
let(:name_2) { Namae.parse(name_2_str).first }
|
7
|
+
|
8
|
+
def compare(string, expectation)
|
9
|
+
name = Namae.parse(string).first
|
10
|
+
given_name_match = expectation.given == name.given
|
11
|
+
family_name_match = expectation.family == name.family
|
12
|
+
raise unless given_name_match && family_name_match
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has no conflicts' do
|
16
|
+
[[name_1_str, name_1], [name_2_str, name_2]].map do |string, expectation|
|
17
|
+
Thread.new do
|
18
|
+
1000.times do
|
19
|
+
compare(string, expectation)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end.each(&:join)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
@@ -9,22 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2017-11-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: racc
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: 1.4
|
20
|
+
version: '1.4'
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: 1.4
|
27
|
+
version: '1.4'
|
28
28
|
description: " Namae (名前) is a parser for human names. It recognizes personal names
|
29
29
|
of various cultural backgrounds and tries to split them into their component parts
|
30
30
|
(e.g., given and family names, honorifics etc.). "
|
@@ -36,7 +36,6 @@ extensions: []
|
|
36
36
|
extra_rdoc_files:
|
37
37
|
- README.md
|
38
38
|
files:
|
39
|
-
- ".autotest"
|
40
39
|
- ".codeclimate.yml"
|
41
40
|
- ".coveralls.yml"
|
42
41
|
- ".document"
|
@@ -69,6 +68,7 @@ files:
|
|
69
68
|
- spec/namae/parser_spec.rb
|
70
69
|
- spec/namae/utility_spec.rb
|
71
70
|
- spec/spec_helper.rb
|
71
|
+
- spec/thread_safety_spec.rb
|
72
72
|
homepage: https://github.com/berkmancenter/namae
|
73
73
|
licenses:
|
74
74
|
- AGPL-3.0
|
@@ -89,7 +89,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
89
|
version: '0'
|
90
90
|
requirements: []
|
91
91
|
rubyforge_project:
|
92
|
-
rubygems_version: 2.6.
|
92
|
+
rubygems_version: 2.6.13
|
93
93
|
signing_key:
|
94
94
|
specification_version: 4
|
95
95
|
summary: Namae (名前) parses personal names and splits them into their component parts.
|
data/.autotest
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'bundler'
|
2
|
-
begin
|
3
|
-
if RUBY_PLATFORM =~ /darwin/
|
4
|
-
Bundler.setup(:default, :development, :debug, :test, :osx)
|
5
|
-
require 'autotest/fsevent'
|
6
|
-
else
|
7
|
-
Bundler.setup(:default, :development, :debug, :test)
|
8
|
-
end
|
9
|
-
rescue Bundler::BundlerError => e
|
10
|
-
$stderr.puts e.message
|
11
|
-
$stderr.puts "Run `bundle install` to install missing gems"
|
12
|
-
exit e.status_code
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
Autotest.add_hook :initialize do |at|
|
17
|
-
at.add_mapping(/.+\.y$/) do |f,_|
|
18
|
-
system 'rake clean racc'
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|