namae 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.simplecov +2 -0
- data/.travis.yml +2 -1
- data/Gemfile +8 -6
- data/Rakefile +4 -0
- data/features/support/env.rb +7 -2
- data/lib/namae/parser.rb +19 -21
- data/lib/namae/parser.y +22 -24
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +12 -15
- data/spec/spec_helper.rb +4 -3
- metadata +18 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b2f817b8e6424d7ca383930ce0e7972067a6739b
|
4
|
+
data.tar.gz: 70dabec7ddfa545b6bcda9c3d7dc00bc8c0879c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 768ae4032f5da7b9212e0fb3c4e3830209a7fb95d0b8ac7f5a4cf0c233fd47d3eb9c5bc1ba5d2e58b5d1f6ab81c7a19220a046808265bbfc9c505f42eda2b94a
|
7
|
+
data.tar.gz: 5fcca10db251ec01f7f9f1d1e73794eedad46fc5684b5c90535518f262270e77207bf9a705cbe424cdcce4d05ff11f0f08efc643c09c57097055e73a9c2f42ac
|
data/.simplecov
CHANGED
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -6,10 +6,15 @@ group :test do
|
|
6
6
|
gem 'cucumber', '~>1.3'
|
7
7
|
end
|
8
8
|
|
9
|
+
|
9
10
|
group :development do
|
10
|
-
gem 'racc', '1.4.9', :platform => [:ruby]
|
11
11
|
gem 'simplecov', '~>0.8', :require => false, :platforms => [:ruby_19, :ruby_20, :ruby_21]
|
12
12
|
gem 'rubinius-coverage', '~>2.0', :platform => :rbx
|
13
|
+
gem 'coveralls', :require => false
|
14
|
+
gem 'racc', '1.4.9', :platform => [:ruby]
|
15
|
+
end
|
16
|
+
|
17
|
+
group :optional do
|
13
18
|
gem 'ZenTest', '~>4.9'
|
14
19
|
gem 'jeweler', '~> 2.0'
|
15
20
|
gem 'yard', '~>0.8'
|
@@ -25,9 +30,6 @@ group :osx do
|
|
25
30
|
gem 'autotest-fsevent'
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
gem 'rubysl', '~>2.0'
|
31
|
-
gem 'racc', '1.4.9'
|
32
|
-
end
|
33
|
+
platform :rbx do
|
34
|
+
gem 'rubysl', '~>2.0'
|
33
35
|
end
|
data/Rakefile
CHANGED
@@ -58,6 +58,10 @@ Cucumber::Rake::Task.new(:features)
|
|
58
58
|
|
59
59
|
task :default => [:spec, :features]
|
60
60
|
|
61
|
+
require 'coveralls/rake/task'
|
62
|
+
Coveralls::RakeTask.new
|
63
|
+
task :test_with_coveralls => [:spec, :features, 'coveralls:push']
|
64
|
+
|
61
65
|
begin
|
62
66
|
require 'yard'
|
63
67
|
YARD::Rake::YardocTask.new
|
data/features/support/env.rb
CHANGED
@@ -9,12 +9,17 @@ end
|
|
9
9
|
|
10
10
|
begin
|
11
11
|
require 'simplecov'
|
12
|
+
require 'coveralls' if ENV['CI']
|
12
13
|
rescue LoadError
|
13
14
|
# ignore
|
14
|
-
end
|
15
|
+
end unless RUBY_VERSION < '1.9'
|
15
16
|
|
16
17
|
begin
|
17
|
-
|
18
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
|
19
|
+
require 'rubinius/debugger'
|
20
|
+
else
|
21
|
+
require 'debugger'
|
22
|
+
end
|
18
23
|
rescue LoadError
|
19
24
|
# ignore
|
20
25
|
end
|
data/lib/namae/parser.rb
CHANGED
@@ -15,9 +15,9 @@ module Namae
|
|
15
15
|
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 97)
|
16
16
|
|
17
17
|
include Singleton
|
18
|
-
|
19
|
-
attr_reader :options
|
20
|
-
|
18
|
+
|
19
|
+
attr_reader :options, :input
|
20
|
+
|
21
21
|
def initialize
|
22
22
|
@input, @options = StringScanner.new(''), {
|
23
23
|
:debug => false,
|
@@ -29,15 +29,15 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 97)
|
|
29
29
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
30
30
|
}
|
31
31
|
end
|
32
|
-
|
32
|
+
|
33
33
|
def debug?
|
34
34
|
options[:debug] || ENV['DEBUG']
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
def separator
|
38
38
|
options[:separator]
|
39
39
|
end
|
40
|
-
|
40
|
+
|
41
41
|
def comma
|
42
42
|
options[:comma]
|
43
43
|
end
|
@@ -53,7 +53,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 97)
|
|
53
53
|
def appellation
|
54
54
|
options[:appellation]
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
def prefer_comma_as_separator?
|
58
58
|
options[:prefer_comma_as_separator]
|
59
59
|
end
|
@@ -64,39 +64,39 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 97)
|
|
64
64
|
warn e.message if debug?
|
65
65
|
[]
|
66
66
|
end
|
67
|
-
|
67
|
+
|
68
68
|
def parse!(string)
|
69
69
|
input.string = normalize(string)
|
70
70
|
reset
|
71
71
|
do_parse
|
72
72
|
end
|
73
|
-
|
73
|
+
|
74
74
|
def normalize(string)
|
75
75
|
string = string.strip
|
76
76
|
string
|
77
77
|
end
|
78
|
-
|
78
|
+
|
79
79
|
def reset
|
80
|
-
@commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
|
80
|
+
@commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
|
81
81
|
self
|
82
82
|
end
|
83
83
|
|
84
84
|
private
|
85
|
-
|
85
|
+
|
86
86
|
def stack
|
87
87
|
@vstack || @racc_vstack || []
|
88
88
|
end
|
89
|
-
|
89
|
+
|
90
90
|
def last_token
|
91
91
|
stack[-1]
|
92
92
|
end
|
93
|
-
|
93
|
+
|
94
94
|
def consume_separator
|
95
95
|
return next_token if seen_separator?
|
96
96
|
@commas, @words, @initials, @suffices = 0, 0, 0, 0
|
97
97
|
[:AND, :AND]
|
98
98
|
end
|
99
|
-
|
99
|
+
|
100
100
|
def consume_comma
|
101
101
|
@commas += 1
|
102
102
|
[:COMMA, :COMMA]
|
@@ -122,11 +122,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 97)
|
|
122
122
|
def suffix?
|
123
123
|
!@suffices.zero? || will_see_suffix?
|
124
124
|
end
|
125
|
-
|
125
|
+
|
126
126
|
def will_see_suffix?
|
127
127
|
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
128
128
|
end
|
129
|
-
|
129
|
+
|
130
130
|
def will_see_initial?
|
131
131
|
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
|
132
132
|
end
|
@@ -169,13 +169,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 97)
|
|
169
169
|
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
170
170
|
end
|
171
171
|
end
|
172
|
-
|
172
|
+
|
173
173
|
def on_error(tid, value, stack)
|
174
174
|
raise ArgumentError,
|
175
175
|
"Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
|
176
176
|
end
|
177
|
-
|
178
|
-
attr_reader :input
|
179
177
|
|
180
178
|
# -*- racc -*-
|
181
179
|
...end parser.y/module_eval...
|
@@ -593,4 +591,4 @@ def _reduce_none(val, _values, result)
|
|
593
591
|
end
|
594
592
|
|
595
593
|
end # class Parser
|
596
|
-
|
594
|
+
end # module Namae
|
data/lib/namae/parser.y
CHANGED
@@ -17,10 +17,10 @@ rule
|
|
17
17
|
| honorific word { result = val[0].merge(:family => val[1]) }
|
18
18
|
| honorific display_order { result = val[1].merge(val[0]) }
|
19
19
|
| sort_order
|
20
|
-
|
20
|
+
|
21
21
|
honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
|
22
22
|
| TITLE { result = Name.new(:title => val[0]) }
|
23
|
-
|
23
|
+
|
24
24
|
display_order : u_words word opt_suffices
|
25
25
|
{
|
26
26
|
result = Name.new(:given => val[0], :family => val[1], :suffix => val[2])
|
@@ -43,7 +43,7 @@ rule
|
|
43
43
|
{
|
44
44
|
result = Name.new(:particle => val[0], :family => val[1])
|
45
45
|
}
|
46
|
-
|
46
|
+
|
47
47
|
sort_order : last COMMA first
|
48
48
|
{
|
49
49
|
result = Name.new({ :family => val[0], :suffix => val[2][0],
|
@@ -79,7 +79,7 @@ rule
|
|
79
79
|
|
80
80
|
words : word
|
81
81
|
| words word { result = val.join(' ') }
|
82
|
-
|
82
|
+
|
83
83
|
opt_words : /* empty */ | words
|
84
84
|
|
85
85
|
word : LWORD | UWORD | PWORD
|
@@ -96,9 +96,9 @@ require 'strscan'
|
|
96
96
|
---- inner
|
97
97
|
|
98
98
|
include Singleton
|
99
|
-
|
100
|
-
attr_reader :options
|
101
|
-
|
99
|
+
|
100
|
+
attr_reader :options, :input
|
101
|
+
|
102
102
|
def initialize
|
103
103
|
@input, @options = StringScanner.new(''), {
|
104
104
|
:debug => false,
|
@@ -110,15 +110,15 @@ require 'strscan'
|
|
110
110
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
111
111
|
}
|
112
112
|
end
|
113
|
-
|
113
|
+
|
114
114
|
def debug?
|
115
115
|
options[:debug] || ENV['DEBUG']
|
116
116
|
end
|
117
|
-
|
117
|
+
|
118
118
|
def separator
|
119
119
|
options[:separator]
|
120
120
|
end
|
121
|
-
|
121
|
+
|
122
122
|
def comma
|
123
123
|
options[:comma]
|
124
124
|
end
|
@@ -134,7 +134,7 @@ require 'strscan'
|
|
134
134
|
def appellation
|
135
135
|
options[:appellation]
|
136
136
|
end
|
137
|
-
|
137
|
+
|
138
138
|
def prefer_comma_as_separator?
|
139
139
|
options[:prefer_comma_as_separator]
|
140
140
|
end
|
@@ -145,39 +145,39 @@ require 'strscan'
|
|
145
145
|
warn e.message if debug?
|
146
146
|
[]
|
147
147
|
end
|
148
|
-
|
148
|
+
|
149
149
|
def parse!(string)
|
150
150
|
input.string = normalize(string)
|
151
151
|
reset
|
152
152
|
do_parse
|
153
153
|
end
|
154
|
-
|
154
|
+
|
155
155
|
def normalize(string)
|
156
156
|
string = string.strip
|
157
157
|
string
|
158
158
|
end
|
159
|
-
|
159
|
+
|
160
160
|
def reset
|
161
|
-
@commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
|
161
|
+
@commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
|
162
162
|
self
|
163
163
|
end
|
164
164
|
|
165
165
|
private
|
166
|
-
|
166
|
+
|
167
167
|
def stack
|
168
168
|
@vstack || @racc_vstack || []
|
169
169
|
end
|
170
|
-
|
170
|
+
|
171
171
|
def last_token
|
172
172
|
stack[-1]
|
173
173
|
end
|
174
|
-
|
174
|
+
|
175
175
|
def consume_separator
|
176
176
|
return next_token if seen_separator?
|
177
177
|
@commas, @words, @initials, @suffices = 0, 0, 0, 0
|
178
178
|
[:AND, :AND]
|
179
179
|
end
|
180
|
-
|
180
|
+
|
181
181
|
def consume_comma
|
182
182
|
@commas += 1
|
183
183
|
[:COMMA, :COMMA]
|
@@ -203,11 +203,11 @@ require 'strscan'
|
|
203
203
|
def suffix?
|
204
204
|
!@suffices.zero? || will_see_suffix?
|
205
205
|
end
|
206
|
-
|
206
|
+
|
207
207
|
def will_see_suffix?
|
208
208
|
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
209
209
|
end
|
210
|
-
|
210
|
+
|
211
211
|
def will_see_initial?
|
212
212
|
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /[[:alpha:]]\./
|
213
213
|
end
|
@@ -250,12 +250,10 @@ require 'strscan'
|
|
250
250
|
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
251
251
|
end
|
252
252
|
end
|
253
|
-
|
253
|
+
|
254
254
|
def on_error(tid, value, stack)
|
255
255
|
raise ArgumentError,
|
256
256
|
"Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
|
257
257
|
end
|
258
|
-
|
259
|
-
attr_reader :input
|
260
258
|
|
261
259
|
# -*- racc -*-
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: namae 0.8.
|
5
|
+
# stub: namae 0.8.2 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "namae"
|
9
|
-
s.version = "0.8.
|
9
|
+
s.version = "0.8.2"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
|
14
|
-
s.date = "2014-01-
|
14
|
+
s.date = "2014-01-28"
|
15
15
|
s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
|
16
16
|
s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
|
17
17
|
s.extra_rdoc_files = [
|
@@ -57,27 +57,24 @@ Gem::Specification.new do |s|
|
|
57
57
|
s.specification_version = 4
|
58
58
|
|
59
59
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
60
|
-
s.
|
60
|
+
s.add_runtime_dependency(%q<rubysl>, ["~> 2.0"])
|
61
61
|
s.add_development_dependency(%q<simplecov>, ["~> 0.8"])
|
62
62
|
s.add_development_dependency(%q<rubinius-coverage>, ["~> 2.0"])
|
63
|
-
s.add_development_dependency(%q<
|
64
|
-
s.add_development_dependency(%q<
|
65
|
-
s.add_development_dependency(%q<yard>, ["~> 0.8"])
|
63
|
+
s.add_development_dependency(%q<coveralls>, [">= 0"])
|
64
|
+
s.add_development_dependency(%q<racc>, ["= 1.4.9"])
|
66
65
|
else
|
67
|
-
s.add_dependency(%q<
|
66
|
+
s.add_dependency(%q<rubysl>, ["~> 2.0"])
|
68
67
|
s.add_dependency(%q<simplecov>, ["~> 0.8"])
|
69
68
|
s.add_dependency(%q<rubinius-coverage>, ["~> 2.0"])
|
70
|
-
s.add_dependency(%q<
|
71
|
-
s.add_dependency(%q<
|
72
|
-
s.add_dependency(%q<yard>, ["~> 0.8"])
|
69
|
+
s.add_dependency(%q<coveralls>, [">= 0"])
|
70
|
+
s.add_dependency(%q<racc>, ["= 1.4.9"])
|
73
71
|
end
|
74
72
|
else
|
75
|
-
s.add_dependency(%q<
|
73
|
+
s.add_dependency(%q<rubysl>, ["~> 2.0"])
|
76
74
|
s.add_dependency(%q<simplecov>, ["~> 0.8"])
|
77
75
|
s.add_dependency(%q<rubinius-coverage>, ["~> 2.0"])
|
78
|
-
s.add_dependency(%q<
|
79
|
-
s.add_dependency(%q<
|
80
|
-
s.add_dependency(%q<yard>, ["~> 0.8"])
|
76
|
+
s.add_dependency(%q<coveralls>, [">= 0"])
|
77
|
+
s.add_dependency(%q<racc>, ["= 1.4.9"])
|
81
78
|
end
|
82
79
|
end
|
83
80
|
|
data/spec/spec_helper.rb
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
begin
|
2
|
-
require 'simplecov'
|
2
|
+
require 'simplecov'
|
3
|
+
require 'coveralls' if ENV['CI']
|
3
4
|
rescue LoadError
|
4
5
|
# ignore
|
5
6
|
end unless RUBY_VERSION < '1.9'
|
6
7
|
|
7
8
|
begin
|
8
|
-
if RUBY_ENGINE == 'rbx'
|
9
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
|
9
10
|
require 'rubinius/debugger'
|
10
11
|
else
|
11
12
|
require 'debugger'
|
12
13
|
end
|
13
|
-
rescue LoadError
|
14
|
+
rescue LoadError
|
14
15
|
# ignore
|
15
16
|
end
|
16
17
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
@@ -9,22 +9,22 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-28 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: rubysl
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
21
|
-
type: :
|
20
|
+
version: '2.0'
|
21
|
+
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: '2.0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: simplecov
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -54,47 +54,33 @@ dependencies:
|
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '2.0'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
|
-
name:
|
58
|
-
requirement: !ruby/object:Gem::Requirement
|
59
|
-
requirements:
|
60
|
-
- - "~>"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '4.9'
|
63
|
-
type: :development
|
64
|
-
prerelease: false
|
65
|
-
version_requirements: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '4.9'
|
70
|
-
- !ruby/object:Gem::Dependency
|
71
|
-
name: jeweler
|
57
|
+
name: coveralls
|
72
58
|
requirement: !ruby/object:Gem::Requirement
|
73
59
|
requirements:
|
74
|
-
- - "
|
60
|
+
- - ">="
|
75
61
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
62
|
+
version: '0'
|
77
63
|
type: :development
|
78
64
|
prerelease: false
|
79
65
|
version_requirements: !ruby/object:Gem::Requirement
|
80
66
|
requirements:
|
81
|
-
- - "
|
67
|
+
- - ">="
|
82
68
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
69
|
+
version: '0'
|
84
70
|
- !ruby/object:Gem::Dependency
|
85
|
-
name:
|
71
|
+
name: racc
|
86
72
|
requirement: !ruby/object:Gem::Requirement
|
87
73
|
requirements:
|
88
|
-
- -
|
74
|
+
- - '='
|
89
75
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
76
|
+
version: 1.4.9
|
91
77
|
type: :development
|
92
78
|
prerelease: false
|
93
79
|
version_requirements: !ruby/object:Gem::Requirement
|
94
80
|
requirements:
|
95
|
-
- -
|
81
|
+
- - '='
|
96
82
|
- !ruby/object:Gem::Version
|
97
|
-
version:
|
83
|
+
version: 1.4.9
|
98
84
|
description: " Namae (名前) is a parser for human names. It recognizes personal names
|
99
85
|
of various cultural backgrounds and tries to split them into their component parts
|
100
86
|
(e.g., given and family names, honorifics etc.). "
|
@@ -160,4 +146,3 @@ signing_key:
|
|
160
146
|
specification_version: 4
|
161
147
|
summary: Namae (名前) parses personal names and splits them into their component parts.
|
162
148
|
test_files: []
|
163
|
-
has_rdoc:
|