namae 0.9.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9c35acb2595cb60e9256141f209a97ebe717c5fd
4
- data.tar.gz: ce3c39facdf1b12051b56edb8330376f777eb2d8
3
+ metadata.gz: 7e3498b418afe338293a9fe9c7b4c8747ee014b2
4
+ data.tar.gz: cac2386cd67b385330b50a8d722429100020b2e6
5
5
  SHA512:
6
- metadata.gz: e3e158d44f41686c75eadb19b9a4ef984db05b57d7e2be63aa4db7b8c62bb119216ddbd6a7a1c2bc28d6068f6a7df7a7cbb2cb8f176903b43e54e2af07df21f5
7
- data.tar.gz: e440237a7f090dc85cbb1e68f1388831147823794b305fcc0e6ae131fe4cb26692d841ca0f9a2facf3bcf65f5ab6ac79fe0ec492a2adeb9e8cb6eac427435eb8
6
+ metadata.gz: a2001bf45ca48aa8d9d1a8f31143a3687f5013ffa4e8be7837c7efa08a5acb9257cd8e7685a0ee19f89a49d8e4bf841a596cc99e734087d6bb32828c025b0e34
7
+ data.tar.gz: e7b62fc3a1278938f42e5fcee152cf47ba06e5378f3bedef6bc238d3983a11d2ffc25bc7ec6c8dd90655dc6ca29f976d62e2c3dd710d6cd0700056481244f1d3
data/.travis.yml CHANGED
@@ -1,7 +1,12 @@
1
1
  language: ruby
2
2
  bundler_args: --without debug osx optional
3
3
  script: bundle exec rake test_with_coveralls
4
+
5
+ sudo: false
6
+ cache: bundler
7
+
4
8
  rvm:
9
+ - 2.2.0
5
10
  - 2.1.0
6
11
  - 2.0.0
7
12
  - 1.9.3
data/Gemfile CHANGED
@@ -21,8 +21,8 @@ group :optional do
21
21
  end
22
22
 
23
23
  group :debug do
24
- gem 'debugger', '~>1.6', :platform => [:mri_20, :mri_19]
25
- gem 'byebug', '~>3.5', :platform => [:mri_21, :mri_22]
24
+ gem 'debugger', '~>1.6', :platform => [:mri_19]
25
+ gem 'byebug', '~>3.5', :platform => :mri if RUBY_VERSION > '2.0'
26
26
  gem 'rubinius-compiler', '~>2.0', :platform => :rbx
27
27
  gem 'rubinius-debugger', '~>2.0', :platform => :rbx
28
28
  end
@@ -30,6 +30,22 @@ Feature: Parse a list of names
30
30
  | Dennis | Ritchie |
31
31
  | Donald | Knuth |
32
32
 
33
+ @list
34
+ Scenario: A list of names separated by semicolons
35
+ When I parse the names "John D. Smith; Jack R. Johnson; Emily Tanner"
36
+ Then there should be 3 names
37
+ And the names should be:
38
+ | given | family |
39
+ | John D. | Smith |
40
+ | Jack R. | Johnson |
41
+ | Emily | Tanner |
42
+ When I parse the names "Smith, John D.; Johnson, Jack R.; Tanner, Emily"
43
+ Then there should be 3 names
44
+ And the names should be:
45
+ | given | family |
46
+ | John D. | Smith |
47
+ | Jack R. | Johnson |
48
+ | Emily | Tanner |
33
49
 
34
50
  @list
35
51
  Scenario: A list of sort-order names with initials separated by commas
@@ -51,6 +67,24 @@ Feature: Parse a list of names
51
67
  | Dennis | Ritchie |
52
68
  | Donald | Knuth |
53
69
 
70
+ @list
71
+ Scenario: A list of mixed names separated by semicolons, commas and 'and'
72
+ Given a parser that prefers commas as separators
73
+ When I parse the names "John D. Smith, Jack R. Johnson & Emily Tanner"
74
+ Then there should be 3 names
75
+ And the names should be:
76
+ | given | family |
77
+ | John D. | Smith |
78
+ | Jack R. | Johnson |
79
+ | Emily | Tanner |
80
+ When I parse the names "C. Foster; C. Hamel, C. Desroches"
81
+ Then there should be 3 names
82
+ And the names should be:
83
+ | given | family |
84
+ | C. | Foster |
85
+ | C. | Hamel |
86
+ | C. | Desroches |
87
+
54
88
  @list
55
89
  Scenario: A list of display-order names separated by commas and 'and'
56
90
  Given a parser that prefers commas as separators
@@ -64,7 +98,7 @@ Feature: Parse a list of names
64
98
 
65
99
  @list @wip
66
100
  Scenario: A list of names separated by commas
67
- Given a parser that prefers commas as separators
101
+ Given a parser that prefers commas as separators
68
102
  When I parse the names "G. Proctor, M. Cooper, P. Sanders & B. Malcom"
69
103
  Then the names should be:
70
104
  | given | family |
@@ -81,7 +115,7 @@ Feature: Parse a list of names
81
115
  | B | Malcom |
82
116
 
83
117
  Scenario: A list of names with particles separated by commas
84
- Given a parser that prefers commas as separators
118
+ Given a parser that prefers commas as separators
85
119
  When I parse the names "Di Proctor, M., von Cooper, P."
86
120
  Then the names should be:
87
121
  | given | family |
data/lib/namae/parser.rb CHANGED
@@ -23,7 +23,8 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
23
23
  :debug => false,
24
24
  :prefer_comma_as_separator => false,
25
25
  :comma => ',',
26
- :separator => /\s*(\band\b|\&)\s*/i,
26
+ :stops => ',;',
27
+ :separator => /\s*(\band\b|\&|;)\s*/i,
27
28
  :title => /\s*\b(sir|lord|count(ess)?|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
28
29
  :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
29
30
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
@@ -42,6 +43,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
42
43
  options[:comma]
43
44
  end
44
45
 
46
+ def stops
47
+ options[:stops]
48
+ end
49
+
45
50
  def title
46
51
  options[:title]
47
52
  end
@@ -142,7 +147,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
142
147
  nil
143
148
  when input.scan(separator)
144
149
  consume_separator
145
- when input.scan(/\s*,\s*/)
150
+ when input.scan(/\s*#{comma}\s*/)
146
151
  if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
147
152
  consume_comma
148
153
  else
@@ -156,11 +161,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 107)
156
161
  consume_word(:SUFFIX, input.matched.strip)
157
162
  when input.scan(appellation)
158
163
  [:APPELLATION, input.matched.strip]
159
- when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
164
+ when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
160
165
  consume_word(:UWORD, input.matched)
161
- when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
166
+ when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
162
167
  consume_word(:LWORD, input.matched)
163
- when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
168
+ when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
164
169
  consume_word(:PWORD, input.matched)
165
170
  when input.scan(/('[^'\n]+')|("[^"\n]+")/)
166
171
  consume_word(:NICK, input.matched[1...-1])
data/lib/namae/parser.y CHANGED
@@ -114,7 +114,8 @@ require 'strscan'
114
114
  :debug => false,
115
115
  :prefer_comma_as_separator => false,
116
116
  :comma => ',',
117
- :separator => /\s*(\band\b|\&)\s*/i,
117
+ :stops => ',;',
118
+ :separator => /\s*(\band\b|\&|;)\s*/i,
118
119
  :title => /\s*\b(sir|lord|count(ess)?|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
119
120
  :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
120
121
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
@@ -133,6 +134,10 @@ require 'strscan'
133
134
  options[:comma]
134
135
  end
135
136
 
137
+ def stops
138
+ options[:stops]
139
+ end
140
+
136
141
  def title
137
142
  options[:title]
138
143
  end
@@ -233,7 +238,7 @@ require 'strscan'
233
238
  nil
234
239
  when input.scan(separator)
235
240
  consume_separator
236
- when input.scan(/\s*,\s*/)
241
+ when input.scan(/\s*#{comma}\s*/)
237
242
  if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
238
243
  consume_comma
239
244
  else
@@ -247,11 +252,11 @@ require 'strscan'
247
252
  consume_word(:SUFFIX, input.matched.strip)
248
253
  when input.scan(appellation)
249
254
  [:APPELLATION, input.matched.strip]
250
- when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
255
+ when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
251
256
  consume_word(:UWORD, input.matched)
252
- when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
257
+ when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
253
258
  consume_word(:LWORD, input.matched)
254
- when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
259
+ when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
255
260
  consume_word(:PWORD, input.matched)
256
261
  when input.scan(/('[^'\n]+')|("[^"\n]+")/)
257
262
  consume_word(:NICK, input.matched[1...-1])
data/lib/namae/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Namae
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 9
5
- PATCH = 3
4
+ MINOR = 10
5
+ PATCH = 0
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: namae 0.9.3 ruby lib
5
+ # stub: namae 0.10.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "namae"
9
- s.version = "0.9.3"
9
+ s.version = "0.10.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Sylvester Keil", "Dan Collis-Puro"]
14
- s.date = "2015-01-19"
14
+ s.date = "2015-04-26"
15
15
  s.description = " Namae (\u{540d}\u{524d}) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). "
16
16
  s.email = ["sylvester@keil.or.at", "dan@collispuro.com"]
17
17
  s.extra_rdoc_files = [
@@ -44,6 +44,27 @@ module Namae
44
44
  end
45
45
  end
46
46
 
47
+ describe 'when the next input is ", "' do
48
+ before { parser.send(:input).string = ', ' }
49
+ it 'returns a COMMA token' do
50
+ expect(parser.send(:next_token)).to eq([:COMMA, :COMMA])
51
+ end
52
+ end
53
+
54
+ describe 'when the next input is "; "' do
55
+ before { parser.send(:input).string = '; ' }
56
+ it 'returns an AND token' do
57
+ expect(parser.send(:next_token)).to eq([:AND, :AND])
58
+ end
59
+ end
60
+
61
+ describe 'when the next input is "foo;"' do
62
+ before { parser.send(:input).string = 'foo;' }
63
+ it 'returns an LWORD token "foo"' do
64
+ expect(parser.send(:next_token)).to eq([:LWORD, 'foo'])
65
+ end
66
+ end
67
+
47
68
  describe 'when the next input is " \'foo bar\' "' do
48
69
  before { parser.send(:input).string = " 'foo bar' " }
49
70
  it 'returns a NICK token' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-01-19 00:00:00.000000000 Z
12
+ date: 2015-04-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: simplecov
@@ -134,3 +134,4 @@ signing_key:
134
134
  specification_version: 4
135
135
  summary: Namae (名前) parses personal names and splits them into their component parts.
136
136
  test_files: []
137
+ has_rdoc: