nomener 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +33 -5
- data/lib/nomener/parser.rb +30 -25
- data/lib/nomener/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 68e937013b06402708e9f33b7105bfa96838b310
|
|
4
|
+
data.tar.gz: 52555ad53e9a6dceac49750a28f4eecba8463e51
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 92fe2644e1408afdfb36cfbd24bc9d9d2d5d2cf2d9b405bd875cf3e8990c7253a449106c4338e6fcf6670f0780b64a4b5ca9011426bc75cabf2faf36a9b59f43
|
|
7
|
+
data.tar.gz: dde1cdf91eac5b6ed6e0dbd9a15c077f8c3457848eead726df0a8a5838a83f1b4ac81c06f92edaedd5a3d5b7112e4734c76152489e7da9e59fb223c3b940954d
|
data/README.md
CHANGED
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
[](http://badge.fury.io/rb/nomener)
|
|
3
3
|
[](https://travis-ci.org/dan-ding/nomener)
|
|
4
4
|
[](https://codeclimate.com/github/dan-ding/nomener)
|
|
5
|
+
[](https://github.com/dan-ding/nomener/blob/master/LICENSE.txt)
|
|
5
6
|
|
|
6
7
|
Nomener assists with parsing peoples names that they give themselves (or other people). Nomener ~~is~~ was a fork of [People](https://github.com/dan-ding/people) as it uses some code contributed there. It's currently geared towards western style name formatting, however other cultural name formatting is (or would like to be supported). Currently it attempts to parse names through pattern matching without using large(r) dictionary/library/data files (except for name decorations and suffixes, see usage). It may not be possible to do without such in all languages.
|
|
7
8
|
|
|
8
9
|
If you didn't know, parsing names can be much more difficult than it seems it should be.
|
|
9
10
|
|
|
11
|
+
Two of the main goals are to support (a) UX in a similar manner as described by [not splitting form fields](http://www.w3.org/International/questions/qa-personal-names#singlefield) and (b) an easy manner to handle parsing of strings which contain a name.
|
|
12
|
+
|
|
10
13
|
## Requirements
|
|
11
14
|
|
|
12
15
|
Requires Ruby 1.9.3 or higher (or equivalent).
|
|
@@ -36,16 +39,39 @@ name = Nomener.parse "Joe Smith" # <Nomener::Name first="Joe" last="Smith">
|
|
|
36
39
|
Create a new instance:
|
|
37
40
|
```ruby
|
|
38
41
|
name = Nomener::Name.new "Duke Joe (Henry) Smith Jr."
|
|
39
|
-
|
|
42
|
+
#<Nomener::Name title="Duke" first="Joe" nick="Henry" last="Smith" suffix="Jr">
|
|
43
|
+
```
|
|
44
|
+
returns the same as:
|
|
45
|
+
```ruby
|
|
46
|
+
name = Nomener.parse "Duke Joe (Henry) Smith Jr."
|
|
40
47
|
|
|
41
|
-
name.first # Joe
|
|
42
|
-
name.name # Joe Smith
|
|
43
|
-
"Hi #{name}!" # Hi Joe Smith!
|
|
44
|
-
name.last # Smith
|
|
48
|
+
name.first # "Joe"
|
|
49
|
+
name.name # "Joe Smith"
|
|
50
|
+
"Hi #{name}!" # "Hi Joe Smith!""
|
|
51
|
+
name.last # "Smith"
|
|
45
52
|
name.title # "Duke"
|
|
46
53
|
name.suffix # "Jr"
|
|
47
54
|
name.nick # "Henry"
|
|
48
55
|
```
|
|
56
|
+
## Formatting
|
|
57
|
+
|
|
58
|
+
The .name method accepts a string to format the name however you may like.
|
|
59
|
+
It defaults to "%f %l", which is the first and last name.
|
|
60
|
+
|
|
61
|
+
Other options are:
|
|
62
|
+
- %f # first name
|
|
63
|
+
- %l # last/surname/family name
|
|
64
|
+
- %m # middle name
|
|
65
|
+
- %n # nick name
|
|
66
|
+
- %m # middle name
|
|
67
|
+
- %s # suffix
|
|
68
|
+
- %t # title/prefix
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
name = Nomener::Name.new "Duke Joe (Henry) Smith Jr."
|
|
72
|
+
name.name "%l, %f" # "Smith, Joe"
|
|
73
|
+
name.name "%t %l" # "Duke Smith"
|
|
74
|
+
```
|
|
49
75
|
|
|
50
76
|
## TODO
|
|
51
77
|
* optionally use web service api data to assist (and create the web service!)
|
|
@@ -54,6 +80,8 @@ name.nick # "Henry"
|
|
|
54
80
|
* specifying formats to parse by
|
|
55
81
|
* many other things
|
|
56
82
|
* better non-english support
|
|
83
|
+
* translations
|
|
84
|
+
* NLP?
|
|
57
85
|
|
|
58
86
|
## References
|
|
59
87
|
* [http://en.wikipedia.org/wiki/Personal_name](http://en.wikipedia.org/wiki/Personal_name)
|
data/lib/nomener/parser.rb
CHANGED
|
@@ -69,6 +69,7 @@ module Nomener
|
|
|
69
69
|
|
|
70
70
|
# grab any identified nickname before working on the rest
|
|
71
71
|
newname[:nick] = parse_nick! name
|
|
72
|
+
name.sub! NICKNAME_LEFTOVER, ""
|
|
72
73
|
cleanup! name
|
|
73
74
|
|
|
74
75
|
# grab any suffix' we can find
|
|
@@ -123,12 +124,7 @@ module Nomener
|
|
|
123
124
|
#
|
|
124
125
|
# Returns string of the title found or and empty string
|
|
125
126
|
def self.parse_title!(nm)
|
|
126
|
-
|
|
127
|
-
nm.gsub! TITLES do |title|
|
|
128
|
-
titles << title.strip
|
|
129
|
-
""
|
|
130
|
-
end
|
|
131
|
-
dustoff titles.join(" ")
|
|
127
|
+
dustoff gut!(nm, TITLES)
|
|
132
128
|
end
|
|
133
129
|
|
|
134
130
|
# Internal: pull off what suffixes we can
|
|
@@ -138,12 +134,7 @@ module Nomener
|
|
|
138
134
|
#
|
|
139
135
|
# Returns string of the suffixes found or and empty string
|
|
140
136
|
def self.parse_suffix!(nm)
|
|
141
|
-
|
|
142
|
-
nm.gsub! SUFFIXES do |suffix|
|
|
143
|
-
suffixes << suffix.strip
|
|
144
|
-
""
|
|
145
|
-
end
|
|
146
|
-
dustoff suffixes.join(" ")
|
|
137
|
+
dustoff gut!(nm, SUFFIXES)
|
|
147
138
|
end
|
|
148
139
|
|
|
149
140
|
# Internal: parse nickname out of string. presuming it's in quotes
|
|
@@ -153,13 +144,7 @@ module Nomener
|
|
|
153
144
|
#
|
|
154
145
|
# Returns string of the nickname found or and empty string
|
|
155
146
|
def self.parse_nick!(nm)
|
|
156
|
-
|
|
157
|
-
nm.sub! NICKNAME do |z|
|
|
158
|
-
nick = $1.strip
|
|
159
|
-
""
|
|
160
|
-
end
|
|
161
|
-
nm.sub! NICKNAME_LEFTOVER, ""
|
|
162
|
-
dustoff nick
|
|
147
|
+
dustoff gut!(nm, NICKNAME)
|
|
163
148
|
end
|
|
164
149
|
|
|
165
150
|
# Internal: parse last name from string
|
|
@@ -176,12 +161,17 @@ module Nomener
|
|
|
176
161
|
format = :lcf if (format == :auto && nm.index(","))
|
|
177
162
|
|
|
178
163
|
# these constants should have the named match :fam
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
164
|
+
nomen = case format
|
|
165
|
+
when :fl
|
|
166
|
+
nm.match( FIRSTLAST_MATCHER )
|
|
167
|
+
when :lf
|
|
168
|
+
nm.match( LASTFIRST_MATCHER )
|
|
169
|
+
when :lcf
|
|
170
|
+
nm.match( LASTCOMFIRST_MATCHER )
|
|
171
|
+
end
|
|
182
172
|
|
|
183
|
-
unless
|
|
184
|
-
last =
|
|
173
|
+
unless nomen.nil? || nomen[:fam].nil?
|
|
174
|
+
last = nomen[:fam].strip
|
|
185
175
|
nm.sub!(last, "")
|
|
186
176
|
nm.sub!(",", "")
|
|
187
177
|
end
|
|
@@ -233,5 +223,20 @@ module Nomener
|
|
|
233
223
|
str = str.squeeze " "
|
|
234
224
|
str = str.strip
|
|
235
225
|
end
|
|
226
|
+
|
|
227
|
+
# Internal: clean out a given string with a given pattern
|
|
228
|
+
# Modfies the given string
|
|
229
|
+
# str - the string to gut
|
|
230
|
+
# pattern - the regext to cut with
|
|
231
|
+
#
|
|
232
|
+
# Returns the gutted pattern
|
|
233
|
+
def self.gut!(str = "", pattern = / /)
|
|
234
|
+
found = []
|
|
235
|
+
str.gsub! pattern do |pat|
|
|
236
|
+
found << pat.strip
|
|
237
|
+
""
|
|
238
|
+
end
|
|
239
|
+
found.join " "
|
|
240
|
+
end
|
|
236
241
|
end
|
|
237
|
-
end
|
|
242
|
+
end
|
data/lib/nomener/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: nomener
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dante Piombino
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-04-
|
|
11
|
+
date: 2015-04-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|