icu_name 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +13 -7
- data/lib/icu_name/name.rb +1 -1
- data/lib/icu_name/version.rb +1 -1
- data/spec/name_spec.rb +6 -5
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -26,24 +26,30 @@ Capitalisation, white space and punctuation will all be automatically corrected:
|
|
26
26
|
robert.name # => 'Robert J. Fischer'
|
27
27
|
robert.rname # => 'Fischer, Robert J.' (reversed name)
|
28
28
|
|
29
|
-
The input text, without any changes apart from white-space cleanup
|
29
|
+
The input text, without any changes apart from white-space cleanup and the insertion of a comma
|
30
|
+
(to separate the two names), is returned by the _original_ method:
|
30
31
|
|
31
|
-
robert.original # => 'robert j
|
32
|
+
robert.original # => 'FISCHER, robert j'
|
32
33
|
|
33
34
|
To avoid ambiguity when either the first or second names consist of multiple words, it is better to
|
34
35
|
supply the two separately, if known. However, the full name can be supplied alone to the constructor
|
35
|
-
and a guess will be made as to the first and last names.
|
36
|
+
and a guess will be made as to the first and last names (the last distinct word becomes the last name).
|
36
37
|
|
37
|
-
bobby = ICU::Name.new(' bobby
|
38
|
+
bobby = ICU::Name.new(' bobby fischer ')
|
38
39
|
|
39
40
|
bobby.first # => 'Bobby'
|
40
41
|
bobby.last # => 'Fischer'
|
41
42
|
|
43
|
+
But in this case, since the names were not supplied separately, the _original_ text will not contain a comma:
|
44
|
+
|
45
|
+
bobby.original # => 'bobby fischer'
|
46
|
+
|
42
47
|
Names will match even if one is missing middle initials or if a nickname is used for one of the first names.
|
43
48
|
|
44
49
|
bobby.match('Robert J.', 'Fischer') # => true
|
45
50
|
|
46
|
-
Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc)
|
51
|
+
Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc)
|
52
|
+
and not all possibilities.
|
47
53
|
|
48
54
|
Supplying the _match_ method with strings is equivalent to instantiating a Name instance with the same
|
49
55
|
strings and then matching it. So, for example the following are equivalent:
|
@@ -80,14 +86,14 @@ strings encoded in UTF-8, no matter what the input encoding.
|
|
80
86
|
eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
81
87
|
eric.rname # => "Prié, Éric"
|
82
88
|
eric.rname.encoding.name # => "UTF-8"
|
83
|
-
eric.original # => "éric
|
89
|
+
eric.original # => "PRIÉ, éric"
|
84
90
|
eric.original.encoding.name # => "UTF-8"
|
85
91
|
|
86
92
|
Accented letters can be transliterated into their US-ASCII counterparts by setting the
|
87
93
|
_chars_ option, which is available in all accessors. For example:
|
88
94
|
|
89
95
|
eric.rname(:chars => "US-ASCII") # => "Prie, Eric"
|
90
|
-
eric.original(:chars => "US-ASCII") # => "eric
|
96
|
+
eric.original(:chars => "US-ASCII") # => "PRIE, eric"
|
91
97
|
|
92
98
|
Also possible is the preservation of ISO-8859-1 characters, but the transliteration of
|
93
99
|
all other accented characters:
|
data/lib/icu_name/name.rb
CHANGED
@@ -66,7 +66,7 @@ module ICU
|
|
66
66
|
|
67
67
|
# Save the original inputs without any cleanup other than whitespace.
|
68
68
|
def originalize
|
69
|
-
@original = "#{@
|
69
|
+
@original = @name2 == '' ? @name1.clone : "#{@name2.strip}, #{@name1.strip}"
|
70
70
|
@original.strip!
|
71
71
|
@original.gsub!(/\s+/, ' ')
|
72
72
|
end
|
data/lib/icu_name/version.rb
CHANGED
data/spec/name_spec.rb
CHANGED
@@ -29,7 +29,7 @@ module ICU
|
|
29
29
|
end
|
30
30
|
|
31
31
|
it "#original returns the original data" do
|
32
|
-
@simple.original.should == 'mark j l
|
32
|
+
@simple.original.should == 'ORR, mark j l'
|
33
33
|
end
|
34
34
|
|
35
35
|
it "#match returns true if and only if two names match" do
|
@@ -72,10 +72,10 @@ module ICU
|
|
72
72
|
eric = Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
73
73
|
eric.rname.should == "Prié, Éric"
|
74
74
|
eric.rname.encoding.name.should == "UTF-8"
|
75
|
-
eric.original.should == "éric
|
75
|
+
eric.original.should == "PRIÉ, éric"
|
76
76
|
eric.original.encoding.name.should == "UTF-8"
|
77
77
|
eric.rname(:chars => "US-ASCII").should == "Prie, Eric"
|
78
|
-
eric.original(:chars => "US-ASCII").should == "eric
|
78
|
+
eric.original(:chars => "US-ASCII").should == "PRIE, eric"
|
79
79
|
joe = Name.new('Józef', 'Żabiński')
|
80
80
|
joe.rname.should == "Żabiński, Józef"
|
81
81
|
joe.rname(:chars => "ISO-8859-1").should == "Zabinski, Józef"
|
@@ -183,8 +183,9 @@ module ICU
|
|
183
183
|
|
184
184
|
context "the original input" do
|
185
185
|
it "should be the original text unaltered except for white space" do
|
186
|
-
Name.new(' Mark j l ', ' ORR ').original.should == 'Mark j l
|
187
|
-
Name.new('
|
186
|
+
Name.new(' Mark j l ', ' ORR ').original.should == 'ORR, Mark j l'
|
187
|
+
Name.new(' Mark J. L. Orr ').original.should == 'Mark J. L. Orr'
|
188
|
+
Name.new('Józef', 'Żabiński').original.should == 'Żabiński, Józef'
|
188
189
|
Name.new('Ui Laigleis,Gearoidin').original.should == 'Ui Laigleis,Gearoidin'
|
189
190
|
end
|
190
191
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 3
|
9
|
+
version: 0.1.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Mark Orr
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-19 00:00:00 +00:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|