icu_name 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +13 -7
- data/lib/icu_name/name.rb +1 -1
- data/lib/icu_name/version.rb +1 -1
- data/spec/name_spec.rb +6 -5
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -26,24 +26,30 @@ Capitalisation, white space and punctuation will all be automatically corrected:
|
|
26
26
|
robert.name # => 'Robert J. Fischer'
|
27
27
|
robert.rname # => 'Fischer, Robert J.' (reversed name)
|
28
28
|
|
29
|
-
The input text, without any changes apart from white-space cleanup
|
29
|
+
The input text, without any changes apart from white-space cleanup and the insertion of a comma
|
30
|
+
(to separate the two names), is returned by the _original_ method:
|
30
31
|
|
31
|
-
robert.original # => 'robert j
|
32
|
+
robert.original # => 'FISCHER, robert j'
|
32
33
|
|
33
34
|
To avoid ambiguity when either the first or second names consist of multiple words, it is better to
|
34
35
|
supply the two separately, if known. However, the full name can be supplied alone to the constructor
|
35
|
-
and a guess will be made as to the first and last names.
|
36
|
+
and a guess will be made as to the first and last names (the last distinct word becomes the last name).
|
36
37
|
|
37
|
-
bobby = ICU::Name.new(' bobby
|
38
|
+
bobby = ICU::Name.new(' bobby fischer ')
|
38
39
|
|
39
40
|
bobby.first # => 'Bobby'
|
40
41
|
bobby.last # => 'Fischer'
|
41
42
|
|
43
|
+
But in this case, since the names were not supplied separately, the _original_ text will not contain a comma:
|
44
|
+
|
45
|
+
bobby.original # => 'bobby fischer'
|
46
|
+
|
42
47
|
Names will match even if one is missing middle initials or if a nickname is used for one of the first names.
|
43
48
|
|
44
49
|
bobby.match('Robert J.', 'Fischer') # => true
|
45
50
|
|
46
|
-
Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc)
|
51
|
+
Note that the class is aware of only common nicknames (e.g. _Bobby_ and _Robert_, _Bill_ and _William_, etc)
|
52
|
+
and not all possibilities.
|
47
53
|
|
48
54
|
Supplying the _match_ method with strings is equivalent to instantiating a Name instance with the same
|
49
55
|
strings and then matching it. So, for example the following are equivalent:
|
@@ -80,14 +86,14 @@ strings encoded in UTF-8, no matter what the input encoding.
|
|
80
86
|
eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
81
87
|
eric.rname # => "Prié, Éric"
|
82
88
|
eric.rname.encoding.name # => "UTF-8"
|
83
|
-
eric.original # => "éric
|
89
|
+
eric.original # => "PRIÉ, éric"
|
84
90
|
eric.original.encoding.name # => "UTF-8"
|
85
91
|
|
86
92
|
Accented letters can be transliterated into their US-ASCII counterparts by setting the
|
87
93
|
_chars_ option, which is available in all accessors. For example:
|
88
94
|
|
89
95
|
eric.rname(:chars => "US-ASCII") # => "Prie, Eric"
|
90
|
-
eric.original(:chars => "US-ASCII") # => "eric
|
96
|
+
eric.original(:chars => "US-ASCII") # => "PRIE, eric"
|
91
97
|
|
92
98
|
Also possible is the preservation of ISO-8859-1 characters, but the transliteration of
|
93
99
|
all other accented characters:
|
data/lib/icu_name/name.rb
CHANGED
@@ -66,7 +66,7 @@ module ICU
|
|
66
66
|
|
67
67
|
# Save the original inputs without any cleanup other than whitespace.
|
68
68
|
def originalize
|
69
|
-
@original = "#{@
|
69
|
+
@original = @name2 == '' ? @name1.clone : "#{@name2.strip}, #{@name1.strip}"
|
70
70
|
@original.strip!
|
71
71
|
@original.gsub!(/\s+/, ' ')
|
72
72
|
end
|
data/lib/icu_name/version.rb
CHANGED
data/spec/name_spec.rb
CHANGED
@@ -29,7 +29,7 @@ module ICU
|
|
29
29
|
end
|
30
30
|
|
31
31
|
it "#original returns the original data" do
|
32
|
-
@simple.original.should == 'mark j l
|
32
|
+
@simple.original.should == 'ORR, mark j l'
|
33
33
|
end
|
34
34
|
|
35
35
|
it "#match returns true if and only if two names match" do
|
@@ -72,10 +72,10 @@ module ICU
|
|
72
72
|
eric = Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
|
73
73
|
eric.rname.should == "Prié, Éric"
|
74
74
|
eric.rname.encoding.name.should == "UTF-8"
|
75
|
-
eric.original.should == "éric
|
75
|
+
eric.original.should == "PRIÉ, éric"
|
76
76
|
eric.original.encoding.name.should == "UTF-8"
|
77
77
|
eric.rname(:chars => "US-ASCII").should == "Prie, Eric"
|
78
|
-
eric.original(:chars => "US-ASCII").should == "eric
|
78
|
+
eric.original(:chars => "US-ASCII").should == "PRIE, eric"
|
79
79
|
joe = Name.new('Józef', 'Żabiński')
|
80
80
|
joe.rname.should == "Żabiński, Józef"
|
81
81
|
joe.rname(:chars => "ISO-8859-1").should == "Zabinski, Józef"
|
@@ -183,8 +183,9 @@ module ICU
|
|
183
183
|
|
184
184
|
context "the original input" do
|
185
185
|
it "should be the original text unaltered except for white space" do
|
186
|
-
Name.new(' Mark j l ', ' ORR ').original.should == 'Mark j l
|
187
|
-
Name.new('
|
186
|
+
Name.new(' Mark j l ', ' ORR ').original.should == 'ORR, Mark j l'
|
187
|
+
Name.new(' Mark J. L. Orr ').original.should == 'Mark J. L. Orr'
|
188
|
+
Name.new('Józef', 'Żabiński').original.should == 'Żabiński, Józef'
|
188
189
|
Name.new('Ui Laigleis,Gearoidin').original.should == 'Ui Laigleis,Gearoidin'
|
189
190
|
end
|
190
191
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 3
|
9
|
+
version: 0.1.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Mark Orr
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-19 00:00:00 +00:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|