name-tamer 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b556d5a36fcb89c56e435fd67a0f159987b7f8b9
4
- data.tar.gz: 7ac8e5b948e6edb607f367d4d8303bb3c97c0d9d
3
+ metadata.gz: 3532b7472b3daecb0bb11863268531c229771639
4
+ data.tar.gz: 0096dd16106d480f6c5e1e043dbf54896f787599
5
5
  SHA512:
6
- metadata.gz: 215966db363f5630a1b53671c95792057deb29306a07e1cf93c1772700be4b3fc3e4f9cab40a1ba6d39f58813eddead3226d1faa2583aa21e3d73f41eb2c1403
7
- data.tar.gz: 4d2bcd7d0f9b8556c548235c4d12ae0c92643399444a2ce70c5e9b8ef984577d54915827a86f4b243d3434775a1ddb37ccd1d0055b8645c56ab770bdba558613
6
+ metadata.gz: 4ee6d017e93b54acd10791f44a2920c46fe76faaafcb0171ab59c582f7d07c34036bc64610c5298ae363a33fa26fbedd1711800771acc29620beb6967adea10e
7
+ data.tar.gz: 23741e994c62fc8c746f826e3124824a29e3a5ecff22930a1ec350db0363b5b6d20d67bc5104d546f25c9301850a8944e936cb5e41c6320524d766303a2be69c
data/.rubocop.yml CHANGED
@@ -7,4 +7,4 @@ CyclomaticComplexity:
7
7
  ClassLength:
8
8
  Description: 'Avoid classes longer than 100 lines of code.'
9
9
  CountComments: false # count full line comments?
10
- Max: 316
10
+ Max: 321
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- name-tamer (0.1.8)
4
+ name-tamer (0.1.9)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/lib/name-tamer.rb CHANGED
@@ -23,11 +23,12 @@ class NameTamer
23
23
 
24
24
  def tidy_name
25
25
  unless @tidy_name
26
- @tidy_name = name.dup # Start with the name we've received
26
+ @tidy_name = name.dup # Start with the name we've received
27
27
 
28
- tidy_spacing # " John Smith " -> "John Smith"
29
- fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
30
- consolidate_initials # "I. B. M." -> "I.B.M."
28
+ ensure_safe # Invalid byte sequence in UTF-8, for example
29
+ tidy_spacing # " John Smith " -> "John Smith"
30
+ fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
31
+ consolidate_initials # "I. B. M." -> "I.B.M."
31
32
  end
32
33
 
33
34
  @tidy_name
@@ -111,6 +112,10 @@ class NameTamer
111
112
  # Tidy up the name we've received
112
113
  #--------------------------------------------------------
113
114
 
115
+ def ensure_safe
116
+ @tidy_name.ensure_safe
117
+ end
118
+
114
119
  def tidy_spacing
115
120
  @tidy_name
116
121
  .space_after_comma!
@@ -295,6 +300,7 @@ class NameTamer
295
300
  @contact_type = ct
296
301
  end
297
302
 
303
+ @tidy_name = nil
298
304
  @nice_name = nil
299
305
  @simple_name = nil
300
306
  @slug = nil
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.1.9'
2
+ VERSION = '0.2.0'
3
3
  end
data/lib/string_extras.rb CHANGED
@@ -2,83 +2,74 @@
2
2
  class String
3
3
  # Strip illegal characters out completely
4
4
  def strip_unwanted!(filter)
5
- self.gsub!(filter, '')
6
- self # Allows chaining
5
+ substitute!(filter, '')
7
6
  end
8
7
 
9
8
  def strip_or_self!
10
- self.strip!
11
- self # Allows chaining
9
+ strip! || self
12
10
  end
13
11
 
14
12
  # Change any whitespace into our separator character
15
13
  def whitespace_to!(separator)
16
- self.gsub!(/[[:space:]]+/, separator)
17
- self # Allows chaining
14
+ substitute!(/[[:space:]]+/, separator)
18
15
  end
19
16
 
20
17
  # Ensure commas have exactly one space after them
21
18
  def space_after_comma!
22
- self.gsub!(/,[[:space:]]*/, ', ')
23
- self # Allows chaining
19
+ substitute!(/,[[:space:]]*/, ', ')
24
20
  end
25
21
 
26
22
  # Change some characters embedded in words to our separator character
27
23
  # e.g. example.com -> example-com
28
24
  def invalid_chars_to!(separator)
29
- self.gsub!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
30
- self # Allows chaining
25
+ substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
31
26
  end
32
27
 
33
28
  # Make sure separators are not where they shouldn't be
34
29
  def fix_separators!(separator)
35
- unless separator.nil? || separator.empty?
36
- r = Regexp.escape(separator)
37
- # No more than one of the separator in a row.
38
- self.gsub!(/#{r}{2,}/, separator)
39
- # Remove leading/trailing separator.
40
- self.gsub!(/^#{r}|#{r}$/i, '')
41
- end
30
+ return self if separator.nil? || separator.empty?
42
31
 
43
- self # Allows chaining
32
+ r = Regexp.escape(separator)
33
+
34
+ # No more than one of the separator in a row.
35
+ substitute!(/#{r}{2,}/, separator)
36
+
37
+ # Remove leading/trailing separator.
38
+ substitute!(/^#{r}|#{r}$/i, '')
44
39
  end
45
40
 
46
41
  # Any characters that resemble latin characters might usefully be
47
42
  # transliterated into ones that are easy to type on an anglophone
48
43
  # keyboard.
49
44
  def approximate_latin_chars!
50
- self.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
51
- self # Allows chaining
45
+ gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char } || self
52
46
  end
53
47
 
54
48
  # Strings that were wrongly encoded with single-byte encodings sometimes have
55
49
  # tell-tale substrings that we can put back into the correct UTF-8 character
56
50
  def fix_encoding_errors!
57
- self.gsub!(BAD_ENCODING_PATTERNS) { |substring| BAD_ENCODING[substring] || substring }
58
- self # Allows chaining
51
+ gsub!(BAD_ENCODING_PATTERNS) { |substring| BAD_ENCODING[substring] || substring } || self
59
52
  end
60
53
 
61
54
  def upcase_first_letter!
62
- self.gsub!(/\b\w/) { |first| first.upcase }
63
- self # Allows chaining
55
+ gsub!(/\b\w/) { |first| first.upcase } || self
64
56
  end
65
57
 
66
58
  def downcase_after_apostrophe!
67
- self.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
68
- self # Allows chaining
59
+ gsub!(/\'\w\b/) { |c| c.downcase } || self # Lowercase 's
69
60
  end
70
61
 
71
62
  # Our list of terminal characters that indicate a non-celtic name used
72
63
  # to include o but we removed it because of MacMurdo.
73
64
  def fix_mac!
74
65
  if self =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ || self =~ /\bMc/
75
- self.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
66
+ gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
76
67
 
77
68
  # Fix Mac exceptions
78
69
  %w(
79
70
  MacEdo MacEvicius MacHado MacHar MacHin MacHlin MacIas MacIulis MacKie
80
71
  MacKle MacKlin MacKmin MacKmurdo MacQuarie MacLise MacKenzie
81
- ).each { |mac_name| self.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
72
+ ).each { |mac_name| substitute!(/\b#{mac_name}/, mac_name.capitalize) }
82
73
  end
83
74
 
84
75
  self # Allows chaining
@@ -88,7 +79,7 @@ class String
88
79
  def fix_ff!
89
80
  %w(
90
81
  Fforbes Fforde Ffinch Ffrench Ffoulkes
91
- ).each { |ff_name| self.gsub!(ff_name, ff_name.downcase) }
82
+ ).each { |ff_name| substitute!(ff_name, ff_name.downcase) }
92
83
 
93
84
  self # Allows chaining
94
85
  end
@@ -98,13 +89,13 @@ class String
98
89
  # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
99
90
  def fix_name_modifiers!
100
91
  NAME_MODIFIERS.each do |modifier|
101
- self.gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
92
+ gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
102
93
  "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2].tr(ASCII_SPACE, NONBREAKING_SPACE)}"
103
94
  end
104
95
  end
105
96
 
106
97
  %w(Dell D).each do |modifier|
107
- self.gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
98
+ gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
108
99
  end
109
100
 
110
101
  self # Allows chaining
@@ -113,16 +104,14 @@ class String
113
104
  # Upcase words with no vowels, e.g JPR Williams
114
105
  # Except Ng
115
106
  def upcase_initials!
116
- self.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
117
- self.gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } # http://en.wikipedia.org/wiki/Ng
118
-
119
- self # Allows chaining
107
+ gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
108
+ gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } || self # http://en.wikipedia.org/wiki/Ng
120
109
  end
121
110
 
122
111
  # Fix known last names that have spaces (not hyphens!)
123
112
  def nbsp_in_compound_name!
124
113
  COMPOUND_NAMES.each do |compound_name|
125
- self.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
114
+ substitute!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
126
115
  end
127
116
 
128
117
  self # Allows chaining
@@ -130,25 +119,33 @@ class String
130
119
 
131
120
  def nbsp_in_name_modifier!
132
121
  NAME_MODIFIERS.each do |modifier|
133
- self.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
122
+ gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
134
123
  end
135
124
 
136
125
  self # Allows chaining
137
126
  end
138
127
 
139
128
  def remove_periods_from_initials!
140
- self.gsub!(/\b([a-z])\./i) { |_| Regexp.last_match[1] }
141
- self # Allows chaining
129
+ gsub!(/\b([a-z])\./i) { |_| Regexp.last_match[1] } || self
142
130
  end
143
131
 
144
132
  def remove_spaces_from_initials!
145
- self.gsub!(/\b([a-z])(\.)* \b(?![a-z0-9']{2,})/i) { |_| "#{Regexp.last_match[1]}#{Regexp.last_match[2]}" }
146
- self # Allows chaining
133
+ gsub!(/\b([a-z])(\.)* \b(?![a-z0-9'\u00C0-\u00FF]{2,})/i) do |_|
134
+ "#{Regexp.last_match[1]}#{Regexp.last_match[2]}"
135
+ end || self
147
136
  end
148
137
 
149
138
  def ensure_space_after_initials!
150
- self.gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " }
151
- self # Allows chaining
139
+ gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
140
+ end
141
+
142
+ def ensure_safe
143
+ return if valid_encoding?
144
+ encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
145
+ end
146
+
147
+ def substitute!(pattern, replacement)
148
+ gsub!(pattern, replacement) || self
152
149
  end
153
150
 
154
151
  NONBREAKING_SPACE = "\u00a0"
@@ -183,12 +183,9 @@ describe NameTamer do
183
183
  sn: 'Scout Loyalty Optimizer',
184
184
  s: 'scout-loyalty-optimizer'
185
185
  },
186
- { n: 'René Descartes',
187
- t: :person,
188
- nn: 'René Descartes',
189
- sn: 'René Descartes',
190
- s: 'rene-descartes'
191
- }
186
+ { n: 'René Descartes', t: :person, nn: 'René Descartes', sn: 'René Descartes', s: 'rene-descartes' },
187
+ { n: 'Pablo M Sánchez', t: :person, nn: 'Pablo M Sánchez', sn: 'Pablo Sánchez', s: 'pablo-sanchez' },
188
+ { n: "\xc3\x28", t: :person, nn: '()', sn: '()', s: '_' } # Invalid byte sequence in UTF-8
192
189
  ]
193
190
  end
194
191
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-30 00:00:00.000000000 Z
11
+ date: 2014-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler