name-tamer 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3532b7472b3daecb0bb11863268531c229771639
4
- data.tar.gz: 0096dd16106d480f6c5e1e043dbf54896f787599
3
+ metadata.gz: 0a8134129faabc9dc73dec3bd107873adf6a8f21
4
+ data.tar.gz: 6c2c5f68c2e3e9a41c6ccc0ddba78a896952c725
5
5
  SHA512:
6
- metadata.gz: 4ee6d017e93b54acd10791f44a2920c46fe76faaafcb0171ab59c582f7d07c34036bc64610c5298ae363a33fa26fbedd1711800771acc29620beb6967adea10e
7
- data.tar.gz: 23741e994c62fc8c746f826e3124824a29e3a5ecff22930a1ec350db0363b5b6d20d67bc5104d546f25c9301850a8944e936cb5e41c6320524d766303a2be69c
6
+ metadata.gz: 199a1fb93b68757fd0a9bc6657c1595c655da6ad5ad0ba87ae6cbf43dea88a15e3ecc9cb7317ed3edff415cac3288e26f012653e2432e222a4497aed41d21534
7
+ data.tar.gz: ae987673dfb5a693706a470765a3d7ac300dd8ff14c76d796659e97c5db79085fa735d1c2b1e218baf2da829fba047690cddbe640b02758b63a6fe49e503d01b
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- name-tamer (0.1.9)
4
+ name-tamer (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -19,13 +19,35 @@ class NameTamer
19
19
  def [](name, args = {})
20
20
  new name, args
21
21
  end
22
+
23
+ # Make a slug from a string
24
+ def parameterize(string, args = {})
25
+ sep = args[:sep] || SLUG_DELIMITER
26
+ rfc3987 = args[:rfc3987] || false
27
+ filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
28
+
29
+ new_string = string.dup
30
+
31
+ new_string
32
+ .whitespace_to!(sep)
33
+ .invalid_chars_to!(sep)
34
+ .strip_unwanted!(filter)
35
+ .fix_separators!(sep)
36
+ .approximate_latin_chars!
37
+
38
+ # Have we got anything left?
39
+ new_string = '_' if new_string.empty?
40
+
41
+ # downcase any latin characters
42
+ new_string.downcase
43
+ end
22
44
  end
23
45
 
24
46
  def tidy_name
25
47
  unless @tidy_name
26
48
  @tidy_name = name.dup # Start with the name we've received
27
49
 
28
- ensure_safe # Invalid byte sequence in UTF-8, for example
50
+ unescape # Unescape percent-encoded characters and fix UTF-8 encoding
29
51
  tidy_spacing # " John Smith " -> "John Smith"
30
52
  fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
31
53
  consolidate_initials # "I. B. M." -> "I.B.M."
@@ -65,12 +87,7 @@ class NameTamer
65
87
  end
66
88
 
67
89
  def slug
68
- unless @slug
69
- @slug = simple_name.dup # Start with search name
70
- slugify # "John Doe" -> "john-doe"
71
- end
72
-
73
- @slug
90
+ @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
74
91
  end
75
92
 
76
93
  def contact_type
@@ -112,8 +129,8 @@ class NameTamer
112
129
  # Tidy up the name we've received
113
130
  #--------------------------------------------------------
114
131
 
115
- def ensure_safe
116
- @tidy_name.ensure_safe
132
+ def unescape
133
+ @tidy_name.ensure_safe!.safe_unescape!
117
134
  end
118
135
 
119
136
  def tidy_spacing
@@ -272,18 +289,6 @@ class NameTamer
272
289
  @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
273
290
  end
274
291
 
275
- #--------------------------------------------------------
276
- # Make slug from search name
277
- #--------------------------------------------------------
278
-
279
- def slugify
280
- # Inflector::parameterize just gives up with non-latin characters so...
281
- # @slug = @slug.parameterize # Can't use this
282
-
283
- # Instead we'll do it ourselves
284
- @slug = parameterize @slug
285
- end
286
-
287
292
  #--------------------------------------------------------
288
293
  # Initialization and utilities
289
294
  #--------------------------------------------------------
@@ -378,29 +383,6 @@ class NameTamer
378
383
  .upcase_initials!
379
384
  end
380
385
 
381
- def parameterize(string, args = {})
382
- sep = args[:sep] || SLUG_DELIMITER
383
- rfc3987 = args[:rfc3987] || false
384
- filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
385
-
386
- # First we unescape any pct-encoded characters. These might turn into
387
- # things we want to alter for the slug, like whitespace (e.g. %20)
388
- new_string = URI.unescape(string)
389
-
390
- new_string
391
- .whitespace_to!(sep)
392
- .invalid_chars_to!(sep)
393
- .strip_unwanted!(filter)
394
- .fix_separators!(sep)
395
- .approximate_latin_chars!
396
-
397
- # Have we got anything left?
398
- new_string = '_' if new_string.empty?
399
-
400
- # downcase any latin characters
401
- new_string.downcase
402
- end
403
-
404
386
  #--------------------------------------------------------
405
387
  # Constants
406
388
  #--------------------------------------------------------
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.2.0'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -25,6 +25,16 @@ class String
25
25
  substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
26
26
  end
27
27
 
28
+ # Unescape percent-encoded characters
29
+ # This might introduce UTF-8 invalid byte sequence
30
+ # so we take precautions
31
+ def safe_unescape!
32
+ string = URI.unescape(self)
33
+ return self if self == string
34
+ replace string
35
+ ensure_safe!
36
+ end
37
+
28
38
  # Make sure separators are not where they shouldn't be
29
39
  def fix_separators!(separator)
30
40
  return self if separator.nil? || separator.empty?
@@ -139,8 +149,7 @@ class String
139
149
  gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
140
150
  end
141
151
 
142
- def ensure_safe
143
- return if valid_encoding?
152
+ def ensure_safe!
144
153
  encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
145
154
  end
146
155
 
@@ -122,7 +122,7 @@ describe NameTamer do
122
122
  { n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
123
123
  { n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
124
124
  { n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
125
- { n: 'xxx%52zzz', t: :organization, nn: 'xxx%52zzz', sn: 'xxx%52zzz', s: 'xxxrzzz' },
125
+ { n: 'xxx%52zzz', t: :organization, nn: 'xxxRzzz', sn: 'xxxRzzz', s: 'xxxrzzz' },
126
126
  { n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
127
127
  s: 'Евгений-Болотнов' },
128
128
  { n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-04 00:00:00.000000000 Z
11
+ date: 2014-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler