name-tamer 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3532b7472b3daecb0bb11863268531c229771639
4
- data.tar.gz: 0096dd16106d480f6c5e1e043dbf54896f787599
3
+ metadata.gz: 0a8134129faabc9dc73dec3bd107873adf6a8f21
4
+ data.tar.gz: 6c2c5f68c2e3e9a41c6ccc0ddba78a896952c725
5
5
  SHA512:
6
- metadata.gz: 4ee6d017e93b54acd10791f44a2920c46fe76faaafcb0171ab59c582f7d07c34036bc64610c5298ae363a33fa26fbedd1711800771acc29620beb6967adea10e
7
- data.tar.gz: 23741e994c62fc8c746f826e3124824a29e3a5ecff22930a1ec350db0363b5b6d20d67bc5104d546f25c9301850a8944e936cb5e41c6320524d766303a2be69c
6
+ metadata.gz: 199a1fb93b68757fd0a9bc6657c1595c655da6ad5ad0ba87ae6cbf43dea88a15e3ecc9cb7317ed3edff415cac3288e26f012653e2432e222a4497aed41d21534
7
+ data.tar.gz: ae987673dfb5a693706a470765a3d7ac300dd8ff14c76d796659e97c5db79085fa735d1c2b1e218baf2da829fba047690cddbe640b02758b63a6fe49e503d01b
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- name-tamer (0.1.9)
4
+ name-tamer (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -19,13 +19,35 @@ class NameTamer
19
19
  def [](name, args = {})
20
20
  new name, args
21
21
  end
22
+
23
+ # Make a slug from a string
24
+ def parameterize(string, args = {})
25
+ sep = args[:sep] || SLUG_DELIMITER
26
+ rfc3987 = args[:rfc3987] || false
27
+ filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
28
+
29
+ new_string = string.dup
30
+
31
+ new_string
32
+ .whitespace_to!(sep)
33
+ .invalid_chars_to!(sep)
34
+ .strip_unwanted!(filter)
35
+ .fix_separators!(sep)
36
+ .approximate_latin_chars!
37
+
38
+ # Have we got anything left?
39
+ new_string = '_' if new_string.empty?
40
+
41
+ # downcase any latin characters
42
+ new_string.downcase
43
+ end
22
44
  end
23
45
 
24
46
  def tidy_name
25
47
  unless @tidy_name
26
48
  @tidy_name = name.dup # Start with the name we've received
27
49
 
28
- ensure_safe # Invalid byte sequence in UTF-8, for example
50
+ unescape # Unescape percent-encoded characters and fix UTF-8 encoding
29
51
  tidy_spacing # " John Smith " -> "John Smith"
30
52
  fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
31
53
  consolidate_initials # "I. B. M." -> "I.B.M."
@@ -65,12 +87,7 @@ class NameTamer
65
87
  end
66
88
 
67
89
  def slug
68
- unless @slug
69
- @slug = simple_name.dup # Start with search name
70
- slugify # "John Doe" -> "john-doe"
71
- end
72
-
73
- @slug
90
+ @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
74
91
  end
75
92
 
76
93
  def contact_type
@@ -112,8 +129,8 @@ class NameTamer
112
129
  # Tidy up the name we've received
113
130
  #--------------------------------------------------------
114
131
 
115
- def ensure_safe
116
- @tidy_name.ensure_safe
132
+ def unescape
133
+ @tidy_name.ensure_safe!.safe_unescape!
117
134
  end
118
135
 
119
136
  def tidy_spacing
@@ -272,18 +289,6 @@ class NameTamer
272
289
  @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
273
290
  end
274
291
 
275
- #--------------------------------------------------------
276
- # Make slug from search name
277
- #--------------------------------------------------------
278
-
279
- def slugify
280
- # Inflector::parameterize just gives up with non-latin characters so...
281
- # @slug = @slug.parameterize # Can't use this
282
-
283
- # Instead we'll do it ourselves
284
- @slug = parameterize @slug
285
- end
286
-
287
292
  #--------------------------------------------------------
288
293
  # Initialization and utilities
289
294
  #--------------------------------------------------------
@@ -378,29 +383,6 @@ class NameTamer
378
383
  .upcase_initials!
379
384
  end
380
385
 
381
- def parameterize(string, args = {})
382
- sep = args[:sep] || SLUG_DELIMITER
383
- rfc3987 = args[:rfc3987] || false
384
- filter = args[:filter] || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
385
-
386
- # First we unescape any pct-encoded characters. These might turn into
387
- # things we want to alter for the slug, like whitespace (e.g. %20)
388
- new_string = URI.unescape(string)
389
-
390
- new_string
391
- .whitespace_to!(sep)
392
- .invalid_chars_to!(sep)
393
- .strip_unwanted!(filter)
394
- .fix_separators!(sep)
395
- .approximate_latin_chars!
396
-
397
- # Have we got anything left?
398
- new_string = '_' if new_string.empty?
399
-
400
- # downcase any latin characters
401
- new_string.downcase
402
- end
403
-
404
386
  #--------------------------------------------------------
405
387
  # Constants
406
388
  #--------------------------------------------------------
@@ -1,3 +1,3 @@
1
1
  class NameTamer
2
- VERSION = '0.2.0'
2
+ VERSION = '0.2.1'
3
3
  end
@@ -25,6 +25,16 @@ class String
25
25
  substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
26
26
  end
27
27
 
28
+ # Unescape percent-encoded characters
29
+ # This might introduce UTF-8 invalid byte sequence
30
+ # so we take precautions
31
+ def safe_unescape!
32
+ string = URI.unescape(self)
33
+ return self if self == string
34
+ replace string
35
+ ensure_safe!
36
+ end
37
+
28
38
  # Make sure separators are not where they shouldn't be
29
39
  def fix_separators!(separator)
30
40
  return self if separator.nil? || separator.empty?
@@ -139,8 +149,7 @@ class String
139
149
  gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
140
150
  end
141
151
 
142
- def ensure_safe
143
- return if valid_encoding?
152
+ def ensure_safe!
144
153
  encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
145
154
  end
146
155
 
@@ -122,7 +122,7 @@ describe NameTamer do
122
122
  { n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
123
123
  { n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
124
124
  { n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
125
- { n: 'xxx%52zzz', t: :organization, nn: 'xxx%52zzz', sn: 'xxx%52zzz', s: 'xxxrzzz' },
125
+ { n: 'xxx%52zzz', t: :organization, nn: 'xxxRzzz', sn: 'xxxRzzz', s: 'xxxrzzz' },
126
126
  { n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
127
127
  s: 'Евгений-Болотнов' },
128
128
  { n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name-tamer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Xenapto
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-04 00:00:00.000000000 Z
11
+ date: 2014-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler