sterile 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ module Sterile
4
+
5
+ class << self
6
+
7
+ # Trim whitespace from start and end of string and remove any redundant
8
+ # whitespace in between.
9
+ #
10
+ # " Hello world! ".transliterate # => "Hello world!"
11
+ #
12
+ def trim_whitespace(string)
13
+ string.gsub(/\s+/, " ").strip
14
+ end
15
+
16
+
17
+ # Transliterate to ASCII and strip out any HTML/XML tags.
18
+ #
19
+ # "<b>nåsty</b>".sterilize # => "nasty"
20
+ #
21
+ def sterilize(string)
22
+ strip_tags(transliterate(string))
23
+ end
24
+
25
+
26
+ # Transliterate to ASCII, downcase and format for URL permalink/slug
27
+ # by stripping out all non-alphanumeric characters and replacing spaces
28
+ # with a delimiter (defaults to '-').
29
+ #
30
+ # "Hello World!".sluggerize # => "hello-world"
31
+ #
32
+ def sluggerize(string, options = {})
33
+ options = {
34
+ :delimiter => "-"
35
+ }.merge!(options)
36
+
37
+ sterilize(string).strip.gsub(/\s+/, "-").gsub(/[^a-zA-Z0-9\-]/, "").gsub(/-+/, options[:delimiter]).downcase
38
+ end
39
+ alias_method :to_slug, :sluggerize
40
+
41
+ end # class << self
42
+
43
+ end # module Sterile
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  module Sterile
4
- VERSION = "1.0.1"
4
+ VERSION = "1.0.2"
5
5
  end
data/lib/sterile.rb CHANGED
@@ -21,319 +21,12 @@
21
21
  # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
22
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
 
24
- require "sterile/codepoints"
25
- require "sterile/html_entities"
26
- require "sterile/smart_format_rules"
27
24
 
25
+ require "sterile/transliterate"
26
+ require "sterile/smart_format"
27
+ require "sterile/titlecase"
28
+ require "sterile/utilities"
29
+ require "sterile/entities"
30
+ require "sterile/tags"
28
31
 
29
- module Sterile
30
-
31
- class << self
32
-
33
- def transmogrify(string, &block)
34
- raise "No block given" unless block_given?
35
-
36
- result = ""
37
- string.unpack("U*").each do |codepoint|
38
- cg = codepoint >> 8
39
- cp = codepoint & 0xFF
40
- begin
41
- mapping = CODEPOINTS[cg][cp]
42
- result << yield(mapping, codepoint)
43
- rescue
44
- end
45
- end
46
-
47
- result
48
- end
49
-
50
- # Transliterate Unicode [and accented ASCII] characters to their plain-text
51
- # ASCII equivalents. This is based on data from the stringex gem (https://github.com/rsl/stringex)
52
- # which is in turn a port of Perl's Unidecode and ostensibly provides
53
- # superior results to iconv. The optical conversion data is based on work
54
- # by Eric Boehs at https://github.com/ericboehs/to_slug
55
- # Passing an option of :optical => true will prefer optical mapping instead
56
- # of more pedantic matches.
57
- #
58
- # "ýůçký".transliterate # => "yucky"
59
- #
60
- def transliterate(string, options = {})
61
- options = {
62
- :optical => false
63
- }.merge!(options)
64
-
65
- if options[:optical]
66
- transmogrify(string) do |mapping, codepoint|
67
- mapping[1] || mapping[0] || ""
68
- end
69
- else
70
- transmogrify(string) do |mapping, codepoint|
71
- mapping[0] || mapping[1] || ""
72
- end
73
- end
74
- end
75
- alias_method :to_ascii, :transliterate
76
-
77
-
78
- # Trim whitespace from start and end of string and remove any redundant
79
- # whitespace in between.
80
- #
81
- # " Hello world! ".transliterate # => "Hello world!"
82
- #
83
- def trim_whitespace(string)
84
- string.gsub(/\s+/, " ").strip
85
- end
86
-
87
-
88
- # Transliterate to ASCII and strip out any HTML/XML tags.
89
- #
90
- # "<b>nåsty</b>".sterilize # => "nasty"
91
- #
92
- def sterilize(string)
93
- strip_tags(transliterate(string))
94
- end
95
-
96
-
97
- # Transliterate to ASCII, downcase and format for URL permalink/slug
98
- # by stripping out all non-alphanumeric characters and replacing spaces
99
- # with a delimiter (defaults to '-').
100
- #
101
- # "Hello World!".sluggerize # => "hello-world"
102
- #
103
- def sluggerize(string, options = {})
104
- options = {
105
- :delimiter => "-"
106
- }.merge!(options)
107
-
108
- sterilize(string).strip.gsub(/\s+/, "-").gsub(/[^a-zA-Z0-9\-]/, "").gsub(/-+/, options[:delimiter]).downcase
109
- end
110
- alias_method :to_slug, :sluggerize
111
-
112
-
113
- # Format text with proper "curly" quotes, m-dashes, copyright, trademark, etc.
114
- #
115
- # q{"He said, 'Away with you, Drake!'"}.smart_format # => “He said, ‘Away with you, Drake!’”
116
- #
117
- def smart_format(string)
118
- SMART_FORMAT_RULES.each do |rule|
119
- string.gsub!(rule[0], rule[1])
120
- end
121
- string
122
- end
123
-
124
-
125
- # Turn Unicode characters into their HTML equivilents.
126
- # If a valid HTML entity is not possible, it will create a numeric entity.
127
- #
128
- # q{“Economy Hits Bottom,” ran the headline}.encode_entities # => &ldquo;Economy Hits Bottom,&rdquo; ran the headline
129
- #
130
- def encode_entities(string)
131
- transmogrify(string) do |mapping, codepoint|
132
- if (32..126).include?(codepoint)
133
- mapping[0]
134
- else
135
- "&" + (mapping[2] || "#" + codepoint.to_s) + ";"
136
- end
137
- end
138
- end
139
-
140
-
141
- # The reverse of +encode_entities+. Turns HTML or numeric entities into
142
- # their Unicode counterparts.
143
- #
144
- def decode_entities(string)
145
- string.gsub!(/&#(\d{1,4});/) { [$1.to_i].pack("U") }
146
- string.gsub(/&([a-zA-Z0-9]+);/) do
147
- codepoint = HTML_ENTITIES[$1]
148
- codepoint ? [codepoint].pack("U") : $&
149
- end
150
- end
151
-
152
-
153
- # Remove HTML/XML tags from text. Also strips out comments, PHP and ERB style tags.
154
- # CDATA is considered text unless :keep_cdata => false is specified.
155
- # Redundant whitespace will be removed unless :keep_whitespace => true is specified.
156
- #
157
- def strip_tags(string, options = {})
158
- options = {
159
- :keep_whitespace => false,
160
- :keep_cdata => true
161
- }.merge!(options)
162
-
163
- string.gsub!(/<[%?](php)?[^>]*>/, '') # strip php, erb et al
164
- string.gsub!(/<!--[^-]*-->/, '') # strip comments
165
-
166
- string.gsub!(
167
- /
168
- <!\[CDATA\[
169
- ([^\]]*)
170
- \]\]>
171
- /xi,
172
- options[:keep_cdata] ? '\\1' : ''
173
- )
174
-
175
- html_name = /[\w:-]+/
176
- html_data = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
177
- html_attr = /(#{html_name}(\s*=\s*#{html_data})?)/
178
-
179
- string.gsub!(
180
- /
181
- <
182
- [\/]?
183
- #{html_name}
184
- (\s+(#{html_attr}(\s+#{html_attr})*))?
185
- \s*
186
- [\/]?
187
- >
188
- /xi,
189
- ''
190
- )
191
-
192
- options[:keep_whitespace] ? string : trim_whitespace(string)
193
- end
194
-
195
-
196
- # Similar to +gsub+, except it works in between HTML/XML tags and
197
- # yields text to a block. Text will be replaced by what the block
198
- # returns.
199
- # Warning: does not work in some degenerate cases.
200
- #
201
- def gsub_tags(string, &block)
202
- raise "No block given" unless block_given?
203
-
204
- string.gsub!(/(<[^>]*>)|([^<]+)/) do |match|
205
- $2 ? yield($2) : $1
206
- end
207
- end
208
-
209
-
210
- # Iterates over all text in between HTML/XML tags and yields
211
- # it to a block.
212
- # Warning: does not work in some degenerate cases.
213
- #
214
- def scan_tags(string, &block)
215
- raise "No block given" unless block_given?
216
-
217
- string.scan(/(<[^>]*>)|([^<]+)/) do |match|
218
- yield($2) unless $2.nil?
219
- end
220
- end
221
-
222
-
223
- # Like +smart_format+, but works with HTML/XML (somewhat).
224
- #
225
- def smart_format_tags(string)
226
- string.gsub_tags do |text|
227
- text.smart_format.encode_entities
228
- end
229
- end
230
-
231
-
232
- # Format text appropriately for titles. This method is much smarter
233
- # than ActiveSupport's +titlecase+. The algorithm is based on work done
234
- # by John Gruber et al (http://daringfireball.net/2008/08/title_case_update)
235
- #
236
- def titlecase(string)
237
- string.strip!
238
- string.gsub!(/\s+/, " ")
239
- string.downcase! unless string =~ /[[:lower:]]/
240
-
241
- small_words = %w{ a an and as at(?!&t) but by en for if in nor of on or the to v[.]? via vs[.]? }.join("|")
242
- apos = / (?: ['’] [[:lower:]]* )? /xu
243
-
244
- string.gsub!(
245
- /
246
- \b
247
- ([_\*]*)
248
- (?:
249
- ( [-\+\w]+ [@.\:\/] [-\w@.\:\/]+ #{apos} ) # URL, domain, or email
250
- |
251
- ( (?i: #{small_words} ) #{apos} ) # or small word, case-insensitive
252
- |
253
- ( [[:alpha:]] [[:lower:]'’()\[\]{}]* #{apos} ) # or word without internal caps
254
- |
255
- ( [[:alpha:]] [[:alpha:]'’()\[\]{}]* #{apos} ) # or some other word
256
- )
257
- ([_\*]*)
258
- \b
259
- /xu
260
- ) do
261
- ($1 ? $1 : "") +
262
- ($2 ? $2 : ($3 ? $3.downcase : ($4 ? $4.downcase.capitalize : $5))) +
263
- ($6 ? $6 : "")
264
- end
265
-
266
- if RUBY_VERSION < "1.9.0"
267
- string.gsub!(
268
- /
269
- \b
270
- ([:alpha:]+)
271
- (‑)
272
- ([:alpha:]+)
273
- \b
274
- /xu
275
- ) do
276
- $1.downcase.capitalize + $2 + $1.downcase.capitalize
277
- end
278
- end
279
-
280
- string.gsub!(
281
- /
282
- (
283
- \A [[:punct:]]* # start of title
284
- | [:.;?!][ ]+ # or of subsentence
285
- | [ ]['"“‘(\[][ ]* # or of inserted subphrase
286
- )
287
- ( #{small_words} ) # followed by a small-word
288
- \b
289
- /xiu
290
- ) do
291
- $1 + $2.downcase.capitalize
292
- end
293
-
294
- string.gsub!(
295
- /
296
- \b
297
- ( #{small_words} ) # small-word
298
- (?=
299
- [[:punct:]]* \Z # at the end of the title
300
- |
301
- ['"’”)\]] [ ] # or of an inserted subphrase
302
- )
303
- /xu
304
- ) do
305
- $1.downcase.capitalize
306
- end
307
-
308
- string.gsub!(
309
- /
310
- (
311
- \b
312
- [[:alpha:]] # single first letter
313
- [\-‑] # followed by a dash
314
- )
315
- ( [[:alpha:]] ) # followed by a letter
316
- /xu
317
- ) do
318
- $1 + $2.downcase
319
- end
320
-
321
- string.gsub!(/q&a/i, 'Q&A')
322
-
323
- string
324
- end
325
-
326
- end
327
-
328
- end
329
-
330
-
331
- # Add extensions to String
332
- #
333
- class String
334
- Sterile.methods(false).each do |method|
335
- eval("def #{method}(*args, &block); Sterile.#{method}(self, *args, &block); end")
336
- eval("def #{method}!(*args, &block); replace Sterile.#{method}(self, *args, &block); end")
337
- end
338
- end
339
-
32
+ require "sterile/string_extensions"
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: sterile
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.1
5
+ version: 1.0.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - Patrick Hogan
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-06-02 00:00:00 -05:00
13
+ date: 2011-06-03 00:00:00 -05:00
14
14
  default_executable:
15
15
  dependencies: []
16
16
 
@@ -27,14 +27,22 @@ files:
27
27
  - .autotest
28
28
  - .gitignore
29
29
  - .rvmrc
30
+ - .yaropts
30
31
  - Gemfile
31
32
  - Gemfile.lock
32
33
  - README.markdown
33
34
  - Rakefile
34
35
  - lib/sterile.rb
35
- - lib/sterile/codepoints.rb
36
- - lib/sterile/html_entities.rb
37
- - lib/sterile/smart_format_rules.rb
36
+ - lib/sterile/data/codepoints_data.rb
37
+ - lib/sterile/data/html_entities_data.rb
38
+ - lib/sterile/data/smart_format_rules.rb
39
+ - lib/sterile/entities.rb
40
+ - lib/sterile/smart_format.rb
41
+ - lib/sterile/string_extensions.rb
42
+ - lib/sterile/tags.rb
43
+ - lib/sterile/titlecase.rb
44
+ - lib/sterile/transliterate.rb
45
+ - lib/sterile/utilities.rb
38
46
  - lib/sterile/version.rb
39
47
  - sterile.gemspec
40
48
  has_rdoc: true