rubypants 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 80fce7b0286c4b7268c4bbd6d73cc702acee77d5
4
- data.tar.gz: b2206216e8645e82ad3705a54064e5465430eab3
3
+ metadata.gz: 8dc4efc96c1fe85653d5cfb4f6d0291a94edcdbc
4
+ data.tar.gz: b17cda053df199efe900caed483404b903345f9c
5
5
  SHA512:
6
- metadata.gz: 395f50ffc524bdf2e193dea148e1eec494453b4ac7a062220257d8c7514a65a7377c63ee7ecdb081fd0e625169263be6b32f1b9e0d592bb7a4537ee776fca7e0
7
- data.tar.gz: 665a1f5b1d81ef0f95afaf6ad4db71ba684dfdd2b0fc2658f2628ba11d3aaec9fb41a9b50e5053ff96fc8f48ad685741f52f6d671bde8eedd10d4a5eb6c74bc2
6
+ metadata.gz: eaebe94dfee7f1ef9295910593ff64e0b10a5147ab96e802e7ce98031a15ee4e6c78ae7ee45ccadccaeb30d2c86d96692a2625733bb6e3f7edcda3ecd995c1a6
7
+ data.tar.gz: 3eabe14d2d081f67239047c29d7d71e93025622cdd3f562b1f3fd2b83a26cddea359e2fbc8a20d3f9ca9f0c855c12c4fe3f76e16442e6f2898a6dc346f2e6f4e
@@ -10,9 +10,3 @@ rvm:
10
10
  # and https://github.com/travis-ci/travis-ci/issues/5239
11
11
  before_install:
12
12
  - gem install bundler
13
-
14
- # For now override the default `bundle exec rake` which fails because of an
15
- # unresolved superclass mismatch in `core.rb` vs. `version.rb`. This should
16
- # eventually be fixed in rubypants.
17
- script:
18
- - rake
@@ -1,2 +1,473 @@
1
- require_relative 'rubypants/core'
2
- require_relative 'rubypants/version'
1
+ require_relative 'version'
2
+
3
+ class RubyPants < String
4
+ extend RubyPantsVersion
5
+
6
+ # Create a new RubyPants instance with the text in +string+.
7
+ #
8
+ # Allowed elements in the options array:
9
+ #
10
+ # 0 :: do nothing
11
+ # 1 :: enable all, using only em-dash shortcuts
12
+ # 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
13
+ # 3 :: enable all, using inverted old school en and em-dash shortcuts
14
+ # -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
15
+ #
16
+ # If you don't like any of these defaults, you can pass symbols to change
17
+ # RubyPants' behavior:
18
+ #
19
+ # <tt>:quotes</tt> :: quotes
20
+ # <tt>:backticks</tt> :: backtick quotes (``double'' only)
21
+ # <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
22
+ # <tt>:dashes</tt> :: dashes
23
+ # <tt>:oldschool</tt> :: old school dashes
24
+ # <tt>:inverted</tt> :: inverted old school dashes
25
+ # <tt>:ellipses</tt> :: ellipses
26
+ # <tt>:prevent_breaks</tt> :: use nbsp and word-joiner to avoid breaking
27
+ # before dashes and ellipses
28
+ # <tt>:named_entities</tt> :: used named entities instead of the default
29
+ # decimal entities (see below)
30
+ # <tt>:convertquotes</tt> :: convert <tt>&quot;</tt> entities to
31
+ # <tt>"</tt>
32
+ # <tt>:stupefy</tt> :: translate RubyPants HTML entities
33
+ # to their ASCII counterparts.
34
+ #
35
+ # In addition, you can customize the HTML entities that will be injected by
36
+ # passing in a hash for the final argument. The defaults for these entities
37
+ # are as follows:
38
+ #
39
+ # <tt>:single_left_quote</tt> :: <tt>&#8216;</tt>
40
+ # <tt>:double_left_quote</tt> :: <tt>&#8220;</tt>
41
+ # <tt>:single_right_quote</tt> :: <tt>&#8217;</tt>
42
+ # <tt>:double_right_quote</tt> :: <tt>&#8221;</tt>
43
+ # <tt>:em_dash</tt> :: <tt>&#8212;</tt>
44
+ # <tt>:en_dash</tt> :: <tt>&#8211;</tt>
45
+ # <tt>:ellipsis</tt> :: <tt>&#8230;</tt>
46
+ # <tt>:non_breaking_space</tt> :: <tt>&nbsp;</tt>
47
+ # <tt>:word_joiner</tt> :: <tt>&#8288;</tt>
48
+ #
49
+ # If the <tt>:named_entities</tt> option is used, the default entities are
50
+ # as follows:
51
+ #
52
+ # <tt>:single_left_quote</tt> :: <tt>&lsquo;</tt>
53
+ # <tt>:double_left_quote</tt> :: <tt>&ldquo;</tt>
54
+ # <tt>:single_right_quote</tt> :: <tt>&rsquo;</tt>
55
+ # <tt>:double_right_quote</tt> :: <tt>&rdquo;</tt>
56
+ # <tt>:em_dash</tt> :: <tt>&mdash;</tt>
57
+ # <tt>:en_dash</tt> :: <tt>&ndash;</tt>
58
+ # <tt>:ellipsis</tt> :: <tt>&hellip;</tt>
59
+ # <tt>:non_breaking_space</tt> :: <tt>&nbsp;</tt>
60
+ # <tt>:word_joiner</tt> :: <tt>&#8288;</tt>
61
+ #
62
+ # If the <tt>:character_entities</tt> option is used, RubyPants will
63
+ # emit Unicode characters directly, rather than HTML entities. By default
64
+ # this excludes the space characters (non-breaking space and
65
+ # word-joiner). To additionally emit Unicode space characters, use the
66
+ # <tt>:character_spaces</tt> option.
67
+ #
68
+ def initialize(string, options=[2], entities = {})
69
+ super string
70
+
71
+ @options = [*options]
72
+ @entities = default_entities
73
+ @entities.merge!(named_entities) if @options.include?(:named_entities)
74
+ @entities.merge!(character_entities) if @options.include?(:character_entities)
75
+ @entities.merge!(character_spaces) if @options.include?(:character_spaces)
76
+ @entities.merge!(entities)
77
+ end
78
+
79
+ # Apply SmartyPants transformations.
80
+ def to_html
81
+ do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
82
+ convert_quotes = prevent_breaks = nil
83
+
84
+ if @options.include?(0)
85
+ # Do nothing.
86
+ return self
87
+ elsif @options.include?(1)
88
+ # Do everything, turn all options on.
89
+ do_quotes = do_backticks = do_ellipses = true
90
+ do_dashes = :normal
91
+ elsif @options.include?(2)
92
+ # Do everything, turn all options on, use old school dash shorthand.
93
+ do_quotes = do_backticks = do_ellipses = true
94
+ do_dashes = :oldschool
95
+ elsif @options.include?(3)
96
+ # Do everything, turn all options on, use inverted old school
97
+ # dash shorthand.
98
+ do_quotes = do_backticks = do_ellipses = true
99
+ do_dashes = :inverted
100
+ elsif @options.include?(-1)
101
+ do_stupefy = true
102
+ end
103
+
104
+ # Explicit flags override numeric flag groups.
105
+ do_quotes = true if @options.include?(:quotes)
106
+ do_backticks = true if @options.include?(:backticks)
107
+ do_backticks = :both if @options.include?(:allbackticks)
108
+ do_dashes = :normal if @options.include?(:dashes)
109
+ do_dashes = :oldschool if @options.include?(:oldschool)
110
+ do_dashes = :inverted if @options.include?(:inverted)
111
+ prevent_breaks = true if @options.include?(:prevent_breaks)
112
+ do_ellipses = true if @options.include?(:ellipses)
113
+ convert_quotes = true if @options.include?(:convertquotes)
114
+ do_stupefy = true if @options.include?(:stupefy)
115
+
116
+ # Parse the HTML
117
+ tokens = tokenize
118
+
119
+ # Keep track of when we're inside <pre> or <code> tags.
120
+ in_pre = nil
121
+
122
+ # Here is the result stored in.
123
+ result = ""
124
+
125
+ # This is a cheat, used to get some context for one-character
126
+ # tokens that consist of just a quote char. What we do is remember
127
+ # the last character of the previous text token, to use as context
128
+ # to curl single- character quote tokens correctly.
129
+ prev_token_last_char = nil
130
+
131
+ tokens.each do |token|
132
+ if token.first == :tag
133
+ result << token[1]
134
+ if token[1].end_with? '/>'
135
+ # ignore self-closing tags
136
+ elsif token[1] =~ %r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
137
+ if $1 == '' && ! in_pre
138
+ in_pre = $2
139
+ elsif $1 == '/' && $2 == in_pre
140
+ in_pre = nil
141
+ end
142
+ end
143
+ else
144
+ t = token[1]
145
+
146
+ # Remember last char of this token before processing.
147
+ last_char = t[-1].chr
148
+
149
+ unless in_pre
150
+ t = process_escapes t
151
+
152
+ t.gsub!(/&quot;/, '"') if convert_quotes
153
+
154
+ if do_dashes
155
+ t = educate_dashes t, prevent_breaks if do_dashes == :normal
156
+ t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
157
+ t = educate_dashes_inverted t, prevent_breaks if do_dashes == :inverted
158
+ end
159
+
160
+ t = educate_ellipses t, prevent_breaks if do_ellipses
161
+
162
+ # Note: backticks need to be processed before quotes.
163
+ if do_backticks
164
+ t = educate_backticks t
165
+ t = educate_single_backticks t if do_backticks == :both
166
+ end
167
+
168
+ if do_quotes
169
+ if t == "'"
170
+ # Special case: single-character ' token
171
+ if prev_token_last_char =~ /\S/
172
+ t = entity(:single_right_quote)
173
+ else
174
+ t = entity(:single_left_quote)
175
+ end
176
+ elsif t == '"'
177
+ # Special case: single-character " token
178
+ if prev_token_last_char =~ /\S/
179
+ t = entity(:double_right_quote)
180
+ else
181
+ t = entity(:double_left_quote)
182
+ end
183
+ else
184
+ # Normal case:
185
+ t = educate_quotes t
186
+ end
187
+ end
188
+
189
+ t = stupefy_entities t if do_stupefy
190
+ end
191
+
192
+ prev_token_last_char = last_char
193
+ result << t
194
+ end
195
+ end
196
+
197
+ # Done
198
+ result
199
+ end
200
+
201
+ protected
202
+
203
+ # Return the string, with after processing the following backslash
204
+ # escape sequences. This is useful if you want to force a "dumb" quote
205
+ # or other character to appear.
206
+ #
207
+ # Escaped are:
208
+ # \\ \" \' \. \- \`
209
+ #
210
+ def process_escapes(str)
211
+ str.
212
+ gsub('\\\\', '&#92;').
213
+ gsub('\"', '&#34;').
214
+ gsub("\\\'", '&#39;').
215
+ gsub('\.', '&#46;').
216
+ gsub('\-', '&#45;').
217
+ gsub('\`', '&#96;')
218
+ end
219
+
220
+ def self.n_of(n, x)
221
+ x = Regexp.escape(x)
222
+ /(?<!#{x}) # not preceded by x
223
+ #{x}{#{n}} # n of x
224
+ (?!#{x}) # not followed by x
225
+ /x
226
+ end
227
+
228
+ DOUBLE_DASH = n_of(2, '-')
229
+ TRIPLE_DASH = n_of(3, '-')
230
+
231
+ # Return +str+ replacing all +patt+ with +repl+. If +prevent_breaks+ is true,
232
+ # then replace spaces preceding +patt+ with a non-breaking space, and if there
233
+ # are no spaces, then insert a word-joiner.
234
+ #
235
+ def educate(str, patt, repl, prevent_breaks)
236
+ patt = /(?<spaces>[[:space:]]*)#{patt}/
237
+ str.gsub(patt) do
238
+ spaces = if prevent_breaks && $~['spaces'].length > 0
239
+ entity(:non_breaking_space) # * $~['spaces'].length
240
+ elsif prevent_breaks
241
+ entity(:word_joiner)
242
+ else
243
+ $~['spaces']
244
+ end
245
+ spaces + repl
246
+ end
247
+ end
248
+
249
+ # Return the string, with each instance of "<tt>--</tt>" translated to an
250
+ # em-dash HTML entity.
251
+ #
252
+ def educate_dashes(str, prevent_breaks=false)
253
+ educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
254
+ end
255
+
256
+ # Return the string, with each instance of "<tt>--</tt>" translated to an
257
+ # en-dash HTML entity, and each "<tt>---</tt>" translated to an
258
+ # em-dash HTML entity.
259
+ #
260
+ def educate_dashes_oldschool(str, prevent_breaks=false)
261
+ str = educate(str, TRIPLE_DASH, entity(:em_dash), prevent_breaks)
262
+ educate(str, DOUBLE_DASH, entity(:en_dash), prevent_breaks)
263
+ end
264
+
265
+ # Return the string, with each instance of "<tt>--</tt>" translated
266
+ # to an em-dash HTML entity, and each "<tt>---</tt>" translated to
267
+ # an en-dash HTML entity. Two reasons why: First, unlike the en- and
268
+ # em-dash syntax supported by +educate_dashes_oldschool+, it's
269
+ # compatible with existing entries written before SmartyPants 1.1,
270
+ # back when "<tt>--</tt>" was only used for em-dashes. Second,
271
+ # em-dashes are more common than en-dashes, and so it sort of makes
272
+ # sense that the shortcut should be shorter to type. (Thanks to
273
+ # Aaron Swartz for the idea.)
274
+ #
275
+ def educate_dashes_inverted(str, prevent_breaks=false)
276
+ str = educate(str, TRIPLE_DASH, entity(:en_dash), prevent_breaks)
277
+ educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
278
+ end
279
+
280
+ # Return the string, with each instance of "<tt>...</tt>" translated
281
+ # to an ellipsis HTML entity. Also converts the case where there are
282
+ # spaces between the dots.
283
+ #
284
+ def educate_ellipses(str, prevent_breaks=false)
285
+ str = educate(str, RubyPants.n_of(3, '.'), entity(:ellipsis), prevent_breaks)
286
+ educate(str, /(?<!\.|\.[[:space:]])\.[[:space:]]\.[[:space:]]\.(?!\.|[[:space:]]\.)/,
287
+ entity(:ellipsis), prevent_breaks)
288
+ end
289
+
290
+ # Return the string, with "<tt>``backticks''</tt>"-style single quotes
291
+ # translated into HTML curly quote entities.
292
+ #
293
+ def educate_backticks(str)
294
+ str.
295
+ gsub("``", entity(:double_left_quote)).
296
+ gsub("''", entity(:double_right_quote))
297
+ end
298
+
299
+ # Return the string, with "<tt>`backticks'</tt>"-style single quotes
300
+ # translated into HTML curly quote entities.
301
+ #
302
+ def educate_single_backticks(str)
303
+ str.
304
+ gsub("`", entity(:single_left_quote)).
305
+ gsub("'", entity(:single_right_quote))
306
+ end
307
+
308
+ # Return the string, with "educated" curly quote HTML entities.
309
+ #
310
+ def educate_quotes(str)
311
+ punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
312
+
313
+ str = str.dup
314
+
315
+ # Special case if the very first character is a quote followed by
316
+ # punctuation at a non-word-break. Close the quotes by brute
317
+ # force:
318
+ str.gsub!(/^'(?=#{punct_class}\B)/,
319
+ entity(:single_right_quote))
320
+ str.gsub!(/^"(?=#{punct_class}\B)/,
321
+ entity(:double_right_quote))
322
+
323
+ # Special case for double sets of quotes, e.g.:
324
+ # <p>He said, "'Quoted' words in a larger quote."</p>
325
+ str.gsub!(/"'(?=\w)/,
326
+ "#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
327
+ str.gsub!(/'"(?=\w)/,
328
+ "#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
329
+
330
+ # Special case for decade abbreviations (the '80s):
331
+ str.gsub!(/'(?=\d\ds)/,
332
+ entity(:single_right_quote))
333
+
334
+ close_class = %![^\ \t\r\n\\[\{\(\-]!
335
+ dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
336
+
337
+ # Get most opening single quotes:
338
+ str.gsub!(/([[:space:]]|&nbsp;|--|&[mn]dash;|#{dec_dashes}|&#x201[34];)'(?=\w)/,
339
+ '\1' + entity(:single_left_quote))
340
+
341
+ # Single closing quotes:
342
+ str.gsub!(/(#{close_class})'/,
343
+ '\1' + entity(:single_right_quote))
344
+ str.gsub!(/'(\s|s\b|$)/,
345
+ entity(:single_right_quote) + '\1')
346
+
347
+ # Any remaining single quotes should be opening ones:
348
+ str.gsub!(/'/,
349
+ entity(:single_left_quote))
350
+
351
+ # Get most opening double quotes:
352
+ str.gsub!(/([[:space:]]|&nbsp;|--|&[mn]dash;|#{dec_dashes}|&#x201[34];)"(?=\w)/,
353
+ '\1' + entity(:double_left_quote))
354
+
355
+ # Double closing quotes:
356
+ str.gsub!(/(#{close_class})"/,
357
+ '\1' + entity(:double_right_quote))
358
+ str.gsub!(/"(\s|s\b|$)/,
359
+ entity(:double_right_quote) + '\1')
360
+
361
+ # Any remaining quotes should be opening ones:
362
+ str.gsub!(/"/,
363
+ entity(:double_left_quote))
364
+
365
+ str
366
+ end
367
+
368
+ # Return the string, with each RubyPants HTML entity translated to
369
+ # its ASCII counterpart.
370
+ #
371
+ # Note: This is not reversible (but exactly the same as in SmartyPants)
372
+ #
373
+ def stupefy_entities(str)
374
+ new_str = str.dup
375
+
376
+ {
377
+ :en_dash => '-',
378
+ :em_dash => '--',
379
+ :single_left_quote => "'",
380
+ :single_right_quote => "'",
381
+ :double_left_quote => '"',
382
+ :double_right_quote => '"',
383
+ :ellipsis => '...'
384
+ }.each do |k,v|
385
+ new_str.gsub!(/#{entity(k)}/, v)
386
+ end
387
+
388
+ new_str
389
+ end
390
+
391
+ # Return an array of the tokens comprising the string. Each token is
392
+ # either a tag (possibly with nested, tags contained therein, such
393
+ # as <tt><a href="<MTFoo>"></tt>, or a run of text between
394
+ # tags. Each element of the array is a two-element array; the first
395
+ # is either :tag or :text; the second is the actual value.
396
+ #
397
+ # Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
398
+ # MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
399
+ #
400
+ # This is actually the easier variant using tag_soup, as used by
401
+ # Chad Miller in the Python port of SmartyPants.
402
+ #
403
+ def tokenize
404
+ tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/m
405
+
406
+ tokens = []
407
+
408
+ prev_end = 0
409
+
410
+ scan(tag_soup) do
411
+ tokens << [:text, $1] if $1 != ""
412
+ tokens << [:tag, $2]
413
+ prev_end = $~.end(0)
414
+ end
415
+
416
+ if prev_end < size
417
+ tokens << [:text, self[prev_end..-1]]
418
+ end
419
+
420
+ tokens
421
+ end
422
+
423
+ def default_entities
424
+ {
425
+ :single_left_quote => "&#8216;",
426
+ :double_left_quote => "&#8220;",
427
+ :single_right_quote => "&#8217;",
428
+ :double_right_quote => "&#8221;",
429
+ :em_dash => "&#8212;",
430
+ :en_dash => "&#8211;",
431
+ :ellipsis => "&#8230;",
432
+ :non_breaking_space => "&nbsp;",
433
+ :word_joiner => "&#8288;",
434
+ }
435
+ end
436
+
437
+ def named_entities
438
+ {
439
+ :single_left_quote => '&lsquo;',
440
+ :double_left_quote => "&ldquo;",
441
+ :single_right_quote => "&rsquo;",
442
+ :double_right_quote => "&rdquo;",
443
+ :em_dash => "&mdash;",
444
+ :en_dash => "&ndash;",
445
+ :ellipsis => "&hellip;",
446
+ :non_breaking_space => "&nbsp;",
447
+ # :word_joiner => N/A,
448
+ }
449
+ end
450
+
451
+ def character_entities
452
+ {
453
+ :single_left_quote => "\u2018",
454
+ :double_left_quote => "\u201C",
455
+ :single_right_quote => "\u2019",
456
+ :double_right_quote => "\u201D",
457
+ :em_dash => "\u2014",
458
+ :en_dash => "\u2013",
459
+ :ellipsis => "\u2026",
460
+ }
461
+ end
462
+
463
+ def character_spaces
464
+ {
465
+ :non_breaking_space => "\u00A0",
466
+ :word_joiner => "\u2060",
467
+ }
468
+ end
469
+
470
+ def entity(key)
471
+ @entities[key]
472
+ end
473
+ end
@@ -0,0 +1,3 @@
1
+ module RubyPantsVersion
2
+ VERSION = "0.7.0"
3
+ end
@@ -2,11 +2,11 @@
2
2
 
3
3
  lib = File.expand_path('../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
- require "rubypants/version"
5
+ require 'version'
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = 'rubypants'
9
- s.version = RubyPants::VERSION
9
+ s.version = RubyPantsVersion::VERSION
10
10
  s.summary = "RubyPants is a Ruby port of the smart-quotes library SmartyPants."
11
11
  s.description = <<-EOF
12
12
  The original "SmartyPants" is a free web publishing plug-in for
@@ -235,4 +235,11 @@ EOF
235
235
  def test_named_entities
236
236
  assert_rp_equal "Testing 'FOO!'", "Testing &lsquo;FOO!&rsquo;", [2, :named_entities]
237
237
  end
238
+
239
+ def test_character_entities
240
+ assert_rp_equal "Testing 'FOO!'", "Testing ‘FOO!’", [2, :character_entities]
241
+ assert_rp_equal "foo---bar", "foo&#8288;—bar", [2, :character_entities, :prevent_breaks]
242
+ assert_rp_equal "foo ---bar", "foo&nbsp;—bar", [2, :character_entities, :prevent_breaks]
243
+ assert_rp_equal "foo ---bar", "foo\u00A0—bar", [2, :character_entities, :character_spaces, :prevent_breaks]
244
+ end
238
245
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubypants
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Gruber
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-10-17 00:00:00.000000000 Z
15
+ date: 2018-02-26 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: minitest
@@ -45,8 +45,7 @@ files:
45
45
  - README.rdoc
46
46
  - Rakefile
47
47
  - lib/rubypants.rb
48
- - lib/rubypants/core.rb
49
- - lib/rubypants/version.rb
48
+ - lib/version.rb
50
49
  - rubypants.gemspec
51
50
  - test/helper.rb
52
51
  - test/rubypants_test.rb
@@ -70,7 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
70
69
  version: '0'
71
70
  requirements: []
72
71
  rubyforge_project:
73
- rubygems_version: 2.5.1
72
+ rubygems_version: 2.6.13
74
73
  signing_key:
75
74
  specification_version: 4
76
75
  summary: RubyPants is a Ruby port of the smart-quotes library SmartyPants.
@@ -1,447 +0,0 @@
1
- class RubyPants < String
2
-
3
- # Create a new RubyPants instance with the text in +string+.
4
- #
5
- # Allowed elements in the options array:
6
- #
7
- # 0 :: do nothing
8
- # 1 :: enable all, using only em-dash shortcuts
9
- # 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
10
- # 3 :: enable all, using inverted old school en and em-dash shortcuts
11
- # -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
12
- #
13
- # If you don't like any of these defaults, you can pass symbols to change
14
- # RubyPants' behavior:
15
- #
16
- # <tt>:quotes</tt> :: quotes
17
- # <tt>:backticks</tt> :: backtick quotes (``double'' only)
18
- # <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
19
- # <tt>:dashes</tt> :: dashes
20
- # <tt>:oldschool</tt> :: old school dashes
21
- # <tt>:inverted</tt> :: inverted old school dashes
22
- # <tt>:ellipses</tt> :: ellipses
23
- # <tt>:prevent_breaks</tt> :: use nbsp and word-joiner to avoid breaking
24
- # before dashes and ellipses
25
- # <tt>:named_entities</tt> :: used named entities instead of the default
26
- # decimal entities (see below)
27
- # <tt>:convertquotes</tt> :: convert <tt>&quot;</tt> entities to
28
- # <tt>"</tt>
29
- # <tt>:stupefy</tt> :: translate RubyPants HTML entities
30
- # to their ASCII counterparts.
31
- #
32
- # In addition, you can customize the HTML entities that will be injected by
33
- # passing in a hash for the final argument. The defaults for these entities
34
- # are as follows:
35
- #
36
- # <tt>:single_left_quote</tt> :: <tt>&#8216;</tt>
37
- # <tt>:double_left_quote</tt> :: <tt>&#8220;</tt>
38
- # <tt>:single_right_quote</tt> :: <tt>&#8217;</tt>
39
- # <tt>:double_right_quote</tt> :: <tt>&#8221;</tt>
40
- # <tt>:em_dash</tt> :: <tt>&#8212;</tt>
41
- # <tt>:en_dash</tt> :: <tt>&#8211;</tt>
42
- # <tt>:ellipsis</tt> :: <tt>&#8230;</tt>
43
- # <tt>:html_quote</tt> :: <tt>&quot;</tt>
44
- # <tt>:non_breaking_space</tt> :: <tt>&nbsp;</tt>
45
- # <tt>:word_joiner</tt> :: <tt>&#8288;</tt>
46
- #
47
- # If the <tt>:named_entities</tt> option is used, the default entities are
48
- # as follows:
49
- #
50
- # <tt>:single_left_quote</tt> :: <tt>&lsquo;</tt>
51
- # <tt>:double_left_quote</tt> :: <tt>&ldquo;</tt>
52
- # <tt>:single_right_quote</tt> :: <tt>&rsquo;</tt>
53
- # <tt>:double_right_quote</tt> :: <tt>&rdquo;</tt>
54
- # <tt>:em_dash</tt> :: <tt>&mdash;</tt>
55
- # <tt>:en_dash</tt> :: <tt>&ndash;</tt>
56
- # <tt>:ellipsis</tt> :: <tt>&hellip;</tt>
57
- # <tt>:html_quote</tt> :: <tt>&quot;</tt>
58
- # <tt>:non_breaking_space</tt> :: <tt>&nbsp;</tt>
59
- # <tt>:word_joiner</tt> :: <tt>&#8288;</tt>
60
- #
61
- def initialize(string, options=[2], entities = {})
62
- super string
63
-
64
- @options = [*options]
65
- @entities = default_entities
66
- @entities.merge!(named_entities) if @options.include?(:named_entities)
67
- @entities.merge!(entities)
68
- end
69
-
70
- # Apply SmartyPants transformations.
71
- def to_html
72
- do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
73
- convert_quotes = prevent_breaks = nil
74
-
75
- if @options.include?(0)
76
- # Do nothing.
77
- return self
78
- elsif @options.include?(1)
79
- # Do everything, turn all options on.
80
- do_quotes = do_backticks = do_ellipses = true
81
- do_dashes = :normal
82
- elsif @options.include?(2)
83
- # Do everything, turn all options on, use old school dash shorthand.
84
- do_quotes = do_backticks = do_ellipses = true
85
- do_dashes = :oldschool
86
- elsif @options.include?(3)
87
- # Do everything, turn all options on, use inverted old school
88
- # dash shorthand.
89
- do_quotes = do_backticks = do_ellipses = true
90
- do_dashes = :inverted
91
- elsif @options.include?(-1)
92
- do_stupefy = true
93
- end
94
-
95
- # Explicit flags override numeric flag groups.
96
- do_quotes = true if @options.include?(:quotes)
97
- do_backticks = true if @options.include?(:backticks)
98
- do_backticks = :both if @options.include?(:allbackticks)
99
- do_dashes = :normal if @options.include?(:dashes)
100
- do_dashes = :oldschool if @options.include?(:oldschool)
101
- do_dashes = :inverted if @options.include?(:inverted)
102
- prevent_breaks = true if @options.include?(:prevent_breaks)
103
- do_ellipses = true if @options.include?(:ellipses)
104
- convert_quotes = true if @options.include?(:convertquotes)
105
- do_stupefy = true if @options.include?(:stupefy)
106
-
107
- # Parse the HTML
108
- tokens = tokenize
109
-
110
- # Keep track of when we're inside <pre> or <code> tags.
111
- in_pre = nil
112
-
113
- # Here is the result stored in.
114
- result = ""
115
-
116
- # This is a cheat, used to get some context for one-character
117
- # tokens that consist of just a quote char. What we do is remember
118
- # the last character of the previous text token, to use as context
119
- # to curl single- character quote tokens correctly.
120
- prev_token_last_char = nil
121
-
122
- tokens.each do |token|
123
- if token.first == :tag
124
- result << token[1]
125
- if token[1].end_with? '/>'
126
- # ignore self-closing tags
127
- elsif token[1] =~ %r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
128
- if $1 == '' && ! in_pre
129
- in_pre = $2
130
- elsif $1 == '/' && $2 == in_pre
131
- in_pre = nil
132
- end
133
- end
134
- else
135
- t = token[1]
136
-
137
- # Remember last char of this token before processing.
138
- last_char = t[-1].chr
139
-
140
- unless in_pre
141
- t = process_escapes t
142
-
143
- t.gsub!(/&quot;/, '"') if convert_quotes
144
-
145
- if do_dashes
146
- t = educate_dashes t, prevent_breaks if do_dashes == :normal
147
- t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
148
- t = educate_dashes_inverted t, prevent_breaks if do_dashes == :inverted
149
- end
150
-
151
- t = educate_ellipses t, prevent_breaks if do_ellipses
152
-
153
- # Note: backticks need to be processed before quotes.
154
- if do_backticks
155
- t = educate_backticks t
156
- t = educate_single_backticks t if do_backticks == :both
157
- end
158
-
159
- if do_quotes
160
- if t == "'"
161
- # Special case: single-character ' token
162
- if prev_token_last_char =~ /\S/
163
- t = entity(:single_right_quote)
164
- else
165
- t = entity(:single_left_quote)
166
- end
167
- elsif t == '"'
168
- # Special case: single-character " token
169
- if prev_token_last_char =~ /\S/
170
- t = entity(:double_right_quote)
171
- else
172
- t = entity(:double_left_quote)
173
- end
174
- else
175
- # Normal case:
176
- t = educate_quotes t
177
- end
178
- end
179
-
180
- t = stupefy_entities t if do_stupefy
181
- end
182
-
183
- prev_token_last_char = last_char
184
- result << t
185
- end
186
- end
187
-
188
- # Done
189
- result
190
- end
191
-
192
- protected
193
-
194
- # Return the string, with after processing the following backslash
195
- # escape sequences. This is useful if you want to force a "dumb" quote
196
- # or other character to appear.
197
- #
198
- # Escaped are:
199
- # \\ \" \' \. \- \`
200
- #
201
- def process_escapes(str)
202
- str.
203
- gsub('\\\\', '&#92;').
204
- gsub('\"', '&#34;').
205
- gsub("\\\'", '&#39;').
206
- gsub('\.', '&#46;').
207
- gsub('\-', '&#45;').
208
- gsub('\`', '&#96;')
209
- end
210
-
211
- def self.n_of(n, x)
212
- x = Regexp.escape(x)
213
- /(?<!#{x}) # not preceded by x
214
- #{x}{#{n}} # n of x
215
- (?!#{x}) # not followed by x
216
- /x
217
- end
218
-
219
- DOUBLE_DASH = n_of(2, '-')
220
- TRIPLE_DASH = n_of(3, '-')
221
-
222
- # Return +str+ replacing all +patt+ with +repl+. If +prevent_breaks+ is true,
223
- # then replace spaces preceding +patt+ with a non-breaking space, and if there
224
- # are no spaces, then insert a word-joiner.
225
- #
226
- def educate(str, patt, repl, prevent_breaks)
227
- patt = /(?<spaces>[[:space:]]*)#{patt}/
228
- str.gsub(patt) do
229
- spaces = if prevent_breaks && $~['spaces'].length > 0
230
- entity(:non_breaking_space) # * $~['spaces'].length
231
- elsif prevent_breaks
232
- entity(:word_joiner)
233
- else
234
- $~['spaces']
235
- end
236
- spaces + repl
237
- end
238
- end
239
-
240
- # Return the string, with each instance of "<tt>--</tt>" translated to an
241
- # em-dash HTML entity.
242
- #
243
- def educate_dashes(str, prevent_breaks=false)
244
- educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
245
- end
246
-
247
- # Return the string, with each instance of "<tt>--</tt>" translated to an
248
- # en-dash HTML entity, and each "<tt>---</tt>" translated to an
249
- # em-dash HTML entity.
250
- #
251
- def educate_dashes_oldschool(str, prevent_breaks=false)
252
- str = educate(str, TRIPLE_DASH, entity(:em_dash), prevent_breaks)
253
- educate(str, DOUBLE_DASH, entity(:en_dash), prevent_breaks)
254
- end
255
-
256
- # Return the string, with each instance of "<tt>--</tt>" translated
257
- # to an em-dash HTML entity, and each "<tt>---</tt>" translated to
258
- # an en-dash HTML entity. Two reasons why: First, unlike the en- and
259
- # em-dash syntax supported by +educate_dashes_oldschool+, it's
260
- # compatible with existing entries written before SmartyPants 1.1,
261
- # back when "<tt>--</tt>" was only used for em-dashes. Second,
262
- # em-dashes are more common than en-dashes, and so it sort of makes
263
- # sense that the shortcut should be shorter to type. (Thanks to
264
- # Aaron Swartz for the idea.)
265
- #
266
- def educate_dashes_inverted(str, prevent_breaks=false)
267
- str = educate(str, TRIPLE_DASH, entity(:en_dash), prevent_breaks)
268
- educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
269
- end
270
-
271
- # Return the string, with each instance of "<tt>...</tt>" translated
272
- # to an ellipsis HTML entity. Also converts the case where there are
273
- # spaces between the dots.
274
- #
275
- def educate_ellipses(str, prevent_breaks=false)
276
- str = educate(str, RubyPants.n_of(3, '.'), entity(:ellipsis), prevent_breaks)
277
- educate(str, /(?<!\.|\.[[:space:]])\.[[:space:]]\.[[:space:]]\.(?!\.|[[:space:]]\.)/,
278
- entity(:ellipsis), prevent_breaks)
279
- end
280
-
281
- # Return the string, with "<tt>``backticks''</tt>"-style single quotes
282
- # translated into HTML curly quote entities.
283
- #
284
- def educate_backticks(str)
285
- str.
286
- gsub("``", entity(:double_left_quote)).
287
- gsub("''", entity(:double_right_quote))
288
- end
289
-
290
- # Return the string, with "<tt>`backticks'</tt>"-style single quotes
291
- # translated into HTML curly quote entities.
292
- #
293
- def educate_single_backticks(str)
294
- str.
295
- gsub("`", entity(:single_left_quote)).
296
- gsub("'", entity(:single_right_quote))
297
- end
298
-
299
- # Return the string, with "educated" curly quote HTML entities.
300
- #
301
- def educate_quotes(str)
302
- punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
303
-
304
- str = str.dup
305
-
306
- # Special case if the very first character is a quote followed by
307
- # punctuation at a non-word-break. Close the quotes by brute
308
- # force:
309
- str.gsub!(/^'(?=#{punct_class}\B)/,
310
- entity(:single_right_quote))
311
- str.gsub!(/^"(?=#{punct_class}\B)/,
312
- entity(:double_right_quote))
313
-
314
- # Special case for double sets of quotes, e.g.:
315
- # <p>He said, "'Quoted' words in a larger quote."</p>
316
- str.gsub!(/"'(?=\w)/,
317
- "#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
318
- str.gsub!(/'"(?=\w)/,
319
- "#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
320
-
321
- # Special case for decade abbreviations (the '80s):
322
- str.gsub!(/'(?=\d\ds)/,
323
- entity(:single_right_quote))
324
-
325
- close_class = %![^\ \t\r\n\\[\{\(\-]!
326
- dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
327
-
328
- # Get most opening single quotes:
329
- str.gsub!(/([[:space:]]|&nbsp;|--|&[mn]dash;|#{dec_dashes}|&#x201[34];)'(?=\w)/,
330
- '\1' + entity(:single_left_quote))
331
-
332
- # Single closing quotes:
333
- str.gsub!(/(#{close_class})'/,
334
- '\1' + entity(:single_right_quote))
335
- str.gsub!(/'(\s|s\b|$)/,
336
- entity(:single_right_quote) + '\1')
337
-
338
- # Any remaining single quotes should be opening ones:
339
- str.gsub!(/'/,
340
- entity(:single_left_quote))
341
-
342
- # Get most opening double quotes:
343
- str.gsub!(/([[:space:]]|&nbsp;|--|&[mn]dash;|#{dec_dashes}|&#x201[34];)"(?=\w)/,
344
- '\1' + entity(:double_left_quote))
345
-
346
- # Double closing quotes:
347
- str.gsub!(/(#{close_class})"/,
348
- '\1' + entity(:double_right_quote))
349
- str.gsub!(/"(\s|s\b|$)/,
350
- entity(:double_right_quote) + '\1')
351
-
352
- # Any remaining quotes should be opening ones:
353
- str.gsub!(/"/,
354
- entity(:double_left_quote))
355
-
356
- str
357
- end
358
-
359
- # Return the string, with each RubyPants HTML entity translated to
360
- # its ASCII counterpart.
361
- #
362
- # Note: This is not reversible (but exactly the same as in SmartyPants)
363
- #
364
- def stupefy_entities(str)
365
- new_str = str.dup
366
-
367
- {
368
- :en_dash => '-',
369
- :em_dash => '--',
370
- :single_left_quote => "'",
371
- :single_right_quote => "'",
372
- :double_left_quote => '"',
373
- :double_right_quote => '"',
374
- :ellipsis => '...'
375
- }.each do |k,v|
376
- new_str.gsub!(/#{entity(k)}/, v)
377
- end
378
-
379
- new_str
380
- end
381
-
382
- # Return an array of the tokens comprising the string. Each token is
383
- # either a tag (possibly with nested, tags contained therein, such
384
- # as <tt><a href="<MTFoo>"></tt>, or a run of text between
385
- # tags. Each element of the array is a two-element array; the first
386
- # is either :tag or :text; the second is the actual value.
387
- #
388
- # Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
389
- # MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
390
- #
391
- # This is actually the easier variant using tag_soup, as used by
392
- # Chad Miller in the Python port of SmartyPants.
393
- #
394
- def tokenize
395
- tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/m
396
-
397
- tokens = []
398
-
399
- prev_end = 0
400
-
401
- scan(tag_soup) do
402
- tokens << [:text, $1] if $1 != ""
403
- tokens << [:tag, $2]
404
- prev_end = $~.end(0)
405
- end
406
-
407
- if prev_end < size
408
- tokens << [:text, self[prev_end..-1]]
409
- end
410
-
411
- tokens
412
- end
413
-
414
- def default_entities
415
- {
416
- :single_left_quote => "&#8216;",
417
- :double_left_quote => "&#8220;",
418
- :single_right_quote => "&#8217;",
419
- :double_right_quote => "&#8221;",
420
- :em_dash => "&#8212;",
421
- :en_dash => "&#8211;",
422
- :ellipsis => "&#8230;",
423
- :html_quote => "&quot;",
424
- :non_breaking_space => "&nbsp;",
425
- :word_joiner => "&#8288;",
426
- }
427
- end
428
-
429
- def named_entities
430
- {
431
- :single_left_quote => '&lsquo;',
432
- :double_left_quote => "&ldquo;",
433
- :single_right_quote => "&rsquo;",
434
- :double_right_quote => "&rdquo;",
435
- :em_dash => "&mdash;",
436
- :en_dash => "&ndash;",
437
- :ellipsis => "&hellip;",
438
- :html_quote => "&quot;",
439
- :non_breaking_space => "&nbsp;",
440
- # :word_joiner => N/A,
441
- }
442
- end
443
-
444
- def entity(key)
445
- @entities[key]
446
- end
447
- end
@@ -1,3 +0,0 @@
1
- class RubyPants
2
- VERSION = "0.6.0"
3
- end