rubypants 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +0 -6
- data/lib/rubypants.rb +473 -2
- data/lib/version.rb +3 -0
- data/rubypants.gemspec +2 -2
- data/test/rubypants_test.rb +7 -0
- metadata +4 -5
- data/lib/rubypants/core.rb +0 -447
- data/lib/rubypants/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8dc4efc96c1fe85653d5cfb4f6d0291a94edcdbc
|
4
|
+
data.tar.gz: b17cda053df199efe900caed483404b903345f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaebe94dfee7f1ef9295910593ff64e0b10a5147ab96e802e7ce98031a15ee4e6c78ae7ee45ccadccaeb30d2c86d96692a2625733bb6e3f7edcda3ecd995c1a6
|
7
|
+
data.tar.gz: 3eabe14d2d081f67239047c29d7d71e93025622cdd3f562b1f3fd2b83a26cddea359e2fbc8a20d3f9ca9f0c855c12c4fe3f76e16442e6f2898a6dc346f2e6f4e
|
data/.travis.yml
CHANGED
@@ -10,9 +10,3 @@ rvm:
|
|
10
10
|
# and https://github.com/travis-ci/travis-ci/issues/5239
|
11
11
|
before_install:
|
12
12
|
- gem install bundler
|
13
|
-
|
14
|
-
# For now override the default `bundle exec rake` which fails because of an
|
15
|
-
# unresolved superclass mismatch in `core.rb` vs. `version.rb`. This should
|
16
|
-
# eventually be fixed in rubypants.
|
17
|
-
script:
|
18
|
-
- rake
|
data/lib/rubypants.rb
CHANGED
@@ -1,2 +1,473 @@
|
|
1
|
-
require_relative '
|
2
|
-
|
1
|
+
require_relative 'version'
|
2
|
+
|
3
|
+
class RubyPants < String
|
4
|
+
extend RubyPantsVersion
|
5
|
+
|
6
|
+
# Create a new RubyPants instance with the text in +string+.
|
7
|
+
#
|
8
|
+
# Allowed elements in the options array:
|
9
|
+
#
|
10
|
+
# 0 :: do nothing
|
11
|
+
# 1 :: enable all, using only em-dash shortcuts
|
12
|
+
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
13
|
+
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
14
|
+
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
15
|
+
#
|
16
|
+
# If you don't like any of these defaults, you can pass symbols to change
|
17
|
+
# RubyPants' behavior:
|
18
|
+
#
|
19
|
+
# <tt>:quotes</tt> :: quotes
|
20
|
+
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
21
|
+
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
22
|
+
# <tt>:dashes</tt> :: dashes
|
23
|
+
# <tt>:oldschool</tt> :: old school dashes
|
24
|
+
# <tt>:inverted</tt> :: inverted old school dashes
|
25
|
+
# <tt>:ellipses</tt> :: ellipses
|
26
|
+
# <tt>:prevent_breaks</tt> :: use nbsp and word-joiner to avoid breaking
|
27
|
+
# before dashes and ellipses
|
28
|
+
# <tt>:named_entities</tt> :: used named entities instead of the default
|
29
|
+
# decimal entities (see below)
|
30
|
+
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
31
|
+
# <tt>"</tt>
|
32
|
+
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
33
|
+
# to their ASCII counterparts.
|
34
|
+
#
|
35
|
+
# In addition, you can customize the HTML entities that will be injected by
|
36
|
+
# passing in a hash for the final argument. The defaults for these entities
|
37
|
+
# are as follows:
|
38
|
+
#
|
39
|
+
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
40
|
+
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
41
|
+
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
42
|
+
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
43
|
+
# <tt>:em_dash</tt> :: <tt>—</tt>
|
44
|
+
# <tt>:en_dash</tt> :: <tt>–</tt>
|
45
|
+
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
46
|
+
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
47
|
+
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
48
|
+
#
|
49
|
+
# If the <tt>:named_entities</tt> option is used, the default entities are
|
50
|
+
# as follows:
|
51
|
+
#
|
52
|
+
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
53
|
+
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
54
|
+
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
55
|
+
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
56
|
+
# <tt>:em_dash</tt> :: <tt>—</tt>
|
57
|
+
# <tt>:en_dash</tt> :: <tt>–</tt>
|
58
|
+
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
59
|
+
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
60
|
+
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
61
|
+
#
|
62
|
+
# If the <tt>:character_entities</tt> option is used, RubyPants will
|
63
|
+
# emit Unicode characters directly, rather than HTML entities. By default
|
64
|
+
# this excludes the space characters (non-breaking space and
|
65
|
+
# word-joiner). To additionally emit Unicode space characters, use the
|
66
|
+
# <tt>:character_spaces</tt> option.
|
67
|
+
#
|
68
|
+
def initialize(string, options=[2], entities = {})
|
69
|
+
super string
|
70
|
+
|
71
|
+
@options = [*options]
|
72
|
+
@entities = default_entities
|
73
|
+
@entities.merge!(named_entities) if @options.include?(:named_entities)
|
74
|
+
@entities.merge!(character_entities) if @options.include?(:character_entities)
|
75
|
+
@entities.merge!(character_spaces) if @options.include?(:character_spaces)
|
76
|
+
@entities.merge!(entities)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Apply SmartyPants transformations.
|
80
|
+
def to_html
|
81
|
+
do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
|
82
|
+
convert_quotes = prevent_breaks = nil
|
83
|
+
|
84
|
+
if @options.include?(0)
|
85
|
+
# Do nothing.
|
86
|
+
return self
|
87
|
+
elsif @options.include?(1)
|
88
|
+
# Do everything, turn all options on.
|
89
|
+
do_quotes = do_backticks = do_ellipses = true
|
90
|
+
do_dashes = :normal
|
91
|
+
elsif @options.include?(2)
|
92
|
+
# Do everything, turn all options on, use old school dash shorthand.
|
93
|
+
do_quotes = do_backticks = do_ellipses = true
|
94
|
+
do_dashes = :oldschool
|
95
|
+
elsif @options.include?(3)
|
96
|
+
# Do everything, turn all options on, use inverted old school
|
97
|
+
# dash shorthand.
|
98
|
+
do_quotes = do_backticks = do_ellipses = true
|
99
|
+
do_dashes = :inverted
|
100
|
+
elsif @options.include?(-1)
|
101
|
+
do_stupefy = true
|
102
|
+
end
|
103
|
+
|
104
|
+
# Explicit flags override numeric flag groups.
|
105
|
+
do_quotes = true if @options.include?(:quotes)
|
106
|
+
do_backticks = true if @options.include?(:backticks)
|
107
|
+
do_backticks = :both if @options.include?(:allbackticks)
|
108
|
+
do_dashes = :normal if @options.include?(:dashes)
|
109
|
+
do_dashes = :oldschool if @options.include?(:oldschool)
|
110
|
+
do_dashes = :inverted if @options.include?(:inverted)
|
111
|
+
prevent_breaks = true if @options.include?(:prevent_breaks)
|
112
|
+
do_ellipses = true if @options.include?(:ellipses)
|
113
|
+
convert_quotes = true if @options.include?(:convertquotes)
|
114
|
+
do_stupefy = true if @options.include?(:stupefy)
|
115
|
+
|
116
|
+
# Parse the HTML
|
117
|
+
tokens = tokenize
|
118
|
+
|
119
|
+
# Keep track of when we're inside <pre> or <code> tags.
|
120
|
+
in_pre = nil
|
121
|
+
|
122
|
+
# Here is the result stored in.
|
123
|
+
result = ""
|
124
|
+
|
125
|
+
# This is a cheat, used to get some context for one-character
|
126
|
+
# tokens that consist of just a quote char. What we do is remember
|
127
|
+
# the last character of the previous text token, to use as context
|
128
|
+
# to curl single- character quote tokens correctly.
|
129
|
+
prev_token_last_char = nil
|
130
|
+
|
131
|
+
tokens.each do |token|
|
132
|
+
if token.first == :tag
|
133
|
+
result << token[1]
|
134
|
+
if token[1].end_with? '/>'
|
135
|
+
# ignore self-closing tags
|
136
|
+
elsif token[1] =~ %r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
|
137
|
+
if $1 == '' && ! in_pre
|
138
|
+
in_pre = $2
|
139
|
+
elsif $1 == '/' && $2 == in_pre
|
140
|
+
in_pre = nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
else
|
144
|
+
t = token[1]
|
145
|
+
|
146
|
+
# Remember last char of this token before processing.
|
147
|
+
last_char = t[-1].chr
|
148
|
+
|
149
|
+
unless in_pre
|
150
|
+
t = process_escapes t
|
151
|
+
|
152
|
+
t.gsub!(/"/, '"') if convert_quotes
|
153
|
+
|
154
|
+
if do_dashes
|
155
|
+
t = educate_dashes t, prevent_breaks if do_dashes == :normal
|
156
|
+
t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
|
157
|
+
t = educate_dashes_inverted t, prevent_breaks if do_dashes == :inverted
|
158
|
+
end
|
159
|
+
|
160
|
+
t = educate_ellipses t, prevent_breaks if do_ellipses
|
161
|
+
|
162
|
+
# Note: backticks need to be processed before quotes.
|
163
|
+
if do_backticks
|
164
|
+
t = educate_backticks t
|
165
|
+
t = educate_single_backticks t if do_backticks == :both
|
166
|
+
end
|
167
|
+
|
168
|
+
if do_quotes
|
169
|
+
if t == "'"
|
170
|
+
# Special case: single-character ' token
|
171
|
+
if prev_token_last_char =~ /\S/
|
172
|
+
t = entity(:single_right_quote)
|
173
|
+
else
|
174
|
+
t = entity(:single_left_quote)
|
175
|
+
end
|
176
|
+
elsif t == '"'
|
177
|
+
# Special case: single-character " token
|
178
|
+
if prev_token_last_char =~ /\S/
|
179
|
+
t = entity(:double_right_quote)
|
180
|
+
else
|
181
|
+
t = entity(:double_left_quote)
|
182
|
+
end
|
183
|
+
else
|
184
|
+
# Normal case:
|
185
|
+
t = educate_quotes t
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
t = stupefy_entities t if do_stupefy
|
190
|
+
end
|
191
|
+
|
192
|
+
prev_token_last_char = last_char
|
193
|
+
result << t
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Done
|
198
|
+
result
|
199
|
+
end
|
200
|
+
|
201
|
+
protected
|
202
|
+
|
203
|
+
# Return the string, with after processing the following backslash
|
204
|
+
# escape sequences. This is useful if you want to force a "dumb" quote
|
205
|
+
# or other character to appear.
|
206
|
+
#
|
207
|
+
# Escaped are:
|
208
|
+
# \\ \" \' \. \- \`
|
209
|
+
#
|
210
|
+
def process_escapes(str)
|
211
|
+
str.
|
212
|
+
gsub('\\\\', '\').
|
213
|
+
gsub('\"', '"').
|
214
|
+
gsub("\\\'", ''').
|
215
|
+
gsub('\.', '.').
|
216
|
+
gsub('\-', '-').
|
217
|
+
gsub('\`', '`')
|
218
|
+
end
|
219
|
+
|
220
|
+
def self.n_of(n, x)
|
221
|
+
x = Regexp.escape(x)
|
222
|
+
/(?<!#{x}) # not preceded by x
|
223
|
+
#{x}{#{n}} # n of x
|
224
|
+
(?!#{x}) # not followed by x
|
225
|
+
/x
|
226
|
+
end
|
227
|
+
|
228
|
+
DOUBLE_DASH = n_of(2, '-')
|
229
|
+
TRIPLE_DASH = n_of(3, '-')
|
230
|
+
|
231
|
+
# Return +str+ replacing all +patt+ with +repl+. If +prevent_breaks+ is true,
|
232
|
+
# then replace spaces preceding +patt+ with a non-breaking space, and if there
|
233
|
+
# are no spaces, then insert a word-joiner.
|
234
|
+
#
|
235
|
+
def educate(str, patt, repl, prevent_breaks)
|
236
|
+
patt = /(?<spaces>[[:space:]]*)#{patt}/
|
237
|
+
str.gsub(patt) do
|
238
|
+
spaces = if prevent_breaks && $~['spaces'].length > 0
|
239
|
+
entity(:non_breaking_space) # * $~['spaces'].length
|
240
|
+
elsif prevent_breaks
|
241
|
+
entity(:word_joiner)
|
242
|
+
else
|
243
|
+
$~['spaces']
|
244
|
+
end
|
245
|
+
spaces + repl
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
250
|
+
# em-dash HTML entity.
|
251
|
+
#
|
252
|
+
def educate_dashes(str, prevent_breaks=false)
|
253
|
+
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
254
|
+
end
|
255
|
+
|
256
|
+
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
257
|
+
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
258
|
+
# em-dash HTML entity.
|
259
|
+
#
|
260
|
+
def educate_dashes_oldschool(str, prevent_breaks=false)
|
261
|
+
str = educate(str, TRIPLE_DASH, entity(:em_dash), prevent_breaks)
|
262
|
+
educate(str, DOUBLE_DASH, entity(:en_dash), prevent_breaks)
|
263
|
+
end
|
264
|
+
|
265
|
+
# Return the string, with each instance of "<tt>--</tt>" translated
|
266
|
+
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
267
|
+
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
268
|
+
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
269
|
+
# compatible with existing entries written before SmartyPants 1.1,
|
270
|
+
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
271
|
+
# em-dashes are more common than en-dashes, and so it sort of makes
|
272
|
+
# sense that the shortcut should be shorter to type. (Thanks to
|
273
|
+
# Aaron Swartz for the idea.)
|
274
|
+
#
|
275
|
+
def educate_dashes_inverted(str, prevent_breaks=false)
|
276
|
+
str = educate(str, TRIPLE_DASH, entity(:en_dash), prevent_breaks)
|
277
|
+
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
278
|
+
end
|
279
|
+
|
280
|
+
# Return the string, with each instance of "<tt>...</tt>" translated
|
281
|
+
# to an ellipsis HTML entity. Also converts the case where there are
|
282
|
+
# spaces between the dots.
|
283
|
+
#
|
284
|
+
def educate_ellipses(str, prevent_breaks=false)
|
285
|
+
str = educate(str, RubyPants.n_of(3, '.'), entity(:ellipsis), prevent_breaks)
|
286
|
+
educate(str, /(?<!\.|\.[[:space:]])\.[[:space:]]\.[[:space:]]\.(?!\.|[[:space:]]\.)/,
|
287
|
+
entity(:ellipsis), prevent_breaks)
|
288
|
+
end
|
289
|
+
|
290
|
+
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
291
|
+
# translated into HTML curly quote entities.
|
292
|
+
#
|
293
|
+
def educate_backticks(str)
|
294
|
+
str.
|
295
|
+
gsub("``", entity(:double_left_quote)).
|
296
|
+
gsub("''", entity(:double_right_quote))
|
297
|
+
end
|
298
|
+
|
299
|
+
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
300
|
+
# translated into HTML curly quote entities.
|
301
|
+
#
|
302
|
+
def educate_single_backticks(str)
|
303
|
+
str.
|
304
|
+
gsub("`", entity(:single_left_quote)).
|
305
|
+
gsub("'", entity(:single_right_quote))
|
306
|
+
end
|
307
|
+
|
308
|
+
# Return the string, with "educated" curly quote HTML entities.
|
309
|
+
#
|
310
|
+
def educate_quotes(str)
|
311
|
+
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
312
|
+
|
313
|
+
str = str.dup
|
314
|
+
|
315
|
+
# Special case if the very first character is a quote followed by
|
316
|
+
# punctuation at a non-word-break. Close the quotes by brute
|
317
|
+
# force:
|
318
|
+
str.gsub!(/^'(?=#{punct_class}\B)/,
|
319
|
+
entity(:single_right_quote))
|
320
|
+
str.gsub!(/^"(?=#{punct_class}\B)/,
|
321
|
+
entity(:double_right_quote))
|
322
|
+
|
323
|
+
# Special case for double sets of quotes, e.g.:
|
324
|
+
# <p>He said, "'Quoted' words in a larger quote."</p>
|
325
|
+
str.gsub!(/"'(?=\w)/,
|
326
|
+
"#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
|
327
|
+
str.gsub!(/'"(?=\w)/,
|
328
|
+
"#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
|
329
|
+
|
330
|
+
# Special case for decade abbreviations (the '80s):
|
331
|
+
str.gsub!(/'(?=\d\ds)/,
|
332
|
+
entity(:single_right_quote))
|
333
|
+
|
334
|
+
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
335
|
+
dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
|
336
|
+
|
337
|
+
# Get most opening single quotes:
|
338
|
+
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
339
|
+
'\1' + entity(:single_left_quote))
|
340
|
+
|
341
|
+
# Single closing quotes:
|
342
|
+
str.gsub!(/(#{close_class})'/,
|
343
|
+
'\1' + entity(:single_right_quote))
|
344
|
+
str.gsub!(/'(\s|s\b|$)/,
|
345
|
+
entity(:single_right_quote) + '\1')
|
346
|
+
|
347
|
+
# Any remaining single quotes should be opening ones:
|
348
|
+
str.gsub!(/'/,
|
349
|
+
entity(:single_left_quote))
|
350
|
+
|
351
|
+
# Get most opening double quotes:
|
352
|
+
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
353
|
+
'\1' + entity(:double_left_quote))
|
354
|
+
|
355
|
+
# Double closing quotes:
|
356
|
+
str.gsub!(/(#{close_class})"/,
|
357
|
+
'\1' + entity(:double_right_quote))
|
358
|
+
str.gsub!(/"(\s|s\b|$)/,
|
359
|
+
entity(:double_right_quote) + '\1')
|
360
|
+
|
361
|
+
# Any remaining quotes should be opening ones:
|
362
|
+
str.gsub!(/"/,
|
363
|
+
entity(:double_left_quote))
|
364
|
+
|
365
|
+
str
|
366
|
+
end
|
367
|
+
|
368
|
+
# Return the string, with each RubyPants HTML entity translated to
|
369
|
+
# its ASCII counterpart.
|
370
|
+
#
|
371
|
+
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
372
|
+
#
|
373
|
+
def stupefy_entities(str)
|
374
|
+
new_str = str.dup
|
375
|
+
|
376
|
+
{
|
377
|
+
:en_dash => '-',
|
378
|
+
:em_dash => '--',
|
379
|
+
:single_left_quote => "'",
|
380
|
+
:single_right_quote => "'",
|
381
|
+
:double_left_quote => '"',
|
382
|
+
:double_right_quote => '"',
|
383
|
+
:ellipsis => '...'
|
384
|
+
}.each do |k,v|
|
385
|
+
new_str.gsub!(/#{entity(k)}/, v)
|
386
|
+
end
|
387
|
+
|
388
|
+
new_str
|
389
|
+
end
|
390
|
+
|
391
|
+
# Return an array of the tokens comprising the string. Each token is
|
392
|
+
# either a tag (possibly with nested, tags contained therein, such
|
393
|
+
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
394
|
+
# tags. Each element of the array is a two-element array; the first
|
395
|
+
# is either :tag or :text; the second is the actual value.
|
396
|
+
#
|
397
|
+
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
398
|
+
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
399
|
+
#
|
400
|
+
# This is actually the easier variant using tag_soup, as used by
|
401
|
+
# Chad Miller in the Python port of SmartyPants.
|
402
|
+
#
|
403
|
+
def tokenize
|
404
|
+
tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/m
|
405
|
+
|
406
|
+
tokens = []
|
407
|
+
|
408
|
+
prev_end = 0
|
409
|
+
|
410
|
+
scan(tag_soup) do
|
411
|
+
tokens << [:text, $1] if $1 != ""
|
412
|
+
tokens << [:tag, $2]
|
413
|
+
prev_end = $~.end(0)
|
414
|
+
end
|
415
|
+
|
416
|
+
if prev_end < size
|
417
|
+
tokens << [:text, self[prev_end..-1]]
|
418
|
+
end
|
419
|
+
|
420
|
+
tokens
|
421
|
+
end
|
422
|
+
|
423
|
+
def default_entities
|
424
|
+
{
|
425
|
+
:single_left_quote => "‘",
|
426
|
+
:double_left_quote => "“",
|
427
|
+
:single_right_quote => "’",
|
428
|
+
:double_right_quote => "”",
|
429
|
+
:em_dash => "—",
|
430
|
+
:en_dash => "–",
|
431
|
+
:ellipsis => "…",
|
432
|
+
:non_breaking_space => " ",
|
433
|
+
:word_joiner => "⁠",
|
434
|
+
}
|
435
|
+
end
|
436
|
+
|
437
|
+
def named_entities
|
438
|
+
{
|
439
|
+
:single_left_quote => '‘',
|
440
|
+
:double_left_quote => "“",
|
441
|
+
:single_right_quote => "’",
|
442
|
+
:double_right_quote => "”",
|
443
|
+
:em_dash => "—",
|
444
|
+
:en_dash => "–",
|
445
|
+
:ellipsis => "…",
|
446
|
+
:non_breaking_space => " ",
|
447
|
+
# :word_joiner => N/A,
|
448
|
+
}
|
449
|
+
end
|
450
|
+
|
451
|
+
def character_entities
|
452
|
+
{
|
453
|
+
:single_left_quote => "\u2018",
|
454
|
+
:double_left_quote => "\u201C",
|
455
|
+
:single_right_quote => "\u2019",
|
456
|
+
:double_right_quote => "\u201D",
|
457
|
+
:em_dash => "\u2014",
|
458
|
+
:en_dash => "\u2013",
|
459
|
+
:ellipsis => "\u2026",
|
460
|
+
}
|
461
|
+
end
|
462
|
+
|
463
|
+
def character_spaces
|
464
|
+
{
|
465
|
+
:non_breaking_space => "\u00A0",
|
466
|
+
:word_joiner => "\u2060",
|
467
|
+
}
|
468
|
+
end
|
469
|
+
|
470
|
+
def entity(key)
|
471
|
+
@entities[key]
|
472
|
+
end
|
473
|
+
end
|
data/lib/version.rb
ADDED
data/rubypants.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
-
require
|
5
|
+
require 'version'
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = 'rubypants'
|
9
|
-
s.version =
|
9
|
+
s.version = RubyPantsVersion::VERSION
|
10
10
|
s.summary = "RubyPants is a Ruby port of the smart-quotes library SmartyPants."
|
11
11
|
s.description = <<-EOF
|
12
12
|
The original "SmartyPants" is a free web publishing plug-in for
|
data/test/rubypants_test.rb
CHANGED
@@ -235,4 +235,11 @@ EOF
|
|
235
235
|
def test_named_entities
|
236
236
|
assert_rp_equal "Testing 'FOO!'", "Testing ‘FOO!’", [2, :named_entities]
|
237
237
|
end
|
238
|
+
|
239
|
+
def test_character_entities
|
240
|
+
assert_rp_equal "Testing 'FOO!'", "Testing ‘FOO!’", [2, :character_entities]
|
241
|
+
assert_rp_equal "foo---bar", "foo⁠—bar", [2, :character_entities, :prevent_breaks]
|
242
|
+
assert_rp_equal "foo ---bar", "foo —bar", [2, :character_entities, :prevent_breaks]
|
243
|
+
assert_rp_equal "foo ---bar", "foo\u00A0—bar", [2, :character_entities, :character_spaces, :prevent_breaks]
|
244
|
+
end
|
238
245
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubypants
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Gruber
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2018-02-26 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: minitest
|
@@ -45,8 +45,7 @@ files:
|
|
45
45
|
- README.rdoc
|
46
46
|
- Rakefile
|
47
47
|
- lib/rubypants.rb
|
48
|
-
- lib/
|
49
|
-
- lib/rubypants/version.rb
|
48
|
+
- lib/version.rb
|
50
49
|
- rubypants.gemspec
|
51
50
|
- test/helper.rb
|
52
51
|
- test/rubypants_test.rb
|
@@ -70,7 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
69
|
version: '0'
|
71
70
|
requirements: []
|
72
71
|
rubyforge_project:
|
73
|
-
rubygems_version: 2.
|
72
|
+
rubygems_version: 2.6.13
|
74
73
|
signing_key:
|
75
74
|
specification_version: 4
|
76
75
|
summary: RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
data/lib/rubypants/core.rb
DELETED
@@ -1,447 +0,0 @@
|
|
1
|
-
class RubyPants < String
|
2
|
-
|
3
|
-
# Create a new RubyPants instance with the text in +string+.
|
4
|
-
#
|
5
|
-
# Allowed elements in the options array:
|
6
|
-
#
|
7
|
-
# 0 :: do nothing
|
8
|
-
# 1 :: enable all, using only em-dash shortcuts
|
9
|
-
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
10
|
-
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
11
|
-
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
12
|
-
#
|
13
|
-
# If you don't like any of these defaults, you can pass symbols to change
|
14
|
-
# RubyPants' behavior:
|
15
|
-
#
|
16
|
-
# <tt>:quotes</tt> :: quotes
|
17
|
-
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
18
|
-
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
19
|
-
# <tt>:dashes</tt> :: dashes
|
20
|
-
# <tt>:oldschool</tt> :: old school dashes
|
21
|
-
# <tt>:inverted</tt> :: inverted old school dashes
|
22
|
-
# <tt>:ellipses</tt> :: ellipses
|
23
|
-
# <tt>:prevent_breaks</tt> :: use nbsp and word-joiner to avoid breaking
|
24
|
-
# before dashes and ellipses
|
25
|
-
# <tt>:named_entities</tt> :: used named entities instead of the default
|
26
|
-
# decimal entities (see below)
|
27
|
-
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
28
|
-
# <tt>"</tt>
|
29
|
-
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
30
|
-
# to their ASCII counterparts.
|
31
|
-
#
|
32
|
-
# In addition, you can customize the HTML entities that will be injected by
|
33
|
-
# passing in a hash for the final argument. The defaults for these entities
|
34
|
-
# are as follows:
|
35
|
-
#
|
36
|
-
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
37
|
-
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
38
|
-
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
39
|
-
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
40
|
-
# <tt>:em_dash</tt> :: <tt>—</tt>
|
41
|
-
# <tt>:en_dash</tt> :: <tt>–</tt>
|
42
|
-
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
43
|
-
# <tt>:html_quote</tt> :: <tt>"</tt>
|
44
|
-
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
45
|
-
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
46
|
-
#
|
47
|
-
# If the <tt>:named_entities</tt> option is used, the default entities are
|
48
|
-
# as follows:
|
49
|
-
#
|
50
|
-
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
51
|
-
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
52
|
-
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
53
|
-
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
54
|
-
# <tt>:em_dash</tt> :: <tt>—</tt>
|
55
|
-
# <tt>:en_dash</tt> :: <tt>–</tt>
|
56
|
-
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
57
|
-
# <tt>:html_quote</tt> :: <tt>"</tt>
|
58
|
-
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
59
|
-
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
60
|
-
#
|
61
|
-
def initialize(string, options=[2], entities = {})
|
62
|
-
super string
|
63
|
-
|
64
|
-
@options = [*options]
|
65
|
-
@entities = default_entities
|
66
|
-
@entities.merge!(named_entities) if @options.include?(:named_entities)
|
67
|
-
@entities.merge!(entities)
|
68
|
-
end
|
69
|
-
|
70
|
-
# Apply SmartyPants transformations.
|
71
|
-
def to_html
|
72
|
-
do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
|
73
|
-
convert_quotes = prevent_breaks = nil
|
74
|
-
|
75
|
-
if @options.include?(0)
|
76
|
-
# Do nothing.
|
77
|
-
return self
|
78
|
-
elsif @options.include?(1)
|
79
|
-
# Do everything, turn all options on.
|
80
|
-
do_quotes = do_backticks = do_ellipses = true
|
81
|
-
do_dashes = :normal
|
82
|
-
elsif @options.include?(2)
|
83
|
-
# Do everything, turn all options on, use old school dash shorthand.
|
84
|
-
do_quotes = do_backticks = do_ellipses = true
|
85
|
-
do_dashes = :oldschool
|
86
|
-
elsif @options.include?(3)
|
87
|
-
# Do everything, turn all options on, use inverted old school
|
88
|
-
# dash shorthand.
|
89
|
-
do_quotes = do_backticks = do_ellipses = true
|
90
|
-
do_dashes = :inverted
|
91
|
-
elsif @options.include?(-1)
|
92
|
-
do_stupefy = true
|
93
|
-
end
|
94
|
-
|
95
|
-
# Explicit flags override numeric flag groups.
|
96
|
-
do_quotes = true if @options.include?(:quotes)
|
97
|
-
do_backticks = true if @options.include?(:backticks)
|
98
|
-
do_backticks = :both if @options.include?(:allbackticks)
|
99
|
-
do_dashes = :normal if @options.include?(:dashes)
|
100
|
-
do_dashes = :oldschool if @options.include?(:oldschool)
|
101
|
-
do_dashes = :inverted if @options.include?(:inverted)
|
102
|
-
prevent_breaks = true if @options.include?(:prevent_breaks)
|
103
|
-
do_ellipses = true if @options.include?(:ellipses)
|
104
|
-
convert_quotes = true if @options.include?(:convertquotes)
|
105
|
-
do_stupefy = true if @options.include?(:stupefy)
|
106
|
-
|
107
|
-
# Parse the HTML
|
108
|
-
tokens = tokenize
|
109
|
-
|
110
|
-
# Keep track of when we're inside <pre> or <code> tags.
|
111
|
-
in_pre = nil
|
112
|
-
|
113
|
-
# Here is the result stored in.
|
114
|
-
result = ""
|
115
|
-
|
116
|
-
# This is a cheat, used to get some context for one-character
|
117
|
-
# tokens that consist of just a quote char. What we do is remember
|
118
|
-
# the last character of the previous text token, to use as context
|
119
|
-
# to curl single- character quote tokens correctly.
|
120
|
-
prev_token_last_char = nil
|
121
|
-
|
122
|
-
tokens.each do |token|
|
123
|
-
if token.first == :tag
|
124
|
-
result << token[1]
|
125
|
-
if token[1].end_with? '/>'
|
126
|
-
# ignore self-closing tags
|
127
|
-
elsif token[1] =~ %r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
|
128
|
-
if $1 == '' && ! in_pre
|
129
|
-
in_pre = $2
|
130
|
-
elsif $1 == '/' && $2 == in_pre
|
131
|
-
in_pre = nil
|
132
|
-
end
|
133
|
-
end
|
134
|
-
else
|
135
|
-
t = token[1]
|
136
|
-
|
137
|
-
# Remember last char of this token before processing.
|
138
|
-
last_char = t[-1].chr
|
139
|
-
|
140
|
-
unless in_pre
|
141
|
-
t = process_escapes t
|
142
|
-
|
143
|
-
t.gsub!(/"/, '"') if convert_quotes
|
144
|
-
|
145
|
-
if do_dashes
|
146
|
-
t = educate_dashes t, prevent_breaks if do_dashes == :normal
|
147
|
-
t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
|
148
|
-
t = educate_dashes_inverted t, prevent_breaks if do_dashes == :inverted
|
149
|
-
end
|
150
|
-
|
151
|
-
t = educate_ellipses t, prevent_breaks if do_ellipses
|
152
|
-
|
153
|
-
# Note: backticks need to be processed before quotes.
|
154
|
-
if do_backticks
|
155
|
-
t = educate_backticks t
|
156
|
-
t = educate_single_backticks t if do_backticks == :both
|
157
|
-
end
|
158
|
-
|
159
|
-
if do_quotes
|
160
|
-
if t == "'"
|
161
|
-
# Special case: single-character ' token
|
162
|
-
if prev_token_last_char =~ /\S/
|
163
|
-
t = entity(:single_right_quote)
|
164
|
-
else
|
165
|
-
t = entity(:single_left_quote)
|
166
|
-
end
|
167
|
-
elsif t == '"'
|
168
|
-
# Special case: single-character " token
|
169
|
-
if prev_token_last_char =~ /\S/
|
170
|
-
t = entity(:double_right_quote)
|
171
|
-
else
|
172
|
-
t = entity(:double_left_quote)
|
173
|
-
end
|
174
|
-
else
|
175
|
-
# Normal case:
|
176
|
-
t = educate_quotes t
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
t = stupefy_entities t if do_stupefy
|
181
|
-
end
|
182
|
-
|
183
|
-
prev_token_last_char = last_char
|
184
|
-
result << t
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
# Done
|
189
|
-
result
|
190
|
-
end
|
191
|
-
|
192
|
-
protected
|
193
|
-
|
194
|
-
# Return the string, with after processing the following backslash
|
195
|
-
# escape sequences. This is useful if you want to force a "dumb" quote
|
196
|
-
# or other character to appear.
|
197
|
-
#
|
198
|
-
# Escaped are:
|
199
|
-
# \\ \" \' \. \- \`
|
200
|
-
#
|
201
|
-
def process_escapes(str)
|
202
|
-
str.
|
203
|
-
gsub('\\\\', '\').
|
204
|
-
gsub('\"', '"').
|
205
|
-
gsub("\\\'", ''').
|
206
|
-
gsub('\.', '.').
|
207
|
-
gsub('\-', '-').
|
208
|
-
gsub('\`', '`')
|
209
|
-
end
|
210
|
-
|
211
|
-
def self.n_of(n, x)
|
212
|
-
x = Regexp.escape(x)
|
213
|
-
/(?<!#{x}) # not preceded by x
|
214
|
-
#{x}{#{n}} # n of x
|
215
|
-
(?!#{x}) # not followed by x
|
216
|
-
/x
|
217
|
-
end
|
218
|
-
|
219
|
-
DOUBLE_DASH = n_of(2, '-')
|
220
|
-
TRIPLE_DASH = n_of(3, '-')
|
221
|
-
|
222
|
-
# Return +str+ replacing all +patt+ with +repl+. If +prevent_breaks+ is true,
|
223
|
-
# then replace spaces preceding +patt+ with a non-breaking space, and if there
|
224
|
-
# are no spaces, then insert a word-joiner.
|
225
|
-
#
|
226
|
-
def educate(str, patt, repl, prevent_breaks)
|
227
|
-
patt = /(?<spaces>[[:space:]]*)#{patt}/
|
228
|
-
str.gsub(patt) do
|
229
|
-
spaces = if prevent_breaks && $~['spaces'].length > 0
|
230
|
-
entity(:non_breaking_space) # * $~['spaces'].length
|
231
|
-
elsif prevent_breaks
|
232
|
-
entity(:word_joiner)
|
233
|
-
else
|
234
|
-
$~['spaces']
|
235
|
-
end
|
236
|
-
spaces + repl
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
241
|
-
# em-dash HTML entity.
|
242
|
-
#
|
243
|
-
def educate_dashes(str, prevent_breaks=false)
|
244
|
-
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
245
|
-
end
|
246
|
-
|
247
|
-
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
248
|
-
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
249
|
-
# em-dash HTML entity.
|
250
|
-
#
|
251
|
-
def educate_dashes_oldschool(str, prevent_breaks=false)
|
252
|
-
str = educate(str, TRIPLE_DASH, entity(:em_dash), prevent_breaks)
|
253
|
-
educate(str, DOUBLE_DASH, entity(:en_dash), prevent_breaks)
|
254
|
-
end
|
255
|
-
|
256
|
-
# Return the string, with each instance of "<tt>--</tt>" translated
|
257
|
-
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
258
|
-
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
259
|
-
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
260
|
-
# compatible with existing entries written before SmartyPants 1.1,
|
261
|
-
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
262
|
-
# em-dashes are more common than en-dashes, and so it sort of makes
|
263
|
-
# sense that the shortcut should be shorter to type. (Thanks to
|
264
|
-
# Aaron Swartz for the idea.)
|
265
|
-
#
|
266
|
-
def educate_dashes_inverted(str, prevent_breaks=false)
|
267
|
-
str = educate(str, TRIPLE_DASH, entity(:en_dash), prevent_breaks)
|
268
|
-
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
269
|
-
end
|
270
|
-
|
271
|
-
# Return the string, with each instance of "<tt>...</tt>" translated
|
272
|
-
# to an ellipsis HTML entity. Also converts the case where there are
|
273
|
-
# spaces between the dots.
|
274
|
-
#
|
275
|
-
def educate_ellipses(str, prevent_breaks=false)
|
276
|
-
str = educate(str, RubyPants.n_of(3, '.'), entity(:ellipsis), prevent_breaks)
|
277
|
-
educate(str, /(?<!\.|\.[[:space:]])\.[[:space:]]\.[[:space:]]\.(?!\.|[[:space:]]\.)/,
|
278
|
-
entity(:ellipsis), prevent_breaks)
|
279
|
-
end
|
280
|
-
|
281
|
-
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
282
|
-
# translated into HTML curly quote entities.
|
283
|
-
#
|
284
|
-
def educate_backticks(str)
|
285
|
-
str.
|
286
|
-
gsub("``", entity(:double_left_quote)).
|
287
|
-
gsub("''", entity(:double_right_quote))
|
288
|
-
end
|
289
|
-
|
290
|
-
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
291
|
-
# translated into HTML curly quote entities.
|
292
|
-
#
|
293
|
-
def educate_single_backticks(str)
|
294
|
-
str.
|
295
|
-
gsub("`", entity(:single_left_quote)).
|
296
|
-
gsub("'", entity(:single_right_quote))
|
297
|
-
end
|
298
|
-
|
299
|
-
# Return the string, with "educated" curly quote HTML entities.
|
300
|
-
#
|
301
|
-
def educate_quotes(str)
|
302
|
-
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
303
|
-
|
304
|
-
str = str.dup
|
305
|
-
|
306
|
-
# Special case if the very first character is a quote followed by
|
307
|
-
# punctuation at a non-word-break. Close the quotes by brute
|
308
|
-
# force:
|
309
|
-
str.gsub!(/^'(?=#{punct_class}\B)/,
|
310
|
-
entity(:single_right_quote))
|
311
|
-
str.gsub!(/^"(?=#{punct_class}\B)/,
|
312
|
-
entity(:double_right_quote))
|
313
|
-
|
314
|
-
# Special case for double sets of quotes, e.g.:
|
315
|
-
# <p>He said, "'Quoted' words in a larger quote."</p>
|
316
|
-
str.gsub!(/"'(?=\w)/,
|
317
|
-
"#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
|
318
|
-
str.gsub!(/'"(?=\w)/,
|
319
|
-
"#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
|
320
|
-
|
321
|
-
# Special case for decade abbreviations (the '80s):
|
322
|
-
str.gsub!(/'(?=\d\ds)/,
|
323
|
-
entity(:single_right_quote))
|
324
|
-
|
325
|
-
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
326
|
-
dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
|
327
|
-
|
328
|
-
# Get most opening single quotes:
|
329
|
-
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
330
|
-
'\1' + entity(:single_left_quote))
|
331
|
-
|
332
|
-
# Single closing quotes:
|
333
|
-
str.gsub!(/(#{close_class})'/,
|
334
|
-
'\1' + entity(:single_right_quote))
|
335
|
-
str.gsub!(/'(\s|s\b|$)/,
|
336
|
-
entity(:single_right_quote) + '\1')
|
337
|
-
|
338
|
-
# Any remaining single quotes should be opening ones:
|
339
|
-
str.gsub!(/'/,
|
340
|
-
entity(:single_left_quote))
|
341
|
-
|
342
|
-
# Get most opening double quotes:
|
343
|
-
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
344
|
-
'\1' + entity(:double_left_quote))
|
345
|
-
|
346
|
-
# Double closing quotes:
|
347
|
-
str.gsub!(/(#{close_class})"/,
|
348
|
-
'\1' + entity(:double_right_quote))
|
349
|
-
str.gsub!(/"(\s|s\b|$)/,
|
350
|
-
entity(:double_right_quote) + '\1')
|
351
|
-
|
352
|
-
# Any remaining quotes should be opening ones:
|
353
|
-
str.gsub!(/"/,
|
354
|
-
entity(:double_left_quote))
|
355
|
-
|
356
|
-
str
|
357
|
-
end
|
358
|
-
|
359
|
-
# Return the string, with each RubyPants HTML entity translated to
|
360
|
-
# its ASCII counterpart.
|
361
|
-
#
|
362
|
-
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
363
|
-
#
|
364
|
-
def stupefy_entities(str)
|
365
|
-
new_str = str.dup
|
366
|
-
|
367
|
-
{
|
368
|
-
:en_dash => '-',
|
369
|
-
:em_dash => '--',
|
370
|
-
:single_left_quote => "'",
|
371
|
-
:single_right_quote => "'",
|
372
|
-
:double_left_quote => '"',
|
373
|
-
:double_right_quote => '"',
|
374
|
-
:ellipsis => '...'
|
375
|
-
}.each do |k,v|
|
376
|
-
new_str.gsub!(/#{entity(k)}/, v)
|
377
|
-
end
|
378
|
-
|
379
|
-
new_str
|
380
|
-
end
|
381
|
-
|
382
|
-
# Return an array of the tokens comprising the string. Each token is
|
383
|
-
# either a tag (possibly with nested, tags contained therein, such
|
384
|
-
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
385
|
-
# tags. Each element of the array is a two-element array; the first
|
386
|
-
# is either :tag or :text; the second is the actual value.
|
387
|
-
#
|
388
|
-
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
389
|
-
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
390
|
-
#
|
391
|
-
# This is actually the easier variant using tag_soup, as used by
|
392
|
-
# Chad Miller in the Python port of SmartyPants.
|
393
|
-
#
|
394
|
-
def tokenize
|
395
|
-
tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/m
|
396
|
-
|
397
|
-
tokens = []
|
398
|
-
|
399
|
-
prev_end = 0
|
400
|
-
|
401
|
-
scan(tag_soup) do
|
402
|
-
tokens << [:text, $1] if $1 != ""
|
403
|
-
tokens << [:tag, $2]
|
404
|
-
prev_end = $~.end(0)
|
405
|
-
end
|
406
|
-
|
407
|
-
if prev_end < size
|
408
|
-
tokens << [:text, self[prev_end..-1]]
|
409
|
-
end
|
410
|
-
|
411
|
-
tokens
|
412
|
-
end
|
413
|
-
|
414
|
-
def default_entities
|
415
|
-
{
|
416
|
-
:single_left_quote => "‘",
|
417
|
-
:double_left_quote => "“",
|
418
|
-
:single_right_quote => "’",
|
419
|
-
:double_right_quote => "”",
|
420
|
-
:em_dash => "—",
|
421
|
-
:en_dash => "–",
|
422
|
-
:ellipsis => "…",
|
423
|
-
:html_quote => """,
|
424
|
-
:non_breaking_space => " ",
|
425
|
-
:word_joiner => "⁠",
|
426
|
-
}
|
427
|
-
end
|
428
|
-
|
429
|
-
def named_entities
|
430
|
-
{
|
431
|
-
:single_left_quote => '‘',
|
432
|
-
:double_left_quote => "“",
|
433
|
-
:single_right_quote => "’",
|
434
|
-
:double_right_quote => "”",
|
435
|
-
:em_dash => "—",
|
436
|
-
:en_dash => "–",
|
437
|
-
:ellipsis => "…",
|
438
|
-
:html_quote => """,
|
439
|
-
:non_breaking_space => " ",
|
440
|
-
# :word_joiner => N/A,
|
441
|
-
}
|
442
|
-
end
|
443
|
-
|
444
|
-
def entity(key)
|
445
|
-
@entities[key]
|
446
|
-
end
|
447
|
-
end
|
data/lib/rubypants/version.rb
DELETED