rubypants 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +0 -6
- data/lib/rubypants.rb +473 -2
- data/lib/version.rb +3 -0
- data/rubypants.gemspec +2 -2
- data/test/rubypants_test.rb +7 -0
- metadata +4 -5
- data/lib/rubypants/core.rb +0 -447
- data/lib/rubypants/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8dc4efc96c1fe85653d5cfb4f6d0291a94edcdbc
|
4
|
+
data.tar.gz: b17cda053df199efe900caed483404b903345f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaebe94dfee7f1ef9295910593ff64e0b10a5147ab96e802e7ce98031a15ee4e6c78ae7ee45ccadccaeb30d2c86d96692a2625733bb6e3f7edcda3ecd995c1a6
|
7
|
+
data.tar.gz: 3eabe14d2d081f67239047c29d7d71e93025622cdd3f562b1f3fd2b83a26cddea359e2fbc8a20d3f9ca9f0c855c12c4fe3f76e16442e6f2898a6dc346f2e6f4e
|
data/.travis.yml
CHANGED
@@ -10,9 +10,3 @@ rvm:
|
|
10
10
|
# and https://github.com/travis-ci/travis-ci/issues/5239
|
11
11
|
before_install:
|
12
12
|
- gem install bundler
|
13
|
-
|
14
|
-
# For now override the default `bundle exec rake` which fails because of an
|
15
|
-
# unresolved superclass mismatch in `core.rb` vs. `version.rb`. This should
|
16
|
-
# eventually be fixed in rubypants.
|
17
|
-
script:
|
18
|
-
- rake
|
data/lib/rubypants.rb
CHANGED
@@ -1,2 +1,473 @@
|
|
1
|
-
require_relative '
|
2
|
-
|
1
|
+
require_relative 'version'
|
2
|
+
|
3
|
+
class RubyPants < String
|
4
|
+
extend RubyPantsVersion
|
5
|
+
|
6
|
+
# Create a new RubyPants instance with the text in +string+.
|
7
|
+
#
|
8
|
+
# Allowed elements in the options array:
|
9
|
+
#
|
10
|
+
# 0 :: do nothing
|
11
|
+
# 1 :: enable all, using only em-dash shortcuts
|
12
|
+
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
13
|
+
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
14
|
+
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
15
|
+
#
|
16
|
+
# If you don't like any of these defaults, you can pass symbols to change
|
17
|
+
# RubyPants' behavior:
|
18
|
+
#
|
19
|
+
# <tt>:quotes</tt> :: quotes
|
20
|
+
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
21
|
+
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
22
|
+
# <tt>:dashes</tt> :: dashes
|
23
|
+
# <tt>:oldschool</tt> :: old school dashes
|
24
|
+
# <tt>:inverted</tt> :: inverted old school dashes
|
25
|
+
# <tt>:ellipses</tt> :: ellipses
|
26
|
+
# <tt>:prevent_breaks</tt> :: use nbsp and word-joiner to avoid breaking
|
27
|
+
# before dashes and ellipses
|
28
|
+
# <tt>:named_entities</tt> :: used named entities instead of the default
|
29
|
+
# decimal entities (see below)
|
30
|
+
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
31
|
+
# <tt>"</tt>
|
32
|
+
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
33
|
+
# to their ASCII counterparts.
|
34
|
+
#
|
35
|
+
# In addition, you can customize the HTML entities that will be injected by
|
36
|
+
# passing in a hash for the final argument. The defaults for these entities
|
37
|
+
# are as follows:
|
38
|
+
#
|
39
|
+
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
40
|
+
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
41
|
+
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
42
|
+
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
43
|
+
# <tt>:em_dash</tt> :: <tt>—</tt>
|
44
|
+
# <tt>:en_dash</tt> :: <tt>–</tt>
|
45
|
+
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
46
|
+
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
47
|
+
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
48
|
+
#
|
49
|
+
# If the <tt>:named_entities</tt> option is used, the default entities are
|
50
|
+
# as follows:
|
51
|
+
#
|
52
|
+
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
53
|
+
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
54
|
+
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
55
|
+
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
56
|
+
# <tt>:em_dash</tt> :: <tt>—</tt>
|
57
|
+
# <tt>:en_dash</tt> :: <tt>–</tt>
|
58
|
+
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
59
|
+
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
60
|
+
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
61
|
+
#
|
62
|
+
# If the <tt>:character_entities</tt> option is used, RubyPants will
|
63
|
+
# emit Unicode characters directly, rather than HTML entities. By default
|
64
|
+
# this excludes the space characters (non-breaking space and
|
65
|
+
# word-joiner). To additionally emit Unicode space characters, use the
|
66
|
+
# <tt>:character_spaces</tt> option.
|
67
|
+
#
|
68
|
+
def initialize(string, options=[2], entities = {})
|
69
|
+
super string
|
70
|
+
|
71
|
+
@options = [*options]
|
72
|
+
@entities = default_entities
|
73
|
+
@entities.merge!(named_entities) if @options.include?(:named_entities)
|
74
|
+
@entities.merge!(character_entities) if @options.include?(:character_entities)
|
75
|
+
@entities.merge!(character_spaces) if @options.include?(:character_spaces)
|
76
|
+
@entities.merge!(entities)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Apply SmartyPants transformations.
|
80
|
+
def to_html
|
81
|
+
do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
|
82
|
+
convert_quotes = prevent_breaks = nil
|
83
|
+
|
84
|
+
if @options.include?(0)
|
85
|
+
# Do nothing.
|
86
|
+
return self
|
87
|
+
elsif @options.include?(1)
|
88
|
+
# Do everything, turn all options on.
|
89
|
+
do_quotes = do_backticks = do_ellipses = true
|
90
|
+
do_dashes = :normal
|
91
|
+
elsif @options.include?(2)
|
92
|
+
# Do everything, turn all options on, use old school dash shorthand.
|
93
|
+
do_quotes = do_backticks = do_ellipses = true
|
94
|
+
do_dashes = :oldschool
|
95
|
+
elsif @options.include?(3)
|
96
|
+
# Do everything, turn all options on, use inverted old school
|
97
|
+
# dash shorthand.
|
98
|
+
do_quotes = do_backticks = do_ellipses = true
|
99
|
+
do_dashes = :inverted
|
100
|
+
elsif @options.include?(-1)
|
101
|
+
do_stupefy = true
|
102
|
+
end
|
103
|
+
|
104
|
+
# Explicit flags override numeric flag groups.
|
105
|
+
do_quotes = true if @options.include?(:quotes)
|
106
|
+
do_backticks = true if @options.include?(:backticks)
|
107
|
+
do_backticks = :both if @options.include?(:allbackticks)
|
108
|
+
do_dashes = :normal if @options.include?(:dashes)
|
109
|
+
do_dashes = :oldschool if @options.include?(:oldschool)
|
110
|
+
do_dashes = :inverted if @options.include?(:inverted)
|
111
|
+
prevent_breaks = true if @options.include?(:prevent_breaks)
|
112
|
+
do_ellipses = true if @options.include?(:ellipses)
|
113
|
+
convert_quotes = true if @options.include?(:convertquotes)
|
114
|
+
do_stupefy = true if @options.include?(:stupefy)
|
115
|
+
|
116
|
+
# Parse the HTML
|
117
|
+
tokens = tokenize
|
118
|
+
|
119
|
+
# Keep track of when we're inside <pre> or <code> tags.
|
120
|
+
in_pre = nil
|
121
|
+
|
122
|
+
# Here is the result stored in.
|
123
|
+
result = ""
|
124
|
+
|
125
|
+
# This is a cheat, used to get some context for one-character
|
126
|
+
# tokens that consist of just a quote char. What we do is remember
|
127
|
+
# the last character of the previous text token, to use as context
|
128
|
+
# to curl single- character quote tokens correctly.
|
129
|
+
prev_token_last_char = nil
|
130
|
+
|
131
|
+
tokens.each do |token|
|
132
|
+
if token.first == :tag
|
133
|
+
result << token[1]
|
134
|
+
if token[1].end_with? '/>'
|
135
|
+
# ignore self-closing tags
|
136
|
+
elsif token[1] =~ %r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
|
137
|
+
if $1 == '' && ! in_pre
|
138
|
+
in_pre = $2
|
139
|
+
elsif $1 == '/' && $2 == in_pre
|
140
|
+
in_pre = nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
else
|
144
|
+
t = token[1]
|
145
|
+
|
146
|
+
# Remember last char of this token before processing.
|
147
|
+
last_char = t[-1].chr
|
148
|
+
|
149
|
+
unless in_pre
|
150
|
+
t = process_escapes t
|
151
|
+
|
152
|
+
t.gsub!(/"/, '"') if convert_quotes
|
153
|
+
|
154
|
+
if do_dashes
|
155
|
+
t = educate_dashes t, prevent_breaks if do_dashes == :normal
|
156
|
+
t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
|
157
|
+
t = educate_dashes_inverted t, prevent_breaks if do_dashes == :inverted
|
158
|
+
end
|
159
|
+
|
160
|
+
t = educate_ellipses t, prevent_breaks if do_ellipses
|
161
|
+
|
162
|
+
# Note: backticks need to be processed before quotes.
|
163
|
+
if do_backticks
|
164
|
+
t = educate_backticks t
|
165
|
+
t = educate_single_backticks t if do_backticks == :both
|
166
|
+
end
|
167
|
+
|
168
|
+
if do_quotes
|
169
|
+
if t == "'"
|
170
|
+
# Special case: single-character ' token
|
171
|
+
if prev_token_last_char =~ /\S/
|
172
|
+
t = entity(:single_right_quote)
|
173
|
+
else
|
174
|
+
t = entity(:single_left_quote)
|
175
|
+
end
|
176
|
+
elsif t == '"'
|
177
|
+
# Special case: single-character " token
|
178
|
+
if prev_token_last_char =~ /\S/
|
179
|
+
t = entity(:double_right_quote)
|
180
|
+
else
|
181
|
+
t = entity(:double_left_quote)
|
182
|
+
end
|
183
|
+
else
|
184
|
+
# Normal case:
|
185
|
+
t = educate_quotes t
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
t = stupefy_entities t if do_stupefy
|
190
|
+
end
|
191
|
+
|
192
|
+
prev_token_last_char = last_char
|
193
|
+
result << t
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# Done
|
198
|
+
result
|
199
|
+
end
|
200
|
+
|
201
|
+
protected
|
202
|
+
|
203
|
+
# Return the string, with after processing the following backslash
|
204
|
+
# escape sequences. This is useful if you want to force a "dumb" quote
|
205
|
+
# or other character to appear.
|
206
|
+
#
|
207
|
+
# Escaped are:
|
208
|
+
# \\ \" \' \. \- \`
|
209
|
+
#
|
210
|
+
def process_escapes(str)
|
211
|
+
str.
|
212
|
+
gsub('\\\\', '\').
|
213
|
+
gsub('\"', '"').
|
214
|
+
gsub("\\\'", ''').
|
215
|
+
gsub('\.', '.').
|
216
|
+
gsub('\-', '-').
|
217
|
+
gsub('\`', '`')
|
218
|
+
end
|
219
|
+
|
220
|
+
def self.n_of(n, x)
|
221
|
+
x = Regexp.escape(x)
|
222
|
+
/(?<!#{x}) # not preceded by x
|
223
|
+
#{x}{#{n}} # n of x
|
224
|
+
(?!#{x}) # not followed by x
|
225
|
+
/x
|
226
|
+
end
|
227
|
+
|
228
|
+
DOUBLE_DASH = n_of(2, '-')
|
229
|
+
TRIPLE_DASH = n_of(3, '-')
|
230
|
+
|
231
|
+
# Return +str+ replacing all +patt+ with +repl+. If +prevent_breaks+ is true,
|
232
|
+
# then replace spaces preceding +patt+ with a non-breaking space, and if there
|
233
|
+
# are no spaces, then insert a word-joiner.
|
234
|
+
#
|
235
|
+
def educate(str, patt, repl, prevent_breaks)
|
236
|
+
patt = /(?<spaces>[[:space:]]*)#{patt}/
|
237
|
+
str.gsub(patt) do
|
238
|
+
spaces = if prevent_breaks && $~['spaces'].length > 0
|
239
|
+
entity(:non_breaking_space) # * $~['spaces'].length
|
240
|
+
elsif prevent_breaks
|
241
|
+
entity(:word_joiner)
|
242
|
+
else
|
243
|
+
$~['spaces']
|
244
|
+
end
|
245
|
+
spaces + repl
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
250
|
+
# em-dash HTML entity.
|
251
|
+
#
|
252
|
+
def educate_dashes(str, prevent_breaks=false)
|
253
|
+
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
254
|
+
end
|
255
|
+
|
256
|
+
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
257
|
+
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
258
|
+
# em-dash HTML entity.
|
259
|
+
#
|
260
|
+
def educate_dashes_oldschool(str, prevent_breaks=false)
|
261
|
+
str = educate(str, TRIPLE_DASH, entity(:em_dash), prevent_breaks)
|
262
|
+
educate(str, DOUBLE_DASH, entity(:en_dash), prevent_breaks)
|
263
|
+
end
|
264
|
+
|
265
|
+
# Return the string, with each instance of "<tt>--</tt>" translated
|
266
|
+
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
267
|
+
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
268
|
+
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
269
|
+
# compatible with existing entries written before SmartyPants 1.1,
|
270
|
+
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
271
|
+
# em-dashes are more common than en-dashes, and so it sort of makes
|
272
|
+
# sense that the shortcut should be shorter to type. (Thanks to
|
273
|
+
# Aaron Swartz for the idea.)
|
274
|
+
#
|
275
|
+
def educate_dashes_inverted(str, prevent_breaks=false)
|
276
|
+
str = educate(str, TRIPLE_DASH, entity(:en_dash), prevent_breaks)
|
277
|
+
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
278
|
+
end
|
279
|
+
|
280
|
+
# Return the string, with each instance of "<tt>...</tt>" translated
|
281
|
+
# to an ellipsis HTML entity. Also converts the case where there are
|
282
|
+
# spaces between the dots.
|
283
|
+
#
|
284
|
+
def educate_ellipses(str, prevent_breaks=false)
|
285
|
+
str = educate(str, RubyPants.n_of(3, '.'), entity(:ellipsis), prevent_breaks)
|
286
|
+
educate(str, /(?<!\.|\.[[:space:]])\.[[:space:]]\.[[:space:]]\.(?!\.|[[:space:]]\.)/,
|
287
|
+
entity(:ellipsis), prevent_breaks)
|
288
|
+
end
|
289
|
+
|
290
|
+
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
291
|
+
# translated into HTML curly quote entities.
|
292
|
+
#
|
293
|
+
def educate_backticks(str)
|
294
|
+
str.
|
295
|
+
gsub("``", entity(:double_left_quote)).
|
296
|
+
gsub("''", entity(:double_right_quote))
|
297
|
+
end
|
298
|
+
|
299
|
+
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
300
|
+
# translated into HTML curly quote entities.
|
301
|
+
#
|
302
|
+
def educate_single_backticks(str)
|
303
|
+
str.
|
304
|
+
gsub("`", entity(:single_left_quote)).
|
305
|
+
gsub("'", entity(:single_right_quote))
|
306
|
+
end
|
307
|
+
|
308
|
+
# Return the string, with "educated" curly quote HTML entities.
|
309
|
+
#
|
310
|
+
def educate_quotes(str)
|
311
|
+
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
312
|
+
|
313
|
+
str = str.dup
|
314
|
+
|
315
|
+
# Special case if the very first character is a quote followed by
|
316
|
+
# punctuation at a non-word-break. Close the quotes by brute
|
317
|
+
# force:
|
318
|
+
str.gsub!(/^'(?=#{punct_class}\B)/,
|
319
|
+
entity(:single_right_quote))
|
320
|
+
str.gsub!(/^"(?=#{punct_class}\B)/,
|
321
|
+
entity(:double_right_quote))
|
322
|
+
|
323
|
+
# Special case for double sets of quotes, e.g.:
|
324
|
+
# <p>He said, "'Quoted' words in a larger quote."</p>
|
325
|
+
str.gsub!(/"'(?=\w)/,
|
326
|
+
"#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
|
327
|
+
str.gsub!(/'"(?=\w)/,
|
328
|
+
"#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
|
329
|
+
|
330
|
+
# Special case for decade abbreviations (the '80s):
|
331
|
+
str.gsub!(/'(?=\d\ds)/,
|
332
|
+
entity(:single_right_quote))
|
333
|
+
|
334
|
+
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
335
|
+
dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
|
336
|
+
|
337
|
+
# Get most opening single quotes:
|
338
|
+
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
339
|
+
'\1' + entity(:single_left_quote))
|
340
|
+
|
341
|
+
# Single closing quotes:
|
342
|
+
str.gsub!(/(#{close_class})'/,
|
343
|
+
'\1' + entity(:single_right_quote))
|
344
|
+
str.gsub!(/'(\s|s\b|$)/,
|
345
|
+
entity(:single_right_quote) + '\1')
|
346
|
+
|
347
|
+
# Any remaining single quotes should be opening ones:
|
348
|
+
str.gsub!(/'/,
|
349
|
+
entity(:single_left_quote))
|
350
|
+
|
351
|
+
# Get most opening double quotes:
|
352
|
+
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
353
|
+
'\1' + entity(:double_left_quote))
|
354
|
+
|
355
|
+
# Double closing quotes:
|
356
|
+
str.gsub!(/(#{close_class})"/,
|
357
|
+
'\1' + entity(:double_right_quote))
|
358
|
+
str.gsub!(/"(\s|s\b|$)/,
|
359
|
+
entity(:double_right_quote) + '\1')
|
360
|
+
|
361
|
+
# Any remaining quotes should be opening ones:
|
362
|
+
str.gsub!(/"/,
|
363
|
+
entity(:double_left_quote))
|
364
|
+
|
365
|
+
str
|
366
|
+
end
|
367
|
+
|
368
|
+
# Return the string, with each RubyPants HTML entity translated to
|
369
|
+
# its ASCII counterpart.
|
370
|
+
#
|
371
|
+
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
372
|
+
#
|
373
|
+
def stupefy_entities(str)
|
374
|
+
new_str = str.dup
|
375
|
+
|
376
|
+
{
|
377
|
+
:en_dash => '-',
|
378
|
+
:em_dash => '--',
|
379
|
+
:single_left_quote => "'",
|
380
|
+
:single_right_quote => "'",
|
381
|
+
:double_left_quote => '"',
|
382
|
+
:double_right_quote => '"',
|
383
|
+
:ellipsis => '...'
|
384
|
+
}.each do |k,v|
|
385
|
+
new_str.gsub!(/#{entity(k)}/, v)
|
386
|
+
end
|
387
|
+
|
388
|
+
new_str
|
389
|
+
end
|
390
|
+
|
391
|
+
# Return an array of the tokens comprising the string. Each token is
|
392
|
+
# either a tag (possibly with nested, tags contained therein, such
|
393
|
+
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
394
|
+
# tags. Each element of the array is a two-element array; the first
|
395
|
+
# is either :tag or :text; the second is the actual value.
|
396
|
+
#
|
397
|
+
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
398
|
+
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
399
|
+
#
|
400
|
+
# This is actually the easier variant using tag_soup, as used by
|
401
|
+
# Chad Miller in the Python port of SmartyPants.
|
402
|
+
#
|
403
|
+
def tokenize
|
404
|
+
tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/m
|
405
|
+
|
406
|
+
tokens = []
|
407
|
+
|
408
|
+
prev_end = 0
|
409
|
+
|
410
|
+
scan(tag_soup) do
|
411
|
+
tokens << [:text, $1] if $1 != ""
|
412
|
+
tokens << [:tag, $2]
|
413
|
+
prev_end = $~.end(0)
|
414
|
+
end
|
415
|
+
|
416
|
+
if prev_end < size
|
417
|
+
tokens << [:text, self[prev_end..-1]]
|
418
|
+
end
|
419
|
+
|
420
|
+
tokens
|
421
|
+
end
|
422
|
+
|
423
|
+
def default_entities
|
424
|
+
{
|
425
|
+
:single_left_quote => "‘",
|
426
|
+
:double_left_quote => "“",
|
427
|
+
:single_right_quote => "’",
|
428
|
+
:double_right_quote => "”",
|
429
|
+
:em_dash => "—",
|
430
|
+
:en_dash => "–",
|
431
|
+
:ellipsis => "…",
|
432
|
+
:non_breaking_space => " ",
|
433
|
+
:word_joiner => "⁠",
|
434
|
+
}
|
435
|
+
end
|
436
|
+
|
437
|
+
def named_entities
|
438
|
+
{
|
439
|
+
:single_left_quote => '‘',
|
440
|
+
:double_left_quote => "“",
|
441
|
+
:single_right_quote => "’",
|
442
|
+
:double_right_quote => "”",
|
443
|
+
:em_dash => "—",
|
444
|
+
:en_dash => "–",
|
445
|
+
:ellipsis => "…",
|
446
|
+
:non_breaking_space => " ",
|
447
|
+
# :word_joiner => N/A,
|
448
|
+
}
|
449
|
+
end
|
450
|
+
|
451
|
+
def character_entities
|
452
|
+
{
|
453
|
+
:single_left_quote => "\u2018",
|
454
|
+
:double_left_quote => "\u201C",
|
455
|
+
:single_right_quote => "\u2019",
|
456
|
+
:double_right_quote => "\u201D",
|
457
|
+
:em_dash => "\u2014",
|
458
|
+
:en_dash => "\u2013",
|
459
|
+
:ellipsis => "\u2026",
|
460
|
+
}
|
461
|
+
end
|
462
|
+
|
463
|
+
def character_spaces
|
464
|
+
{
|
465
|
+
:non_breaking_space => "\u00A0",
|
466
|
+
:word_joiner => "\u2060",
|
467
|
+
}
|
468
|
+
end
|
469
|
+
|
470
|
+
def entity(key)
|
471
|
+
@entities[key]
|
472
|
+
end
|
473
|
+
end
|
data/lib/version.rb
ADDED
data/rubypants.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
-
require
|
5
|
+
require 'version'
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = 'rubypants'
|
9
|
-
s.version =
|
9
|
+
s.version = RubyPantsVersion::VERSION
|
10
10
|
s.summary = "RubyPants is a Ruby port of the smart-quotes library SmartyPants."
|
11
11
|
s.description = <<-EOF
|
12
12
|
The original "SmartyPants" is a free web publishing plug-in for
|
data/test/rubypants_test.rb
CHANGED
@@ -235,4 +235,11 @@ EOF
|
|
235
235
|
def test_named_entities
|
236
236
|
assert_rp_equal "Testing 'FOO!'", "Testing ‘FOO!’", [2, :named_entities]
|
237
237
|
end
|
238
|
+
|
239
|
+
def test_character_entities
|
240
|
+
assert_rp_equal "Testing 'FOO!'", "Testing ‘FOO!’", [2, :character_entities]
|
241
|
+
assert_rp_equal "foo---bar", "foo⁠—bar", [2, :character_entities, :prevent_breaks]
|
242
|
+
assert_rp_equal "foo ---bar", "foo —bar", [2, :character_entities, :prevent_breaks]
|
243
|
+
assert_rp_equal "foo ---bar", "foo\u00A0—bar", [2, :character_entities, :character_spaces, :prevent_breaks]
|
244
|
+
end
|
238
245
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rubypants
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Gruber
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2018-02-26 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: minitest
|
@@ -45,8 +45,7 @@ files:
|
|
45
45
|
- README.rdoc
|
46
46
|
- Rakefile
|
47
47
|
- lib/rubypants.rb
|
48
|
-
- lib/
|
49
|
-
- lib/rubypants/version.rb
|
48
|
+
- lib/version.rb
|
50
49
|
- rubypants.gemspec
|
51
50
|
- test/helper.rb
|
52
51
|
- test/rubypants_test.rb
|
@@ -70,7 +69,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
69
|
version: '0'
|
71
70
|
requirements: []
|
72
71
|
rubyforge_project:
|
73
|
-
rubygems_version: 2.
|
72
|
+
rubygems_version: 2.6.13
|
74
73
|
signing_key:
|
75
74
|
specification_version: 4
|
76
75
|
summary: RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
data/lib/rubypants/core.rb
DELETED
@@ -1,447 +0,0 @@
|
|
1
|
-
class RubyPants < String
|
2
|
-
|
3
|
-
# Create a new RubyPants instance with the text in +string+.
|
4
|
-
#
|
5
|
-
# Allowed elements in the options array:
|
6
|
-
#
|
7
|
-
# 0 :: do nothing
|
8
|
-
# 1 :: enable all, using only em-dash shortcuts
|
9
|
-
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
10
|
-
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
11
|
-
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
12
|
-
#
|
13
|
-
# If you don't like any of these defaults, you can pass symbols to change
|
14
|
-
# RubyPants' behavior:
|
15
|
-
#
|
16
|
-
# <tt>:quotes</tt> :: quotes
|
17
|
-
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
18
|
-
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
19
|
-
# <tt>:dashes</tt> :: dashes
|
20
|
-
# <tt>:oldschool</tt> :: old school dashes
|
21
|
-
# <tt>:inverted</tt> :: inverted old school dashes
|
22
|
-
# <tt>:ellipses</tt> :: ellipses
|
23
|
-
# <tt>:prevent_breaks</tt> :: use nbsp and word-joiner to avoid breaking
|
24
|
-
# before dashes and ellipses
|
25
|
-
# <tt>:named_entities</tt> :: used named entities instead of the default
|
26
|
-
# decimal entities (see below)
|
27
|
-
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
28
|
-
# <tt>"</tt>
|
29
|
-
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
30
|
-
# to their ASCII counterparts.
|
31
|
-
#
|
32
|
-
# In addition, you can customize the HTML entities that will be injected by
|
33
|
-
# passing in a hash for the final argument. The defaults for these entities
|
34
|
-
# are as follows:
|
35
|
-
#
|
36
|
-
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
37
|
-
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
38
|
-
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
39
|
-
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
40
|
-
# <tt>:em_dash</tt> :: <tt>—</tt>
|
41
|
-
# <tt>:en_dash</tt> :: <tt>–</tt>
|
42
|
-
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
43
|
-
# <tt>:html_quote</tt> :: <tt>"</tt>
|
44
|
-
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
45
|
-
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
46
|
-
#
|
47
|
-
# If the <tt>:named_entities</tt> option is used, the default entities are
|
48
|
-
# as follows:
|
49
|
-
#
|
50
|
-
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
51
|
-
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
52
|
-
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
53
|
-
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
54
|
-
# <tt>:em_dash</tt> :: <tt>—</tt>
|
55
|
-
# <tt>:en_dash</tt> :: <tt>–</tt>
|
56
|
-
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
57
|
-
# <tt>:html_quote</tt> :: <tt>"</tt>
|
58
|
-
# <tt>:non_breaking_space</tt> :: <tt> </tt>
|
59
|
-
# <tt>:word_joiner</tt> :: <tt>⁠</tt>
|
60
|
-
#
|
61
|
-
def initialize(string, options=[2], entities = {})
|
62
|
-
super string
|
63
|
-
|
64
|
-
@options = [*options]
|
65
|
-
@entities = default_entities
|
66
|
-
@entities.merge!(named_entities) if @options.include?(:named_entities)
|
67
|
-
@entities.merge!(entities)
|
68
|
-
end
|
69
|
-
|
70
|
-
# Apply SmartyPants transformations.
|
71
|
-
def to_html
|
72
|
-
do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
|
73
|
-
convert_quotes = prevent_breaks = nil
|
74
|
-
|
75
|
-
if @options.include?(0)
|
76
|
-
# Do nothing.
|
77
|
-
return self
|
78
|
-
elsif @options.include?(1)
|
79
|
-
# Do everything, turn all options on.
|
80
|
-
do_quotes = do_backticks = do_ellipses = true
|
81
|
-
do_dashes = :normal
|
82
|
-
elsif @options.include?(2)
|
83
|
-
# Do everything, turn all options on, use old school dash shorthand.
|
84
|
-
do_quotes = do_backticks = do_ellipses = true
|
85
|
-
do_dashes = :oldschool
|
86
|
-
elsif @options.include?(3)
|
87
|
-
# Do everything, turn all options on, use inverted old school
|
88
|
-
# dash shorthand.
|
89
|
-
do_quotes = do_backticks = do_ellipses = true
|
90
|
-
do_dashes = :inverted
|
91
|
-
elsif @options.include?(-1)
|
92
|
-
do_stupefy = true
|
93
|
-
end
|
94
|
-
|
95
|
-
# Explicit flags override numeric flag groups.
|
96
|
-
do_quotes = true if @options.include?(:quotes)
|
97
|
-
do_backticks = true if @options.include?(:backticks)
|
98
|
-
do_backticks = :both if @options.include?(:allbackticks)
|
99
|
-
do_dashes = :normal if @options.include?(:dashes)
|
100
|
-
do_dashes = :oldschool if @options.include?(:oldschool)
|
101
|
-
do_dashes = :inverted if @options.include?(:inverted)
|
102
|
-
prevent_breaks = true if @options.include?(:prevent_breaks)
|
103
|
-
do_ellipses = true if @options.include?(:ellipses)
|
104
|
-
convert_quotes = true if @options.include?(:convertquotes)
|
105
|
-
do_stupefy = true if @options.include?(:stupefy)
|
106
|
-
|
107
|
-
# Parse the HTML
|
108
|
-
tokens = tokenize
|
109
|
-
|
110
|
-
# Keep track of when we're inside <pre> or <code> tags.
|
111
|
-
in_pre = nil
|
112
|
-
|
113
|
-
# Here is the result stored in.
|
114
|
-
result = ""
|
115
|
-
|
116
|
-
# This is a cheat, used to get some context for one-character
|
117
|
-
# tokens that consist of just a quote char. What we do is remember
|
118
|
-
# the last character of the previous text token, to use as context
|
119
|
-
# to curl single- character quote tokens correctly.
|
120
|
-
prev_token_last_char = nil
|
121
|
-
|
122
|
-
tokens.each do |token|
|
123
|
-
if token.first == :tag
|
124
|
-
result << token[1]
|
125
|
-
if token[1].end_with? '/>'
|
126
|
-
# ignore self-closing tags
|
127
|
-
elsif token[1] =~ %r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
|
128
|
-
if $1 == '' && ! in_pre
|
129
|
-
in_pre = $2
|
130
|
-
elsif $1 == '/' && $2 == in_pre
|
131
|
-
in_pre = nil
|
132
|
-
end
|
133
|
-
end
|
134
|
-
else
|
135
|
-
t = token[1]
|
136
|
-
|
137
|
-
# Remember last char of this token before processing.
|
138
|
-
last_char = t[-1].chr
|
139
|
-
|
140
|
-
unless in_pre
|
141
|
-
t = process_escapes t
|
142
|
-
|
143
|
-
t.gsub!(/"/, '"') if convert_quotes
|
144
|
-
|
145
|
-
if do_dashes
|
146
|
-
t = educate_dashes t, prevent_breaks if do_dashes == :normal
|
147
|
-
t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
|
148
|
-
t = educate_dashes_inverted t, prevent_breaks if do_dashes == :inverted
|
149
|
-
end
|
150
|
-
|
151
|
-
t = educate_ellipses t, prevent_breaks if do_ellipses
|
152
|
-
|
153
|
-
# Note: backticks need to be processed before quotes.
|
154
|
-
if do_backticks
|
155
|
-
t = educate_backticks t
|
156
|
-
t = educate_single_backticks t if do_backticks == :both
|
157
|
-
end
|
158
|
-
|
159
|
-
if do_quotes
|
160
|
-
if t == "'"
|
161
|
-
# Special case: single-character ' token
|
162
|
-
if prev_token_last_char =~ /\S/
|
163
|
-
t = entity(:single_right_quote)
|
164
|
-
else
|
165
|
-
t = entity(:single_left_quote)
|
166
|
-
end
|
167
|
-
elsif t == '"'
|
168
|
-
# Special case: single-character " token
|
169
|
-
if prev_token_last_char =~ /\S/
|
170
|
-
t = entity(:double_right_quote)
|
171
|
-
else
|
172
|
-
t = entity(:double_left_quote)
|
173
|
-
end
|
174
|
-
else
|
175
|
-
# Normal case:
|
176
|
-
t = educate_quotes t
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
t = stupefy_entities t if do_stupefy
|
181
|
-
end
|
182
|
-
|
183
|
-
prev_token_last_char = last_char
|
184
|
-
result << t
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
# Done
|
189
|
-
result
|
190
|
-
end
|
191
|
-
|
192
|
-
protected
|
193
|
-
|
194
|
-
# Return the string, with after processing the following backslash
|
195
|
-
# escape sequences. This is useful if you want to force a "dumb" quote
|
196
|
-
# or other character to appear.
|
197
|
-
#
|
198
|
-
# Escaped are:
|
199
|
-
# \\ \" \' \. \- \`
|
200
|
-
#
|
201
|
-
def process_escapes(str)
|
202
|
-
str.
|
203
|
-
gsub('\\\\', '\').
|
204
|
-
gsub('\"', '"').
|
205
|
-
gsub("\\\'", ''').
|
206
|
-
gsub('\.', '.').
|
207
|
-
gsub('\-', '-').
|
208
|
-
gsub('\`', '`')
|
209
|
-
end
|
210
|
-
|
211
|
-
def self.n_of(n, x)
|
212
|
-
x = Regexp.escape(x)
|
213
|
-
/(?<!#{x}) # not preceded by x
|
214
|
-
#{x}{#{n}} # n of x
|
215
|
-
(?!#{x}) # not followed by x
|
216
|
-
/x
|
217
|
-
end
|
218
|
-
|
219
|
-
DOUBLE_DASH = n_of(2, '-')
|
220
|
-
TRIPLE_DASH = n_of(3, '-')
|
221
|
-
|
222
|
-
# Return +str+ replacing all +patt+ with +repl+. If +prevent_breaks+ is true,
|
223
|
-
# then replace spaces preceding +patt+ with a non-breaking space, and if there
|
224
|
-
# are no spaces, then insert a word-joiner.
|
225
|
-
#
|
226
|
-
def educate(str, patt, repl, prevent_breaks)
|
227
|
-
patt = /(?<spaces>[[:space:]]*)#{patt}/
|
228
|
-
str.gsub(patt) do
|
229
|
-
spaces = if prevent_breaks && $~['spaces'].length > 0
|
230
|
-
entity(:non_breaking_space) # * $~['spaces'].length
|
231
|
-
elsif prevent_breaks
|
232
|
-
entity(:word_joiner)
|
233
|
-
else
|
234
|
-
$~['spaces']
|
235
|
-
end
|
236
|
-
spaces + repl
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
241
|
-
# em-dash HTML entity.
|
242
|
-
#
|
243
|
-
def educate_dashes(str, prevent_breaks=false)
|
244
|
-
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
245
|
-
end
|
246
|
-
|
247
|
-
# Return the string, with each instance of "<tt>--</tt>" translated to an
|
248
|
-
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
249
|
-
# em-dash HTML entity.
|
250
|
-
#
|
251
|
-
def educate_dashes_oldschool(str, prevent_breaks=false)
|
252
|
-
str = educate(str, TRIPLE_DASH, entity(:em_dash), prevent_breaks)
|
253
|
-
educate(str, DOUBLE_DASH, entity(:en_dash), prevent_breaks)
|
254
|
-
end
|
255
|
-
|
256
|
-
# Return the string, with each instance of "<tt>--</tt>" translated
|
257
|
-
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
258
|
-
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
259
|
-
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
260
|
-
# compatible with existing entries written before SmartyPants 1.1,
|
261
|
-
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
262
|
-
# em-dashes are more common than en-dashes, and so it sort of makes
|
263
|
-
# sense that the shortcut should be shorter to type. (Thanks to
|
264
|
-
# Aaron Swartz for the idea.)
|
265
|
-
#
|
266
|
-
def educate_dashes_inverted(str, prevent_breaks=false)
|
267
|
-
str = educate(str, TRIPLE_DASH, entity(:en_dash), prevent_breaks)
|
268
|
-
educate(str, DOUBLE_DASH, entity(:em_dash), prevent_breaks)
|
269
|
-
end
|
270
|
-
|
271
|
-
# Return the string, with each instance of "<tt>...</tt>" translated
|
272
|
-
# to an ellipsis HTML entity. Also converts the case where there are
|
273
|
-
# spaces between the dots.
|
274
|
-
#
|
275
|
-
def educate_ellipses(str, prevent_breaks=false)
|
276
|
-
str = educate(str, RubyPants.n_of(3, '.'), entity(:ellipsis), prevent_breaks)
|
277
|
-
educate(str, /(?<!\.|\.[[:space:]])\.[[:space:]]\.[[:space:]]\.(?!\.|[[:space:]]\.)/,
|
278
|
-
entity(:ellipsis), prevent_breaks)
|
279
|
-
end
|
280
|
-
|
281
|
-
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
282
|
-
# translated into HTML curly quote entities.
|
283
|
-
#
|
284
|
-
def educate_backticks(str)
|
285
|
-
str.
|
286
|
-
gsub("``", entity(:double_left_quote)).
|
287
|
-
gsub("''", entity(:double_right_quote))
|
288
|
-
end
|
289
|
-
|
290
|
-
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
291
|
-
# translated into HTML curly quote entities.
|
292
|
-
#
|
293
|
-
def educate_single_backticks(str)
|
294
|
-
str.
|
295
|
-
gsub("`", entity(:single_left_quote)).
|
296
|
-
gsub("'", entity(:single_right_quote))
|
297
|
-
end
|
298
|
-
|
299
|
-
# Return the string, with "educated" curly quote HTML entities.
|
300
|
-
#
|
301
|
-
def educate_quotes(str)
|
302
|
-
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
303
|
-
|
304
|
-
str = str.dup
|
305
|
-
|
306
|
-
# Special case if the very first character is a quote followed by
|
307
|
-
# punctuation at a non-word-break. Close the quotes by brute
|
308
|
-
# force:
|
309
|
-
str.gsub!(/^'(?=#{punct_class}\B)/,
|
310
|
-
entity(:single_right_quote))
|
311
|
-
str.gsub!(/^"(?=#{punct_class}\B)/,
|
312
|
-
entity(:double_right_quote))
|
313
|
-
|
314
|
-
# Special case for double sets of quotes, e.g.:
|
315
|
-
# <p>He said, "'Quoted' words in a larger quote."</p>
|
316
|
-
str.gsub!(/"'(?=\w)/,
|
317
|
-
"#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
|
318
|
-
str.gsub!(/'"(?=\w)/,
|
319
|
-
"#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
|
320
|
-
|
321
|
-
# Special case for decade abbreviations (the '80s):
|
322
|
-
str.gsub!(/'(?=\d\ds)/,
|
323
|
-
entity(:single_right_quote))
|
324
|
-
|
325
|
-
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
326
|
-
dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
|
327
|
-
|
328
|
-
# Get most opening single quotes:
|
329
|
-
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
330
|
-
'\1' + entity(:single_left_quote))
|
331
|
-
|
332
|
-
# Single closing quotes:
|
333
|
-
str.gsub!(/(#{close_class})'/,
|
334
|
-
'\1' + entity(:single_right_quote))
|
335
|
-
str.gsub!(/'(\s|s\b|$)/,
|
336
|
-
entity(:single_right_quote) + '\1')
|
337
|
-
|
338
|
-
# Any remaining single quotes should be opening ones:
|
339
|
-
str.gsub!(/'/,
|
340
|
-
entity(:single_left_quote))
|
341
|
-
|
342
|
-
# Get most opening double quotes:
|
343
|
-
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
344
|
-
'\1' + entity(:double_left_quote))
|
345
|
-
|
346
|
-
# Double closing quotes:
|
347
|
-
str.gsub!(/(#{close_class})"/,
|
348
|
-
'\1' + entity(:double_right_quote))
|
349
|
-
str.gsub!(/"(\s|s\b|$)/,
|
350
|
-
entity(:double_right_quote) + '\1')
|
351
|
-
|
352
|
-
# Any remaining quotes should be opening ones:
|
353
|
-
str.gsub!(/"/,
|
354
|
-
entity(:double_left_quote))
|
355
|
-
|
356
|
-
str
|
357
|
-
end
|
358
|
-
|
359
|
-
# Return the string, with each RubyPants HTML entity translated to
|
360
|
-
# its ASCII counterpart.
|
361
|
-
#
|
362
|
-
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
363
|
-
#
|
364
|
-
def stupefy_entities(str)
|
365
|
-
new_str = str.dup
|
366
|
-
|
367
|
-
{
|
368
|
-
:en_dash => '-',
|
369
|
-
:em_dash => '--',
|
370
|
-
:single_left_quote => "'",
|
371
|
-
:single_right_quote => "'",
|
372
|
-
:double_left_quote => '"',
|
373
|
-
:double_right_quote => '"',
|
374
|
-
:ellipsis => '...'
|
375
|
-
}.each do |k,v|
|
376
|
-
new_str.gsub!(/#{entity(k)}/, v)
|
377
|
-
end
|
378
|
-
|
379
|
-
new_str
|
380
|
-
end
|
381
|
-
|
382
|
-
# Return an array of the tokens comprising the string. Each token is
|
383
|
-
# either a tag (possibly with nested, tags contained therein, such
|
384
|
-
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
385
|
-
# tags. Each element of the array is a two-element array; the first
|
386
|
-
# is either :tag or :text; the second is the actual value.
|
387
|
-
#
|
388
|
-
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
389
|
-
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
390
|
-
#
|
391
|
-
# This is actually the easier variant using tag_soup, as used by
|
392
|
-
# Chad Miller in the Python port of SmartyPants.
|
393
|
-
#
|
394
|
-
def tokenize
|
395
|
-
tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/m
|
396
|
-
|
397
|
-
tokens = []
|
398
|
-
|
399
|
-
prev_end = 0
|
400
|
-
|
401
|
-
scan(tag_soup) do
|
402
|
-
tokens << [:text, $1] if $1 != ""
|
403
|
-
tokens << [:tag, $2]
|
404
|
-
prev_end = $~.end(0)
|
405
|
-
end
|
406
|
-
|
407
|
-
if prev_end < size
|
408
|
-
tokens << [:text, self[prev_end..-1]]
|
409
|
-
end
|
410
|
-
|
411
|
-
tokens
|
412
|
-
end
|
413
|
-
|
414
|
-
def default_entities
|
415
|
-
{
|
416
|
-
:single_left_quote => "‘",
|
417
|
-
:double_left_quote => "“",
|
418
|
-
:single_right_quote => "’",
|
419
|
-
:double_right_quote => "”",
|
420
|
-
:em_dash => "—",
|
421
|
-
:en_dash => "–",
|
422
|
-
:ellipsis => "…",
|
423
|
-
:html_quote => """,
|
424
|
-
:non_breaking_space => " ",
|
425
|
-
:word_joiner => "⁠",
|
426
|
-
}
|
427
|
-
end
|
428
|
-
|
429
|
-
def named_entities
|
430
|
-
{
|
431
|
-
:single_left_quote => '‘',
|
432
|
-
:double_left_quote => "“",
|
433
|
-
:single_right_quote => "’",
|
434
|
-
:double_right_quote => "”",
|
435
|
-
:em_dash => "—",
|
436
|
-
:en_dash => "–",
|
437
|
-
:ellipsis => "…",
|
438
|
-
:html_quote => """,
|
439
|
-
:non_breaking_space => " ",
|
440
|
-
# :word_joiner => N/A,
|
441
|
-
}
|
442
|
-
end
|
443
|
-
|
444
|
-
def entity(key)
|
445
|
-
@entities[key]
|
446
|
-
end
|
447
|
-
end
|
data/lib/rubypants/version.rb
DELETED