rubypants-unicode 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +119 -0
- data/Rakefile +56 -0
- data/install.rb +9 -0
- data/rubypants.rb +491 -0
- data/test_rubypants.rb +164 -0
- metadata +73 -0
data/README.md
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
# RubyPants Unicode — SmartyPants ported to Ruby
|
2
|
+
|
3
|
+
Switched to unicode output (UTF-8) instead of HTML entities by Chris Chapman
|
4
|
+
Copyright © 2012 Chris Chapman
|
5
|
+
|
6
|
+
Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
|
7
|
+
Copyright © 2004 Christian Neukirchen
|
8
|
+
|
9
|
+
Incooporates ideas, comments and documentation by Chad Miller
|
10
|
+
Copyright © 2004 Chad Miller
|
11
|
+
|
12
|
+
Original SmartyPants by John Gruber
|
13
|
+
Copyright © 2003 John Gruber
|
14
|
+
|
15
|
+
|
16
|
+
## RubyPants
|
17
|
+
|
18
|
+
RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
19
|
+
|
20
|
+
The original “SmartyPants” is a free web publishing plug-in for
|
21
|
+
Movable Type, Blosxom, and BBEdit that easily translates plain ASCII
|
22
|
+
punctuation characters into “smart” typographic punctuation HTML
|
23
|
+
entities.
|
24
|
+
|
25
|
+
See rubypants.rb for more information.
|
26
|
+
|
27
|
+
|
28
|
+
## Incompatibilities
|
29
|
+
|
30
|
+
RubyPants uses a different API than SmartyPants; it is compatible to
|
31
|
+
Red- and BlueCloth. Usually, you call RubyPants like this:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
nicehtml = RubyPants.new(uglyhtml, options).to_html
|
35
|
+
```
|
36
|
+
|
37
|
+
where +options+ is an Array of Integers and/or Symbols (if you don’t
|
38
|
+
pass any options, RubyPants will use <tt>[2]</tt> as default.)
|
39
|
+
|
40
|
+
*Note*:: This is incompatible to SmartyPants, which uses <tt>[1]</tt>
|
41
|
+
for default.
|
42
|
+
|
43
|
+
The exact meaning of numbers and symbols is documented at RubyPants#new.
|
44
|
+
|
45
|
+
|
46
|
+
## SmartyPants license:
|
47
|
+
|
48
|
+
Copyright © 2003 John Gruber
|
49
|
+
(http://daringfireball.net)
|
50
|
+
All rights reserved.
|
51
|
+
|
52
|
+
Redistribution and use in source and binary forms, with or without
|
53
|
+
modification, are permitted provided that the following conditions
|
54
|
+
are met:
|
55
|
+
|
56
|
+
* Redistributions of source code must retain the above copyright
|
57
|
+
notice, this list of conditions and the following disclaimer.
|
58
|
+
|
59
|
+
* Redistributions in binary form must reproduce the above copyright
|
60
|
+
notice, this list of conditions and the following disclaimer in
|
61
|
+
the documentation and/or other materials provided with the
|
62
|
+
distribution.
|
63
|
+
|
64
|
+
* Neither the name “SmartyPants” nor the names of its contributors
|
65
|
+
may be used to endorse or promote products derived from this
|
66
|
+
software without specific prior written permission.
|
67
|
+
|
68
|
+
This software is provided by the copyright holders and contributors
|
69
|
+
“as is” and any express or implied warranties, including, but not
|
70
|
+
limited to, the implied warranties of merchantability and fitness
|
71
|
+
for a particular purpose are disclaimed. In no event shall the
|
72
|
+
copyright owner or contributors be liable for any direct, indirect,
|
73
|
+
incidental, special, exemplary, or consequential damages (including,
|
74
|
+
but not limited to, procurement of substitute goods or services;
|
75
|
+
loss of use, data, or profits; or business interruption) however
|
76
|
+
caused and on any theory of liability, whether in contract, strict
|
77
|
+
liability, or tort (including negligence or otherwise) arising in
|
78
|
+
any way out of the use of this software, even if advised of the
|
79
|
+
possibility of such damage.
|
80
|
+
|
81
|
+
|
82
|
+
## RubyPants license
|
83
|
+
|
84
|
+
Copyright © 2004 Christian Neukirchen
|
85
|
+
|
86
|
+
RubyPants is a derivative work of SmartyPants and smartypants.py.
|
87
|
+
|
88
|
+
Redistribution and use in source and binary forms, with or without
|
89
|
+
modification, are permitted provided that the following conditions
|
90
|
+
are met:
|
91
|
+
|
92
|
+
* Redistributions of source code must retain the above copyright
|
93
|
+
notice, this list of conditions and the following disclaimer.
|
94
|
+
|
95
|
+
* Redistributions in binary form must reproduce the above copyright
|
96
|
+
notice, this list of conditions and the following disclaimer in
|
97
|
+
the documentation and/or other materials provided with the
|
98
|
+
distribution.
|
99
|
+
|
100
|
+
This software is provided by the copyright holders and contributors
|
101
|
+
“as is” and any express or implied warranties, including, but not
|
102
|
+
limited to, the implied warranties of merchantability and fitness
|
103
|
+
for a particular purpose are disclaimed. In no event shall the
|
104
|
+
copyright owner or contributors be liable for any direct, indirect,
|
105
|
+
incidental, special, exemplary, or consequential damages (including,
|
106
|
+
but not limited to, procurement of substitute goods or services;
|
107
|
+
loss of use, data, or profits; or business interruption) however
|
108
|
+
caused and on any theory of liability, whether in contract, strict
|
109
|
+
liability, or tort (including negligence or otherwise) arising in
|
110
|
+
any way out of the use of this software, even if advised of the
|
111
|
+
possibility of such damage.
|
112
|
+
|
113
|
+
|
114
|
+
## Links
|
115
|
+
|
116
|
+
- John Gruber:: http://daringfireball.net
|
117
|
+
- SmartyPants:: http://daringfireball.net/projects/smartypants
|
118
|
+
- Chad Miller:: http://web.chad.org
|
119
|
+
- Christian Neukirchen:: http://kronavita.de/chris
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# Rakefile for rubypants -*-ruby-*-
|
2
|
+
require 'rake/rdoctask'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
|
5
|
+
|
6
|
+
desc "Run all the tests"
|
7
|
+
task :default => [:test]
|
8
|
+
|
9
|
+
desc "Do predistribution stuff"
|
10
|
+
task :predist => [:doc]
|
11
|
+
|
12
|
+
|
13
|
+
desc "Run all the tests"
|
14
|
+
task :test do
|
15
|
+
ruby 'test_rubypants.rb'
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Make an archive as .tar.gz"
|
19
|
+
task :dist => :test do
|
20
|
+
system "darcs dist -d rubypants#{get_darcs_tree_version}"
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
desc "Generate RDoc documentation"
|
25
|
+
Rake::RDocTask.new(:doc) do |rdoc|
|
26
|
+
rdoc.options << '--line-numbers --inline-source --all'
|
27
|
+
rdoc.rdoc_files.include 'README'
|
28
|
+
rdoc.rdoc_files.include 'rubypants.rb'
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
# Helper to retrieve the "revision number" of the darcs tree.
|
33
|
+
def get_darcs_tree_version
|
34
|
+
return "" unless File.directory? "_darcs"
|
35
|
+
|
36
|
+
changes = `darcs changes`
|
37
|
+
count = 0
|
38
|
+
tag = "0.0"
|
39
|
+
|
40
|
+
changes.each("\n\n") { |change|
|
41
|
+
head, title, desc = change.split("\n", 3)
|
42
|
+
|
43
|
+
if title =~ /^ \*/
|
44
|
+
# Normal change.
|
45
|
+
count += 1
|
46
|
+
elsif title =~ /tagged (.*)/
|
47
|
+
# Tag. We look for these.
|
48
|
+
tag = $1
|
49
|
+
break
|
50
|
+
else
|
51
|
+
warn "Unparsable change: #{change}"
|
52
|
+
end
|
53
|
+
}
|
54
|
+
|
55
|
+
"-" + tag + "." + count.to_s
|
56
|
+
end
|
data/install.rb
ADDED
data/rubypants.rb
ADDED
@@ -0,0 +1,491 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# = RubyPants -- SmartyPants ported to Ruby
|
4
|
+
#
|
5
|
+
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
|
6
|
+
# Copyright (C) 2004 Christian Neukirchen
|
7
|
+
#
|
8
|
+
# Incooporates ideas, comments and documentation by Chad Miller
|
9
|
+
# Copyright (C) 2004 Chad Miller
|
10
|
+
#
|
11
|
+
# Original SmartyPants by John Gruber
|
12
|
+
# Copyright (C) 2003 John Gruber
|
13
|
+
#
|
14
|
+
|
15
|
+
#
|
16
|
+
# = RubyPants -- SmartyPants ported to Ruby
|
17
|
+
#
|
18
|
+
# == Synopsis
|
19
|
+
#
|
20
|
+
# RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
21
|
+
#
|
22
|
+
# The original "SmartyPants" is a free web publishing plug-in for
|
23
|
+
# Movable Type, Blosxom, and BBEdit that easily translates plain ASCII
|
24
|
+
# punctuation characters into "smart" typographic punctuation HTML
|
25
|
+
# entities.
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# == Description
|
29
|
+
#
|
30
|
+
# RubyPants can perform the following transformations:
|
31
|
+
#
|
32
|
+
# * Straight quotes (<tt>"</tt> and <tt>'</tt>) into "curly" quote
|
33
|
+
# HTML entities
|
34
|
+
# * Backticks-style quotes (<tt>``like this''</tt>) into "curly" quote
|
35
|
+
# HTML entities
|
36
|
+
# * Dashes (<tt>--</tt> and <tt>---</tt>) into en- and em-dash
|
37
|
+
# entities
|
38
|
+
# * Three consecutive dots (<tt>...</tt> or <tt>. . .</tt>) into an
|
39
|
+
# ellipsis entity
|
40
|
+
#
|
41
|
+
# This means you can write, edit, and save your posts using plain old
|
42
|
+
# ASCII straight quotes, plain dashes, and plain dots, but your
|
43
|
+
# published posts (and final HTML output) will appear with smart
|
44
|
+
# quotes, em-dashes, and proper ellipses.
|
45
|
+
#
|
46
|
+
# RubyPants does not modify characters within <tt><pre></tt>,
|
47
|
+
# <tt><code></tt>, <tt><kbd></tt>, <tt><math></tt> or
|
48
|
+
# <tt><script></tt> tag blocks. Typically, these tags are used to
|
49
|
+
# display text where smart quotes and other "smart punctuation" would
|
50
|
+
# not be appropriate, such as source code or example markup.
|
51
|
+
#
|
52
|
+
#
|
53
|
+
# == Backslash Escapes
|
54
|
+
#
|
55
|
+
# If you need to use literal straight quotes (or plain hyphens and
|
56
|
+
# periods), RubyPants accepts the following backslash escape sequences
|
57
|
+
# to force non-smart punctuation. It does so by transforming the
|
58
|
+
# escape sequence into a decimal-encoded HTML entity:
|
59
|
+
#
|
60
|
+
# \\ \" \' \. \- \`
|
61
|
+
#
|
62
|
+
# This is useful, for example, when you want to use straight quotes as
|
63
|
+
# foot and inch marks: 6'2" tall; a 17" iMac. (Use <tt>6\'2\"</tt>
|
64
|
+
# resp. <tt>17\"</tt>.)
|
65
|
+
#
|
66
|
+
#
|
67
|
+
# == Algorithmic Shortcomings
|
68
|
+
#
|
69
|
+
# One situation in which quotes will get curled the wrong way is when
|
70
|
+
# apostrophes are used at the start of leading contractions. For
|
71
|
+
# example:
|
72
|
+
#
|
73
|
+
# 'Twas the night before Christmas.
|
74
|
+
#
|
75
|
+
# In the case above, RubyPants will turn the apostrophe into an
|
76
|
+
# opening single-quote, when in fact it should be a closing one. I
|
77
|
+
# don't think this problem can be solved in the general case--every
|
78
|
+
# word processor I've tried gets this wrong as well. In such cases,
|
79
|
+
# it's best to use the proper HTML entity for closing single-quotes
|
80
|
+
# ("<tt>’</tt>") by hand.
|
81
|
+
#
|
82
|
+
#
|
83
|
+
# == Bugs
|
84
|
+
#
|
85
|
+
# To file bug reports or feature requests (except see above) please
|
86
|
+
# send email to: mailto:chneukirchen@gmail.com
|
87
|
+
#
|
88
|
+
# If the bug involves quotes being curled the wrong way, please send
|
89
|
+
# example text to illustrate.
|
90
|
+
#
|
91
|
+
#
|
92
|
+
# == Authors
|
93
|
+
#
|
94
|
+
# John Gruber did all of the hard work of writing this software in
|
95
|
+
# Perl for Movable Type and almost all of this useful documentation.
|
96
|
+
# Chad Miller ported it to Python to use with Pyblosxom.
|
97
|
+
#
|
98
|
+
# Christian Neukirchen provided the Ruby port, as a general-purpose
|
99
|
+
# library that follows the *Cloth API.
|
100
|
+
#
|
101
|
+
#
|
102
|
+
# == Copyright and License
|
103
|
+
#
|
104
|
+
# === SmartyPants license:
|
105
|
+
#
|
106
|
+
# Copyright (c) 2003 John Gruber
|
107
|
+
# (http://daringfireball.net)
|
108
|
+
# All rights reserved.
|
109
|
+
#
|
110
|
+
# Redistribution and use in source and binary forms, with or without
|
111
|
+
# modification, are permitted provided that the following conditions
|
112
|
+
# are met:
|
113
|
+
#
|
114
|
+
# * Redistributions of source code must retain the above copyright
|
115
|
+
# notice, this list of conditions and the following disclaimer.
|
116
|
+
#
|
117
|
+
# * Redistributions in binary form must reproduce the above copyright
|
118
|
+
# notice, this list of conditions and the following disclaimer in
|
119
|
+
# the documentation and/or other materials provided with the
|
120
|
+
# distribution.
|
121
|
+
#
|
122
|
+
# * Neither the name "SmartyPants" nor the names of its contributors
|
123
|
+
# may be used to endorse or promote products derived from this
|
124
|
+
# software without specific prior written permission.
|
125
|
+
#
|
126
|
+
# This software is provided by the copyright holders and contributors
|
127
|
+
# "as is" and any express or implied warranties, including, but not
|
128
|
+
# limited to, the implied warranties of merchantability and fitness
|
129
|
+
# for a particular purpose are disclaimed. In no event shall the
|
130
|
+
# copyright owner or contributors be liable for any direct, indirect,
|
131
|
+
# incidental, special, exemplary, or consequential damages (including,
|
132
|
+
# but not limited to, procurement of substitute goods or services;
|
133
|
+
# loss of use, data, or profits; or business interruption) however
|
134
|
+
# caused and on any theory of liability, whether in contract, strict
|
135
|
+
# liability, or tort (including negligence or otherwise) arising in
|
136
|
+
# any way out of the use of this software, even if advised of the
|
137
|
+
# possibility of such damage.
|
138
|
+
#
|
139
|
+
# === RubyPants license
|
140
|
+
#
|
141
|
+
# RubyPants is a derivative work of SmartyPants and smartypants.py.
|
142
|
+
#
|
143
|
+
# Redistribution and use in source and binary forms, with or without
|
144
|
+
# modification, are permitted provided that the following conditions
|
145
|
+
# are met:
|
146
|
+
#
|
147
|
+
# * Redistributions of source code must retain the above copyright
|
148
|
+
# notice, this list of conditions and the following disclaimer.
|
149
|
+
#
|
150
|
+
# * Redistributions in binary form must reproduce the above copyright
|
151
|
+
# notice, this list of conditions and the following disclaimer in
|
152
|
+
# the documentation and/or other materials provided with the
|
153
|
+
# distribution.
|
154
|
+
#
|
155
|
+
# This software is provided by the copyright holders and contributors
|
156
|
+
# "as is" and any express or implied warranties, including, but not
|
157
|
+
# limited to, the implied warranties of merchantability and fitness
|
158
|
+
# for a particular purpose are disclaimed. In no event shall the
|
159
|
+
# copyright owner or contributors be liable for any direct, indirect,
|
160
|
+
# incidental, special, exemplary, or consequential damages (including,
|
161
|
+
# but not limited to, procurement of substitute goods or services;
|
162
|
+
# loss of use, data, or profits; or business interruption) however
|
163
|
+
# caused and on any theory of liability, whether in contract, strict
|
164
|
+
# liability, or tort (including negligence or otherwise) arising in
|
165
|
+
# any way out of the use of this software, even if advised of the
|
166
|
+
# possibility of such damage.
|
167
|
+
#
|
168
|
+
#
|
169
|
+
# == Links
|
170
|
+
#
|
171
|
+
# John Gruber:: http://daringfireball.net
|
172
|
+
# SmartyPants:: http://daringfireball.net/projects/smartypants
|
173
|
+
#
|
174
|
+
# Chad Miller:: http://web.chad.org
|
175
|
+
#
|
176
|
+
# Christian Neukirchen:: http://kronavita.de/chris
|
177
|
+
#
|
178
|
+
|
179
|
+
|
180
|
+
class RubyPants < String
|
181
|
+
VERSION = "0.2"
|
182
|
+
|
183
|
+
# Create a new RubyPants instance with the text in +string+.
|
184
|
+
#
|
185
|
+
# Allowed elements in the options array:
|
186
|
+
#
|
187
|
+
# 0 :: do nothing
|
188
|
+
# 1 :: enable all, using only em-dash shortcuts
|
189
|
+
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
190
|
+
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
191
|
+
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
192
|
+
#
|
193
|
+
# If you don't like any of these defaults, you can pass symbols to change
|
194
|
+
# RubyPants' behavior:
|
195
|
+
#
|
196
|
+
# <tt>:quotes</tt> :: quotes
|
197
|
+
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
198
|
+
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
199
|
+
# <tt>:dashes</tt> :: dashes
|
200
|
+
# <tt>:oldschool</tt> :: old school dashes
|
201
|
+
# <tt>:inverted</tt> :: inverted old school dashes
|
202
|
+
# <tt>:ellipses</tt> :: ellipses
|
203
|
+
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
204
|
+
# <tt>"</tt> for Dreamweaver users
|
205
|
+
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
206
|
+
# to their ASCII counterparts.
|
207
|
+
#
|
208
|
+
def initialize(string, options=[2])
|
209
|
+
super string
|
210
|
+
@options = [*options]
|
211
|
+
end
|
212
|
+
|
213
|
+
# Apply SmartyPants transformations.
|
214
|
+
def to_html
|
215
|
+
do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
|
216
|
+
convert_quotes = false
|
217
|
+
|
218
|
+
if @options.include? 0
|
219
|
+
# Do nothing.
|
220
|
+
return self
|
221
|
+
elsif @options.include? 1
|
222
|
+
# Do everything, turn all options on.
|
223
|
+
do_quotes = do_backticks = do_ellipses = true
|
224
|
+
do_dashes = :normal
|
225
|
+
elsif @options.include? 2
|
226
|
+
# Do everything, turn all options on, use old school dash shorthand.
|
227
|
+
do_quotes = do_backticks = do_ellipses = true
|
228
|
+
do_dashes = :oldschool
|
229
|
+
elsif @options.include? 3
|
230
|
+
# Do everything, turn all options on, use inverted old school
|
231
|
+
# dash shorthand.
|
232
|
+
do_quotes = do_backticks = do_ellipses = true
|
233
|
+
do_dashes = :inverted
|
234
|
+
elsif @options.include?(-1)
|
235
|
+
do_stupefy = true
|
236
|
+
else
|
237
|
+
do_quotes = @options.include? :quotes
|
238
|
+
do_backticks = @options.include? :backticks
|
239
|
+
do_backticks = :both if @options.include? :allbackticks
|
240
|
+
do_dashes = :normal if @options.include? :dashes
|
241
|
+
do_dashes = :oldschool if @options.include? :oldschool
|
242
|
+
do_dashes = :inverted if @options.include? :inverted
|
243
|
+
do_ellipses = @options.include? :ellipses
|
244
|
+
convert_quotes = @options.include? :convertquotes
|
245
|
+
do_stupefy = @options.include? :stupefy
|
246
|
+
end
|
247
|
+
|
248
|
+
# Parse the HTML
|
249
|
+
tokens = tokenize
|
250
|
+
|
251
|
+
# Keep track of when we're inside <pre> or <code> tags.
|
252
|
+
in_pre = false
|
253
|
+
|
254
|
+
# Here is the result stored in.
|
255
|
+
result = ""
|
256
|
+
|
257
|
+
# This is a cheat, used to get some context for one-character
|
258
|
+
# tokens that consist of just a quote char. What we do is remember
|
259
|
+
# the last character of the previous text token, to use as context
|
260
|
+
# to curl single- character quote tokens correctly.
|
261
|
+
prev_token_last_char = nil
|
262
|
+
|
263
|
+
tokens.each { |token|
|
264
|
+
if token.first == :tag
|
265
|
+
result << token[1]
|
266
|
+
if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]!
|
267
|
+
in_pre = ($1 != "/") # Opening or closing tag?
|
268
|
+
end
|
269
|
+
else
|
270
|
+
t = token[1]
|
271
|
+
|
272
|
+
# Remember last char of this token before processing.
|
273
|
+
last_char = t[-1].chr
|
274
|
+
|
275
|
+
unless in_pre
|
276
|
+
t = process_escapes t
|
277
|
+
|
278
|
+
t.gsub!(/"/, '"') if convert_quotes
|
279
|
+
|
280
|
+
if do_dashes
|
281
|
+
t = educate_dashes t if do_dashes == :normal
|
282
|
+
t = educate_dashes_oldschool t if do_dashes == :oldschool
|
283
|
+
t = educate_dashes_inverted t if do_dashes == :inverted
|
284
|
+
end
|
285
|
+
|
286
|
+
t = educate_ellipses t if do_ellipses
|
287
|
+
|
288
|
+
# Note: backticks need to be processed before quotes.
|
289
|
+
if do_backticks
|
290
|
+
t = educate_backticks t
|
291
|
+
t = educate_single_backticks t if do_backticks == :both
|
292
|
+
end
|
293
|
+
|
294
|
+
if do_quotes
|
295
|
+
if t == "'"
|
296
|
+
# Special case: single-character ' token
|
297
|
+
if prev_token_last_char =~ /\S/
|
298
|
+
t = "’"
|
299
|
+
else
|
300
|
+
t = "‘"
|
301
|
+
end
|
302
|
+
elsif t == '"'
|
303
|
+
# Special case: single-character " token
|
304
|
+
if prev_token_last_char =~ /\S/
|
305
|
+
t = "”"
|
306
|
+
else
|
307
|
+
t = "“"
|
308
|
+
end
|
309
|
+
else
|
310
|
+
# Normal case:
|
311
|
+
t = educate_quotes t
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
t = stupefy_entities t if do_stupefy
|
316
|
+
end
|
317
|
+
|
318
|
+
prev_token_last_char = last_char
|
319
|
+
result << t
|
320
|
+
end
|
321
|
+
}
|
322
|
+
|
323
|
+
# Done
|
324
|
+
result
|
325
|
+
end
|
326
|
+
|
327
|
+
protected
|
328
|
+
|
329
|
+
# Return the string, with after processing the following backslash
|
330
|
+
# escape sequences. This is useful if you want to force a "dumb" quote
|
331
|
+
# or other character to appear.
|
332
|
+
#
|
333
|
+
# Escaped are:
|
334
|
+
# \\ \" \' \. \- \`
|
335
|
+
#
|
336
|
+
def process_escapes(str)
|
337
|
+
str.gsub('\\\\', '\').
|
338
|
+
gsub('\"', '"').
|
339
|
+
gsub("\\\'", ''').
|
340
|
+
gsub('\.', '.').
|
341
|
+
gsub('\-', '-').
|
342
|
+
gsub('\`', '`')
|
343
|
+
end
|
344
|
+
|
345
|
+
# The string, with each instance of "<tt>--</tt>" translated to an
|
346
|
+
# em-dash HTML entity.
|
347
|
+
#
|
348
|
+
def educate_dashes(str)
|
349
|
+
str.gsub(/--/, '—')
|
350
|
+
end
|
351
|
+
|
352
|
+
# The string, with each instance of "<tt>--</tt>" translated to an
|
353
|
+
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
354
|
+
# em-dash HTML entity.
|
355
|
+
#
|
356
|
+
def educate_dashes_oldschool(str)
|
357
|
+
str.gsub(/---/, '—').gsub(/--/, '–')
|
358
|
+
end
|
359
|
+
|
360
|
+
# Return the string, with each instance of "<tt>--</tt>" translated
|
361
|
+
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
362
|
+
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
363
|
+
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
364
|
+
# compatible with existing entries written before SmartyPants 1.1,
|
365
|
+
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
366
|
+
# em-dashes are more common than en-dashes, and so it sort of makes
|
367
|
+
# sense that the shortcut should be shorter to type. (Thanks to
|
368
|
+
# Aaron Swartz for the idea.)
|
369
|
+
#
|
370
|
+
def educate_dashes_inverted(str)
|
371
|
+
str.gsub(/---/, '–').gsub(/--/, '—')
|
372
|
+
end
|
373
|
+
|
374
|
+
# Return the string, with each instance of "<tt>...</tt>" translated
|
375
|
+
# to an ellipsis HTML entity. Also converts the case where there are
|
376
|
+
# spaces between the dots.
|
377
|
+
#
|
378
|
+
def educate_ellipses(str)
|
379
|
+
str.gsub('...', '…').gsub('. . .', '…')
|
380
|
+
end
|
381
|
+
|
382
|
+
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
383
|
+
# translated into HTML curly quote entities.
|
384
|
+
#
|
385
|
+
def educate_backticks(str)
|
386
|
+
str.gsub("``", '“').gsub("''", '”')
|
387
|
+
end
|
388
|
+
|
389
|
+
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
390
|
+
# translated into HTML curly quote entities.
|
391
|
+
#
|
392
|
+
def educate_single_backticks(str)
|
393
|
+
str.gsub("`", '‘').gsub("'", '’')
|
394
|
+
end
|
395
|
+
|
396
|
+
# Return the string, with "educated" curly quote HTML entities.
|
397
|
+
#
|
398
|
+
def educate_quotes(str)
|
399
|
+
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
400
|
+
|
401
|
+
str = str.dup
|
402
|
+
|
403
|
+
# Special case if the very first character is a quote followed by
|
404
|
+
# punctuation at a non-word-break. Close the quotes by brute
|
405
|
+
# force:
|
406
|
+
str.gsub!(/^'(?=#{punct_class}\B)/, '’')
|
407
|
+
str.gsub!(/^"(?=#{punct_class}\B)/, '”')
|
408
|
+
|
409
|
+
# Special case for double sets of quotes, e.g.:
|
410
|
+
# <p>He said, "'Quoted' words in a larger quote."</p>
|
411
|
+
str.gsub!(/"'(?=\w)/, '“‘')
|
412
|
+
str.gsub!(/'"(?=\w)/, '‘“')
|
413
|
+
|
414
|
+
# Special case for decade abbreviations (the '80s):
|
415
|
+
str.gsub!(/'(?=\d\ds)/, '’')
|
416
|
+
|
417
|
+
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
418
|
+
dec_dashes = '–|—'
|
419
|
+
|
420
|
+
# Get most opening single quotes:
|
421
|
+
str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
422
|
+
'\1‘')
|
423
|
+
# Single closing quotes:
|
424
|
+
str.gsub!(/(#{close_class})'/, '\1’')
|
425
|
+
str.gsub!(/'(\s|s\b|$)/, '’\1')
|
426
|
+
# Any remaining single quotes should be opening ones:
|
427
|
+
str.gsub!(/'/, '‘')
|
428
|
+
|
429
|
+
# Get most opening double quotes:
|
430
|
+
str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
431
|
+
'\1“')
|
432
|
+
# Double closing quotes:
|
433
|
+
str.gsub!(/(#{close_class})"/, '\1”')
|
434
|
+
str.gsub!(/"(\s|s\b|$)/, '”\1')
|
435
|
+
# Any remaining quotes should be opening ones:
|
436
|
+
str.gsub!(/"/, '“')
|
437
|
+
|
438
|
+
str
|
439
|
+
end
|
440
|
+
|
441
|
+
# Return the string, with each RubyPants HTML entity translated to
|
442
|
+
# its ASCII counterpart.
|
443
|
+
#
|
444
|
+
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
445
|
+
#
|
446
|
+
def stupefy_entities(str)
|
447
|
+
str.
|
448
|
+
gsub(/–/, '-'). # en-dash
|
449
|
+
gsub(/—/, '--'). # em-dash
|
450
|
+
|
451
|
+
gsub(/‘/, "'"). # open single quote
|
452
|
+
gsub(/’/, "'"). # close single quote
|
453
|
+
|
454
|
+
gsub(/“/, '"'). # open double quote
|
455
|
+
gsub(/”/, '"'). # close double quote
|
456
|
+
|
457
|
+
gsub(/…/, '...') # ellipsis
|
458
|
+
end
|
459
|
+
|
460
|
+
# Return an array of the tokens comprising the string. Each token is
|
461
|
+
# either a tag (possibly with nested, tags contained therein, such
|
462
|
+
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
463
|
+
# tags. Each element of the array is a two-element array; the first
|
464
|
+
# is either :tag or :text; the second is the actual value.
|
465
|
+
#
|
466
|
+
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
467
|
+
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
468
|
+
#
|
469
|
+
# This is actually the easier variant using tag_soup, as used by
|
470
|
+
# Chad Miller in the Python port of SmartyPants.
|
471
|
+
#
|
472
|
+
def tokenize
|
473
|
+
tag_soup = /([^<]*)(<[^>]*>)/
|
474
|
+
|
475
|
+
tokens = []
|
476
|
+
|
477
|
+
prev_end = 0
|
478
|
+
scan(tag_soup) {
|
479
|
+
tokens << [:text, $1] if $1 != ""
|
480
|
+
tokens << [:tag, $2]
|
481
|
+
|
482
|
+
prev_end = $~.end(0)
|
483
|
+
}
|
484
|
+
|
485
|
+
if prev_end < size
|
486
|
+
tokens << [:text, self[prev_end..-1]]
|
487
|
+
end
|
488
|
+
|
489
|
+
tokens
|
490
|
+
end
|
491
|
+
end
|
data/test_rubypants.rb
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require './rubypants'
|
5
|
+
|
6
|
+
# Test EVERYTHING against SmartyPants.pl output!
|
7
|
+
|
8
|
+
|
9
|
+
class TestRubyPants < Test::Unit::TestCase
|
10
|
+
def assert_rp_equal(str, orig, options=[2])
|
11
|
+
assert_equal orig, RubyPants.new(str, options).to_html
|
12
|
+
end
|
13
|
+
|
14
|
+
def assert_verbatim(str)
|
15
|
+
assert_rp_equal str, str
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_verbatim
|
19
|
+
assert_verbatim "foo!"
|
20
|
+
assert_verbatim "<div>This is HTML</div>"
|
21
|
+
assert_verbatim "<div>This is HTML with <crap </div> tags>"
|
22
|
+
assert_verbatim <<EOF
|
23
|
+
multiline
|
24
|
+
|
25
|
+
<b>html</b>
|
26
|
+
|
27
|
+
code
|
28
|
+
|
29
|
+
EOF
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_quotes
|
33
|
+
assert_rp_equal '"A first example"', '“A first example”'
|
34
|
+
assert_rp_equal '"A first "nested" example"',
|
35
|
+
'“A first “nested” example”'
|
36
|
+
|
37
|
+
assert_rp_equal '".', '”.'
|
38
|
+
assert_rp_equal '"a', '“a'
|
39
|
+
|
40
|
+
assert_rp_equal "'.", '’.'
|
41
|
+
assert_rp_equal "'a", '‘a'
|
42
|
+
|
43
|
+
assert_rp_equal %{<p>He said, "'Quoted' words in a larger quote."</p>},
|
44
|
+
"<p>He said, “‘Quoted’ words in a larger quote.”</p>"
|
45
|
+
|
46
|
+
assert_rp_equal %{"I like the 70's"}, '“I like the 70’s”'
|
47
|
+
assert_rp_equal %{"I like the '70s"}, '“I like the ’70s”'
|
48
|
+
assert_rp_equal %{"I like the '70!"}, '“I like the ‘70!”'
|
49
|
+
|
50
|
+
assert_rp_equal 'pre"post', 'pre”post'
|
51
|
+
assert_rp_equal 'pre "post', 'pre “post'
|
52
|
+
assert_rp_equal 'pre "post', 'pre “post'
|
53
|
+
assert_rp_equal 'pre--"post', 'pre–“post'
|
54
|
+
assert_rp_equal 'pre--"!', 'pre–”!'
|
55
|
+
|
56
|
+
assert_rp_equal "pre'post", 'pre’post'
|
57
|
+
assert_rp_equal "pre 'post", 'pre ‘post'
|
58
|
+
assert_rp_equal "pre 'post", 'pre ‘post'
|
59
|
+
assert_rp_equal "pre--'post", 'pre–‘post'
|
60
|
+
assert_rp_equal "pre--'!", 'pre–’!'
|
61
|
+
|
62
|
+
assert_rp_equal "<b>'</b>", "<b>‘</b>"
|
63
|
+
assert_rp_equal "foo<b>'</b>", "foo<b>’</b>"
|
64
|
+
|
65
|
+
assert_rp_equal '<b>"</b>', "<b>“</b>"
|
66
|
+
assert_rp_equal 'foo<b>"</b>', "foo<b>”</b>"
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_dashes
|
70
|
+
assert_rp_equal "foo--bar", 'foo—bar', 1
|
71
|
+
assert_rp_equal "foo---bar", 'foo—-bar', 1
|
72
|
+
assert_rp_equal "foo----bar", 'foo——bar', 1
|
73
|
+
assert_rp_equal "foo-----bar", 'foo——-bar', 1
|
74
|
+
assert_rp_equal "--foo--bar--quux--",
|
75
|
+
'—foo—bar—quux—', 1
|
76
|
+
|
77
|
+
assert_rp_equal "foo--bar", 'foo–bar', 2
|
78
|
+
assert_rp_equal "foo---bar", 'foo—bar', 2
|
79
|
+
assert_rp_equal "foo----bar", 'foo—-bar', 2
|
80
|
+
assert_rp_equal "foo-----bar", 'foo—–bar', 2
|
81
|
+
assert_rp_equal "--foo--bar--quux--",
|
82
|
+
'–foo–bar–quux–', 2
|
83
|
+
|
84
|
+
assert_rp_equal "foo--bar", 'foo—bar', 3
|
85
|
+
assert_rp_equal "foo---bar", 'foo–bar', 3
|
86
|
+
assert_rp_equal "foo----bar", 'foo–-bar', 3
|
87
|
+
assert_rp_equal "foo-----bar", 'foo–—bar', 3
|
88
|
+
assert_rp_equal "--foo--bar--quux--",
|
89
|
+
'—foo—bar—quux—', 3
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_ellipses
|
93
|
+
assert_rp_equal "foo..bar", 'foo..bar'
|
94
|
+
assert_rp_equal "foo...bar", 'foo…bar'
|
95
|
+
assert_rp_equal "foo....bar", 'foo….bar'
|
96
|
+
|
97
|
+
# Nasty ones
|
98
|
+
assert_rp_equal "foo. . ..bar", 'foo….bar'
|
99
|
+
assert_rp_equal "foo. . ...bar", 'foo. . …bar'
|
100
|
+
assert_rp_equal "foo. . ....bar", 'foo. . ….bar'
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_backticks
|
104
|
+
assert_rp_equal "pre``post", 'pre“post'
|
105
|
+
assert_rp_equal "pre ``post", 'pre “post'
|
106
|
+
assert_rp_equal "pre ``post", 'pre “post'
|
107
|
+
assert_rp_equal "pre--``post", 'pre–“post'
|
108
|
+
assert_rp_equal "pre--``!", 'pre–“!'
|
109
|
+
|
110
|
+
assert_rp_equal "pre''post", 'pre”post'
|
111
|
+
assert_rp_equal "pre ''post", 'pre ”post'
|
112
|
+
assert_rp_equal "pre ''post", 'pre ”post'
|
113
|
+
assert_rp_equal "pre--''post", 'pre–”post'
|
114
|
+
assert_rp_equal "pre--''!", 'pre–”!'
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_single_backticks
|
118
|
+
o = [:oldschool, :allbackticks]
|
119
|
+
|
120
|
+
assert_rp_equal "`foo'", "‘foo’", o
|
121
|
+
|
122
|
+
assert_rp_equal "pre`post", 'pre‘post', o
|
123
|
+
assert_rp_equal "pre `post", 'pre ‘post', o
|
124
|
+
assert_rp_equal "pre `post", 'pre ‘post', o
|
125
|
+
assert_rp_equal "pre--`post", 'pre–‘post', o
|
126
|
+
assert_rp_equal "pre--`!", 'pre–‘!', o
|
127
|
+
|
128
|
+
assert_rp_equal "pre'post", 'pre’post', o
|
129
|
+
assert_rp_equal "pre 'post", 'pre ’post', o
|
130
|
+
assert_rp_equal "pre 'post", 'pre ’post', o
|
131
|
+
assert_rp_equal "pre--'post", 'pre–’post', o
|
132
|
+
assert_rp_equal "pre--'!", 'pre–’!', o
|
133
|
+
end
|
134
|
+
|
135
|
+
def test_stupefy
|
136
|
+
o = [:stupefy]
|
137
|
+
|
138
|
+
assert_rp_equal "<p>He said, “‘Quoted’ words " +
|
139
|
+
"in a larger quote.”</p>",
|
140
|
+
%{<p>He said, "'Quoted' words in a larger quote."</p>}, o
|
141
|
+
|
142
|
+
assert_rp_equal "– — ‘’ “” …",
|
143
|
+
%{- -- '' "" ...}, o
|
144
|
+
|
145
|
+
assert_rp_equal %{- -- '' "" ...}, %{- -- '' "" ...}, o
|
146
|
+
end
|
147
|
+
|
148
|
+
def test_process_escapes
|
149
|
+
assert_rp_equal %q{foo\bar}, "foo\\bar"
|
150
|
+
assert_rp_equal %q{foo\\\bar}, "foo\bar"
|
151
|
+
assert_rp_equal %q{foo\\\\\bar}, "foo\\\bar"
|
152
|
+
assert_rp_equal %q{foo\...bar}, "foo...bar"
|
153
|
+
assert_rp_equal %q{foo\.\.\.bar}, "foo...bar"
|
154
|
+
|
155
|
+
assert_rp_equal %q{foo\'bar}, "foo'bar"
|
156
|
+
assert_rp_equal %q{foo\"bar}, "foo"bar"
|
157
|
+
assert_rp_equal %q{foo\-bar}, "foo-bar"
|
158
|
+
assert_rp_equal %q{foo\`bar}, "foo`bar"
|
159
|
+
|
160
|
+
assert_rp_equal %q{foo\#bar}, "foo\\#bar"
|
161
|
+
assert_rp_equal %q{foo\*bar}, "foo\\*bar"
|
162
|
+
assert_rp_equal %q{foo\&bar}, "foo\\&bar"
|
163
|
+
end
|
164
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rubypants-unicode
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Chris Chapman
|
9
|
+
- Christian Neukirchen
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2012-07-30 00:00:00.000000000 Z
|
14
|
+
dependencies: []
|
15
|
+
description: ! 'RubyPants-Unicode is a Ruby port of the smart-quotes library SmartyPants
|
16
|
+
that outputs
|
17
|
+
|
18
|
+
unicode characters (UTF-8) instead of HTML entities.
|
19
|
+
|
20
|
+
|
21
|
+
The original "SmartyPants" is a free web publishing plug-in for
|
22
|
+
|
23
|
+
Movable Type, Blosxom, and BBEdit that easily translates plain ASCII
|
24
|
+
|
25
|
+
punctuation characters into "smart" typographic punctuation HTML
|
26
|
+
|
27
|
+
entities.
|
28
|
+
|
29
|
+
'
|
30
|
+
email: chris.chapman@aggiemail.usu.edu
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files:
|
34
|
+
- README.md
|
35
|
+
files:
|
36
|
+
- install.rb
|
37
|
+
- rubypants.rb
|
38
|
+
- test_rubypants.rb
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
homepage: https://github.com/cdchapman/rubypants-unicode
|
42
|
+
licenses: []
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README.md
|
47
|
+
- --line-numbers
|
48
|
+
- --inline-source
|
49
|
+
- --all
|
50
|
+
- --exclude
|
51
|
+
- test_rubypants.rb
|
52
|
+
require_paths:
|
53
|
+
- .
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 1.8.24
|
69
|
+
signing_key:
|
70
|
+
specification_version: 3
|
71
|
+
summary: RubyPants-Unicode is a Ruby port of the smart-quotes library SmartyPants.
|
72
|
+
test_files:
|
73
|
+
- test_rubypants.rb
|