rubypants 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +0 -0
- data/{README → LICENSE.rdoc} +8 -46
- data/README.rdoc +119 -0
- data/Rakefile +1 -78
- data/lib/rubypants.rb +2 -0
- data/lib/rubypants/core.rb +374 -0
- data/lib/rubypants/version.rb +3 -0
- data/rubypants.gemspec +32 -0
- data/{test_rubypants.rb → test/rubypants_test.rb} +21 -5
- metadata +69 -46
- data/install.rb +0 -9
- data/rubypants.rb +0 -490
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d9822934b5585190847395099959027911beb2f3
|
4
|
+
data.tar.gz: f12b3db11d8f4c77e3c48fadfc6a1360895b4f8e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3cdf93d5b4c9bfc7d3343d5b55ca8474955b15f689529db1d639dfba292785db6c99b985c4456ec3c025735344d4ca27b4cc9e5febd70b53825182b95758c5d0
|
7
|
+
data.tar.gz: c9488f204e20f28cd8e7913a8bbda9b0e912c62fd167c873c6d7b24a7a5b9863c015f28f1a9d3c7fbc06bcd2c446da6f3b1c734eb926b23c3c849ff8c3a85b06
|
data/.gitignore
ADDED
File without changes
|
data/{README → LICENSE.rdoc}
RENAMED
@@ -1,42 +1,15 @@
|
|
1
|
-
=
|
1
|
+
= Copyright and License
|
2
|
+
|
3
|
+
== Copyright
|
2
4
|
|
3
5
|
Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
|
4
|
-
|
6
|
+
Copyright (C) 2004 Christian Neukirchen
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
+
Incorporates ideas, comments and documentation by Chad Miller
|
9
|
+
Copyright (C) 2004 Chad Miller
|
8
10
|
|
9
11
|
Original SmartyPants by John Gruber
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
== RubyPants
|
14
|
-
|
15
|
-
RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
16
|
-
|
17
|
-
The original "SmartyPants" is a free web publishing plug-in for
|
18
|
-
Movable Type, Blosxom, and BBEdit that easily translates plain ASCII
|
19
|
-
punctuation characters into "smart" typographic punctuation HTML
|
20
|
-
entities.
|
21
|
-
|
22
|
-
See rubypants.rb for more information.
|
23
|
-
|
24
|
-
|
25
|
-
== Incompatibilities
|
26
|
-
|
27
|
-
RubyPants uses a different API than SmartyPants; it is compatible to
|
28
|
-
Red- and BlueCloth. Usually, you call RubyPants like this:
|
29
|
-
|
30
|
-
nicehtml = RubyPants.new(uglyhtml, options).to_html
|
31
|
-
|
32
|
-
where +options+ is an Array of Integers and/or Symbols (if you don't
|
33
|
-
pass any options, RubyPants will use <tt>[2]</tt> as default.)
|
34
|
-
|
35
|
-
*Note*:: This is incompatible to SmartyPants, which uses <tt>[1]</tt>
|
36
|
-
for default.
|
37
|
-
|
38
|
-
The exact meaning of numbers and symbols is documented at RubyPants#new.
|
39
|
-
|
12
|
+
Copyright (C) 2003 John Gruber
|
40
13
|
|
41
14
|
== SmartyPants license:
|
42
15
|
|
@@ -73,10 +46,7 @@ liability, or tort (including negligence or otherwise) arising in
|
|
73
46
|
any way out of the use of this software, even if advised of the
|
74
47
|
possibility of such damage.
|
75
48
|
|
76
|
-
|
77
|
-
== RubyPants license
|
78
|
-
|
79
|
-
Copyright (C) 2004 Christian Neukirchen
|
49
|
+
=== RubyPants license
|
80
50
|
|
81
51
|
RubyPants is a derivative work of SmartyPants and smartypants.py.
|
82
52
|
|
@@ -104,11 +74,3 @@ caused and on any theory of liability, whether in contract, strict
|
|
104
74
|
liability, or tort (including negligence or otherwise) arising in
|
105
75
|
any way out of the use of this software, even if advised of the
|
106
76
|
possibility of such damage.
|
107
|
-
|
108
|
-
|
109
|
-
== Links
|
110
|
-
|
111
|
-
John Gruber:: http://daringfireball.net
|
112
|
-
SmartyPants:: http://daringfireball.net/projects/smartypants
|
113
|
-
Chad Miller:: http://web.chad.org
|
114
|
-
Christian Neukirchen:: http://kronavita.de/chris
|
data/README.rdoc
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
= RubyPants: SmartyPants for Ruby
|
2
|
+
|
3
|
+
{<img src="https://img.shields.io/gem/v/rubypants.png?style=plastic">}[https://rubygems.org/gems/rubypants]
|
4
|
+
|
5
|
+
== Synopsis
|
6
|
+
|
7
|
+
RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
8
|
+
|
9
|
+
The original "SmartyPants" is a free web publishing plug-in for
|
10
|
+
Movable Type, Blosxom, and BBEdit that easily translates plain ASCII
|
11
|
+
punctuation characters into "smart" typographic punctuation HTML
|
12
|
+
entities.
|
13
|
+
|
14
|
+
|
15
|
+
== Description
|
16
|
+
|
17
|
+
RubyPants can perform the following transformations:
|
18
|
+
|
19
|
+
* Straight quotes (<tt>"</tt> and <tt>'</tt>) into "curly" quote
|
20
|
+
HTML entities
|
21
|
+
* Backticks-style quotes (<tt>``like this''</tt>) into "curly" quote
|
22
|
+
HTML entities
|
23
|
+
* Dashes (<tt>--</tt> and <tt>---</tt>) into en- and em-dash
|
24
|
+
entities
|
25
|
+
* Three consecutive dots (<tt>...</tt> or <tt>. . .</tt>) into an
|
26
|
+
ellipsis entity
|
27
|
+
|
28
|
+
This means you can write, edit, and save your posts using plain old
|
29
|
+
ASCII straight quotes, plain dashes, and plain dots, but your
|
30
|
+
published posts (and final HTML output) will appear with smart
|
31
|
+
quotes, em-dashes, and proper ellipses.
|
32
|
+
|
33
|
+
RubyPants does not modify characters within <tt><pre></tt>,
|
34
|
+
<tt><code></tt>, <tt><kbd></tt>, <tt><math></tt>, <tt><style></tt> or
|
35
|
+
<tt><script></tt> tag blocks. Typically, these tags are used to
|
36
|
+
display text where smart quotes and other "smart punctuation" would
|
37
|
+
not be appropriate, such as source code or example markup.
|
38
|
+
|
39
|
+
|
40
|
+
== Installation
|
41
|
+
|
42
|
+
gem install rubypants
|
43
|
+
|
44
|
+
Or, in your application's Gemfile:
|
45
|
+
|
46
|
+
gem 'rubypants'
|
47
|
+
|
48
|
+
|
49
|
+
== Example of Usage
|
50
|
+
|
51
|
+
RubyPants.new("String with 'dumb' quotes.").to_html
|
52
|
+
|
53
|
+
For additional options, consult the documention in
|
54
|
+
<tt>lib/rubypants/core.rb</tt>.
|
55
|
+
|
56
|
+
|
57
|
+
== Backslash Escapes
|
58
|
+
|
59
|
+
If you need to use literal straight quotes (or plain hyphens and
|
60
|
+
periods), RubyPants accepts the following backslash escape sequences
|
61
|
+
to force non-smart punctuation. It does so by transforming the
|
62
|
+
escape sequence into a decimal-encoded HTML entity:
|
63
|
+
|
64
|
+
\\ \" \' \. \- \`
|
65
|
+
|
66
|
+
This is useful, for example, when you want to use straight quotes as
|
67
|
+
foot and inch marks: 6'2" tall; a 17" iMac. (Use <tt>6\'2\"</tt>
|
68
|
+
resp. <tt>17\"</tt>.)
|
69
|
+
|
70
|
+
|
71
|
+
== Algorithmic Shortcomings
|
72
|
+
|
73
|
+
One situation in which quotes will get curled the wrong way is when
|
74
|
+
apostrophes are used at the start of leading contractions. For
|
75
|
+
example:
|
76
|
+
|
77
|
+
'Twas the night before Christmas.
|
78
|
+
|
79
|
+
In the case above, RubyPants will turn the apostrophe into an
|
80
|
+
opening single-quote, when in fact it should be a closing one. I
|
81
|
+
don't think this problem can be solved in the general case--every
|
82
|
+
word processor I've tried gets this wrong as well. In such cases,
|
83
|
+
it's best to use the proper HTML entity for closing single-quotes
|
84
|
+
("<tt>’</tt>") by hand.
|
85
|
+
|
86
|
+
|
87
|
+
== Bugs
|
88
|
+
|
89
|
+
To file bug reports or feature requests, please create an issue in this gem's
|
90
|
+
GitHub repository.
|
91
|
+
|
92
|
+
If the bug involves quotes being curled the wrong way, please send
|
93
|
+
example text to illustrate.
|
94
|
+
|
95
|
+
|
96
|
+
== Authors
|
97
|
+
|
98
|
+
John Gruber did all of the hard work of writing this software in
|
99
|
+
Perl for Movable Type and almost all of this useful documentation.
|
100
|
+
Chad Miller ported it to Python to use with Pyblosxom.
|
101
|
+
|
102
|
+
Christian Neukirchen provided the Ruby port, as a general-purpose
|
103
|
+
library that follows the *Cloth API.
|
104
|
+
|
105
|
+
Jeremy McNevin posted this code to GitHub ages ago, but has recently
|
106
|
+
been trying to improve it where possible.
|
107
|
+
|
108
|
+
Aron Griffis made jekyll-pants[https://github.com/scampersand/jekyll-pants]
|
109
|
+
which depends on RubyPants, and consequently jumped in to help out with
|
110
|
+
issues and pull requests.
|
111
|
+
|
112
|
+
|
113
|
+
== Links
|
114
|
+
|
115
|
+
John Gruber :: http://daringfireball.net
|
116
|
+
SmartyPants :: http://daringfireball.net/projects/smartypants
|
117
|
+
Chad Miller :: http://web.chad.org
|
118
|
+
Christian Neukirchen :: http://kronavita.de/chris
|
119
|
+
Aron Griffis :: https://arongriffis.com
|
data/Rakefile
CHANGED
@@ -1,84 +1,7 @@
|
|
1
|
-
# Rakefile for rubypants -*-ruby-*-
|
2
|
-
require 'rake/rdoctask'
|
3
|
-
require 'rake/gempackagetask'
|
4
|
-
|
5
|
-
|
6
1
|
desc "Run all the tests"
|
7
2
|
task :default => [:test]
|
8
3
|
|
9
|
-
desc "Do predistribution stuff"
|
10
|
-
task :predist => [:doc]
|
11
|
-
|
12
|
-
|
13
4
|
desc "Run all the tests"
|
14
5
|
task :test do
|
15
|
-
ruby '
|
16
|
-
end
|
17
|
-
|
18
|
-
desc "Make an archive as .tar.gz"
|
19
|
-
task :dist => :test do
|
20
|
-
system "darcs dist -d rubypants#{get_darcs_tree_version}"
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
desc "Generate RDoc documentation"
|
25
|
-
Rake::RDocTask.new(:doc) do |rdoc|
|
26
|
-
rdoc.options << '--line-numbers --inline-source --all'
|
27
|
-
rdoc.rdoc_files.include 'README'
|
28
|
-
rdoc.rdoc_files.include 'rubypants.rb'
|
29
|
-
end
|
30
|
-
|
31
|
-
|
32
|
-
spec = Gem::Specification.new do |s|
|
33
|
-
s.name = 'rubypants'
|
34
|
-
s.version = '0.2.0'
|
35
|
-
s.summary = "RubyPants is a Ruby port of the smart-quotes library SmartyPants."
|
36
|
-
s.description = <<-EOF
|
37
|
-
RubyPants is a Ruby port of the smart-quotes library SmartyPants.
|
38
|
-
|
39
|
-
The original "SmartyPants" is a free web publishing plug-in for
|
40
|
-
Movable Type, Blosxom, and BBEdit that easily translates plain ASCII
|
41
|
-
punctuation characters into "smart" typographic punctuation HTML
|
42
|
-
entities.
|
43
|
-
EOF
|
44
|
-
s.files = FileList['**/*rb', 'README', 'Rakefile'].to_a
|
45
|
-
s.test_file = "test_rubypants.rb"
|
46
|
-
s.extra_rdoc_files = ["README"]
|
47
|
-
s.rdoc_options = ["--main", "README"]
|
48
|
-
s.rdoc_options.concat ['--line-numbers', '--inline-source', '--all']
|
49
|
-
s.rdoc_options.concat ['--exclude', 'test_rubypants.rb']
|
50
|
-
s.require_path = '.'
|
51
|
-
s.author = "Christian Neukirchen"
|
52
|
-
s.email = "chneukirchen@gmail.com"
|
53
|
-
s.homepage = "http://www.kronavita.de/chris/blog/projects/rubypants.html"
|
54
|
-
end
|
55
|
-
|
56
|
-
Rake::GemPackageTask.new(spec) do |pkg|
|
57
|
-
end
|
58
|
-
|
59
|
-
|
60
|
-
# Helper to retrieve the "revision number" of the darcs tree.
|
61
|
-
def get_darcs_tree_version
|
62
|
-
return "" unless File.directory? "_darcs"
|
63
|
-
|
64
|
-
changes = `darcs changes`
|
65
|
-
count = 0
|
66
|
-
tag = "0.0"
|
67
|
-
|
68
|
-
changes.each("\n\n") { |change|
|
69
|
-
head, title, desc = change.split("\n", 3)
|
70
|
-
|
71
|
-
if title =~ /^ \*/
|
72
|
-
# Normal change.
|
73
|
-
count += 1
|
74
|
-
elsif title =~ /tagged (.*)/
|
75
|
-
# Tag. We look for these.
|
76
|
-
tag = $1
|
77
|
-
break
|
78
|
-
else
|
79
|
-
warn "Unparsable change: #{change}"
|
80
|
-
end
|
81
|
-
}
|
82
|
-
|
83
|
-
"-" + tag + "." + count.to_s
|
6
|
+
ruby 'test/rubypants_test.rb'
|
84
7
|
end
|
data/lib/rubypants.rb
ADDED
@@ -0,0 +1,374 @@
|
|
1
|
+
class RubyPants < String
|
2
|
+
|
3
|
+
# Create a new RubyPants instance with the text in +string+.
|
4
|
+
#
|
5
|
+
# Allowed elements in the options array:
|
6
|
+
#
|
7
|
+
# 0 :: do nothing
|
8
|
+
# 1 :: enable all, using only em-dash shortcuts
|
9
|
+
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
10
|
+
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
11
|
+
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
12
|
+
#
|
13
|
+
# If you don't like any of these defaults, you can pass symbols to change
|
14
|
+
# RubyPants' behavior:
|
15
|
+
#
|
16
|
+
# <tt>:quotes</tt> :: quotes
|
17
|
+
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
18
|
+
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
19
|
+
# <tt>:dashes</tt> :: dashes
|
20
|
+
# <tt>:oldschool</tt> :: old school dashes
|
21
|
+
# <tt>:inverted</tt> :: inverted old school dashes
|
22
|
+
# <tt>:ellipses</tt> :: ellipses
|
23
|
+
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
24
|
+
# <tt>"</tt>
|
25
|
+
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
26
|
+
# to their ASCII counterparts.
|
27
|
+
#
|
28
|
+
# In addition, you can customize the HTML entities that will be injected by
|
29
|
+
# passing in a hash for the final argument. The defaults for these entities
|
30
|
+
# are as follows:
|
31
|
+
#
|
32
|
+
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
33
|
+
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
34
|
+
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
35
|
+
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
36
|
+
# <tt>:em_dash</tt> :: <tt>—</tt>
|
37
|
+
# <tt>:en_dash</tt> :: <tt>–</tt>
|
38
|
+
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
39
|
+
# <tt>:html_quote</tt> :: <tt>" </tt>
|
40
|
+
#
|
41
|
+
def initialize(string, options=[2], entities = {})
|
42
|
+
super string
|
43
|
+
|
44
|
+
@options = [*options]
|
45
|
+
@entities = default_entities.update(entities)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Apply SmartyPants transformations.
|
49
|
+
def to_html
|
50
|
+
do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
|
51
|
+
convert_quotes = false
|
52
|
+
|
53
|
+
if @options.include?(0)
|
54
|
+
# Do nothing.
|
55
|
+
return self
|
56
|
+
elsif @options.include?(1)
|
57
|
+
# Do everything, turn all options on.
|
58
|
+
do_quotes = do_backticks = do_ellipses = true
|
59
|
+
do_dashes = :normal
|
60
|
+
elsif @options.include?(2)
|
61
|
+
# Do everything, turn all options on, use old school dash shorthand.
|
62
|
+
do_quotes = do_backticks = do_ellipses = true
|
63
|
+
do_dashes = :oldschool
|
64
|
+
elsif @options.include?(3)
|
65
|
+
# Do everything, turn all options on, use inverted old school
|
66
|
+
# dash shorthand.
|
67
|
+
do_quotes = do_backticks = do_ellipses = true
|
68
|
+
do_dashes = :inverted
|
69
|
+
elsif @options.include?(-1)
|
70
|
+
do_stupefy = true
|
71
|
+
else
|
72
|
+
do_quotes = @options.include?(:quotes)
|
73
|
+
do_backticks = @options.include?(:backticks)
|
74
|
+
do_backticks = :both if @options.include?(:allbackticks)
|
75
|
+
do_dashes = :normal if @options.include?(:dashes)
|
76
|
+
do_dashes = :oldschool if @options.include?(:oldschool)
|
77
|
+
do_dashes = :inverted if @options.include?(:inverted)
|
78
|
+
do_ellipses = @options.include?(:ellipses)
|
79
|
+
convert_quotes = @options.include?(:convertquotes)
|
80
|
+
do_stupefy = @options.include?(:stupefy)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Parse the HTML
|
84
|
+
tokens = tokenize
|
85
|
+
|
86
|
+
# Keep track of when we're inside <pre> or <code> tags.
|
87
|
+
in_pre = false
|
88
|
+
|
89
|
+
# Here is the result stored in.
|
90
|
+
result = ""
|
91
|
+
|
92
|
+
# This is a cheat, used to get some context for one-character
|
93
|
+
# tokens that consist of just a quote char. What we do is remember
|
94
|
+
# the last character of the previous text token, to use as context
|
95
|
+
# to curl single- character quote tokens correctly.
|
96
|
+
prev_token_last_char = nil
|
97
|
+
|
98
|
+
tokens.each do |token|
|
99
|
+
if token.first == :tag
|
100
|
+
result << token[1]
|
101
|
+
if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|style|math)[\s>]!
|
102
|
+
in_pre = ($1 != "/") # Opening or closing tag?
|
103
|
+
end
|
104
|
+
else
|
105
|
+
t = token[1]
|
106
|
+
|
107
|
+
# Remember last char of this token before processing.
|
108
|
+
last_char = t[-1].chr
|
109
|
+
|
110
|
+
unless in_pre
|
111
|
+
t = process_escapes t
|
112
|
+
|
113
|
+
t.gsub!(/"/, '"') if convert_quotes
|
114
|
+
|
115
|
+
if do_dashes
|
116
|
+
t = educate_dashes t if do_dashes == :normal
|
117
|
+
t = educate_dashes_oldschool t if do_dashes == :oldschool
|
118
|
+
t = educate_dashes_inverted t if do_dashes == :inverted
|
119
|
+
end
|
120
|
+
|
121
|
+
t = educate_ellipses t if do_ellipses
|
122
|
+
|
123
|
+
# Note: backticks need to be processed before quotes.
|
124
|
+
if do_backticks
|
125
|
+
t = educate_backticks t
|
126
|
+
t = educate_single_backticks t if do_backticks == :both
|
127
|
+
end
|
128
|
+
|
129
|
+
if do_quotes
|
130
|
+
if t == "'"
|
131
|
+
# Special case: single-character ' token
|
132
|
+
if prev_token_last_char =~ /\S/
|
133
|
+
t = entity(:single_right_quote)
|
134
|
+
else
|
135
|
+
t = entity(:single_left_quote)
|
136
|
+
end
|
137
|
+
elsif t == '"'
|
138
|
+
# Special case: single-character " token
|
139
|
+
if prev_token_last_char =~ /\S/
|
140
|
+
t = entity(:double_right_quote)
|
141
|
+
else
|
142
|
+
t = entity(:double_left_quote)
|
143
|
+
end
|
144
|
+
else
|
145
|
+
# Normal case:
|
146
|
+
t = educate_quotes t
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
t = stupefy_entities t if do_stupefy
|
151
|
+
end
|
152
|
+
|
153
|
+
prev_token_last_char = last_char
|
154
|
+
result << t
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Done
|
159
|
+
result
|
160
|
+
end
|
161
|
+
|
162
|
+
protected
|
163
|
+
|
164
|
+
# Return the string, with after processing the following backslash
|
165
|
+
# escape sequences. This is useful if you want to force a "dumb" quote
|
166
|
+
# or other character to appear.
|
167
|
+
#
|
168
|
+
# Escaped are:
|
169
|
+
# \\ \" \' \. \- \`
|
170
|
+
#
|
171
|
+
def process_escapes(str)
|
172
|
+
str.
|
173
|
+
gsub('\\\\', '\').
|
174
|
+
gsub('\"', '"').
|
175
|
+
gsub("\\\'", ''').
|
176
|
+
gsub('\.', '.').
|
177
|
+
gsub('\-', '-').
|
178
|
+
gsub('\`', '`')
|
179
|
+
end
|
180
|
+
|
181
|
+
# The string, with each instance of "<tt>--</tt>" translated to an
|
182
|
+
# em-dash HTML entity.
|
183
|
+
#
|
184
|
+
def educate_dashes(str)
|
185
|
+
str.
|
186
|
+
gsub(/--/, entity(:em_dash))
|
187
|
+
end
|
188
|
+
|
189
|
+
# The string, with each instance of "<tt>--</tt>" translated to an
|
190
|
+
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
191
|
+
# em-dash HTML entity.
|
192
|
+
#
|
193
|
+
def educate_dashes_oldschool(str)
|
194
|
+
str.
|
195
|
+
gsub(/---/, entity(:em_dash)).
|
196
|
+
gsub(/--/, entity(:en_dash))
|
197
|
+
end
|
198
|
+
|
199
|
+
# Return the string, with each instance of "<tt>--</tt>" translated
|
200
|
+
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
201
|
+
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
202
|
+
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
203
|
+
# compatible with existing entries written before SmartyPants 1.1,
|
204
|
+
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
205
|
+
# em-dashes are more common than en-dashes, and so it sort of makes
|
206
|
+
# sense that the shortcut should be shorter to type. (Thanks to
|
207
|
+
# Aaron Swartz for the idea.)
|
208
|
+
#
|
209
|
+
def educate_dashes_inverted(str)
|
210
|
+
str.
|
211
|
+
gsub(/---/, entity(:en_dash)).
|
212
|
+
gsub(/--/, entity(:em_dash))
|
213
|
+
end
|
214
|
+
|
215
|
+
# Return the string, with each instance of "<tt>...</tt>" translated
|
216
|
+
# to an ellipsis HTML entity. Also converts the case where there are
|
217
|
+
# spaces between the dots.
|
218
|
+
#
|
219
|
+
def educate_ellipses(str)
|
220
|
+
str.
|
221
|
+
gsub('...', entity(:ellipsis)).
|
222
|
+
gsub('. . .', entity(:ellipsis))
|
223
|
+
end
|
224
|
+
|
225
|
+
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
226
|
+
# translated into HTML curly quote entities.
|
227
|
+
#
|
228
|
+
def educate_backticks(str)
|
229
|
+
str.
|
230
|
+
gsub("``", entity(:double_left_quote)).
|
231
|
+
gsub("''", entity(:double_right_quote))
|
232
|
+
end
|
233
|
+
|
234
|
+
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
235
|
+
# translated into HTML curly quote entities.
|
236
|
+
#
|
237
|
+
def educate_single_backticks(str)
|
238
|
+
str.
|
239
|
+
gsub("`", entity(:single_left_quote)).
|
240
|
+
gsub("'", entity(:single_right_quote))
|
241
|
+
end
|
242
|
+
|
243
|
+
# Return the string, with "educated" curly quote HTML entities.
|
244
|
+
#
|
245
|
+
def educate_quotes(str)
|
246
|
+
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
247
|
+
|
248
|
+
str = str.dup
|
249
|
+
|
250
|
+
# Special case if the very first character is a quote followed by
|
251
|
+
# punctuation at a non-word-break. Close the quotes by brute
|
252
|
+
# force:
|
253
|
+
str.gsub!(/^'(?=#{punct_class}\B)/,
|
254
|
+
entity(:single_right_quote))
|
255
|
+
str.gsub!(/^"(?=#{punct_class}\B)/,
|
256
|
+
entity(:double_right_quote))
|
257
|
+
|
258
|
+
# Special case for double sets of quotes, e.g.:
|
259
|
+
# <p>He said, "'Quoted' words in a larger quote."</p>
|
260
|
+
str.gsub!(/"'(?=\w)/,
|
261
|
+
"#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
|
262
|
+
str.gsub!(/'"(?=\w)/,
|
263
|
+
"#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
|
264
|
+
|
265
|
+
# Special case for decade abbreviations (the '80s):
|
266
|
+
str.gsub!(/'(?=\d\ds)/,
|
267
|
+
entity(:single_right_quote))
|
268
|
+
|
269
|
+
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
270
|
+
dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
|
271
|
+
|
272
|
+
# Get most opening single quotes:
|
273
|
+
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
274
|
+
'\1' + entity(:single_left_quote))
|
275
|
+
|
276
|
+
# Single closing quotes:
|
277
|
+
str.gsub!(/(#{close_class})'/,
|
278
|
+
'\1' + entity(:single_right_quote))
|
279
|
+
str.gsub!(/'(\s|s\b|$)/,
|
280
|
+
entity(:single_right_quote) + '\1')
|
281
|
+
|
282
|
+
# Any remaining single quotes should be opening ones:
|
283
|
+
str.gsub!(/'/,
|
284
|
+
entity(:single_left_quote))
|
285
|
+
|
286
|
+
# Get most opening double quotes:
|
287
|
+
str.gsub!(/([[:space:]]| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
288
|
+
'\1' + entity(:double_left_quote))
|
289
|
+
|
290
|
+
# Double closing quotes:
|
291
|
+
str.gsub!(/(#{close_class})"/,
|
292
|
+
'\1' + entity(:double_right_quote))
|
293
|
+
str.gsub!(/"(\s|s\b|$)/,
|
294
|
+
entity(:double_right_quote) + '\1')
|
295
|
+
|
296
|
+
# Any remaining quotes should be opening ones:
|
297
|
+
str.gsub!(/"/,
|
298
|
+
entity(:double_left_quote))
|
299
|
+
|
300
|
+
str
|
301
|
+
end
|
302
|
+
|
303
|
+
# Return the string, with each RubyPants HTML entity translated to
|
304
|
+
# its ASCII counterpart.
|
305
|
+
#
|
306
|
+
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
307
|
+
#
|
308
|
+
def stupefy_entities(str)
|
309
|
+
new_str = str.dup
|
310
|
+
|
311
|
+
{
|
312
|
+
:en_dash => '-',
|
313
|
+
:em_dash => '--',
|
314
|
+
:single_left_quote => "'",
|
315
|
+
:single_right_quote => "'",
|
316
|
+
:double_left_quote => '"',
|
317
|
+
:double_right_quote => '"',
|
318
|
+
:ellipsis => '...'
|
319
|
+
}.each do |k,v|
|
320
|
+
new_str.gsub!(/#{entity(k)}/, v)
|
321
|
+
end
|
322
|
+
|
323
|
+
new_str
|
324
|
+
end
|
325
|
+
|
326
|
+
# Return an array of the tokens comprising the string. Each token is
|
327
|
+
# either a tag (possibly with nested, tags contained therein, such
|
328
|
+
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
329
|
+
# tags. Each element of the array is a two-element array; the first
|
330
|
+
# is either :tag or :text; the second is the actual value.
|
331
|
+
#
|
332
|
+
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
333
|
+
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
334
|
+
#
|
335
|
+
# This is actually the easier variant using tag_soup, as used by
|
336
|
+
# Chad Miller in the Python port of SmartyPants.
|
337
|
+
#
|
338
|
+
def tokenize
|
339
|
+
tag_soup = /([^<]*)(<!--.*?-->|<[^>]*>)/
|
340
|
+
|
341
|
+
tokens = []
|
342
|
+
|
343
|
+
prev_end = 0
|
344
|
+
|
345
|
+
scan(tag_soup) do
|
346
|
+
tokens << [:text, $1] if $1 != ""
|
347
|
+
tokens << [:tag, $2]
|
348
|
+
prev_end = $~.end(0)
|
349
|
+
end
|
350
|
+
|
351
|
+
if prev_end < size
|
352
|
+
tokens << [:text, self[prev_end..-1]]
|
353
|
+
end
|
354
|
+
|
355
|
+
tokens
|
356
|
+
end
|
357
|
+
|
358
|
+
def default_entities
|
359
|
+
{
|
360
|
+
:single_left_quote => "‘",
|
361
|
+
:double_left_quote => "“",
|
362
|
+
:single_right_quote => "’",
|
363
|
+
:double_right_quote => "”",
|
364
|
+
:em_dash => "—",
|
365
|
+
:en_dash => "–",
|
366
|
+
:ellipsis => "…",
|
367
|
+
:html_quote => """
|
368
|
+
}
|
369
|
+
end
|
370
|
+
|
371
|
+
def entity(key)
|
372
|
+
@entities[key]
|
373
|
+
end
|
374
|
+
end
|