BlueCloth 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +506 -0
- data/LICENSE +340 -0
- data/README +99 -0
- data/bin/bluecloth +83 -0
- data/install.rb +185 -0
- data/lib/bluecloth.rb +1144 -0
- data/test.rb +117 -0
- data/tests/00_Class.tests.rb +71 -0
- data/tests/05_Markdown.tests.rb +1541 -0
- data/tests/10_Bug.tests.rb +67 -0
- data/tests/15_Contrib.tests.rb +132 -0
- data/tests/bctestcase.rb +283 -0
- data/tests/data/antsugar.txt +34 -0
- data/tests/data/hr-dos.txt +4 -0
- data/tests/data/ml-announce.txt +17 -0
- data/tests/data/re-overflow.txt +67 -0
- data/tests/data/re-overflow2.txt +281 -0
- data/utils.rb +739 -0
- metadata +74 -0
data/install.rb
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# Module Install Script
|
4
|
+
# $Id: install.rb 11 2005-08-07 03:30:22Z ged $
|
5
|
+
#
|
6
|
+
# Thanks to Masatoshi SEKI for ideas found in his install.rb.
|
7
|
+
#
|
8
|
+
# Copyright (c) 2001-2005 The FaerieMUD Consortium.
|
9
|
+
#
|
10
|
+
# This is free software. You may use, modify, and/or redistribute this
|
11
|
+
# software under the terms of the Perl Artistic License. (See
|
12
|
+
# http://language.perl.com/misc/Artistic.html)
|
13
|
+
#
|
14
|
+
|
15
|
+
require './utils.rb'
|
16
|
+
include UtilityFunctions
|
17
|
+
|
18
|
+
require 'rbconfig'
|
19
|
+
include Config
|
20
|
+
|
21
|
+
require 'find'
|
22
|
+
require 'ftools'
|
23
|
+
require 'optparse'
|
24
|
+
|
25
|
+
$version = %q$Revision: 11 $
|
26
|
+
$rcsId = %q$Id: install.rb 11 2005-08-07 03:30:22Z ged $
|
27
|
+
|
28
|
+
# Define required libraries
|
29
|
+
RequiredLibraries = [
|
30
|
+
# libraryname, nice name, RAA URL, Download URL, e.g.,
|
31
|
+
#[ 'strscan', "Strscan",
|
32
|
+
# 'http://www.ruby-lang.org/en/raa-list.rhtml?name=strscan',
|
33
|
+
# 'http://i.loveruby.net/archive/strscan/strscan-0.6.7.tar.gz',
|
34
|
+
#],
|
35
|
+
]
|
36
|
+
|
37
|
+
class Installer
|
38
|
+
|
39
|
+
@@PrunePatterns = [
|
40
|
+
/CVS/,
|
41
|
+
/~$/,
|
42
|
+
%r:(^|/)\.:,
|
43
|
+
/\.tpl$/,
|
44
|
+
]
|
45
|
+
|
46
|
+
def initialize( testing=false )
|
47
|
+
@ftools = (testing) ? self : File
|
48
|
+
end
|
49
|
+
|
50
|
+
### Make the specified dirs (which can be a String or an Array of Strings)
|
51
|
+
### with the specified mode.
|
52
|
+
def makedirs( dirs, mode=0755, verbose=false )
|
53
|
+
dirs = [ dirs ] unless dirs.is_a? Array
|
54
|
+
|
55
|
+
oldumask = File::umask
|
56
|
+
File::umask( 0777 - mode )
|
57
|
+
|
58
|
+
for dir in dirs
|
59
|
+
if @ftools == File
|
60
|
+
File::mkpath( dir, $verbose )
|
61
|
+
else
|
62
|
+
$stderr.puts "Make path %s with mode %o" % [ dir, mode ]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
File::umask( oldumask )
|
67
|
+
end
|
68
|
+
|
69
|
+
def install( srcfile, dstfile, mode=nil, verbose=false )
|
70
|
+
dstfile = File.catname(srcfile, dstfile)
|
71
|
+
unless FileTest.exist? dstfile and File.cmp srcfile, dstfile
|
72
|
+
$stderr.puts " install #{srcfile} -> #{dstfile}"
|
73
|
+
else
|
74
|
+
$stderr.puts " skipping #{dstfile}: unchanged"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
public
|
79
|
+
|
80
|
+
def installFiles( src, dstDir, mode=0444, verbose=false )
|
81
|
+
directories = []
|
82
|
+
files = []
|
83
|
+
|
84
|
+
if File.directory?( src )
|
85
|
+
Find.find( src ) {|f|
|
86
|
+
Find.prune if @@PrunePatterns.find {|pat| f =~ pat}
|
87
|
+
next if f == src
|
88
|
+
|
89
|
+
if FileTest.directory?( f )
|
90
|
+
directories << f.gsub( /^#{src}#{File::Separator}/, '' )
|
91
|
+
next
|
92
|
+
|
93
|
+
elsif FileTest.file?( f )
|
94
|
+
files << f.gsub( /^#{src}#{File::Separator}/, '' )
|
95
|
+
|
96
|
+
else
|
97
|
+
Find.prune
|
98
|
+
end
|
99
|
+
}
|
100
|
+
else
|
101
|
+
files << File.basename( src )
|
102
|
+
src = File.dirname( src )
|
103
|
+
end
|
104
|
+
|
105
|
+
dirs = [ dstDir ]
|
106
|
+
dirs |= directories.collect {|d| File.join(dstDir,d)}
|
107
|
+
makedirs( dirs, 0755, verbose )
|
108
|
+
files.each {|f|
|
109
|
+
srcfile = File.join(src,f)
|
110
|
+
dstfile = File.dirname(File.join( dstDir,f ))
|
111
|
+
|
112
|
+
if verbose
|
113
|
+
if mode
|
114
|
+
$stderr.puts "Install #{srcfile} -> #{dstfile} (mode %o)" % mode
|
115
|
+
else
|
116
|
+
$stderr.puts "Install #{srcfile} -> #{dstfile}"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
@ftools.install( srcfile, dstfile, mode, verbose )
|
121
|
+
}
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
if $0 == __FILE__
|
128
|
+
dryrun = false
|
129
|
+
|
130
|
+
# Parse command-line switches
|
131
|
+
ARGV.options {|oparser|
|
132
|
+
oparser.banner = "Usage: #$0 [options]\n"
|
133
|
+
|
134
|
+
oparser.on( "--verbose", "-v", TrueClass, "Make progress verbose" ) {
|
135
|
+
$VERBOSE = true
|
136
|
+
debugMsg "Turned verbose on."
|
137
|
+
}
|
138
|
+
|
139
|
+
oparser.on( "--dry-run", "-n", TrueClass, "Don't really install anything" ) {
|
140
|
+
debugMsg "Turned dry-run on."
|
141
|
+
dryrun = true
|
142
|
+
}
|
143
|
+
|
144
|
+
# Handle the 'help' option
|
145
|
+
oparser.on( "--help", "-h", "Display this text." ) {
|
146
|
+
$stderr.puts oparser
|
147
|
+
exit!(0)
|
148
|
+
}
|
149
|
+
|
150
|
+
oparser.parse!
|
151
|
+
}
|
152
|
+
|
153
|
+
# Don't do anything if they expect this to be the three-step install script
|
154
|
+
# and they aren't doing the 'install' step.
|
155
|
+
if ARGV.include?( "config" )
|
156
|
+
for lib in RequiredLibraries
|
157
|
+
testForRequiredLibrary( *lib )
|
158
|
+
end
|
159
|
+
puts "Done."
|
160
|
+
elsif ARGV.include?( "setup" )
|
161
|
+
puts "Done."
|
162
|
+
elsif ARGV.empty?
|
163
|
+
for lib in RequiredLibraries
|
164
|
+
testForRequiredLibrary( *lib )
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
if ARGV.empty? || ARGV.include?( "install" )
|
169
|
+
debugMsg "Sitelibdir = '#{CONFIG['sitelibdir']}'"
|
170
|
+
sitelibdir = CONFIG['sitelibdir']
|
171
|
+
debugMsg "Sitearchdir = '#{CONFIG['sitearchdir']}'"
|
172
|
+
sitearchdir = CONFIG['sitearchdir']
|
173
|
+
|
174
|
+
message "Installing..."
|
175
|
+
i = Installer.new( dryrun )
|
176
|
+
#i.installFiles( "redist", sitelibdir, 0444, verbose )
|
177
|
+
i.installFiles( "lib", sitelibdir, 0444, $VERBOSE )
|
178
|
+
|
179
|
+
message "done.\n"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
data/lib/bluecloth.rb
ADDED
@@ -0,0 +1,1144 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'digest/md5'
|
4
|
+
require 'logger'
|
5
|
+
require 'strscan'
|
6
|
+
|
7
|
+
# Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
|
8
|
+
# tool.
|
9
|
+
#
|
10
|
+
# == Synopsis
|
11
|
+
#
|
12
|
+
# doc = BlueCloth::new "
|
13
|
+
# ## Test document ##
|
14
|
+
#
|
15
|
+
# Just a simple test.
|
16
|
+
# "
|
17
|
+
#
|
18
|
+
# puts doc.to_html
|
19
|
+
#
|
20
|
+
# == Authors
|
21
|
+
#
|
22
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
23
|
+
#
|
24
|
+
# == Contributors
|
25
|
+
#
|
26
|
+
# * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
|
27
|
+
# * Florian Gross <flgr@ccan.de> - Filter options, suggestions
|
28
|
+
#
|
29
|
+
# == Copyright
|
30
|
+
#
|
31
|
+
# Original version:
|
32
|
+
# Copyright (c) 2003-2004 John Gruber
|
33
|
+
# <http://daringfireball.net/>
|
34
|
+
# All rights reserved.
|
35
|
+
#
|
36
|
+
# Ruby port:
|
37
|
+
# Copyright (c) 2004 The FaerieMUD Consortium.
|
38
|
+
#
|
39
|
+
# BlueCloth is free software; you can redistribute it and/or modify it under the
|
40
|
+
# terms of the GNU General Public License as published by the Free Software
|
41
|
+
# Foundation; either version 2 of the License, or (at your option) any later
|
42
|
+
# version.
|
43
|
+
#
|
44
|
+
# BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
|
45
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
46
|
+
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
47
|
+
#
|
48
|
+
# == To-do
|
49
|
+
#
|
50
|
+
# * Refactor some of the larger uglier methods that have to do their own
|
51
|
+
# brute-force scanning because of lack of Perl features in Ruby's Regexp
|
52
|
+
# class. Alternately, could add a dependency on 'pcre' and use most Perl
|
53
|
+
# regexps.
|
54
|
+
#
|
55
|
+
# * Put the StringScanner in the render state for thread-safety.
|
56
|
+
#
|
57
|
+
# == Version
|
58
|
+
#
|
59
|
+
# $Id: bluecloth.rb 130 2009-07-16 00:08:36Z deveiant $
|
60
|
+
#
|
61
|
+
class BlueCloth < String
|
62
|
+
|
63
|
+
### Exception class for formatting errors.
|
64
|
+
class FormatError < RuntimeError
|
65
|
+
|
66
|
+
### Create a new FormatError with the given source +str+ and an optional
|
67
|
+
### message about the +specific+ error.
|
68
|
+
def initialize( str, specific=nil )
|
69
|
+
if specific
|
70
|
+
msg = "Bad markdown format near %p: %s" % [ str, specific ]
|
71
|
+
else
|
72
|
+
msg = "Bad markdown format near %p" % str
|
73
|
+
end
|
74
|
+
|
75
|
+
super( msg )
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# Release Version
|
81
|
+
Version = VERSION = '1.0.1'
|
82
|
+
|
83
|
+
# SVN Revision
|
84
|
+
SvnRev = %q$Rev: 130 $
|
85
|
+
|
86
|
+
# SVN Id tag
|
87
|
+
SvnId = %q$Id: bluecloth.rb 130 2009-07-16 00:08:36Z deveiant $
|
88
|
+
|
89
|
+
# SVN URL
|
90
|
+
SvnUrl = %q$URL: svn+ssh://deveiate/svn/BlueCloth/releases/1.0.0/lib/bluecloth.rb $
|
91
|
+
|
92
|
+
|
93
|
+
# Rendering state struct. Keeps track of URLs, titles, and HTML blocks
|
94
|
+
# midway through a render. I prefer this to the globals of the Perl version
|
95
|
+
# because globals make me break out in hives. Or something.
|
96
|
+
RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
|
97
|
+
|
98
|
+
# Tab width for #detab! if none is specified
|
99
|
+
TabWidth = 4
|
100
|
+
|
101
|
+
# The tag-closing string -- set to '>' for HTML
|
102
|
+
EmptyElementSuffix = "/>";
|
103
|
+
|
104
|
+
# Table of MD5 sums for escaped characters
|
105
|
+
EscapeTable = {}
|
106
|
+
'\\`*_{}[]()#.!'.split(//).each {|char|
|
107
|
+
hash = Digest::MD5::hexdigest( char )
|
108
|
+
|
109
|
+
EscapeTable[ char ] = {
|
110
|
+
:md5 => hash,
|
111
|
+
:md5re => Regexp::new( hash ),
|
112
|
+
:re => Regexp::new( '\\\\' + Regexp::escape(char) ),
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
|
117
|
+
#################################################################
|
118
|
+
### I N S T A N C E M E T H O D S
|
119
|
+
#################################################################
|
120
|
+
|
121
|
+
### Create a new BlueCloth string.
|
122
|
+
def initialize( content="", *restrictions )
|
123
|
+
@log = Logger::new( $deferr )
|
124
|
+
@log.level = $DEBUG ?
|
125
|
+
Logger::DEBUG :
|
126
|
+
($VERBOSE ? Logger::INFO : Logger::WARN)
|
127
|
+
@scanner = nil
|
128
|
+
|
129
|
+
# Add any restrictions, and set the line-folding attribute to reflect
|
130
|
+
# what happens by default.
|
131
|
+
@filter_html = nil
|
132
|
+
@filter_styles = nil
|
133
|
+
restrictions.flatten.each {|r| __send__("#{r}=", true) }
|
134
|
+
@fold_lines = true
|
135
|
+
|
136
|
+
super( content )
|
137
|
+
|
138
|
+
@log.debug "String is: %p" % self
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
######
|
143
|
+
public
|
144
|
+
######
|
145
|
+
|
146
|
+
# Filters for controlling what gets output for untrusted input. (But really,
|
147
|
+
# you're filtering bad stuff out of untrusted input at submission-time via
|
148
|
+
# untainting, aren't you?)
|
149
|
+
attr_accessor :filter_html, :filter_styles
|
150
|
+
|
151
|
+
# RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
|
152
|
+
# so this isn't used by anything.
|
153
|
+
attr_accessor :fold_lines
|
154
|
+
|
155
|
+
|
156
|
+
### Render Markdown-formatted text in this string object as HTML and return
|
157
|
+
### it. The parameter is for compatibility with RedCloth, and is currently
|
158
|
+
### unused, though that may change in the future.
|
159
|
+
def to_html( lite=false )
|
160
|
+
|
161
|
+
# Create a StringScanner we can reuse for various lexing tasks
|
162
|
+
@scanner = StringScanner::new( '' )
|
163
|
+
|
164
|
+
# Make a structure to carry around stuff that gets placeholdered out of
|
165
|
+
# the source.
|
166
|
+
rs = RenderState::new( {}, {}, {} )
|
167
|
+
|
168
|
+
# Make a copy of the string with normalized line endings, tabs turned to
|
169
|
+
# spaces, and a couple of guaranteed newlines at the end
|
170
|
+
text = self.gsub( /\r\n?/, "\n" ).detab
|
171
|
+
text += "\n\n"
|
172
|
+
@log.debug "Normalized line-endings: %p" % text
|
173
|
+
|
174
|
+
# Filter HTML if we're asked to do so
|
175
|
+
if self.filter_html
|
176
|
+
text.gsub!( "<", "<" )
|
177
|
+
text.gsub!( ">", ">" )
|
178
|
+
@log.debug "Filtered HTML: %p" % text
|
179
|
+
end
|
180
|
+
|
181
|
+
# Simplify blank lines
|
182
|
+
text.gsub!( /^ +$/, '' )
|
183
|
+
@log.debug "Tabs -> spaces/blank lines stripped: %p" % text
|
184
|
+
|
185
|
+
# Replace HTML blocks with placeholders
|
186
|
+
text = hide_html_blocks( text, rs )
|
187
|
+
@log.debug "Hid HTML blocks: %p" % text
|
188
|
+
@log.debug "Render state: %p" % rs
|
189
|
+
|
190
|
+
# Strip link definitions, store in render state
|
191
|
+
text = strip_link_definitions( text, rs )
|
192
|
+
@log.debug "Stripped link definitions: %p" % text
|
193
|
+
@log.debug "Render state: %p" % rs
|
194
|
+
|
195
|
+
# Escape meta-characters
|
196
|
+
text = escape_special_chars( text )
|
197
|
+
@log.debug "Escaped special characters: %p" % text
|
198
|
+
|
199
|
+
# Transform block-level constructs
|
200
|
+
text = apply_block_transforms( text, rs )
|
201
|
+
@log.debug "After block-level transforms: %p" % text
|
202
|
+
|
203
|
+
# Now swap back in all the escaped characters
|
204
|
+
text = unescape_special_chars( text )
|
205
|
+
@log.debug "After unescaping special characters: %p" % text
|
206
|
+
|
207
|
+
return text
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
### Convert tabs in +str+ to spaces.
|
212
|
+
def detab( tabwidth=TabWidth )
|
213
|
+
copy = self.dup
|
214
|
+
copy.detab!( tabwidth )
|
215
|
+
return copy
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
### Convert tabs to spaces in place and return self if any were converted.
|
220
|
+
def detab!( tabwidth=TabWidth )
|
221
|
+
newstr = self.split( /\n/ ).collect {|line|
|
222
|
+
line.gsub( /(.*?)\t/ ) do
|
223
|
+
$1 + ' ' * (tabwidth - $1.length % tabwidth)
|
224
|
+
end
|
225
|
+
}.join("\n")
|
226
|
+
self.replace( newstr )
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
#######
|
231
|
+
#private
|
232
|
+
#######
|
233
|
+
|
234
|
+
### Do block-level transforms on a copy of +str+ using the specified render
|
235
|
+
### state +rs+ and return the results.
|
236
|
+
def apply_block_transforms( str, rs )
|
237
|
+
# Port: This was called '_runBlockGamut' in the original
|
238
|
+
|
239
|
+
@log.debug "Applying block transforms to:\n %p" % str
|
240
|
+
text = transform_headers( str, rs )
|
241
|
+
text = transform_hrules( text, rs )
|
242
|
+
text = transform_lists( text, rs )
|
243
|
+
text = transform_code_blocks( text, rs )
|
244
|
+
text = transform_block_quotes( text, rs )
|
245
|
+
text = transform_auto_links( text, rs )
|
246
|
+
text = hide_html_blocks( text, rs )
|
247
|
+
|
248
|
+
text = form_paragraphs( text, rs )
|
249
|
+
|
250
|
+
@log.debug "Done with block transforms:\n %p" % text
|
251
|
+
return text
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
### Apply Markdown span transforms to a copy of the specified +str+ with the
|
256
|
+
### given render state +rs+ and return it.
|
257
|
+
def apply_span_transforms( str, rs )
|
258
|
+
@log.debug "Applying span transforms to:\n %p" % str
|
259
|
+
|
260
|
+
str = transform_code_spans( str, rs )
|
261
|
+
str = encode_html( str )
|
262
|
+
str = transform_images( str, rs )
|
263
|
+
str = transform_anchors( str, rs )
|
264
|
+
str = transform_italic_and_bold( str, rs )
|
265
|
+
|
266
|
+
# Hard breaks
|
267
|
+
str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
|
268
|
+
|
269
|
+
@log.debug "Done with span transforms:\n %p" % str
|
270
|
+
return str
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
# The list of tags which are considered block-level constructs and an
|
275
|
+
# alternation pattern suitable for use in regexps made from the list
|
276
|
+
StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
|
277
|
+
form fieldset iframe math ins del ]
|
278
|
+
StrictTagPattern = StrictBlockTags.join('|')
|
279
|
+
|
280
|
+
LooseBlockTags = StrictBlockTags - %w[ins del]
|
281
|
+
LooseTagPattern = LooseBlockTags.join('|')
|
282
|
+
|
283
|
+
# Nested blocks:
|
284
|
+
# <div>
|
285
|
+
# <div>
|
286
|
+
# tags for inner block must be indented.
|
287
|
+
# </div>
|
288
|
+
# </div>
|
289
|
+
StrictBlockRegex = %r{
|
290
|
+
^ # Start of line
|
291
|
+
<(#{StrictTagPattern}) # Start tag: \2
|
292
|
+
\b # word break
|
293
|
+
(.*\n)*? # Any number of lines, minimal match
|
294
|
+
</\1> # Matching end tag
|
295
|
+
[ ]* # trailing spaces
|
296
|
+
$ # End of line or document
|
297
|
+
}ix
|
298
|
+
|
299
|
+
# More-liberal block-matching
|
300
|
+
LooseBlockRegex = %r{
|
301
|
+
^ # Start of line
|
302
|
+
<(#{LooseTagPattern}) # start tag: \2
|
303
|
+
\b # word break
|
304
|
+
(.*\n)*? # Any number of lines, minimal match
|
305
|
+
.*</\1> # Anything + Matching end tag
|
306
|
+
[ ]* # trailing spaces
|
307
|
+
$ # End of line or document
|
308
|
+
}ix
|
309
|
+
|
310
|
+
# Special case for <hr />.
|
311
|
+
HruleBlockRegex = %r{
|
312
|
+
( # $1
|
313
|
+
\A\n? # Start of doc + optional \n
|
314
|
+
| # or
|
315
|
+
.*\n\n # anything + blank line
|
316
|
+
)
|
317
|
+
( # save in $2
|
318
|
+
[ ]* # Any spaces
|
319
|
+
<hr # Tag open
|
320
|
+
\b # Word break
|
321
|
+
([^<>])*? # Attributes
|
322
|
+
/?> # Tag close
|
323
|
+
$ # followed by a blank line or end of document
|
324
|
+
)
|
325
|
+
}ix
|
326
|
+
|
327
|
+
### Replace all blocks of HTML in +str+ that start in the left margin with
|
328
|
+
### tokens.
|
329
|
+
def hide_html_blocks( str, rs )
|
330
|
+
@log.debug "Hiding HTML blocks in %p" % str
|
331
|
+
|
332
|
+
# Tokenizer proc to pass to gsub
|
333
|
+
tokenize = lambda {|match|
|
334
|
+
key = Digest::MD5::hexdigest( match )
|
335
|
+
rs.html_blocks[ key ] = match
|
336
|
+
@log.debug "Replacing %p with %p" % [ match, key ]
|
337
|
+
"\n\n#{key}\n\n"
|
338
|
+
}
|
339
|
+
|
340
|
+
rval = str.dup
|
341
|
+
|
342
|
+
@log.debug "Finding blocks with the strict regex..."
|
343
|
+
rval.gsub!( StrictBlockRegex, &tokenize )
|
344
|
+
|
345
|
+
@log.debug "Finding blocks with the loose regex..."
|
346
|
+
rval.gsub!( LooseBlockRegex, &tokenize )
|
347
|
+
|
348
|
+
@log.debug "Finding hrules..."
|
349
|
+
rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
|
350
|
+
|
351
|
+
return rval
|
352
|
+
end
|
353
|
+
|
354
|
+
|
355
|
+
# Link defs are in the form: ^[id]: url "optional title"
|
356
|
+
LinkRegex = %r{
|
357
|
+
^[ ]*\[(.+)\]: # id = $1
|
358
|
+
[ ]*
|
359
|
+
\n? # maybe *one* newline
|
360
|
+
[ ]*
|
361
|
+
<?(\S+?)>? # url = $2
|
362
|
+
[ ]*
|
363
|
+
\n? # maybe one newline
|
364
|
+
[ ]*
|
365
|
+
(?:
|
366
|
+
# Titles are delimited by "quotes" or (parens).
|
367
|
+
["(]
|
368
|
+
(.+?) # title = $3
|
369
|
+
[")] # Matching ) or "
|
370
|
+
[ ]*
|
371
|
+
)? # title is optional
|
372
|
+
(?:\n+|\Z)
|
373
|
+
}x
|
374
|
+
|
375
|
+
### Strip link definitions from +str+, storing them in the given RenderState
|
376
|
+
### +rs+.
|
377
|
+
def strip_link_definitions( str, rs )
|
378
|
+
str.gsub( LinkRegex ) {|match|
|
379
|
+
id, url, title = $1, $2, $3
|
380
|
+
|
381
|
+
rs.urls[ id.downcase ] = encode_html( url )
|
382
|
+
unless title.nil?
|
383
|
+
rs.titles[ id.downcase ] = title.gsub( /"/, """ )
|
384
|
+
end
|
385
|
+
""
|
386
|
+
}
|
387
|
+
end
|
388
|
+
|
389
|
+
|
390
|
+
### Escape special characters in the given +str+
|
391
|
+
def escape_special_chars( str )
|
392
|
+
@log.debug " Escaping special characters"
|
393
|
+
text = ''
|
394
|
+
|
395
|
+
# The original Markdown source has something called '$tags_to_skip'
|
396
|
+
# declared here, but it's never used, so I don't define it.
|
397
|
+
|
398
|
+
tokenize_html( str ) {|token, str|
|
399
|
+
@log.debug " Adding %p token %p" % [ token, str ]
|
400
|
+
case token
|
401
|
+
|
402
|
+
# Within tags, encode * and _
|
403
|
+
when :tag
|
404
|
+
text += str.
|
405
|
+
gsub( /\*/, EscapeTable['*'][:md5] ).
|
406
|
+
gsub( /_/, EscapeTable['_'][:md5] )
|
407
|
+
|
408
|
+
# Encode backslashed stuff in regular text
|
409
|
+
when :text
|
410
|
+
text += encode_backslash_escapes( str )
|
411
|
+
else
|
412
|
+
raise TypeError, "Unknown token type %p" % token
|
413
|
+
end
|
414
|
+
}
|
415
|
+
|
416
|
+
@log.debug " Text with escapes is now: %p" % text
|
417
|
+
return text
|
418
|
+
end
|
419
|
+
|
420
|
+
|
421
|
+
### Swap escaped special characters in a copy of the given +str+ and return
|
422
|
+
### it.
|
423
|
+
def unescape_special_chars( str )
|
424
|
+
EscapeTable.each {|char, hash|
|
425
|
+
@log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
|
426
|
+
str.gsub!( hash[:md5re], char )
|
427
|
+
}
|
428
|
+
|
429
|
+
return str
|
430
|
+
end
|
431
|
+
|
432
|
+
|
433
|
+
### Return a copy of the given +str+ with any backslashed special character
|
434
|
+
### in it replaced with MD5 placeholders.
|
435
|
+
def encode_backslash_escapes( str )
|
436
|
+
# Make a copy with any double-escaped backslashes encoded
|
437
|
+
text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
|
438
|
+
|
439
|
+
EscapeTable.each_pair {|char, esc|
|
440
|
+
next if char == '\\'
|
441
|
+
text.gsub!( esc[:re], esc[:md5] )
|
442
|
+
}
|
443
|
+
|
444
|
+
return text
|
445
|
+
end
|
446
|
+
|
447
|
+
|
448
|
+
### Transform any Markdown-style horizontal rules in a copy of the specified
|
449
|
+
### +str+ and return it.
|
450
|
+
def transform_hrules( str, rs )
|
451
|
+
@log.debug " Transforming horizontal rules"
|
452
|
+
str.gsub( /^ ?([\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
|
453
|
+
end
|
454
|
+
|
455
|
+
|
456
|
+
|
457
|
+
# Patterns to match and transform lists
|
458
|
+
ListMarkerOl = %r{\d+\.}
|
459
|
+
ListMarkerUl = %r{[*+-]}
|
460
|
+
ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
|
461
|
+
|
462
|
+
ListRegexp = %r{
|
463
|
+
(?:
|
464
|
+
^[ ]{0,#{TabWidth - 1}} # Indent < tab width
|
465
|
+
(#{ListMarkerAny}) # unordered or ordered ($1)
|
466
|
+
[ ]+ # At least one space
|
467
|
+
)
|
468
|
+
(?m:.+?) # item content (include newlines)
|
469
|
+
(?:
|
470
|
+
\z # Either EOF
|
471
|
+
| # or
|
472
|
+
\n{2,} # Blank line...
|
473
|
+
(?=\S) # ...followed by non-space
|
474
|
+
(?![ ]* # ...but not another item
|
475
|
+
(#{ListMarkerAny})
|
476
|
+
[ ]+)
|
477
|
+
)
|
478
|
+
}x
|
479
|
+
|
480
|
+
### Transform Markdown-style lists in a copy of the specified +str+ and
|
481
|
+
### return it.
|
482
|
+
def transform_lists( str, rs )
|
483
|
+
@log.debug " Transforming lists at %p" % (str[0,100] + '...')
|
484
|
+
|
485
|
+
str.gsub( ListRegexp ) {|list|
|
486
|
+
@log.debug " Found list %p" % list
|
487
|
+
bullet = $1
|
488
|
+
list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
|
489
|
+
list.gsub!( /\n{2,}/, "\n\n\n" )
|
490
|
+
|
491
|
+
%{<%s>\n%s</%s>\n} % [
|
492
|
+
list_type,
|
493
|
+
transform_list_items( list, rs ),
|
494
|
+
list_type,
|
495
|
+
]
|
496
|
+
}
|
497
|
+
end
|
498
|
+
|
499
|
+
|
500
|
+
# Pattern for transforming list items
|
501
|
+
ListItemRegexp = %r{
|
502
|
+
(\n)? # leading line = $1
|
503
|
+
(^[ ]*) # leading whitespace = $2
|
504
|
+
(#{ListMarkerAny}) [ ]+ # list marker = $3
|
505
|
+
((?m:.+?) # list item text = $4
|
506
|
+
(\n{1,2}))
|
507
|
+
(?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+))
|
508
|
+
}x
|
509
|
+
|
510
|
+
### Transform list items in a copy of the given +str+ and return it.
|
511
|
+
def transform_list_items( str, rs )
|
512
|
+
@log.debug " Transforming list items"
|
513
|
+
|
514
|
+
# Trim trailing blank lines
|
515
|
+
str = str.sub( /\n{2,}\z/, "\n" )
|
516
|
+
|
517
|
+
str.gsub( ListItemRegexp ) {|line|
|
518
|
+
@log.debug " Found item line %p" % line
|
519
|
+
leading_line, item = $1, $4
|
520
|
+
|
521
|
+
if leading_line or /\n{2,}/.match( item )
|
522
|
+
@log.debug " Found leading line or item has a blank"
|
523
|
+
item = apply_block_transforms( outdent(item), rs )
|
524
|
+
else
|
525
|
+
# Recursion for sub-lists
|
526
|
+
@log.debug " Recursing for sublist"
|
527
|
+
item = transform_lists( outdent(item), rs ).chomp
|
528
|
+
item = apply_span_transforms( item, rs )
|
529
|
+
end
|
530
|
+
|
531
|
+
%{<li>%s</li>\n} % item
|
532
|
+
}
|
533
|
+
end
|
534
|
+
|
535
|
+
|
536
|
+
# Pattern for matching codeblocks
|
537
|
+
CodeBlockRegexp = %r{
|
538
|
+
(?:\n\n|\A)
|
539
|
+
( # $1 = the code block
|
540
|
+
(?:
|
541
|
+
(?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
|
542
|
+
.*\n+
|
543
|
+
)+
|
544
|
+
)
|
545
|
+
(^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
|
546
|
+
# line-start, or end of doc
|
547
|
+
}x
|
548
|
+
|
549
|
+
### Transform Markdown-style codeblocks in a copy of the specified +str+ and
|
550
|
+
### return it.
|
551
|
+
def transform_code_blocks( str, rs )
|
552
|
+
@log.debug " Transforming code blocks"
|
553
|
+
|
554
|
+
str.gsub( CodeBlockRegexp ) {|block|
|
555
|
+
codeblock = $1
|
556
|
+
remainder = $2
|
557
|
+
|
558
|
+
# Generate the codeblock
|
559
|
+
%{\n\n<pre><code>%s\n</code></pre>\n\n%s} %
|
560
|
+
[ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
|
561
|
+
}
|
562
|
+
end
|
563
|
+
|
564
|
+
|
565
|
+
# Pattern for matching Markdown blockquote blocks
|
566
|
+
BlockQuoteRegexp = %r{
|
567
|
+
(?:
|
568
|
+
^[ ]*>[ ]? # '>' at the start of a line
|
569
|
+
.+\n # rest of the first line
|
570
|
+
(?:.+\n)* # subsequent consecutive lines
|
571
|
+
\n* # blanks
|
572
|
+
)+
|
573
|
+
}x
|
574
|
+
PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
|
575
|
+
|
576
|
+
### Transform Markdown-style blockquotes in a copy of the specified +str+
|
577
|
+
### and return it.
|
578
|
+
def transform_block_quotes( str, rs )
|
579
|
+
@log.debug " Transforming block quotes"
|
580
|
+
|
581
|
+
str.gsub( BlockQuoteRegexp ) {|quote|
|
582
|
+
@log.debug "Making blockquote from %p" % quote
|
583
|
+
|
584
|
+
quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
|
585
|
+
quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
|
586
|
+
|
587
|
+
indent = " " * TabWidth
|
588
|
+
quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
|
589
|
+
apply_block_transforms( quote, rs ).
|
590
|
+
gsub( /^/, indent ).
|
591
|
+
gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
|
592
|
+
@log.debug "Blockquoted chunk is: %p" % quoted
|
593
|
+
quoted
|
594
|
+
}
|
595
|
+
end
|
596
|
+
|
597
|
+
|
598
|
+
AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
|
599
|
+
AutoAnchorEmailRegexp = %r{
|
600
|
+
<
|
601
|
+
(
|
602
|
+
[-.\w]+
|
603
|
+
\@
|
604
|
+
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
|
605
|
+
)
|
606
|
+
>
|
607
|
+
}xi
|
608
|
+
|
609
|
+
### Transform URLs in a copy of the specified +str+ into links and return
|
610
|
+
### it.
|
611
|
+
def transform_auto_links( str, rs )
|
612
|
+
@log.debug " Transforming auto-links"
|
613
|
+
str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
|
614
|
+
gsub( AutoAnchorEmailRegexp ) {|addr|
|
615
|
+
encode_email_address( unescape_special_chars($1) )
|
616
|
+
}
|
617
|
+
end
|
618
|
+
|
619
|
+
|
620
|
+
# Encoder functions to turn characters of an email address into encoded
|
621
|
+
# entities.
|
622
|
+
Encoders = [
|
623
|
+
lambda {|char| "&#%03d;" % char},
|
624
|
+
lambda {|char| "&#x%X;" % char},
|
625
|
+
lambda {|char| char.chr },
|
626
|
+
]
|
627
|
+
|
628
|
+
### Transform a copy of the given email +addr+ into an escaped version safer
|
629
|
+
### for posting publicly.
|
630
|
+
def encode_email_address( addr )
|
631
|
+
|
632
|
+
rval = ''
|
633
|
+
("mailto:" + addr).each_byte {|b|
|
634
|
+
case b
|
635
|
+
when ?:
|
636
|
+
rval += ":"
|
637
|
+
when ?@
|
638
|
+
rval += Encoders[ rand(2) ][ b ]
|
639
|
+
else
|
640
|
+
r = rand(100)
|
641
|
+
rval += (
|
642
|
+
r > 90 ? Encoders[2][ b ] :
|
643
|
+
r < 45 ? Encoders[1][ b ] :
|
644
|
+
Encoders[0][ b ]
|
645
|
+
)
|
646
|
+
end
|
647
|
+
}
|
648
|
+
|
649
|
+
return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
|
650
|
+
end
|
651
|
+
|
652
|
+
|
653
|
+
# Regex for matching Setext-style headers
|
654
|
+
SetextHeaderRegexp = %r{
|
655
|
+
(.+) # The title text ($1)
|
656
|
+
\n
|
657
|
+
([\-=])+ # Match a line of = or -. Save only one in $2.
|
658
|
+
[ ]*\n+
|
659
|
+
}x
|
660
|
+
|
661
|
+
# Regexp for matching ATX-style headers
|
662
|
+
AtxHeaderRegexp = %r{
|
663
|
+
^(\#{1,6}) # $1 = string of #'s
|
664
|
+
[ ]*
|
665
|
+
(.+?) # $2 = Header text
|
666
|
+
[ ]*
|
667
|
+
\#* # optional closing #'s (not counted)
|
668
|
+
\n+
|
669
|
+
}x
|
670
|
+
|
671
|
+
### Apply Markdown header transforms to a copy of the given +str+ amd render
|
672
|
+
### state +rs+ and return the result.
|
673
|
+
def transform_headers( str, rs )
|
674
|
+
@log.debug " Transforming headers"
|
675
|
+
|
676
|
+
# Setext-style headers:
|
677
|
+
# Header 1
|
678
|
+
# ========
|
679
|
+
#
|
680
|
+
# Header 2
|
681
|
+
# --------
|
682
|
+
#
|
683
|
+
str.
|
684
|
+
gsub( SetextHeaderRegexp ) {|m|
|
685
|
+
@log.debug "Found setext-style header"
|
686
|
+
title, hdrchar = $1, $2
|
687
|
+
title = apply_span_transforms( title, rs )
|
688
|
+
|
689
|
+
case hdrchar
|
690
|
+
when '='
|
691
|
+
%[<h1>#{title}</h1>\n\n]
|
692
|
+
when '-'
|
693
|
+
%[<h2>#{title}</h2>\n\n]
|
694
|
+
else
|
695
|
+
title
|
696
|
+
end
|
697
|
+
}.
|
698
|
+
|
699
|
+
gsub( AtxHeaderRegexp ) {|m|
|
700
|
+
@log.debug "Found ATX-style header"
|
701
|
+
hdrchars, title = $1, $2
|
702
|
+
title = apply_span_transforms( title, rs )
|
703
|
+
|
704
|
+
level = hdrchars.length
|
705
|
+
%{<h%d>%s</h%d>\n\n} % [ level, title, level ]
|
706
|
+
}
|
707
|
+
end
|
708
|
+
|
709
|
+
|
710
|
+
### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
|
711
|
+
### tags and return it.
|
712
|
+
def form_paragraphs( str, rs )
|
713
|
+
@log.debug " Forming paragraphs"
|
714
|
+
grafs = str.
|
715
|
+
sub( /\A\n+/, '' ).
|
716
|
+
sub( /\n+\z/, '' ).
|
717
|
+
split( /\n{2,}/ )
|
718
|
+
|
719
|
+
rval = grafs.collect {|graf|
|
720
|
+
|
721
|
+
# Unhashify HTML blocks if this is a placeholder
|
722
|
+
if rs.html_blocks.key?( graf )
|
723
|
+
rs.html_blocks[ graf ]
|
724
|
+
|
725
|
+
# Otherwise, wrap in <p> tags
|
726
|
+
else
|
727
|
+
apply_span_transforms(graf, rs).
|
728
|
+
sub( /^[ ]*/, '<p>' ) + '</p>'
|
729
|
+
end
|
730
|
+
}.join( "\n\n" )
|
731
|
+
|
732
|
+
@log.debug " Formed paragraphs: %p" % rval
|
733
|
+
return rval
|
734
|
+
end
|
735
|
+
|
736
|
+
|
737
|
+
# Pattern to match the linkid part of an anchor tag for reference-style
|
738
|
+
# links.
|
739
|
+
RefLinkIdRegex = %r{
|
740
|
+
[ ]? # Optional leading space
|
741
|
+
(?:\n[ ]*)? # Optional newline + spaces
|
742
|
+
\[
|
743
|
+
(.*?) # Id = $1
|
744
|
+
\]
|
745
|
+
}x
|
746
|
+
|
747
|
+
InlineLinkRegex = %r{
|
748
|
+
\( # Literal paren
|
749
|
+
[ ]* # Zero or more spaces
|
750
|
+
<?(.+?)>? # URI = $1
|
751
|
+
[ ]* # Zero or more spaces
|
752
|
+
(?: #
|
753
|
+
([\"\']) # Opening quote char = $2
|
754
|
+
(.*?) # Title = $3
|
755
|
+
\2 # Matching quote char
|
756
|
+
)? # Title is optional
|
757
|
+
\)
|
758
|
+
}x
|
759
|
+
|
760
|
+
### Apply Markdown anchor transforms to a copy of the specified +str+ with
|
761
|
+
### the given render state +rs+ and return it.
|
762
|
+
def transform_anchors( str, rs )
|
763
|
+
@log.debug " Transforming anchors"
|
764
|
+
@scanner.string = str.dup
|
765
|
+
text = ''
|
766
|
+
|
767
|
+
# Scan the whole string
|
768
|
+
until @scanner.empty?
|
769
|
+
|
770
|
+
if @scanner.scan( /\[/ )
|
771
|
+
link = ''; linkid = ''
|
772
|
+
depth = 1
|
773
|
+
startpos = @scanner.pos
|
774
|
+
@log.debug " Found a bracket-open at %d" % startpos
|
775
|
+
|
776
|
+
# Scan the rest of the tag, allowing unlimited nested []s. If
|
777
|
+
# the scanner runs out of text before the opening bracket is
|
778
|
+
# closed, append the text and return (wasn't a valid anchor).
|
779
|
+
while depth.nonzero?
|
780
|
+
linktext = @scanner.scan_until( /\]|\[/ )
|
781
|
+
|
782
|
+
if linktext
|
783
|
+
@log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
|
784
|
+
link += linktext
|
785
|
+
|
786
|
+
# Decrement depth for each closing bracket
|
787
|
+
depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
|
788
|
+
@log.debug " Depth is now #{depth}"
|
789
|
+
|
790
|
+
# If there's no more brackets, it must not be an anchor, so
|
791
|
+
# just abort.
|
792
|
+
else
|
793
|
+
@log.debug " Missing closing brace, assuming non-link."
|
794
|
+
link += @scanner.rest
|
795
|
+
@scanner.terminate
|
796
|
+
return text + '[' + link
|
797
|
+
end
|
798
|
+
end
|
799
|
+
link.slice!( -1 ) # Trim final ']'
|
800
|
+
@log.debug " Found leading link %p" % link
|
801
|
+
|
802
|
+
# Look for a reference-style second part
|
803
|
+
if @scanner.scan( RefLinkIdRegex )
|
804
|
+
linkid = @scanner[1]
|
805
|
+
linkid = link.dup if linkid.empty?
|
806
|
+
linkid.downcase!
|
807
|
+
@log.debug " Found a linkid: %p" % linkid
|
808
|
+
|
809
|
+
# If there's a matching link in the link table, build an
|
810
|
+
# anchor tag for it.
|
811
|
+
if rs.urls.key?( linkid )
|
812
|
+
@log.debug " Found link key in the link table: %p" % rs.urls[linkid]
|
813
|
+
url = escape_md( rs.urls[linkid] )
|
814
|
+
|
815
|
+
text += %{<a href="#{url}"}
|
816
|
+
if rs.titles.key?(linkid)
|
817
|
+
text += %{ title="%s"} % escape_md( rs.titles[linkid] )
|
818
|
+
end
|
819
|
+
text += %{>#{link}</a>}
|
820
|
+
|
821
|
+
# If the link referred to doesn't exist, just append the raw
|
822
|
+
# source to the result
|
823
|
+
else
|
824
|
+
@log.debug " Linkid %p not found in link table" % linkid
|
825
|
+
@log.debug " Appending original string instead: "
|
826
|
+
@log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
|
827
|
+
text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
|
828
|
+
end
|
829
|
+
|
830
|
+
# ...or for an inline style second part
|
831
|
+
elsif @scanner.scan( InlineLinkRegex )
|
832
|
+
url = @scanner[1]
|
833
|
+
title = @scanner[3]
|
834
|
+
@log.debug " Found an inline link to %p" % url
|
835
|
+
|
836
|
+
text += %{<a href="%s"} % escape_md( url )
|
837
|
+
if title
|
838
|
+
title.gsub!( /"/, """ )
|
839
|
+
text += %{ title="%s"} % escape_md( title )
|
840
|
+
end
|
841
|
+
text += %{>#{link}</a>}
|
842
|
+
|
843
|
+
# No linkid part: just append the first part as-is.
|
844
|
+
else
|
845
|
+
@log.debug "No linkid, so no anchor. Appending literal text."
|
846
|
+
text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
|
847
|
+
end # if linkid
|
848
|
+
|
849
|
+
# Plain text
|
850
|
+
else
|
851
|
+
@log.debug " Scanning to the next link from %p" % @scanner.rest
|
852
|
+
text += @scanner.scan( /[^\[]+/ )
|
853
|
+
end
|
854
|
+
|
855
|
+
end # until @scanner.empty?
|
856
|
+
|
857
|
+
return text
|
858
|
+
end
|
859
|
+
|
860
|
+
|
861
|
+
# Pattern to match strong emphasis in Markdown text
|
862
|
+
BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
|
863
|
+
|
864
|
+
# Pattern to match normal emphasis in Markdown text
|
865
|
+
ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
|
866
|
+
|
867
|
+
### Transform italic- and bold-encoded text in a copy of the specified +str+
|
868
|
+
### and return it.
|
869
|
+
def transform_italic_and_bold( str, rs )
|
870
|
+
@log.debug " Transforming italic and bold"
|
871
|
+
|
872
|
+
str.
|
873
|
+
gsub( BoldRegexp, %{<strong>\\2</strong>} ).
|
874
|
+
gsub( ItalicRegexp, %{<em>\\2</em>} )
|
875
|
+
end
|
876
|
+
|
877
|
+
|
878
|
+
### Transform backticked spans into <code> spans.
|
879
|
+
def transform_code_spans( str, rs )
|
880
|
+
@log.debug " Transforming code spans"
|
881
|
+
|
882
|
+
# Set up the string scanner and just return the string unless there's at
|
883
|
+
# least one backtick.
|
884
|
+
@scanner.string = str.dup
|
885
|
+
unless @scanner.exist?( /`/ )
|
886
|
+
@scanner.terminate
|
887
|
+
@log.debug "No backticks found for code span in %p" % str
|
888
|
+
return str
|
889
|
+
end
|
890
|
+
|
891
|
+
@log.debug "Transforming code spans in %p" % str
|
892
|
+
|
893
|
+
# Build the transformed text anew
|
894
|
+
text = ''
|
895
|
+
|
896
|
+
# Scan to the end of the string
|
897
|
+
until @scanner.empty?
|
898
|
+
|
899
|
+
# Scan up to an opening backtick
|
900
|
+
if pre = @scanner.scan_until( /.?(?=`)/m )
|
901
|
+
text += pre
|
902
|
+
@log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
|
903
|
+
|
904
|
+
# Make a pattern to find the end of the span
|
905
|
+
opener = @scanner.scan( /`+/ )
|
906
|
+
len = opener.length
|
907
|
+
closer = Regexp::new( opener )
|
908
|
+
@log.debug "Scanning for end of code span with %p" % closer
|
909
|
+
|
910
|
+
# Scan until the end of the closing backtick sequence. Chop the
|
911
|
+
# backticks off the resultant string, strip leading and trailing
|
912
|
+
# whitespace, and encode any enitites contained in it.
|
913
|
+
codespan = @scanner.scan_until( closer ) or
|
914
|
+
raise FormatError::new( @scanner.rest[0,20],
|
915
|
+
"No %p found before end" % opener )
|
916
|
+
|
917
|
+
@log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
|
918
|
+
codespan.slice!( -len, len )
|
919
|
+
text += "<code>%s</code>" %
|
920
|
+
encode_code( codespan.strip, rs )
|
921
|
+
|
922
|
+
# If there's no more backticks, just append the rest of the string
|
923
|
+
# and move the scan pointer to the end
|
924
|
+
else
|
925
|
+
text += @scanner.rest
|
926
|
+
@scanner.terminate
|
927
|
+
end
|
928
|
+
end
|
929
|
+
|
930
|
+
return text
|
931
|
+
end
|
932
|
+
|
933
|
+
|
934
|
+
# Next, handle inline images: ![alt text](url "optional title")
|
935
|
+
# Don't forget: encode * and _
|
936
|
+
InlineImageRegexp = %r{
|
937
|
+
( # Whole match = $1
|
938
|
+
!\[ (.*?) \] # alt text = $2
|
939
|
+
\([ ]*
|
940
|
+
<?(\S+?)>? # source url = $3
|
941
|
+
[ ]*
|
942
|
+
(?: #
|
943
|
+
(["']) # quote char = $4
|
944
|
+
(.*?) # title = $5
|
945
|
+
\4 # matching quote
|
946
|
+
[ ]*
|
947
|
+
)? # title is optional
|
948
|
+
\)
|
949
|
+
)
|
950
|
+
}xs #"
|
951
|
+
|
952
|
+
|
953
|
+
# Reference-style images
|
954
|
+
ReferenceImageRegexp = %r{
|
955
|
+
( # Whole match = $1
|
956
|
+
!\[ (.*?) \] # Alt text = $2
|
957
|
+
[ ]? # Optional space
|
958
|
+
(?:\n[ ]*)? # One optional newline + spaces
|
959
|
+
\[ (.*?) \] # id = $3
|
960
|
+
)
|
961
|
+
}xs
|
962
|
+
|
963
|
+
### Turn image markup into image tags.
|
964
|
+
def transform_images( str, rs )
|
965
|
+
@log.debug " Transforming images: %p" % [ str ]
|
966
|
+
|
967
|
+
# Handle reference-style labeled images: ![alt text][id]
|
968
|
+
str.
|
969
|
+
gsub( ReferenceImageRegexp ) {|match|
|
970
|
+
whole, alt, linkid = $1, $2, $3.downcase
|
971
|
+
@log.debug "Matched %p" % match
|
972
|
+
res = nil
|
973
|
+
alt.gsub!( /"/, '"' )
|
974
|
+
|
975
|
+
# for shortcut links like ![this][].
|
976
|
+
linkid = alt.downcase if linkid.empty?
|
977
|
+
|
978
|
+
if rs.urls.key?( linkid )
|
979
|
+
url = escape_md( rs.urls[linkid] )
|
980
|
+
@log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
|
981
|
+
|
982
|
+
# Build the tag
|
983
|
+
result = %{<img src="%s" alt="%s"} % [ url, alt ]
|
984
|
+
if rs.titles.key?( linkid )
|
985
|
+
result += %{ title="%s"} % escape_md( rs.titles[linkid] )
|
986
|
+
end
|
987
|
+
result += EmptyElementSuffix
|
988
|
+
|
989
|
+
else
|
990
|
+
result = whole
|
991
|
+
end
|
992
|
+
|
993
|
+
@log.debug "Replacing %p with %p" % [ match, result ]
|
994
|
+
result
|
995
|
+
}.
|
996
|
+
|
997
|
+
# Inline image style
|
998
|
+
gsub( InlineImageRegexp ) {|match|
|
999
|
+
@log.debug "Found inline image %p" % match
|
1000
|
+
whole, alt, title = $1, $2, $5
|
1001
|
+
url = escape_md( $3 )
|
1002
|
+
alt.gsub!( /"/, '"' )
|
1003
|
+
|
1004
|
+
# Build the tag
|
1005
|
+
result = %{<img src="%s" alt="%s"} % [ url, alt ]
|
1006
|
+
unless title.nil?
|
1007
|
+
title.gsub!( /"/, '"' )
|
1008
|
+
result += %{ title="%s"} % escape_md( title )
|
1009
|
+
end
|
1010
|
+
result += EmptyElementSuffix
|
1011
|
+
|
1012
|
+
@log.debug "Replacing %p with %p" % [ match, result ]
|
1013
|
+
result
|
1014
|
+
}
|
1015
|
+
end
|
1016
|
+
|
1017
|
+
|
1018
|
+
# Regexp to match special characters in a code block
|
1019
|
+
CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
|
1020
|
+
|
1021
|
+
### Escape any characters special to HTML and encode any characters special
|
1022
|
+
### to Markdown in a copy of the given +str+ and return it.
|
1023
|
+
def encode_code( str, rs )
|
1024
|
+
str.gsub( %r{&}, '&' ).
|
1025
|
+
gsub( %r{<}, '<' ).
|
1026
|
+
gsub( %r{>}, '>' ).
|
1027
|
+
gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
|
1031
|
+
|
1032
|
+
#################################################################
|
1033
|
+
### U T I L I T Y F U N C T I O N S
|
1034
|
+
#################################################################
|
1035
|
+
|
1036
|
+
### Escape any markdown characters in a copy of the given +str+ and return
|
1037
|
+
### it.
|
1038
|
+
def escape_md( str )
|
1039
|
+
str.
|
1040
|
+
gsub( /\*/, EscapeTable['*'][:md5] ).
|
1041
|
+
gsub( /_/, EscapeTable['_'][:md5] )
|
1042
|
+
end
|
1043
|
+
|
1044
|
+
|
1045
|
+
# Matching constructs for tokenizing X/HTML
|
1046
|
+
HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
|
1047
|
+
XMLProcInstRegexp = %r{ <\? .*? \?> }mx
|
1048
|
+
MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
|
1049
|
+
|
1050
|
+
HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
|
1051
|
+
HTMLTagCloseRegexp = %r{ > }x
|
1052
|
+
HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
|
1053
|
+
|
1054
|
+
### Break the HTML source in +str+ into a series of tokens and return
|
1055
|
+
### them. The tokens are just 2-element Array tuples with a type and the
|
1056
|
+
### actual content. If this function is called with a block, the type and
|
1057
|
+
### text parts of each token will be yielded to it one at a time as they are
|
1058
|
+
### extracted.
|
1059
|
+
def tokenize_html( str )
|
1060
|
+
depth = 0
|
1061
|
+
tokens = []
|
1062
|
+
@scanner.string = str.dup
|
1063
|
+
type, token = nil, nil
|
1064
|
+
|
1065
|
+
until @scanner.empty?
|
1066
|
+
@log.debug "Scanning from %p" % @scanner.rest
|
1067
|
+
|
1068
|
+
# Match comments and PIs without nesting
|
1069
|
+
if (( token = @scanner.scan(MetaTag) ))
|
1070
|
+
type = :tag
|
1071
|
+
|
1072
|
+
# Do nested matching for HTML tags
|
1073
|
+
elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
|
1074
|
+
tagstart = @scanner.pos
|
1075
|
+
@log.debug " Found the start of a plain tag at %d" % tagstart
|
1076
|
+
|
1077
|
+
# Start the token with the opening angle
|
1078
|
+
depth = 1
|
1079
|
+
type = :tag
|
1080
|
+
|
1081
|
+
# Scan the rest of the tag, allowing unlimited nested <>s. If
|
1082
|
+
# the scanner runs out of text before the tag is closed, raise
|
1083
|
+
# an error.
|
1084
|
+
while depth.nonzero?
|
1085
|
+
|
1086
|
+
# Scan either an opener or a closer
|
1087
|
+
chunk = @scanner.scan( HTMLTagPart ) or
|
1088
|
+
raise "Malformed tag at character %d: %p" %
|
1089
|
+
[ tagstart, token + @scanner.rest ]
|
1090
|
+
|
1091
|
+
@log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
|
1092
|
+
|
1093
|
+
token += chunk
|
1094
|
+
|
1095
|
+
# If the last character of the token so far is a closing
|
1096
|
+
# angle bracket, decrement the depth. Otherwise increment
|
1097
|
+
# it for a nested tag.
|
1098
|
+
depth += ( token[-1, 1] == '>' ? -1 : 1 )
|
1099
|
+
@log.debug " Depth is now #{depth}"
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
# Match text segments
|
1103
|
+
else
|
1104
|
+
@log.debug " Looking for a chunk of text"
|
1105
|
+
type = :text
|
1106
|
+
|
1107
|
+
# Scan forward, always matching at least one character to move
|
1108
|
+
# the pointer beyond any non-tag '<'.
|
1109
|
+
token = @scanner.scan_until( /[^<]+/m )
|
1110
|
+
end
|
1111
|
+
|
1112
|
+
@log.debug " type: %p, token: %p" % [ type, token ]
|
1113
|
+
|
1114
|
+
# If a block is given, feed it one token at a time. Add the token to
|
1115
|
+
# the token list to be returned regardless.
|
1116
|
+
if block_given?
|
1117
|
+
yield( type, token )
|
1118
|
+
end
|
1119
|
+
tokens << [ type, token ]
|
1120
|
+
end
|
1121
|
+
|
1122
|
+
return tokens
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
|
1126
|
+
### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
|
1127
|
+
def encode_html( str )
|
1128
|
+
str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&" ).
|
1129
|
+
gsub( %r{<(?![a-z/?\$!])}i, "<" )
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
|
1133
|
+
### Return one level of line-leading tabs or spaces from a copy of +str+ and
|
1134
|
+
### return it.
|
1135
|
+
def outdent( str )
|
1136
|
+
str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
|
1137
|
+
end
|
1138
|
+
|
1139
|
+
end # class BlueCloth
|
1140
|
+
|
1141
|
+
|
1142
|
+
# Set the top-level 'Markdown' constant.
|
1143
|
+
::Markdown = ::BlueCloth unless defined?( ::Markdown )
|
1144
|
+
|