Pimki 1.3.092 → 1.4.092
Sign up to get free protection for your applications and to get access to all the features.
- data/README +145 -131
- data/README-PIMKI +15 -5
- data/app/controllers/wiki.rb +167 -54
- data/app/models/author.rb +3 -3
- data/app/models/chunks/chunk.rb +3 -3
- data/app/models/chunks/engines.rb +18 -21
- data/app/models/chunks/include.rb +29 -29
- data/app/models/chunks/literal.rb +20 -20
- data/app/models/chunks/match.rb +19 -19
- data/app/models/chunks/nowiki.rb +31 -31
- data/app/models/chunks/nowiki_test.rb +14 -14
- data/app/models/chunks/test.rb +18 -18
- data/app/models/chunks/todo.rb +44 -23
- data/app/models/chunks/uri.rb +97 -97
- data/app/models/chunks/uri_test.rb +92 -92
- data/app/models/chunks/wiki.rb +4 -4
- data/app/models/chunks/wiki_symbols.rb +22 -22
- data/app/models/chunks/wiki_test.rb +36 -36
- data/app/models/page.rb +39 -7
- data/app/models/page_lock.rb +23 -23
- data/app/models/page_set.rb +72 -72
- data/app/models/page_test.rb +75 -75
- data/app/models/revision.rb +1 -1
- data/app/models/revision_test.rb +251 -251
- data/app/models/web.rb +19 -6
- data/app/models/web_test.rb +52 -52
- data/app/models/wiki_content.rb +131 -119
- data/app/models/wiki_service.rb +31 -16
- data/app/models/wiki_service_test.rb +15 -15
- data/app/models/wiki_words.rb +1 -1
- data/app/models/wiki_words_test.rb +12 -12
- data/app/views/bottom.rhtml +3 -3
- data/app/views/markdown_help.rhtml +15 -15
- data/app/views/menu.rhtml +20 -20
- data/app/views/navigation.rhtml +26 -26
- data/app/views/rdoc_help.rhtml +15 -15
- data/app/views/static_style_sheet.rhtml +237 -237
- data/app/views/style.rhtml +178 -178
- data/app/views/textile_help.rhtml +27 -27
- data/app/views/top.rhtml +7 -2
- data/app/views/wiki/authors.rhtml +15 -15
- data/app/views/wiki/bliki.rhtml +101 -101
- data/app/views/wiki/bliki_edit.rhtml +3 -0
- data/app/views/wiki/bliki_new.rhtml +3 -0
- data/app/views/wiki/bliki_revision.rhtml +90 -90
- data/app/views/wiki/edit.rhtml +12 -3
- data/app/views/wiki/edit_menu.rhtml +64 -47
- data/app/views/wiki/edit_web.rhtml +65 -18
- data/app/views/wiki/export.rhtml +14 -14
- data/app/views/wiki/feeds.rhtml +10 -10
- data/app/views/wiki/list.rhtml +17 -15
- data/app/views/wiki/locked.rhtml +13 -13
- data/app/views/wiki/login.rhtml +10 -10
- data/app/views/wiki/mind.rhtml +0 -1
- data/app/views/wiki/new.rhtml +8 -3
- data/app/views/wiki/new_system.rhtml +77 -77
- data/app/views/wiki/new_web.rhtml +63 -63
- data/app/views/wiki/page.rhtml +88 -82
- data/app/views/wiki/print.rhtml +15 -15
- data/app/views/wiki/published.rhtml +2 -1
- data/app/views/wiki/recently_revised.rhtml +31 -31
- data/app/views/wiki/revision.rhtml +1 -7
- data/app/views/wiki/rollback.rhtml +31 -0
- data/app/views/wiki/rss_feed.rhtml +21 -21
- data/app/views/wiki/search.rhtml +48 -48
- data/app/views/wiki/tex.rhtml +22 -22
- data/app/views/wiki/tex_web.rhtml +34 -34
- data/app/views/wiki/todo.rhtml +90 -67
- data/app/views/wiki/web_list.rhtml +12 -12
- data/app/views/wiki_words_help.rhtml +1 -1
- data/favicon.png +0 -0
- data/libraries/action_controller_servlet.rb +17 -2
- data/libraries/bluecloth.rb +1127 -1127
- data/libraries/diff/diff.rb +474 -474
- data/libraries/diff/diff_test.rb +79 -79
- data/libraries/erb.rb +490 -490
- data/libraries/madeleine/automatic.rb +418 -357
- data/libraries/madeleine/clock.rb +94 -94
- data/libraries/madeleine/files.rb +19 -0
- data/libraries/madeleine/zmarshal.rb +60 -0
- data/libraries/madeleine_service.rb +14 -15
- data/libraries/rdocsupport.rb +155 -155
- data/libraries/redcloth_for_tex.rb +869 -869
- data/libraries/redcloth_for_tex_test.rb +40 -40
- data/libraries/view_helper.rb +32 -32
- data/libraries/web_controller_server.rb +96 -94
- data/pimki.rb +47 -6
- metadata +18 -4
data/favicon.png
ADDED
Binary file
|
@@ -5,6 +5,15 @@ include WEBrick
|
|
5
5
|
|
6
6
|
require 'view_helper'
|
7
7
|
|
8
|
+
class FavIconHandler < HTTPServlet::AbstractServlet
|
9
|
+
def do_GET(req, res)
|
10
|
+
ico = File.read(File.join(Dir.pwd, 'favicon.png'))
|
11
|
+
res['content-type'] = 'image/png'
|
12
|
+
res['content-length'] = ico.size
|
13
|
+
res.body = ico
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
class ActionControllerServlet < HTTPServlet::AbstractServlet
|
9
18
|
@@template_root = "./views"
|
10
19
|
def self.template_root() @@template_root end
|
@@ -25,7 +34,7 @@ class ActionControllerServlet < HTTPServlet::AbstractServlet
|
|
25
34
|
@assigns = {}
|
26
35
|
@performed_render = @performed_redirect = false
|
27
36
|
|
28
|
-
@logger.info "Performing #{action_name}"
|
37
|
+
@logger.info "Performing #{action_name} (#{request_path.join('/')})"
|
29
38
|
@logger.info " Parameters: #{@params.inspect}"
|
30
39
|
@logger.info " Cookies: #{@req.cookies.collect { |c| "#{c.name} => #{c.value}" }.join(", ") }"
|
31
40
|
|
@@ -159,7 +168,13 @@ class ActionControllerServlet < HTTPServlet::AbstractServlet
|
|
159
168
|
|
160
169
|
def template_result(template_path)
|
161
170
|
@assigns.each { |key, value| instance_variable_set "@#{key}", value }
|
162
|
-
|
171
|
+
begin
|
172
|
+
ERB.new(IO.readlines(template_path).join).result(binding)
|
173
|
+
rescue Exception => detail
|
174
|
+
@logger.error "Processing #{template_path}"
|
175
|
+
@logger.error detail
|
176
|
+
raise
|
177
|
+
end
|
163
178
|
end
|
164
179
|
|
165
180
|
# Converts the class name from something like "OneModule::TwoModule::NeatController"
|
data/libraries/bluecloth.rb
CHANGED
@@ -1,1127 +1,1127 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
#
|
3
|
-
# Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
|
4
|
-
# tool.
|
5
|
-
#
|
6
|
-
# == Synopsis
|
7
|
-
#
|
8
|
-
# doc = BlueCloth::new "
|
9
|
-
# ## Test document ##
|
10
|
-
#
|
11
|
-
# Just a simple test.
|
12
|
-
# "
|
13
|
-
#
|
14
|
-
# puts doc.to_html
|
15
|
-
#
|
16
|
-
# == Authors
|
17
|
-
#
|
18
|
-
# * Michael Granger <ged@FaerieMUD.org>
|
19
|
-
#
|
20
|
-
# == Contributors
|
21
|
-
#
|
22
|
-
# * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
|
23
|
-
# * Florian Gross <flgr@ccan.de> - Filter options, suggestions
|
24
|
-
#
|
25
|
-
# == Copyright
|
26
|
-
#
|
27
|
-
# Original version:
|
28
|
-
# Copyright (c) 2003-2004 John Gruber
|
29
|
-
# <http://daringfireball.net/>
|
30
|
-
# All rights reserved.
|
31
|
-
#
|
32
|
-
# Ruby port:
|
33
|
-
# Copyright (c) 2004 The FaerieMUD Consortium.
|
34
|
-
#
|
35
|
-
# BlueCloth is free software; you can redistribute it and/or modify it under the
|
36
|
-
# terms of the GNU General Public License as published by the Free Software
|
37
|
-
# Foundation; either version 2 of the License, or (at your option) any later
|
38
|
-
# version.
|
39
|
-
#
|
40
|
-
# BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
|
41
|
-
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
42
|
-
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
43
|
-
#
|
44
|
-
# == To-do
|
45
|
-
#
|
46
|
-
# * Refactor some of the larger uglier methods that have to do their own
|
47
|
-
# brute-force scanning because of lack of Perl features in Ruby's Regexp
|
48
|
-
# class. Alternately, could add a dependency on 'pcre' and use most Perl
|
49
|
-
# regexps.
|
50
|
-
#
|
51
|
-
# * Put the StringScanner in the render state for thread-safety.
|
52
|
-
#
|
53
|
-
# == Version
|
54
|
-
#
|
55
|
-
# $Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
|
56
|
-
#
|
57
|
-
|
58
|
-
require 'digest/md5'
|
59
|
-
require 'logger'
|
60
|
-
require 'strscan'
|
61
|
-
|
62
|
-
|
63
|
-
### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
|
64
|
-
### tool.
|
65
|
-
class BlueCloth < String
|
66
|
-
|
67
|
-
### Exception class for formatting errors.
|
68
|
-
class FormatError < RuntimeError
|
69
|
-
|
70
|
-
### Create a new FormatError with the given source +str+ and an optional
|
71
|
-
### message about the +specific+ error.
|
72
|
-
def initialize( str, specific=nil )
|
73
|
-
if specific
|
74
|
-
msg = "Bad markdown format near %p: %s" % [ str, specific ]
|
75
|
-
else
|
76
|
-
msg = "Bad markdown format near %p" % str
|
77
|
-
end
|
78
|
-
|
79
|
-
super( msg )
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
# Release Version
|
85
|
-
Version = '0.0.3'
|
86
|
-
|
87
|
-
# SVN Revision
|
88
|
-
SvnRev = %q$Rev: 37 $
|
89
|
-
|
90
|
-
# SVN Id tag
|
91
|
-
SvnId = %q$Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
|
92
|
-
|
93
|
-
# SVN URL
|
94
|
-
SvnUrl = %q$URL: svn+ssh://cvs.faeriemud.org/var/svn/BlueCloth/trunk/lib/bluecloth.rb $
|
95
|
-
|
96
|
-
|
97
|
-
# Rendering state struct. Keeps track of URLs, titles, and HTML blocks
|
98
|
-
# midway through a render. I prefer this to the globals of the Perl version
|
99
|
-
# because globals make me break out in hives. Or something.
|
100
|
-
RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
|
101
|
-
|
102
|
-
# Tab width for #detab! if none is specified
|
103
|
-
TabWidth = 4
|
104
|
-
|
105
|
-
# The tag-closing string -- set to '>' for HTML
|
106
|
-
EmptyElementSuffix = "/>";
|
107
|
-
|
108
|
-
# Table of MD5 sums for escaped characters
|
109
|
-
EscapeTable = {}
|
110
|
-
'\\`*_{}[]()#.!'.split(//).each {|char|
|
111
|
-
hash = Digest::MD5::hexdigest( char )
|
112
|
-
|
113
|
-
EscapeTable[ char ] = {
|
114
|
-
:md5 => hash,
|
115
|
-
:md5re => Regexp::new( hash ),
|
116
|
-
:re => Regexp::new( '\\\\' + Regexp::escape(char) ),
|
117
|
-
}
|
118
|
-
}
|
119
|
-
|
120
|
-
|
121
|
-
#################################################################
|
122
|
-
### I N S T A N C E M E T H O D S
|
123
|
-
#################################################################
|
124
|
-
|
125
|
-
### Create a new BlueCloth string.
|
126
|
-
def initialize( content="", *restrictions )
|
127
|
-
@log = Logger::new( $deferr )
|
128
|
-
@log.level = $DEBUG ?
|
129
|
-
Logger::DEBUG :
|
130
|
-
($VERBOSE ? Logger::INFO : Logger::WARN)
|
131
|
-
@scanner = nil
|
132
|
-
|
133
|
-
# Add any restrictions, and set the line-folding attribute to reflect
|
134
|
-
# what happens by default.
|
135
|
-
restrictions.flatten.each {|r| __send__("#{r}=", true) }
|
136
|
-
@fold_lines = true
|
137
|
-
|
138
|
-
super( content )
|
139
|
-
|
140
|
-
@log.debug "String is: %p" % self
|
141
|
-
end
|
142
|
-
|
143
|
-
|
144
|
-
######
|
145
|
-
public
|
146
|
-
######
|
147
|
-
|
148
|
-
# Filters for controlling what gets output for untrusted input. (But really,
|
149
|
-
# you're filtering bad stuff out of untrusted input at submission-time via
|
150
|
-
# untainting, aren't you?)
|
151
|
-
attr_accessor :filter_html, :filter_styles
|
152
|
-
|
153
|
-
# RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
|
154
|
-
# so this isn't used by anything.
|
155
|
-
attr_accessor :fold_lines
|
156
|
-
|
157
|
-
|
158
|
-
### Render Markdown-formatted text in this string object as HTML and return
|
159
|
-
### it. The parameter is for compatibility with RedCloth, and is currently
|
160
|
-
### unused, though that may change in the future.
|
161
|
-
def to_html( lite=false )
|
162
|
-
|
163
|
-
# Create a StringScanner we can reuse for various lexing tasks
|
164
|
-
@scanner = StringScanner::new( '' )
|
165
|
-
|
166
|
-
# Make a structure to carry around stuff that gets placeholdered out of
|
167
|
-
# the source.
|
168
|
-
rs = RenderState::new( {}, {}, {} )
|
169
|
-
|
170
|
-
# Make a copy of the string with normalized line endings, tabs turned to
|
171
|
-
# spaces, and a couple of guaranteed newlines at the end
|
172
|
-
text = self.gsub( /\r\n?/, "\n" ).detab
|
173
|
-
text += "\n\n"
|
174
|
-
@log.debug "Normalized line-endings: %p" % text
|
175
|
-
|
176
|
-
# Filter HTML if we're asked to do so
|
177
|
-
if self.filter_html
|
178
|
-
text.gsub!( "<", "<" )
|
179
|
-
text.gsub!( ">", ">" )
|
180
|
-
@log.debug "Filtered HTML: %p" % text
|
181
|
-
end
|
182
|
-
|
183
|
-
# Simplify blank lines
|
184
|
-
text.gsub!( /^ +$/, '' )
|
185
|
-
@log.debug "Tabs -> spaces/blank lines stripped: %p" % text
|
186
|
-
|
187
|
-
# Replace HTML blocks with placeholders
|
188
|
-
text = hide_html_blocks( text, rs )
|
189
|
-
@log.debug "Hid HTML blocks: %p" % text
|
190
|
-
@log.debug "Render state: %p" % rs
|
191
|
-
|
192
|
-
# Strip link definitions, store in render state
|
193
|
-
text = strip_link_definitions( text, rs )
|
194
|
-
@log.debug "Stripped link definitions: %p" % text
|
195
|
-
@log.debug "Render state: %p" % rs
|
196
|
-
|
197
|
-
# Escape meta-characters
|
198
|
-
text = escape_special_chars( text )
|
199
|
-
@log.debug "Escaped special characters: %p" % text
|
200
|
-
|
201
|
-
# Transform block-level constructs
|
202
|
-
text = apply_block_transforms( text, rs )
|
203
|
-
@log.debug "After block-level transforms: %p" % text
|
204
|
-
|
205
|
-
# Now swap back in all the escaped characters
|
206
|
-
text = unescape_special_chars( text )
|
207
|
-
@log.debug "After unescaping special characters: %p" % text
|
208
|
-
|
209
|
-
return text
|
210
|
-
end
|
211
|
-
|
212
|
-
|
213
|
-
### Convert tabs in +str+ to spaces.
|
214
|
-
def detab( tabwidth=TabWidth )
|
215
|
-
copy = self.dup
|
216
|
-
copy.detab!( tabwidth )
|
217
|
-
return copy
|
218
|
-
end
|
219
|
-
|
220
|
-
|
221
|
-
### Convert tabs to spaces in place and return self if any were converted.
|
222
|
-
def detab!( tabwidth=TabWidth )
|
223
|
-
newstr = self.split( /\n/ ).collect {|line|
|
224
|
-
line.gsub( /(.*?)\t/ ) do
|
225
|
-
$1 + ' ' * (tabwidth - $1.length % tabwidth)
|
226
|
-
end
|
227
|
-
}.join("\n")
|
228
|
-
self.replace( newstr )
|
229
|
-
end
|
230
|
-
|
231
|
-
|
232
|
-
#######
|
233
|
-
#private
|
234
|
-
#######
|
235
|
-
|
236
|
-
### Do block-level transforms on a copy of +str+ using the specified render
|
237
|
-
### state +rs+ and return the results.
|
238
|
-
def apply_block_transforms( str, rs )
|
239
|
-
# Port: This was called '_runBlockGamut' in the original
|
240
|
-
|
241
|
-
@log.debug "Applying block transforms to:\n %p" % str
|
242
|
-
text = transform_headers( str, rs )
|
243
|
-
text = transform_hrules( text, rs )
|
244
|
-
text = transform_lists( text, rs )
|
245
|
-
text = transform_code_blocks( text, rs )
|
246
|
-
text = transform_block_quotes( text, rs )
|
247
|
-
text = transform_auto_links( text, rs )
|
248
|
-
text = hide_html_blocks( text, rs )
|
249
|
-
|
250
|
-
text = form_paragraphs( text, rs )
|
251
|
-
|
252
|
-
@log.debug "Done with block transforms:\n %p" % text
|
253
|
-
return text
|
254
|
-
end
|
255
|
-
|
256
|
-
|
257
|
-
### Apply Markdown span transforms to a copy of the specified +str+ with the
|
258
|
-
### given render state +rs+ and return it.
|
259
|
-
def apply_span_transforms( str, rs )
|
260
|
-
@log.debug "Applying span transforms to:\n %p" % str
|
261
|
-
|
262
|
-
str = transform_code_spans( str, rs )
|
263
|
-
str = encode_html( str )
|
264
|
-
str = transform_images( str, rs )
|
265
|
-
str = transform_anchors( str, rs )
|
266
|
-
str = transform_italic_and_bold( str, rs )
|
267
|
-
|
268
|
-
# Hard breaks
|
269
|
-
str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
|
270
|
-
|
271
|
-
@log.debug "Done with span transforms:\n %p" % str
|
272
|
-
return str
|
273
|
-
end
|
274
|
-
|
275
|
-
|
276
|
-
# The list of tags which are considered block-level constructs and an
|
277
|
-
# alternation pattern suitable for use in regexps made from the list
|
278
|
-
BlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script ]
|
279
|
-
BlockTagPattern = BlockTags.join('|')
|
280
|
-
|
281
|
-
# Nested blocks:
|
282
|
-
# <div>
|
283
|
-
# <div>
|
284
|
-
# tags for inner block must be indented.
|
285
|
-
# </div>
|
286
|
-
# </div>
|
287
|
-
StrictBlockRegex = %r{
|
288
|
-
^ # Start of line
|
289
|
-
<(#{BlockTagPattern}) # Start tag: \2
|
290
|
-
\b # word break
|
291
|
-
(.*\n)*? # Any number of lines, minimal match
|
292
|
-
</\1> # Matching end tag
|
293
|
-
[ ]* # trailing spaces
|
294
|
-
(?=\n+|\Z) # End of line or document
|
295
|
-
}ix
|
296
|
-
|
297
|
-
# More-liberal block-matching
|
298
|
-
LooseBlockRegex = %r{
|
299
|
-
^ # Start of line
|
300
|
-
<(#{BlockTagPattern}) # start tag: \2
|
301
|
-
\b # word break
|
302
|
-
(.*\n)*? # Any number of lines, minimal match
|
303
|
-
.*</\1> # Anything + Matching end tag
|
304
|
-
[ ]* # trailing spaces
|
305
|
-
(?=\n+|\Z) # End of line or document
|
306
|
-
}ix
|
307
|
-
|
308
|
-
# Special case for <hr />.
|
309
|
-
HruleBlockRegex = %r{
|
310
|
-
( # $1
|
311
|
-
\A\n? # Start of doc + optional \n
|
312
|
-
| # or
|
313
|
-
.*\n\n # anything + blank line
|
314
|
-
)
|
315
|
-
( # save in $2
|
316
|
-
[ ]* # Any spaces
|
317
|
-
<hr # Tag open
|
318
|
-
\b # Word break
|
319
|
-
([^<>])*? # Attributes
|
320
|
-
/?> # Tag close
|
321
|
-
(?=\n\n|\Z) # followed by a blank line or end of document
|
322
|
-
)
|
323
|
-
}ix
|
324
|
-
|
325
|
-
### Replace all blocks of HTML in +str+ that start in the left margin with
|
326
|
-
### tokens.
|
327
|
-
def hide_html_blocks( str, rs )
|
328
|
-
@log.debug "Hiding HTML blocks in %p" % str
|
329
|
-
|
330
|
-
# Tokenizer proc to pass to gsub
|
331
|
-
tokenize = lambda {|match|
|
332
|
-
key = Digest::MD5::hexdigest( match )
|
333
|
-
rs.html_blocks[ key ] = match
|
334
|
-
@log.debug "Replacing %p with %p" %
|
335
|
-
[ match, key ]
|
336
|
-
"\n\n#{key}\n\n"
|
337
|
-
}
|
338
|
-
|
339
|
-
rval = str.dup
|
340
|
-
|
341
|
-
@log.debug "Finding blocks with the strict regex..."
|
342
|
-
rval.gsub!( StrictBlockRegex, &tokenize )
|
343
|
-
|
344
|
-
@log.debug "Finding blocks with the loose regex..."
|
345
|
-
rval.gsub!( LooseBlockRegex, &tokenize )
|
346
|
-
|
347
|
-
@log.debug "Finding hrules..."
|
348
|
-
rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
|
349
|
-
|
350
|
-
return rval
|
351
|
-
end
|
352
|
-
|
353
|
-
|
354
|
-
# Link defs are in the form: ^[id]: url "optional title"
|
355
|
-
LinkRegex = %r{
|
356
|
-
^[ ]*\[(.+)\]: # id = $1
|
357
|
-
[ ]*
|
358
|
-
\n? # maybe *one* newline
|
359
|
-
[ ]*
|
360
|
-
(\S+) # url = $2
|
361
|
-
[ ]*
|
362
|
-
\n? # maybe one newline
|
363
|
-
[ ]*
|
364
|
-
(?:
|
365
|
-
# Titles are delimited by "quotes" or (parens).
|
366
|
-
["(]
|
367
|
-
(.+?) # title = $3
|
368
|
-
[")] # Matching ) or "
|
369
|
-
[ ]*
|
370
|
-
)? # title is optional
|
371
|
-
(?:\n+|\Z)
|
372
|
-
}x
|
373
|
-
|
374
|
-
### Strip link definitions from +str+, storing them in the given RenderState
|
375
|
-
### +rs+.
|
376
|
-
def strip_link_definitions( str, rs )
|
377
|
-
str.gsub( LinkRegex ) {|match|
|
378
|
-
id, url, title = $1, $2, $3
|
379
|
-
|
380
|
-
rs.urls[ id.downcase ] = encode_html( url )
|
381
|
-
unless title.nil?
|
382
|
-
rs.titles[ id.downcase ] = title.gsub( /"/, """ )
|
383
|
-
end
|
384
|
-
""
|
385
|
-
}
|
386
|
-
end
|
387
|
-
|
388
|
-
|
389
|
-
### Escape special characters in the given +str+
|
390
|
-
def escape_special_chars( str )
|
391
|
-
@log.debug " Escaping special characters"
|
392
|
-
text = ''
|
393
|
-
|
394
|
-
tokenize_html( str ) {|token, str|
|
395
|
-
@log.debug " Adding %p token %p" % [ token, str ]
|
396
|
-
case token
|
397
|
-
|
398
|
-
# Within tags, encode * and _
|
399
|
-
when :tag
|
400
|
-
text += str.
|
401
|
-
gsub( /\*/, EscapeTable['*'][:md5] ).
|
402
|
-
gsub( /_/, EscapeTable['_'][:md5] )
|
403
|
-
|
404
|
-
# Encode backslashed stuff in regular text
|
405
|
-
when :text
|
406
|
-
text += encode_backslash_escapes( str )
|
407
|
-
else
|
408
|
-
raise TypeError, "Unknown token type %p" % token
|
409
|
-
end
|
410
|
-
}
|
411
|
-
|
412
|
-
@log.debug " Text with escapes is now: %p" % text
|
413
|
-
return text
|
414
|
-
end
|
415
|
-
|
416
|
-
|
417
|
-
### Swap escaped special characters in a copy of the given +str+ and return
|
418
|
-
### it.
|
419
|
-
def unescape_special_chars( str )
|
420
|
-
EscapeTable.each {|char, hash|
|
421
|
-
@log.debug "Unescaping escaped %p with %p" %
|
422
|
-
[ char, hash[:md5re] ]
|
423
|
-
str.gsub!( hash[:md5re], char )
|
424
|
-
}
|
425
|
-
|
426
|
-
return str
|
427
|
-
end
|
428
|
-
|
429
|
-
|
430
|
-
### Return a copy of the given +str+ with any backslashed special character
|
431
|
-
### in it replaced with MD5 placeholders.
|
432
|
-
def encode_backslash_escapes( str )
|
433
|
-
# Make a copy with any double-escaped backslashes encoded
|
434
|
-
text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
|
435
|
-
|
436
|
-
EscapeTable.each_pair {|char, esc|
|
437
|
-
next if char == '\\'
|
438
|
-
text.gsub!( esc[:re], esc[:md5] )
|
439
|
-
}
|
440
|
-
|
441
|
-
return text
|
442
|
-
end
|
443
|
-
|
444
|
-
|
445
|
-
### Transform any Markdown-style horizontal rules in a copy of the specified
|
446
|
-
### +str+ and return it.
|
447
|
-
def transform_hrules( str, rs )
|
448
|
-
@log.debug " Transforming horizontal rules"
|
449
|
-
str.gsub( /^( ?[\-\*] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
|
450
|
-
end
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
# Pattern to transform lists
|
455
|
-
ListRegexp = %r{
|
456
|
-
(?:
|
457
|
-
^[ ]{0,#{TabWidth - 1}} # Indent < tab width
|
458
|
-
(\*|\d+\.) # unordered or ordered ($1)
|
459
|
-
[ ]+ # At least one space
|
460
|
-
)
|
461
|
-
(?m:.+?) # item content (include newlines)
|
462
|
-
(?:
|
463
|
-
\z # Either EOF
|
464
|
-
| # or
|
465
|
-
\n{2,} # Blank line...
|
466
|
-
(?=\S) # ...followed by non-space
|
467
|
-
(?![ ]* (\*|\d+\.) [ ]+) # ...but not another item
|
468
|
-
)
|
469
|
-
}x
|
470
|
-
|
471
|
-
### Transform Markdown-style lists in a copy of the specified +str+ and
|
472
|
-
### return it.
|
473
|
-
def transform_lists( str, rs )
|
474
|
-
@log.debug " Transforming lists at %p" % (str[0,100] + '...')
|
475
|
-
|
476
|
-
str.gsub( ListRegexp ) {|list|
|
477
|
-
@log.debug " Found list %p" % list
|
478
|
-
list_type = ($1 == '*' ? "ul" : "ol")
|
479
|
-
list.gsub!( /\n{2,}/, "\n\n\n" )
|
480
|
-
|
481
|
-
%{<%s>\n%s</%s>\n} % [
|
482
|
-
list_type,
|
483
|
-
transform_list_items( list, rs ),
|
484
|
-
list_type,
|
485
|
-
]
|
486
|
-
}
|
487
|
-
end
|
488
|
-
|
489
|
-
|
490
|
-
# Pattern for transforming list items
|
491
|
-
ListItemRegexp = %r{
|
492
|
-
(\n)? # leading line = $1
|
493
|
-
(^[ ]*) # leading whitespace = $2
|
494
|
-
(\*|\d+\.) [ ]+ # list marker = $3
|
495
|
-
((?m:.+?) # list item text = $4
|
496
|
-
(\n{1,2}))
|
497
|
-
(?= \n* (\z | \2 (\*|\d+\.) [ ]+))
|
498
|
-
}x
|
499
|
-
|
500
|
-
### Transform list items in a copy of the given +str+ and return it.
|
501
|
-
def transform_list_items( str, rs )
|
502
|
-
@log.debug " Transforming list items"
|
503
|
-
|
504
|
-
# Trim trailing blank lines
|
505
|
-
str = str.sub( /\n{2,}\z/, "\n" )
|
506
|
-
|
507
|
-
str.gsub( ListItemRegexp ) {|line|
|
508
|
-
@log.debug " Found item line %p" % line
|
509
|
-
leading_line, item = $1, $4
|
510
|
-
|
511
|
-
if leading_line or /\n{2,}/.match( item )
|
512
|
-
@log.debug " Found leading line or item has a blank"
|
513
|
-
item = apply_block_transforms( outdent(item), rs )
|
514
|
-
else
|
515
|
-
# Recursion for sub-lists
|
516
|
-
@log.debug " Recursing for sublist"
|
517
|
-
item = transform_lists( outdent(item), rs ).chomp
|
518
|
-
item = apply_span_transforms( item, rs )
|
519
|
-
end
|
520
|
-
|
521
|
-
%{<li>%s</li>\n} % item
|
522
|
-
}
|
523
|
-
end
|
524
|
-
|
525
|
-
|
526
|
-
# Pattern for matching codeblocks
|
527
|
-
CodeBlockRegexp = %r{
|
528
|
-
(.?) # $1 = preceding character
|
529
|
-
:\n+ # colon + NL delimiter
|
530
|
-
( # $2 = the code block
|
531
|
-
(?:
|
532
|
-
(?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
|
533
|
-
.*\n+
|
534
|
-
)+
|
535
|
-
)
|
536
|
-
((?=^[ ]{0,#{TabWidth}}\S)|\Z) # Lookahead for non-space at
|
537
|
-
# line-start, or end of doc
|
538
|
-
}x
|
539
|
-
|
540
|
-
### Transform Markdown-style codeblocks in a copy of the specified +str+ and
|
541
|
-
### return it.
|
542
|
-
def transform_code_blocks( str, rs )
|
543
|
-
@log.debug " Transforming code blocks"
|
544
|
-
|
545
|
-
str.gsub( CodeBlockRegexp ) {|block|
|
546
|
-
prevchar, codeblock = $1, $2
|
547
|
-
|
548
|
-
@log.debug " prevchar = %p" % prevchar
|
549
|
-
|
550
|
-
# Generated the codeblock
|
551
|
-
%{%s\n\n<pre><code>%s\n</code></pre>\n\n} % [
|
552
|
-
(prevchar.empty? || /\s/ =~ prevchar) ? "" : "#{prevchar}:",
|
553
|
-
encode_code( outdent(codeblock), rs ).rstrip,
|
554
|
-
]
|
555
|
-
}
|
556
|
-
end
|
557
|
-
|
558
|
-
|
559
|
-
# Pattern for matching Markdown blockquote blocks
|
560
|
-
BlockQuoteRegexp = %r{
|
561
|
-
(?:
|
562
|
-
^[ ]*>[ ]? # '>' at the start of a line
|
563
|
-
.+\n # rest of the first line
|
564
|
-
(?:.+\n)* # subsequent consecutive lines
|
565
|
-
\n* # blanks
|
566
|
-
)+
|
567
|
-
}x
|
568
|
-
|
569
|
-
### Transform Markdown-style blockquotes in a copy of the specified +str+
|
570
|
-
### and return it.
|
571
|
-
def transform_block_quotes( str, rs )
|
572
|
-
@log.debug " Transforming block quotes"
|
573
|
-
|
574
|
-
str.gsub( BlockQuoteRegexp ) {|quote|
|
575
|
-
@log.debug "Making blockquote from %p" % quote
|
576
|
-
quote.gsub!( /^[ ]*>[ ]?/, '' )
|
577
|
-
%{<blockquote>\n%s\n</blockquote>\n\n} %
|
578
|
-
apply_block_transforms( quote, rs ).
|
579
|
-
gsub( /^/, " " * TabWidth )
|
580
|
-
}
|
581
|
-
end
|
582
|
-
|
583
|
-
|
584
|
-
AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
|
585
|
-
AutoAnchorEmailRegexp = %r{
|
586
|
-
<
|
587
|
-
(
|
588
|
-
[-.\w]+
|
589
|
-
\@
|
590
|
-
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
|
591
|
-
)
|
592
|
-
>
|
593
|
-
}x
|
594
|
-
|
595
|
-
### Transform URLs in a copy of the specified +str+ into links and return
|
596
|
-
### it.
|
597
|
-
def transform_auto_links( str, rs )
|
598
|
-
@log.debug " Transforming auto-links"
|
599
|
-
str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
|
600
|
-
gsub( AutoAnchorEmailRegexp ) {|addr|
|
601
|
-
encode_email_address( unescape_special_chars($1) )
|
602
|
-
}
|
603
|
-
end
|
604
|
-
|
605
|
-
|
606
|
-
# Encoder functions to turn characters of an email address into encoded
|
607
|
-
# entities.
|
608
|
-
Encoders = [
|
609
|
-
lambda {|char| "&#%03d;" % char},
|
610
|
-
lambda {|char| "&#x%X;" % char},
|
611
|
-
lambda {|char| char.chr },
|
612
|
-
]
|
613
|
-
|
614
|
-
### Transform a copy of the given email +addr+ into an escaped version safer
|
615
|
-
### for posting publicly.
|
616
|
-
def encode_email_address( addr )
|
617
|
-
|
618
|
-
rval = ''
|
619
|
-
("mailto:" + addr).each_byte {|b|
|
620
|
-
case b
|
621
|
-
when ?:
|
622
|
-
rval += ":"
|
623
|
-
when ?@
|
624
|
-
rval += Encoders[ rand(2) ][ b ]
|
625
|
-
else
|
626
|
-
r = rand(100)
|
627
|
-
rval += (
|
628
|
-
r > 90 ? Encoders[2][ b ] :
|
629
|
-
r < 45 ? Encoders[1][ b ] :
|
630
|
-
Encoders[0][ b ]
|
631
|
-
)
|
632
|
-
end
|
633
|
-
}
|
634
|
-
|
635
|
-
return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
|
636
|
-
end
|
637
|
-
|
638
|
-
|
639
|
-
# Regex for matching Setext-style headers
|
640
|
-
SetextHeaderRegexp = %r{
|
641
|
-
(.+) # The title text ($1)
|
642
|
-
\n
|
643
|
-
([\-=])+ # Match a line of = or -. Save only one in $2.
|
644
|
-
[ ]*\n+
|
645
|
-
}x
|
646
|
-
|
647
|
-
# Regexp for matching ATX-style headers
|
648
|
-
AtxHeaderRegexp = %r{
|
649
|
-
^(\#{1,6}) # $1 = string of #'s
|
650
|
-
[ ]*
|
651
|
-
(.+?) # $2 = Header text
|
652
|
-
[ ]*
|
653
|
-
\#* # optional closing #'s (not counted)
|
654
|
-
\n+
|
655
|
-
}x
|
656
|
-
|
657
|
-
### Apply Markdown header transforms to a copy of the given +str+ amd render
|
658
|
-
### state +rs+ and return the result.
|
659
|
-
def transform_headers( str, rs )
|
660
|
-
@log.debug " Transforming headers"
|
661
|
-
|
662
|
-
# Setext-style headers:
|
663
|
-
# Header 1
|
664
|
-
# ========
|
665
|
-
#
|
666
|
-
# Header 2
|
667
|
-
# --------
|
668
|
-
#
|
669
|
-
str.
|
670
|
-
gsub( SetextHeaderRegexp ) {|m|
|
671
|
-
@log.debug "Found setext-style header"
|
672
|
-
title, hdrchar = $1, $2
|
673
|
-
title = apply_span_transforms( title, rs )
|
674
|
-
|
675
|
-
case hdrchar
|
676
|
-
when '='
|
677
|
-
%[<h1>#{title}</h1>\n\n]
|
678
|
-
when '-'
|
679
|
-
%[<h2>#{title}</h2>\n\n]
|
680
|
-
else
|
681
|
-
title
|
682
|
-
end
|
683
|
-
}.
|
684
|
-
|
685
|
-
gsub( AtxHeaderRegexp ) {|m|
|
686
|
-
@log.debug "Found ATX-style header"
|
687
|
-
hdrchars, title = $1, $2
|
688
|
-
title = apply_span_transforms( title, rs )
|
689
|
-
|
690
|
-
level = hdrchars.length
|
691
|
-
%{<h%d>%s</h%d>\n\n} % [ level, title, level ]
|
692
|
-
}
|
693
|
-
end
|
694
|
-
|
695
|
-
|
696
|
-
### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
|
697
|
-
### tags and return it.
|
698
|
-
def form_paragraphs( str, rs )
|
699
|
-
@log.debug " Forming paragraphs"
|
700
|
-
grafs = str.
|
701
|
-
sub( /\A\n+/, '' ).
|
702
|
-
sub( /\n+\z/, '' ).
|
703
|
-
split( /\n{2,}/ )
|
704
|
-
|
705
|
-
rval = grafs.collect {|graf|
|
706
|
-
|
707
|
-
# Unhashify HTML blocks if this is a placeholder
|
708
|
-
if rs.html_blocks.key?( graf )
|
709
|
-
rs.html_blocks[ graf ]
|
710
|
-
|
711
|
-
# Otherwise, wrap in <p> tags
|
712
|
-
else
|
713
|
-
apply_span_transforms(graf, rs).
|
714
|
-
sub( /^[ ]*/, '<p>' ) + '</p>'
|
715
|
-
end
|
716
|
-
}.join( "\n\n" )
|
717
|
-
|
718
|
-
@log.debug " Formed paragraphs: %p" % rval
|
719
|
-
return rval
|
720
|
-
end
|
721
|
-
|
722
|
-
|
723
|
-
# Pattern to match the linkid part of an anchor tag for reference-style
|
724
|
-
# links.
|
725
|
-
RefLinkIdRegex = %r{
|
726
|
-
[ ]? # Optional leading space
|
727
|
-
(?:\n[ ]*)? # Optional newline + spaces
|
728
|
-
\[
|
729
|
-
(.*?) # Id = $1
|
730
|
-
\]
|
731
|
-
}x
|
732
|
-
|
733
|
-
InlineLinkRegex = %r{
|
734
|
-
\( # Literal paren
|
735
|
-
[ ]* # Zero or more spaces
|
736
|
-
(.*?) # URI = $1
|
737
|
-
[ ]* # Zero or more spaces
|
738
|
-
(?: #
|
739
|
-
([\"\']) # Opening quote char = $2
|
740
|
-
(.*?) # Title = $3
|
741
|
-
\2 # Matching quote char
|
742
|
-
)? # Title is optional
|
743
|
-
\)
|
744
|
-
}x
|
745
|
-
|
746
|
-
### Apply Markdown anchor transforms to a copy of the specified +str+ with
|
747
|
-
### the given render state +rs+ and return it.
|
748
|
-
def transform_anchors( str, rs )
|
749
|
-
@log.debug " Transforming anchors"
|
750
|
-
@scanner.string = str.dup
|
751
|
-
text = ''
|
752
|
-
|
753
|
-
# Scan the whole string
|
754
|
-
until @scanner.empty?
|
755
|
-
|
756
|
-
if @scanner.scan( /\[/ )
|
757
|
-
link = ''; linkid = ''
|
758
|
-
depth = 1
|
759
|
-
startpos = @scanner.pos
|
760
|
-
@log.debug " Found a bracket-open at %d" % startpos
|
761
|
-
|
762
|
-
# Scan the rest of the tag, allowing unlimited nested []s. If
|
763
|
-
# the scanner runs out of text before the opening bracket is
|
764
|
-
# closed, append the text and return (wasn't a valid anchor).
|
765
|
-
while depth.nonzero?
|
766
|
-
linktext = @scanner.scan_until( /\]|\[/ )
|
767
|
-
|
768
|
-
if linktext
|
769
|
-
@log.debug " Found a bracket at depth %d: %p" %
|
770
|
-
[ depth, linktext ]
|
771
|
-
link += linktext
|
772
|
-
|
773
|
-
# Decrement depth for each closing bracket
|
774
|
-
depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
|
775
|
-
@log.debug " Depth is now #{depth}"
|
776
|
-
|
777
|
-
# If there's no more brackets, it must not be an anchor, so
|
778
|
-
# just abort.
|
779
|
-
else
|
780
|
-
@log.debug " Missing closing brace, assuming non-link."
|
781
|
-
link += @scanner.rest
|
782
|
-
@scanner.terminate
|
783
|
-
return text + '[' + link
|
784
|
-
end
|
785
|
-
end
|
786
|
-
link.slice!( -1 ) # Trim final ']'
|
787
|
-
@log.debug " Found leading link %p" % link
|
788
|
-
|
789
|
-
# Look for a reference-style second part
|
790
|
-
if @scanner.scan( RefLinkIdRegex )
|
791
|
-
linkid = @scanner[1]
|
792
|
-
linkid = link.dup if linkid.empty?
|
793
|
-
linkid.downcase!
|
794
|
-
@log.debug " Found a linkid: %p" % linkid
|
795
|
-
|
796
|
-
# If there's a matching link in the link table, build an
|
797
|
-
# anchor tag for it.
|
798
|
-
if rs.urls.key?( linkid )
|
799
|
-
@log.debug " Found link key in the link table: %p" %
|
800
|
-
rs.urls[linkid]
|
801
|
-
url = escape_md( rs.urls[linkid] )
|
802
|
-
|
803
|
-
text += %{<a href="#{url}"}
|
804
|
-
if rs.titles.key?(linkid)
|
805
|
-
text += %{ title="%s"} % escape_md( rs.titles[linkid] )
|
806
|
-
end
|
807
|
-
text += %{>#{link}</a>}
|
808
|
-
|
809
|
-
# If the link referred to doesn't exist, just append the raw
|
810
|
-
# source to the result
|
811
|
-
else
|
812
|
-
@log.debug " Linkid %p not found in link table" % linkid
|
813
|
-
@log.debug " Appending original string instead: %p" %
|
814
|
-
@scanner.string[ startpos-1 .. @scanner.pos ]
|
815
|
-
text += @scanner.string[ startpos-1 .. @scanner.pos ]
|
816
|
-
end
|
817
|
-
|
818
|
-
# ...or for an inline style second part
|
819
|
-
elsif @scanner.scan( InlineLinkRegex )
|
820
|
-
url = @scanner[1]
|
821
|
-
title = @scanner[3]
|
822
|
-
@log.debug " Found an inline link to %p" % url
|
823
|
-
|
824
|
-
text += %{<a href="%s"} % escape_md( url )
|
825
|
-
if title
|
826
|
-
text += %{ title="%s"} % escape_md( title )
|
827
|
-
end
|
828
|
-
text += %{>#{link}</a>}
|
829
|
-
|
830
|
-
# No linkid part: just append the first part as-is.
|
831
|
-
else
|
832
|
-
@log.debug "No linkid, so no anchor. Appending literal text."
|
833
|
-
text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
|
834
|
-
end # if linkid
|
835
|
-
|
836
|
-
# Plain text
|
837
|
-
else
|
838
|
-
@log.debug " Scanning to the next link from %p" % @scanner.rest
|
839
|
-
text += @scanner.scan( /[^\[]+/ )
|
840
|
-
end
|
841
|
-
|
842
|
-
end # until @scanner.empty?
|
843
|
-
|
844
|
-
return text
|
845
|
-
end
|
846
|
-
|
847
|
-
# Pattern to match strong emphasis in Markdown text
|
848
|
-
BoldRegexp = %r{ (\*\*|__) (?=\S) (.+?\S) \1 }x
|
849
|
-
|
850
|
-
# Pattern to match normal emphasis in Markdown text
|
851
|
-
ItalicRegexp = %r{ (\*|_) (?=\S) (.+?\S) \1 }x
|
852
|
-
|
853
|
-
### Transform italic- and bold-encoded text in a copy of the specified +str+
|
854
|
-
### and return it.
|
855
|
-
def transform_italic_and_bold( str, rs )
|
856
|
-
@log.debug " Transforming italic and bold"
|
857
|
-
|
858
|
-
str.
|
859
|
-
gsub( BoldRegexp, %{<strong>\\2</strong>} ).
|
860
|
-
gsub( ItalicRegexp, %{<em>\\2</em>} )
|
861
|
-
end
|
862
|
-
|
863
|
-
|
864
|
-
### Transform backticked spans into <code> spans.
|
865
|
-
def transform_code_spans( str, rs )
|
866
|
-
@log.debug " Transforming code spans"
|
867
|
-
|
868
|
-
# Set up the string scanner and just return the string unless there's at
|
869
|
-
# least one backtick.
|
870
|
-
@scanner.string = str.dup
|
871
|
-
unless @scanner.exist?( /`/ )
|
872
|
-
@scanner.terminate
|
873
|
-
@log.debug "No backticks found for code span in %p" % str
|
874
|
-
return str
|
875
|
-
end
|
876
|
-
|
877
|
-
@log.debug "Transforming code spans in %p" % str
|
878
|
-
|
879
|
-
# Build the transformed text anew
|
880
|
-
text = ''
|
881
|
-
|
882
|
-
# Scan to the end of the string
|
883
|
-
until @scanner.empty?
|
884
|
-
|
885
|
-
# Scan up to an opening backtick
|
886
|
-
if pre = @scanner.scan_until( /.?(?=`)/m )
|
887
|
-
text += pre
|
888
|
-
@log.debug "Found backtick at %d after '...%s'" %
|
889
|
-
[ @scanner.pos, text[-10, 10] ]
|
890
|
-
|
891
|
-
# Make a pattern to find the end of the span
|
892
|
-
opener = @scanner.scan( /`+/ )
|
893
|
-
len = opener.length
|
894
|
-
closer = Regexp::new( opener )
|
895
|
-
@log.debug "Scanning for end of code span with %p" % closer
|
896
|
-
|
897
|
-
# Scan until the end of the closing backtick sequence. Chop the
|
898
|
-
# backticks off the resultant string, strip leading and trailing
|
899
|
-
# whitespace, and encode any enitites contained in it.
|
900
|
-
codespan = @scanner.scan_until( closer ) or
|
901
|
-
raise FormatError::new( @scanner.rest[0,20],
|
902
|
-
"No %p found before end" % opener )
|
903
|
-
|
904
|
-
@log.debug "Found close of code span at %d: %p" %
|
905
|
-
[ @scanner.pos - len, codespan ]
|
906
|
-
codespan.slice!( -len, len )
|
907
|
-
text += "<code>%s</code>" %
|
908
|
-
encode_code( codespan.strip, rs )
|
909
|
-
|
910
|
-
# If there's no more backticks, just append the rest of the string
|
911
|
-
# and move the scan pointer to the end
|
912
|
-
else
|
913
|
-
text += @scanner.rest
|
914
|
-
@scanner.terminate
|
915
|
-
end
|
916
|
-
end
|
917
|
-
|
918
|
-
return text
|
919
|
-
end
|
920
|
-
|
921
|
-
|
922
|
-
# Next, handle inline images: ![alt text](url "optional title")
|
923
|
-
# Don't forget: encode * and _
|
924
|
-
InlineImageRegexp = %r{
|
925
|
-
( # Whole match = $1
|
926
|
-
!\[ (.*?) \] # alt text = $2
|
927
|
-
\([ ]* (\S+) [ ]* # source url = $3
|
928
|
-
( # title = $4
|
929
|
-
(["']) # quote char = $5
|
930
|
-
.*?
|
931
|
-
\5 # matching quote
|
932
|
-
[ ]*
|
933
|
-
)? # title is optional
|
934
|
-
\)
|
935
|
-
)
|
936
|
-
}xs #"
|
937
|
-
|
938
|
-
|
939
|
-
# Reference-style images
|
940
|
-
ReferenceImageRegexp = %r{
|
941
|
-
( # Whole match = $1
|
942
|
-
!\[ (.*?) \] # Alt text = $2
|
943
|
-
[ ]? # Optional space
|
944
|
-
(?:\n[ ]*)? # One optional newline + spaces
|
945
|
-
\[ (.*?) \] # id = $3
|
946
|
-
)
|
947
|
-
}xs
|
948
|
-
|
949
|
-
### Turn image markup into image tags.
|
950
|
-
def transform_images( str, rs )
|
951
|
-
@log.debug " Transforming images" % str
|
952
|
-
|
953
|
-
# Handle reference-style labeled images: ![alt text][id]
|
954
|
-
str.
|
955
|
-
gsub( ReferenceImageRegexp ) {|match|
|
956
|
-
whole, alt, linkid = $1, $2, $3.downcase
|
957
|
-
@log.debug "Matched %p" % match
|
958
|
-
res = nil
|
959
|
-
|
960
|
-
# for shortcut links like ![this][].
|
961
|
-
linkid = alt.downcase if linkid.empty?
|
962
|
-
|
963
|
-
if rs.urls.key?( linkid )
|
964
|
-
url = escape_md( rs.urls[linkid] )
|
965
|
-
@log.debug "Found url '%s' for linkid '%s' " %
|
966
|
-
[ url, linkid ]
|
967
|
-
|
968
|
-
# Build the tag
|
969
|
-
result = %{<img src="%s" alt="%s"} % [ url, alt ]
|
970
|
-
if rs.titles.key?( linkid )
|
971
|
-
result += %{ title="%s"} % escape_md( rs.titles[linkid] )
|
972
|
-
end
|
973
|
-
result += EmptyElementSuffix
|
974
|
-
|
975
|
-
else
|
976
|
-
result = whole
|
977
|
-
end
|
978
|
-
|
979
|
-
@log.debug "Replacing %p with %p" %
|
980
|
-
[ match, result ]
|
981
|
-
result
|
982
|
-
}.
|
983
|
-
|
984
|
-
# Inline image style
|
985
|
-
gsub( InlineImageRegexp ) {|match|
|
986
|
-
@log.debug "Found inline image %p" % match
|
987
|
-
whole, alt, title = $1, $2, $4
|
988
|
-
url = escape_md( $3 )
|
989
|
-
|
990
|
-
# Build the tag
|
991
|
-
result = %{<img src="%s" alt="%s"} % [ url, alt ]
|
992
|
-
unless title.nil?
|
993
|
-
result += %{ title="%s"} % escape_md( title.gsub(/^"|"$/, '') )
|
994
|
-
end
|
995
|
-
result += EmptyElementSuffix
|
996
|
-
|
997
|
-
@log.debug "Replacing %p with %p" %
|
998
|
-
[ match, result ]
|
999
|
-
result
|
1000
|
-
}
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
|
1004
|
-
# Regexp to match special characters in a code block
|
1005
|
-
CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] )}x
|
1006
|
-
|
1007
|
-
### Escape any characters special to HTML and encode any characters special
|
1008
|
-
### to Markdown in a copy of the given +str+ and return it.
|
1009
|
-
def encode_code( str, rs )
|
1010
|
-
str.gsub( %r{&}, '&' ).
|
1011
|
-
gsub( %r{<}, '<' ).
|
1012
|
-
gsub( %r{>}, '>' ).
|
1013
|
-
gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
|
1014
|
-
end
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
#################################################################
|
1019
|
-
### U T I L I T Y F U N C T I O N S
|
1020
|
-
#################################################################
|
1021
|
-
|
1022
|
-
### Escape any markdown characters in a copy of the given +str+ and return
|
1023
|
-
### it.
|
1024
|
-
def escape_md( str )
|
1025
|
-
str.
|
1026
|
-
gsub( /\*/, '*' ).
|
1027
|
-
gsub( /_/, '_' )
|
1028
|
-
end
|
1029
|
-
|
1030
|
-
|
1031
|
-
# Matching constructs for tokenizing X/HTML
|
1032
|
-
HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
|
1033
|
-
XMLProcInstRegexp = %r{ <\? .*? \?> }mx
|
1034
|
-
MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
|
1035
|
-
|
1036
|
-
HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }mx
|
1037
|
-
HTMLTagCloseRegexp = %r{ > }x
|
1038
|
-
HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
|
1039
|
-
|
1040
|
-
### Break the HTML source in +str+ into a series of tokens and return
|
1041
|
-
### them. The tokens are just 2-element Array tuples with a type and the
|
1042
|
-
### actual content. If this function is called with a block, the type and
|
1043
|
-
### text parts of each token will be yielded to it one at a time as they are
|
1044
|
-
### extracted.
|
1045
|
-
def tokenize_html( str )
|
1046
|
-
depth = 0
|
1047
|
-
tokens = []
|
1048
|
-
@scanner.string = str.dup
|
1049
|
-
type, token = nil, nil
|
1050
|
-
|
1051
|
-
until @scanner.empty?
|
1052
|
-
@log.debug "Scanning from %p" % @scanner.rest
|
1053
|
-
|
1054
|
-
# Match comments and PIs without nesting
|
1055
|
-
if (( token = @scanner.scan(MetaTag) ))
|
1056
|
-
type = :tag
|
1057
|
-
|
1058
|
-
# Do nested matching for HTML tags
|
1059
|
-
elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
|
1060
|
-
tagstart = @scanner.pos
|
1061
|
-
@log.debug " Found the start of a plain tag at %d" % tagstart
|
1062
|
-
|
1063
|
-
# Start the token with the opening angle
|
1064
|
-
depth = 1
|
1065
|
-
type = :tag
|
1066
|
-
|
1067
|
-
# Scan the rest of the tag, allowing unlimited nested <>s. If
|
1068
|
-
# the scanner runs out of text before the tag is closed, raise
|
1069
|
-
# an error.
|
1070
|
-
while depth.nonzero?
|
1071
|
-
|
1072
|
-
# Scan either an opener or a closer
|
1073
|
-
chunk = @scanner.scan( HTMLTagPart ) or
|
1074
|
-
raise "Malformed tag at character %d: %p" %
|
1075
|
-
[ tagstart, token + @scanner.rest ]
|
1076
|
-
|
1077
|
-
@log.debug " Found another part of the tag at depth %d: %p" %
|
1078
|
-
[ depth, chunk ]
|
1079
|
-
|
1080
|
-
token += chunk
|
1081
|
-
|
1082
|
-
# If the last character of the token so far is a closing
|
1083
|
-
# angle bracket, decrement the depth. Otherwise increment
|
1084
|
-
# it for a nested tag.
|
1085
|
-
depth += ( token[-1, 1] == '>' ? -1 : 1 )
|
1086
|
-
@log.debug " Depth is now #{depth}"
|
1087
|
-
end
|
1088
|
-
|
1089
|
-
# Match text segments
|
1090
|
-
else
|
1091
|
-
@log.debug " Looking for a chunk of text"
|
1092
|
-
type = :text
|
1093
|
-
|
1094
|
-
# Scan forward, always matching at least one character to move
|
1095
|
-
# the pointer beyond any non-tag '<'.
|
1096
|
-
token = @scanner.scan_until( /[^<]+/m )
|
1097
|
-
end
|
1098
|
-
|
1099
|
-
@log.debug " type: %p, token: %p" % [ type, token ]
|
1100
|
-
|
1101
|
-
# If a block is given, feed it one token at a time. Add the token to
|
1102
|
-
# the token list to be returned regardless.
|
1103
|
-
if block_given?
|
1104
|
-
yield( type, token )
|
1105
|
-
end
|
1106
|
-
tokens << [ type, token ]
|
1107
|
-
end
|
1108
|
-
|
1109
|
-
return tokens
|
1110
|
-
end
|
1111
|
-
|
1112
|
-
|
1113
|
-
### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
|
1114
|
-
def encode_html( str )
|
1115
|
-
str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&" ).
|
1116
|
-
gsub( %r{<(?![a-z/?\$!])}i, "<" )
|
1117
|
-
end
|
1118
|
-
|
1119
|
-
|
1120
|
-
### Return one level of line-leading tabs or spaces from a copy of +str+ and
|
1121
|
-
### return it.
|
1122
|
-
def outdent( str )
|
1123
|
-
str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
|
1124
|
-
end
|
1125
|
-
|
1126
|
-
end # class BlueCloth
|
1127
|
-
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#
|
3
|
+
# Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
|
4
|
+
# tool.
|
5
|
+
#
|
6
|
+
# == Synopsis
|
7
|
+
#
|
8
|
+
# doc = BlueCloth::new "
|
9
|
+
# ## Test document ##
|
10
|
+
#
|
11
|
+
# Just a simple test.
|
12
|
+
# "
|
13
|
+
#
|
14
|
+
# puts doc.to_html
|
15
|
+
#
|
16
|
+
# == Authors
|
17
|
+
#
|
18
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
19
|
+
#
|
20
|
+
# == Contributors
|
21
|
+
#
|
22
|
+
# * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
|
23
|
+
# * Florian Gross <flgr@ccan.de> - Filter options, suggestions
|
24
|
+
#
|
25
|
+
# == Copyright
|
26
|
+
#
|
27
|
+
# Original version:
|
28
|
+
# Copyright (c) 2003-2004 John Gruber
|
29
|
+
# <http://daringfireball.net/>
|
30
|
+
# All rights reserved.
|
31
|
+
#
|
32
|
+
# Ruby port:
|
33
|
+
# Copyright (c) 2004 The FaerieMUD Consortium.
|
34
|
+
#
|
35
|
+
# BlueCloth is free software; you can redistribute it and/or modify it under the
|
36
|
+
# terms of the GNU General Public License as published by the Free Software
|
37
|
+
# Foundation; either version 2 of the License, or (at your option) any later
|
38
|
+
# version.
|
39
|
+
#
|
40
|
+
# BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
|
41
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
42
|
+
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
43
|
+
#
|
44
|
+
# == To-do
|
45
|
+
#
|
46
|
+
# * Refactor some of the larger uglier methods that have to do their own
|
47
|
+
# brute-force scanning because of lack of Perl features in Ruby's Regexp
|
48
|
+
# class. Alternately, could add a dependency on 'pcre' and use most Perl
|
49
|
+
# regexps.
|
50
|
+
#
|
51
|
+
# * Put the StringScanner in the render state for thread-safety.
|
52
|
+
#
|
53
|
+
# == Version
|
54
|
+
#
|
55
|
+
# $Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
|
56
|
+
#
|
57
|
+
|
58
|
+
require 'digest/md5'
|
59
|
+
require 'logger'
|
60
|
+
require 'strscan'
|
61
|
+
|
62
|
+
|
63
|
+
### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
|
64
|
+
### tool.
|
65
|
+
class BlueCloth < String
|
66
|
+
|
67
|
+
### Exception class for formatting errors.
|
68
|
+
class FormatError < RuntimeError
|
69
|
+
|
70
|
+
### Create a new FormatError with the given source +str+ and an optional
|
71
|
+
### message about the +specific+ error.
|
72
|
+
def initialize( str, specific=nil )
|
73
|
+
if specific
|
74
|
+
msg = "Bad markdown format near %p: %s" % [ str, specific ]
|
75
|
+
else
|
76
|
+
msg = "Bad markdown format near %p" % str
|
77
|
+
end
|
78
|
+
|
79
|
+
super( msg )
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
# Release Version
|
85
|
+
Version = '0.0.3'
|
86
|
+
|
87
|
+
# SVN Revision
|
88
|
+
SvnRev = %q$Rev: 37 $
|
89
|
+
|
90
|
+
# SVN Id tag
|
91
|
+
SvnId = %q$Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
|
92
|
+
|
93
|
+
# SVN URL
|
94
|
+
SvnUrl = %q$URL: svn+ssh://cvs.faeriemud.org/var/svn/BlueCloth/trunk/lib/bluecloth.rb $
|
95
|
+
|
96
|
+
|
97
|
+
# Rendering state struct. Keeps track of URLs, titles, and HTML blocks
|
98
|
+
# midway through a render. I prefer this to the globals of the Perl version
|
99
|
+
# because globals make me break out in hives. Or something.
|
100
|
+
RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
|
101
|
+
|
102
|
+
# Tab width for #detab! if none is specified
|
103
|
+
TabWidth = 4
|
104
|
+
|
105
|
+
# The tag-closing string -- set to '>' for HTML
|
106
|
+
EmptyElementSuffix = "/>";
|
107
|
+
|
108
|
+
# Table of MD5 sums for escaped characters
|
109
|
+
EscapeTable = {}
|
110
|
+
'\\`*_{}[]()#.!'.split(//).each {|char|
|
111
|
+
hash = Digest::MD5::hexdigest( char )
|
112
|
+
|
113
|
+
EscapeTable[ char ] = {
|
114
|
+
:md5 => hash,
|
115
|
+
:md5re => Regexp::new( hash ),
|
116
|
+
:re => Regexp::new( '\\\\' + Regexp::escape(char) ),
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
|
121
|
+
#################################################################
|
122
|
+
### I N S T A N C E M E T H O D S
|
123
|
+
#################################################################
|
124
|
+
|
125
|
+
### Create a new BlueCloth string.
|
126
|
+
def initialize( content="", *restrictions )
|
127
|
+
@log = Logger::new( $deferr )
|
128
|
+
@log.level = $DEBUG ?
|
129
|
+
Logger::DEBUG :
|
130
|
+
($VERBOSE ? Logger::INFO : Logger::WARN)
|
131
|
+
@scanner = nil
|
132
|
+
|
133
|
+
# Add any restrictions, and set the line-folding attribute to reflect
|
134
|
+
# what happens by default.
|
135
|
+
restrictions.flatten.each {|r| __send__("#{r}=", true) }
|
136
|
+
@fold_lines = true
|
137
|
+
|
138
|
+
super( content )
|
139
|
+
|
140
|
+
@log.debug "String is: %p" % self
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
######
|
145
|
+
public
|
146
|
+
######
|
147
|
+
|
148
|
+
# Filters for controlling what gets output for untrusted input. (But really,
|
149
|
+
# you're filtering bad stuff out of untrusted input at submission-time via
|
150
|
+
# untainting, aren't you?)
|
151
|
+
attr_accessor :filter_html, :filter_styles
|
152
|
+
|
153
|
+
# RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
|
154
|
+
# so this isn't used by anything.
|
155
|
+
attr_accessor :fold_lines
|
156
|
+
|
157
|
+
|
158
|
+
### Render Markdown-formatted text in this string object as HTML and return
|
159
|
+
### it. The parameter is for compatibility with RedCloth, and is currently
|
160
|
+
### unused, though that may change in the future.
|
161
|
+
def to_html( lite=false )
|
162
|
+
|
163
|
+
# Create a StringScanner we can reuse for various lexing tasks
|
164
|
+
@scanner = StringScanner::new( '' )
|
165
|
+
|
166
|
+
# Make a structure to carry around stuff that gets placeholdered out of
|
167
|
+
# the source.
|
168
|
+
rs = RenderState::new( {}, {}, {} )
|
169
|
+
|
170
|
+
# Make a copy of the string with normalized line endings, tabs turned to
|
171
|
+
# spaces, and a couple of guaranteed newlines at the end
|
172
|
+
text = self.gsub( /\r\n?/, "\n" ).detab
|
173
|
+
text += "\n\n"
|
174
|
+
@log.debug "Normalized line-endings: %p" % text
|
175
|
+
|
176
|
+
# Filter HTML if we're asked to do so
|
177
|
+
if self.filter_html
|
178
|
+
text.gsub!( "<", "<" )
|
179
|
+
text.gsub!( ">", ">" )
|
180
|
+
@log.debug "Filtered HTML: %p" % text
|
181
|
+
end
|
182
|
+
|
183
|
+
# Simplify blank lines
|
184
|
+
text.gsub!( /^ +$/, '' )
|
185
|
+
@log.debug "Tabs -> spaces/blank lines stripped: %p" % text
|
186
|
+
|
187
|
+
# Replace HTML blocks with placeholders
|
188
|
+
text = hide_html_blocks( text, rs )
|
189
|
+
@log.debug "Hid HTML blocks: %p" % text
|
190
|
+
@log.debug "Render state: %p" % rs
|
191
|
+
|
192
|
+
# Strip link definitions, store in render state
|
193
|
+
text = strip_link_definitions( text, rs )
|
194
|
+
@log.debug "Stripped link definitions: %p" % text
|
195
|
+
@log.debug "Render state: %p" % rs
|
196
|
+
|
197
|
+
# Escape meta-characters
|
198
|
+
text = escape_special_chars( text )
|
199
|
+
@log.debug "Escaped special characters: %p" % text
|
200
|
+
|
201
|
+
# Transform block-level constructs
|
202
|
+
text = apply_block_transforms( text, rs )
|
203
|
+
@log.debug "After block-level transforms: %p" % text
|
204
|
+
|
205
|
+
# Now swap back in all the escaped characters
|
206
|
+
text = unescape_special_chars( text )
|
207
|
+
@log.debug "After unescaping special characters: %p" % text
|
208
|
+
|
209
|
+
return text
|
210
|
+
end
|
211
|
+
|
212
|
+
|
213
|
+
### Convert tabs in +str+ to spaces.
|
214
|
+
def detab( tabwidth=TabWidth )
|
215
|
+
copy = self.dup
|
216
|
+
copy.detab!( tabwidth )
|
217
|
+
return copy
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
### Convert tabs to spaces in place and return self if any were converted.
|
222
|
+
def detab!( tabwidth=TabWidth )
|
223
|
+
newstr = self.split( /\n/ ).collect {|line|
|
224
|
+
line.gsub( /(.*?)\t/ ) do
|
225
|
+
$1 + ' ' * (tabwidth - $1.length % tabwidth)
|
226
|
+
end
|
227
|
+
}.join("\n")
|
228
|
+
self.replace( newstr )
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
#######
|
233
|
+
#private
|
234
|
+
#######
|
235
|
+
|
236
|
+
### Do block-level transforms on a copy of +str+ using the specified render
|
237
|
+
### state +rs+ and return the results.
|
238
|
+
def apply_block_transforms( str, rs )
|
239
|
+
# Port: This was called '_runBlockGamut' in the original
|
240
|
+
|
241
|
+
@log.debug "Applying block transforms to:\n %p" % str
|
242
|
+
text = transform_headers( str, rs )
|
243
|
+
text = transform_hrules( text, rs )
|
244
|
+
text = transform_lists( text, rs )
|
245
|
+
text = transform_code_blocks( text, rs )
|
246
|
+
text = transform_block_quotes( text, rs )
|
247
|
+
text = transform_auto_links( text, rs )
|
248
|
+
text = hide_html_blocks( text, rs )
|
249
|
+
|
250
|
+
text = form_paragraphs( text, rs )
|
251
|
+
|
252
|
+
@log.debug "Done with block transforms:\n %p" % text
|
253
|
+
return text
|
254
|
+
end
|
255
|
+
|
256
|
+
|
257
|
+
### Apply Markdown span transforms to a copy of the specified +str+ with the
|
258
|
+
### given render state +rs+ and return it.
|
259
|
+
def apply_span_transforms( str, rs )
|
260
|
+
@log.debug "Applying span transforms to:\n %p" % str
|
261
|
+
|
262
|
+
str = transform_code_spans( str, rs )
|
263
|
+
str = encode_html( str )
|
264
|
+
str = transform_images( str, rs )
|
265
|
+
str = transform_anchors( str, rs )
|
266
|
+
str = transform_italic_and_bold( str, rs )
|
267
|
+
|
268
|
+
# Hard breaks
|
269
|
+
str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
|
270
|
+
|
271
|
+
@log.debug "Done with span transforms:\n %p" % str
|
272
|
+
return str
|
273
|
+
end
|
274
|
+
|
275
|
+
|
276
|
+
# The list of tags which are considered block-level constructs and an
|
277
|
+
# alternation pattern suitable for use in regexps made from the list
|
278
|
+
BlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script ]
|
279
|
+
BlockTagPattern = BlockTags.join('|')
|
280
|
+
|
281
|
+
# Nested blocks:
|
282
|
+
# <div>
|
283
|
+
# <div>
|
284
|
+
# tags for inner block must be indented.
|
285
|
+
# </div>
|
286
|
+
# </div>
|
287
|
+
StrictBlockRegex = %r{
|
288
|
+
^ # Start of line
|
289
|
+
<(#{BlockTagPattern}) # Start tag: \2
|
290
|
+
\b # word break
|
291
|
+
(.*\n)*? # Any number of lines, minimal match
|
292
|
+
</\1> # Matching end tag
|
293
|
+
[ ]* # trailing spaces
|
294
|
+
(?=\n+|\Z) # End of line or document
|
295
|
+
}ix
|
296
|
+
|
297
|
+
# More-liberal block-matching
|
298
|
+
LooseBlockRegex = %r{
|
299
|
+
^ # Start of line
|
300
|
+
<(#{BlockTagPattern}) # start tag: \2
|
301
|
+
\b # word break
|
302
|
+
(.*\n)*? # Any number of lines, minimal match
|
303
|
+
.*</\1> # Anything + Matching end tag
|
304
|
+
[ ]* # trailing spaces
|
305
|
+
(?=\n+|\Z) # End of line or document
|
306
|
+
}ix
|
307
|
+
|
308
|
+
# Special case for <hr />.
|
309
|
+
HruleBlockRegex = %r{
|
310
|
+
( # $1
|
311
|
+
\A\n? # Start of doc + optional \n
|
312
|
+
| # or
|
313
|
+
.*\n\n # anything + blank line
|
314
|
+
)
|
315
|
+
( # save in $2
|
316
|
+
[ ]* # Any spaces
|
317
|
+
<hr # Tag open
|
318
|
+
\b # Word break
|
319
|
+
([^<>])*? # Attributes
|
320
|
+
/?> # Tag close
|
321
|
+
(?=\n\n|\Z) # followed by a blank line or end of document
|
322
|
+
)
|
323
|
+
}ix
|
324
|
+
|
325
|
+
### Replace all blocks of HTML in +str+ that start in the left margin with
|
326
|
+
### tokens.
|
327
|
+
def hide_html_blocks( str, rs )
|
328
|
+
@log.debug "Hiding HTML blocks in %p" % str
|
329
|
+
|
330
|
+
# Tokenizer proc to pass to gsub
|
331
|
+
tokenize = lambda {|match|
|
332
|
+
key = Digest::MD5::hexdigest( match )
|
333
|
+
rs.html_blocks[ key ] = match
|
334
|
+
@log.debug "Replacing %p with %p" %
|
335
|
+
[ match, key ]
|
336
|
+
"\n\n#{key}\n\n"
|
337
|
+
}
|
338
|
+
|
339
|
+
rval = str.dup
|
340
|
+
|
341
|
+
@log.debug "Finding blocks with the strict regex..."
|
342
|
+
rval.gsub!( StrictBlockRegex, &tokenize )
|
343
|
+
|
344
|
+
@log.debug "Finding blocks with the loose regex..."
|
345
|
+
rval.gsub!( LooseBlockRegex, &tokenize )
|
346
|
+
|
347
|
+
@log.debug "Finding hrules..."
|
348
|
+
rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
|
349
|
+
|
350
|
+
return rval
|
351
|
+
end
|
352
|
+
|
353
|
+
|
354
|
+
# Link defs are in the form: ^[id]: url "optional title"
|
355
|
+
LinkRegex = %r{
|
356
|
+
^[ ]*\[(.+)\]: # id = $1
|
357
|
+
[ ]*
|
358
|
+
\n? # maybe *one* newline
|
359
|
+
[ ]*
|
360
|
+
(\S+) # url = $2
|
361
|
+
[ ]*
|
362
|
+
\n? # maybe one newline
|
363
|
+
[ ]*
|
364
|
+
(?:
|
365
|
+
# Titles are delimited by "quotes" or (parens).
|
366
|
+
["(]
|
367
|
+
(.+?) # title = $3
|
368
|
+
[")] # Matching ) or "
|
369
|
+
[ ]*
|
370
|
+
)? # title is optional
|
371
|
+
(?:\n+|\Z)
|
372
|
+
}x
|
373
|
+
|
374
|
+
### Strip link definitions from +str+, storing them in the given RenderState
|
375
|
+
### +rs+.
|
376
|
+
def strip_link_definitions( str, rs )
|
377
|
+
str.gsub( LinkRegex ) {|match|
|
378
|
+
id, url, title = $1, $2, $3
|
379
|
+
|
380
|
+
rs.urls[ id.downcase ] = encode_html( url )
|
381
|
+
unless title.nil?
|
382
|
+
rs.titles[ id.downcase ] = title.gsub( /"/, """ )
|
383
|
+
end
|
384
|
+
""
|
385
|
+
}
|
386
|
+
end
|
387
|
+
|
388
|
+
|
389
|
+
### Escape special characters in the given +str+
|
390
|
+
def escape_special_chars( str )
|
391
|
+
@log.debug " Escaping special characters"
|
392
|
+
text = ''
|
393
|
+
|
394
|
+
tokenize_html( str ) {|token, str|
|
395
|
+
@log.debug " Adding %p token %p" % [ token, str ]
|
396
|
+
case token
|
397
|
+
|
398
|
+
# Within tags, encode * and _
|
399
|
+
when :tag
|
400
|
+
text += str.
|
401
|
+
gsub( /\*/, EscapeTable['*'][:md5] ).
|
402
|
+
gsub( /_/, EscapeTable['_'][:md5] )
|
403
|
+
|
404
|
+
# Encode backslashed stuff in regular text
|
405
|
+
when :text
|
406
|
+
text += encode_backslash_escapes( str )
|
407
|
+
else
|
408
|
+
raise TypeError, "Unknown token type %p" % token
|
409
|
+
end
|
410
|
+
}
|
411
|
+
|
412
|
+
@log.debug " Text with escapes is now: %p" % text
|
413
|
+
return text
|
414
|
+
end
|
415
|
+
|
416
|
+
|
417
|
+
### Swap escaped special characters in a copy of the given +str+ and return
|
418
|
+
### it.
|
419
|
+
def unescape_special_chars( str )
|
420
|
+
EscapeTable.each {|char, hash|
|
421
|
+
@log.debug "Unescaping escaped %p with %p" %
|
422
|
+
[ char, hash[:md5re] ]
|
423
|
+
str.gsub!( hash[:md5re], char )
|
424
|
+
}
|
425
|
+
|
426
|
+
return str
|
427
|
+
end
|
428
|
+
|
429
|
+
|
430
|
+
### Return a copy of the given +str+ with any backslashed special character
|
431
|
+
### in it replaced with MD5 placeholders.
|
432
|
+
def encode_backslash_escapes( str )
|
433
|
+
# Make a copy with any double-escaped backslashes encoded
|
434
|
+
text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
|
435
|
+
|
436
|
+
EscapeTable.each_pair {|char, esc|
|
437
|
+
next if char == '\\'
|
438
|
+
text.gsub!( esc[:re], esc[:md5] )
|
439
|
+
}
|
440
|
+
|
441
|
+
return text
|
442
|
+
end
|
443
|
+
|
444
|
+
|
445
|
+
### Transform any Markdown-style horizontal rules in a copy of the specified
|
446
|
+
### +str+ and return it.
|
447
|
+
def transform_hrules( str, rs )
|
448
|
+
@log.debug " Transforming horizontal rules"
|
449
|
+
str.gsub( /^( ?[\-\*] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
|
450
|
+
end
|
451
|
+
|
452
|
+
|
453
|
+
|
454
|
+
# Pattern to transform lists
|
455
|
+
ListRegexp = %r{
|
456
|
+
(?:
|
457
|
+
^[ ]{0,#{TabWidth - 1}} # Indent < tab width
|
458
|
+
(\*|\d+\.) # unordered or ordered ($1)
|
459
|
+
[ ]+ # At least one space
|
460
|
+
)
|
461
|
+
(?m:.+?) # item content (include newlines)
|
462
|
+
(?:
|
463
|
+
\z # Either EOF
|
464
|
+
| # or
|
465
|
+
\n{2,} # Blank line...
|
466
|
+
(?=\S) # ...followed by non-space
|
467
|
+
(?![ ]* (\*|\d+\.) [ ]+) # ...but not another item
|
468
|
+
)
|
469
|
+
}x
|
470
|
+
|
471
|
+
### Transform Markdown-style lists in a copy of the specified +str+ and
|
472
|
+
### return it.
|
473
|
+
def transform_lists( str, rs )
|
474
|
+
@log.debug " Transforming lists at %p" % (str[0,100] + '...')
|
475
|
+
|
476
|
+
str.gsub( ListRegexp ) {|list|
|
477
|
+
@log.debug " Found list %p" % list
|
478
|
+
list_type = ($1 == '*' ? "ul" : "ol")
|
479
|
+
list.gsub!( /\n{2,}/, "\n\n\n" )
|
480
|
+
|
481
|
+
%{<%s>\n%s</%s>\n} % [
|
482
|
+
list_type,
|
483
|
+
transform_list_items( list, rs ),
|
484
|
+
list_type,
|
485
|
+
]
|
486
|
+
}
|
487
|
+
end
|
488
|
+
|
489
|
+
|
490
|
+
# Pattern for transforming list items
|
491
|
+
ListItemRegexp = %r{
|
492
|
+
(\n)? # leading line = $1
|
493
|
+
(^[ ]*) # leading whitespace = $2
|
494
|
+
(\*|\d+\.) [ ]+ # list marker = $3
|
495
|
+
((?m:.+?) # list item text = $4
|
496
|
+
(\n{1,2}))
|
497
|
+
(?= \n* (\z | \2 (\*|\d+\.) [ ]+))
|
498
|
+
}x
|
499
|
+
|
500
|
+
### Transform list items in a copy of the given +str+ and return it.
|
501
|
+
def transform_list_items( str, rs )
|
502
|
+
@log.debug " Transforming list items"
|
503
|
+
|
504
|
+
# Trim trailing blank lines
|
505
|
+
str = str.sub( /\n{2,}\z/, "\n" )
|
506
|
+
|
507
|
+
str.gsub( ListItemRegexp ) {|line|
|
508
|
+
@log.debug " Found item line %p" % line
|
509
|
+
leading_line, item = $1, $4
|
510
|
+
|
511
|
+
if leading_line or /\n{2,}/.match( item )
|
512
|
+
@log.debug " Found leading line or item has a blank"
|
513
|
+
item = apply_block_transforms( outdent(item), rs )
|
514
|
+
else
|
515
|
+
# Recursion for sub-lists
|
516
|
+
@log.debug " Recursing for sublist"
|
517
|
+
item = transform_lists( outdent(item), rs ).chomp
|
518
|
+
item = apply_span_transforms( item, rs )
|
519
|
+
end
|
520
|
+
|
521
|
+
%{<li>%s</li>\n} % item
|
522
|
+
}
|
523
|
+
end
|
524
|
+
|
525
|
+
|
526
|
+
# Pattern for matching codeblocks
|
527
|
+
CodeBlockRegexp = %r{
|
528
|
+
(.?) # $1 = preceding character
|
529
|
+
:\n+ # colon + NL delimiter
|
530
|
+
( # $2 = the code block
|
531
|
+
(?:
|
532
|
+
(?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
|
533
|
+
.*\n+
|
534
|
+
)+
|
535
|
+
)
|
536
|
+
((?=^[ ]{0,#{TabWidth}}\S)|\Z) # Lookahead for non-space at
|
537
|
+
# line-start, or end of doc
|
538
|
+
}x
|
539
|
+
|
540
|
+
### Transform Markdown-style codeblocks in a copy of the specified +str+ and
|
541
|
+
### return it.
|
542
|
+
def transform_code_blocks( str, rs )
|
543
|
+
@log.debug " Transforming code blocks"
|
544
|
+
|
545
|
+
str.gsub( CodeBlockRegexp ) {|block|
|
546
|
+
prevchar, codeblock = $1, $2
|
547
|
+
|
548
|
+
@log.debug " prevchar = %p" % prevchar
|
549
|
+
|
550
|
+
# Generated the codeblock
|
551
|
+
%{%s\n\n<pre><code>%s\n</code></pre>\n\n} % [
|
552
|
+
(prevchar.empty? || /\s/ =~ prevchar) ? "" : "#{prevchar}:",
|
553
|
+
encode_code( outdent(codeblock), rs ).rstrip,
|
554
|
+
]
|
555
|
+
}
|
556
|
+
end
|
557
|
+
|
558
|
+
|
559
|
+
# Pattern for matching Markdown blockquote blocks
|
560
|
+
BlockQuoteRegexp = %r{
|
561
|
+
(?:
|
562
|
+
^[ ]*>[ ]? # '>' at the start of a line
|
563
|
+
.+\n # rest of the first line
|
564
|
+
(?:.+\n)* # subsequent consecutive lines
|
565
|
+
\n* # blanks
|
566
|
+
)+
|
567
|
+
}x
|
568
|
+
|
569
|
+
### Transform Markdown-style blockquotes in a copy of the specified +str+
|
570
|
+
### and return it.
|
571
|
+
def transform_block_quotes( str, rs )
|
572
|
+
@log.debug " Transforming block quotes"
|
573
|
+
|
574
|
+
str.gsub( BlockQuoteRegexp ) {|quote|
|
575
|
+
@log.debug "Making blockquote from %p" % quote
|
576
|
+
quote.gsub!( /^[ ]*>[ ]?/, '' )
|
577
|
+
%{<blockquote>\n%s\n</blockquote>\n\n} %
|
578
|
+
apply_block_transforms( quote, rs ).
|
579
|
+
gsub( /^/, " " * TabWidth )
|
580
|
+
}
|
581
|
+
end
|
582
|
+
|
583
|
+
|
584
|
+
AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
|
585
|
+
AutoAnchorEmailRegexp = %r{
|
586
|
+
<
|
587
|
+
(
|
588
|
+
[-.\w]+
|
589
|
+
\@
|
590
|
+
[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
|
591
|
+
)
|
592
|
+
>
|
593
|
+
}x
|
594
|
+
|
595
|
+
### Transform URLs in a copy of the specified +str+ into links and return
|
596
|
+
### it.
|
597
|
+
def transform_auto_links( str, rs )
|
598
|
+
@log.debug " Transforming auto-links"
|
599
|
+
str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
|
600
|
+
gsub( AutoAnchorEmailRegexp ) {|addr|
|
601
|
+
encode_email_address( unescape_special_chars($1) )
|
602
|
+
}
|
603
|
+
end
|
604
|
+
|
605
|
+
|
606
|
+
# Encoder functions to turn characters of an email address into encoded
|
607
|
+
# entities.
|
608
|
+
Encoders = [
|
609
|
+
lambda {|char| "&#%03d;" % char},
|
610
|
+
lambda {|char| "&#x%X;" % char},
|
611
|
+
lambda {|char| char.chr },
|
612
|
+
]
|
613
|
+
|
614
|
+
### Transform a copy of the given email +addr+ into an escaped version safer
|
615
|
+
### for posting publicly.
|
616
|
+
def encode_email_address( addr )
|
617
|
+
|
618
|
+
rval = ''
|
619
|
+
("mailto:" + addr).each_byte {|b|
|
620
|
+
case b
|
621
|
+
when ?:
|
622
|
+
rval += ":"
|
623
|
+
when ?@
|
624
|
+
rval += Encoders[ rand(2) ][ b ]
|
625
|
+
else
|
626
|
+
r = rand(100)
|
627
|
+
rval += (
|
628
|
+
r > 90 ? Encoders[2][ b ] :
|
629
|
+
r < 45 ? Encoders[1][ b ] :
|
630
|
+
Encoders[0][ b ]
|
631
|
+
)
|
632
|
+
end
|
633
|
+
}
|
634
|
+
|
635
|
+
return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
|
636
|
+
end
|
637
|
+
|
638
|
+
|
639
|
+
# Regex for matching Setext-style headers
|
640
|
+
SetextHeaderRegexp = %r{
|
641
|
+
(.+) # The title text ($1)
|
642
|
+
\n
|
643
|
+
([\-=])+ # Match a line of = or -. Save only one in $2.
|
644
|
+
[ ]*\n+
|
645
|
+
}x
|
646
|
+
|
647
|
+
# Regexp for matching ATX-style headers
|
648
|
+
AtxHeaderRegexp = %r{
|
649
|
+
^(\#{1,6}) # $1 = string of #'s
|
650
|
+
[ ]*
|
651
|
+
(.+?) # $2 = Header text
|
652
|
+
[ ]*
|
653
|
+
\#* # optional closing #'s (not counted)
|
654
|
+
\n+
|
655
|
+
}x
|
656
|
+
|
657
|
+
### Apply Markdown header transforms to a copy of the given +str+ amd render
|
658
|
+
### state +rs+ and return the result.
|
659
|
+
def transform_headers( str, rs )
|
660
|
+
@log.debug " Transforming headers"
|
661
|
+
|
662
|
+
# Setext-style headers:
|
663
|
+
# Header 1
|
664
|
+
# ========
|
665
|
+
#
|
666
|
+
# Header 2
|
667
|
+
# --------
|
668
|
+
#
|
669
|
+
str.
|
670
|
+
gsub( SetextHeaderRegexp ) {|m|
|
671
|
+
@log.debug "Found setext-style header"
|
672
|
+
title, hdrchar = $1, $2
|
673
|
+
title = apply_span_transforms( title, rs )
|
674
|
+
|
675
|
+
case hdrchar
|
676
|
+
when '='
|
677
|
+
%[<h1>#{title}</h1>\n\n]
|
678
|
+
when '-'
|
679
|
+
%[<h2>#{title}</h2>\n\n]
|
680
|
+
else
|
681
|
+
title
|
682
|
+
end
|
683
|
+
}.
|
684
|
+
|
685
|
+
gsub( AtxHeaderRegexp ) {|m|
|
686
|
+
@log.debug "Found ATX-style header"
|
687
|
+
hdrchars, title = $1, $2
|
688
|
+
title = apply_span_transforms( title, rs )
|
689
|
+
|
690
|
+
level = hdrchars.length
|
691
|
+
%{<h%d>%s</h%d>\n\n} % [ level, title, level ]
|
692
|
+
}
|
693
|
+
end
|
694
|
+
|
695
|
+
|
696
|
+
### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
|
697
|
+
### tags and return it.
|
698
|
+
def form_paragraphs( str, rs )
|
699
|
+
@log.debug " Forming paragraphs"
|
700
|
+
grafs = str.
|
701
|
+
sub( /\A\n+/, '' ).
|
702
|
+
sub( /\n+\z/, '' ).
|
703
|
+
split( /\n{2,}/ )
|
704
|
+
|
705
|
+
rval = grafs.collect {|graf|
|
706
|
+
|
707
|
+
# Unhashify HTML blocks if this is a placeholder
|
708
|
+
if rs.html_blocks.key?( graf )
|
709
|
+
rs.html_blocks[ graf ]
|
710
|
+
|
711
|
+
# Otherwise, wrap in <p> tags
|
712
|
+
else
|
713
|
+
apply_span_transforms(graf, rs).
|
714
|
+
sub( /^[ ]*/, '<p>' ) + '</p>'
|
715
|
+
end
|
716
|
+
}.join( "\n\n" )
|
717
|
+
|
718
|
+
@log.debug " Formed paragraphs: %p" % rval
|
719
|
+
return rval
|
720
|
+
end
|
721
|
+
|
722
|
+
|
723
|
+
# Pattern to match the linkid part of an anchor tag for reference-style
|
724
|
+
# links.
|
725
|
+
RefLinkIdRegex = %r{
|
726
|
+
[ ]? # Optional leading space
|
727
|
+
(?:\n[ ]*)? # Optional newline + spaces
|
728
|
+
\[
|
729
|
+
(.*?) # Id = $1
|
730
|
+
\]
|
731
|
+
}x
|
732
|
+
|
733
|
+
InlineLinkRegex = %r{
|
734
|
+
\( # Literal paren
|
735
|
+
[ ]* # Zero or more spaces
|
736
|
+
(.*?) # URI = $1
|
737
|
+
[ ]* # Zero or more spaces
|
738
|
+
(?: #
|
739
|
+
([\"\']) # Opening quote char = $2
|
740
|
+
(.*?) # Title = $3
|
741
|
+
\2 # Matching quote char
|
742
|
+
)? # Title is optional
|
743
|
+
\)
|
744
|
+
}x
|
745
|
+
|
746
|
+
### Apply Markdown anchor transforms to a copy of the specified +str+ with
|
747
|
+
### the given render state +rs+ and return it.
|
748
|
+
def transform_anchors( str, rs )
|
749
|
+
@log.debug " Transforming anchors"
|
750
|
+
@scanner.string = str.dup
|
751
|
+
text = ''
|
752
|
+
|
753
|
+
# Scan the whole string
|
754
|
+
until @scanner.empty?
|
755
|
+
|
756
|
+
if @scanner.scan( /\[/ )
|
757
|
+
link = ''; linkid = ''
|
758
|
+
depth = 1
|
759
|
+
startpos = @scanner.pos
|
760
|
+
@log.debug " Found a bracket-open at %d" % startpos
|
761
|
+
|
762
|
+
# Scan the rest of the tag, allowing unlimited nested []s. If
|
763
|
+
# the scanner runs out of text before the opening bracket is
|
764
|
+
# closed, append the text and return (wasn't a valid anchor).
|
765
|
+
while depth.nonzero?
|
766
|
+
linktext = @scanner.scan_until( /\]|\[/ )
|
767
|
+
|
768
|
+
if linktext
|
769
|
+
@log.debug " Found a bracket at depth %d: %p" %
|
770
|
+
[ depth, linktext ]
|
771
|
+
link += linktext
|
772
|
+
|
773
|
+
# Decrement depth for each closing bracket
|
774
|
+
depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
|
775
|
+
@log.debug " Depth is now #{depth}"
|
776
|
+
|
777
|
+
# If there's no more brackets, it must not be an anchor, so
|
778
|
+
# just abort.
|
779
|
+
else
|
780
|
+
@log.debug " Missing closing brace, assuming non-link."
|
781
|
+
link += @scanner.rest
|
782
|
+
@scanner.terminate
|
783
|
+
return text + '[' + link
|
784
|
+
end
|
785
|
+
end
|
786
|
+
link.slice!( -1 ) # Trim final ']'
|
787
|
+
@log.debug " Found leading link %p" % link
|
788
|
+
|
789
|
+
# Look for a reference-style second part
|
790
|
+
if @scanner.scan( RefLinkIdRegex )
|
791
|
+
linkid = @scanner[1]
|
792
|
+
linkid = link.dup if linkid.empty?
|
793
|
+
linkid.downcase!
|
794
|
+
@log.debug " Found a linkid: %p" % linkid
|
795
|
+
|
796
|
+
# If there's a matching link in the link table, build an
|
797
|
+
# anchor tag for it.
|
798
|
+
if rs.urls.key?( linkid )
|
799
|
+
@log.debug " Found link key in the link table: %p" %
|
800
|
+
rs.urls[linkid]
|
801
|
+
url = escape_md( rs.urls[linkid] )
|
802
|
+
|
803
|
+
text += %{<a href="#{url}"}
|
804
|
+
if rs.titles.key?(linkid)
|
805
|
+
text += %{ title="%s"} % escape_md( rs.titles[linkid] )
|
806
|
+
end
|
807
|
+
text += %{>#{link}</a>}
|
808
|
+
|
809
|
+
# If the link referred to doesn't exist, just append the raw
|
810
|
+
# source to the result
|
811
|
+
else
|
812
|
+
@log.debug " Linkid %p not found in link table" % linkid
|
813
|
+
@log.debug " Appending original string instead: %p" %
|
814
|
+
@scanner.string[ startpos-1 .. @scanner.pos ]
|
815
|
+
text += @scanner.string[ startpos-1 .. @scanner.pos ]
|
816
|
+
end
|
817
|
+
|
818
|
+
# ...or for an inline style second part
|
819
|
+
elsif @scanner.scan( InlineLinkRegex )
|
820
|
+
url = @scanner[1]
|
821
|
+
title = @scanner[3]
|
822
|
+
@log.debug " Found an inline link to %p" % url
|
823
|
+
|
824
|
+
text += %{<a href="%s"} % escape_md( url )
|
825
|
+
if title
|
826
|
+
text += %{ title="%s"} % escape_md( title )
|
827
|
+
end
|
828
|
+
text += %{>#{link}</a>}
|
829
|
+
|
830
|
+
# No linkid part: just append the first part as-is.
|
831
|
+
else
|
832
|
+
@log.debug "No linkid, so no anchor. Appending literal text."
|
833
|
+
text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
|
834
|
+
end # if linkid
|
835
|
+
|
836
|
+
# Plain text
|
837
|
+
else
|
838
|
+
@log.debug " Scanning to the next link from %p" % @scanner.rest
|
839
|
+
text += @scanner.scan( /[^\[]+/ )
|
840
|
+
end
|
841
|
+
|
842
|
+
end # until @scanner.empty?
|
843
|
+
|
844
|
+
return text
|
845
|
+
end
|
846
|
+
|
847
|
+
# Pattern to match strong emphasis in Markdown text
|
848
|
+
BoldRegexp = %r{ (\*\*|__) (?=\S) (.+?\S) \1 }x
|
849
|
+
|
850
|
+
# Pattern to match normal emphasis in Markdown text
|
851
|
+
ItalicRegexp = %r{ (\*|_) (?=\S) (.+?\S) \1 }x
|
852
|
+
|
853
|
+
### Transform italic- and bold-encoded text in a copy of the specified +str+
|
854
|
+
### and return it.
|
855
|
+
def transform_italic_and_bold( str, rs )
|
856
|
+
@log.debug " Transforming italic and bold"
|
857
|
+
|
858
|
+
str.
|
859
|
+
gsub( BoldRegexp, %{<strong>\\2</strong>} ).
|
860
|
+
gsub( ItalicRegexp, %{<em>\\2</em>} )
|
861
|
+
end
|
862
|
+
|
863
|
+
|
864
|
+
### Transform backticked spans into <code> spans.
|
865
|
+
def transform_code_spans( str, rs )
|
866
|
+
@log.debug " Transforming code spans"
|
867
|
+
|
868
|
+
# Set up the string scanner and just return the string unless there's at
|
869
|
+
# least one backtick.
|
870
|
+
@scanner.string = str.dup
|
871
|
+
unless @scanner.exist?( /`/ )
|
872
|
+
@scanner.terminate
|
873
|
+
@log.debug "No backticks found for code span in %p" % str
|
874
|
+
return str
|
875
|
+
end
|
876
|
+
|
877
|
+
@log.debug "Transforming code spans in %p" % str
|
878
|
+
|
879
|
+
# Build the transformed text anew
|
880
|
+
text = ''
|
881
|
+
|
882
|
+
# Scan to the end of the string
|
883
|
+
until @scanner.empty?
|
884
|
+
|
885
|
+
# Scan up to an opening backtick
|
886
|
+
if pre = @scanner.scan_until( /.?(?=`)/m )
|
887
|
+
text += pre
|
888
|
+
@log.debug "Found backtick at %d after '...%s'" %
|
889
|
+
[ @scanner.pos, text[-10, 10] ]
|
890
|
+
|
891
|
+
# Make a pattern to find the end of the span
|
892
|
+
opener = @scanner.scan( /`+/ )
|
893
|
+
len = opener.length
|
894
|
+
closer = Regexp::new( opener )
|
895
|
+
@log.debug "Scanning for end of code span with %p" % closer
|
896
|
+
|
897
|
+
# Scan until the end of the closing backtick sequence. Chop the
|
898
|
+
# backticks off the resultant string, strip leading and trailing
|
899
|
+
# whitespace, and encode any enitites contained in it.
|
900
|
+
codespan = @scanner.scan_until( closer ) or
|
901
|
+
raise FormatError::new( @scanner.rest[0,20],
|
902
|
+
"No %p found before end" % opener )
|
903
|
+
|
904
|
+
@log.debug "Found close of code span at %d: %p" %
|
905
|
+
[ @scanner.pos - len, codespan ]
|
906
|
+
codespan.slice!( -len, len )
|
907
|
+
text += "<code>%s</code>" %
|
908
|
+
encode_code( codespan.strip, rs )
|
909
|
+
|
910
|
+
# If there's no more backticks, just append the rest of the string
|
911
|
+
# and move the scan pointer to the end
|
912
|
+
else
|
913
|
+
text += @scanner.rest
|
914
|
+
@scanner.terminate
|
915
|
+
end
|
916
|
+
end
|
917
|
+
|
918
|
+
return text
|
919
|
+
end
|
920
|
+
|
921
|
+
|
922
|
+
# Next, handle inline images: ![alt text](url "optional title")
|
923
|
+
# Don't forget: encode * and _
|
924
|
+
InlineImageRegexp = %r{
|
925
|
+
( # Whole match = $1
|
926
|
+
!\[ (.*?) \] # alt text = $2
|
927
|
+
\([ ]* (\S+) [ ]* # source url = $3
|
928
|
+
( # title = $4
|
929
|
+
(["']) # quote char = $5
|
930
|
+
.*?
|
931
|
+
\5 # matching quote
|
932
|
+
[ ]*
|
933
|
+
)? # title is optional
|
934
|
+
\)
|
935
|
+
)
|
936
|
+
}xs #"
|
937
|
+
|
938
|
+
|
939
|
+
# Reference-style images
|
940
|
+
ReferenceImageRegexp = %r{
|
941
|
+
( # Whole match = $1
|
942
|
+
!\[ (.*?) \] # Alt text = $2
|
943
|
+
[ ]? # Optional space
|
944
|
+
(?:\n[ ]*)? # One optional newline + spaces
|
945
|
+
\[ (.*?) \] # id = $3
|
946
|
+
)
|
947
|
+
}xs
|
948
|
+
|
949
|
+
### Turn image markup into image tags.
|
950
|
+
def transform_images( str, rs )
|
951
|
+
@log.debug " Transforming images" % str
|
952
|
+
|
953
|
+
# Handle reference-style labeled images: ![alt text][id]
|
954
|
+
str.
|
955
|
+
gsub( ReferenceImageRegexp ) {|match|
|
956
|
+
whole, alt, linkid = $1, $2, $3.downcase
|
957
|
+
@log.debug "Matched %p" % match
|
958
|
+
res = nil
|
959
|
+
|
960
|
+
# for shortcut links like ![this][].
|
961
|
+
linkid = alt.downcase if linkid.empty?
|
962
|
+
|
963
|
+
if rs.urls.key?( linkid )
|
964
|
+
url = escape_md( rs.urls[linkid] )
|
965
|
+
@log.debug "Found url '%s' for linkid '%s' " %
|
966
|
+
[ url, linkid ]
|
967
|
+
|
968
|
+
# Build the tag
|
969
|
+
result = %{<img src="%s" alt="%s"} % [ url, alt ]
|
970
|
+
if rs.titles.key?( linkid )
|
971
|
+
result += %{ title="%s"} % escape_md( rs.titles[linkid] )
|
972
|
+
end
|
973
|
+
result += EmptyElementSuffix
|
974
|
+
|
975
|
+
else
|
976
|
+
result = whole
|
977
|
+
end
|
978
|
+
|
979
|
+
@log.debug "Replacing %p with %p" %
|
980
|
+
[ match, result ]
|
981
|
+
result
|
982
|
+
}.
|
983
|
+
|
984
|
+
# Inline image style
|
985
|
+
gsub( InlineImageRegexp ) {|match|
|
986
|
+
@log.debug "Found inline image %p" % match
|
987
|
+
whole, alt, title = $1, $2, $4
|
988
|
+
url = escape_md( $3 )
|
989
|
+
|
990
|
+
# Build the tag
|
991
|
+
result = %{<img src="%s" alt="%s"} % [ url, alt ]
|
992
|
+
unless title.nil?
|
993
|
+
result += %{ title="%s"} % escape_md( title.gsub(/^"|"$/, '') )
|
994
|
+
end
|
995
|
+
result += EmptyElementSuffix
|
996
|
+
|
997
|
+
@log.debug "Replacing %p with %p" %
|
998
|
+
[ match, result ]
|
999
|
+
result
|
1000
|
+
}
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
|
1004
|
+
# Regexp to match special characters in a code block
|
1005
|
+
CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] )}x
|
1006
|
+
|
1007
|
+
### Escape any characters special to HTML and encode any characters special
|
1008
|
+
### to Markdown in a copy of the given +str+ and return it.
|
1009
|
+
def encode_code( str, rs )
|
1010
|
+
str.gsub( %r{&}, '&' ).
|
1011
|
+
gsub( %r{<}, '<' ).
|
1012
|
+
gsub( %r{>}, '>' ).
|
1013
|
+
gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
|
1017
|
+
|
1018
|
+
#################################################################
|
1019
|
+
### U T I L I T Y F U N C T I O N S
|
1020
|
+
#################################################################
|
1021
|
+
|
1022
|
+
### Escape any markdown characters in a copy of the given +str+ and return
|
1023
|
+
### it.
|
1024
|
+
def escape_md( str )
|
1025
|
+
str.
|
1026
|
+
gsub( /\*/, '*' ).
|
1027
|
+
gsub( /_/, '_' )
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
|
1031
|
+
# Matching constructs for tokenizing X/HTML
|
1032
|
+
HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
|
1033
|
+
XMLProcInstRegexp = %r{ <\? .*? \?> }mx
|
1034
|
+
MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
|
1035
|
+
|
1036
|
+
HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }mx
|
1037
|
+
HTMLTagCloseRegexp = %r{ > }x
|
1038
|
+
HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
|
1039
|
+
|
1040
|
+
### Break the HTML source in +str+ into a series of tokens and return
|
1041
|
+
### them. The tokens are just 2-element Array tuples with a type and the
|
1042
|
+
### actual content. If this function is called with a block, the type and
|
1043
|
+
### text parts of each token will be yielded to it one at a time as they are
|
1044
|
+
### extracted.
|
1045
|
+
def tokenize_html( str )
|
1046
|
+
depth = 0
|
1047
|
+
tokens = []
|
1048
|
+
@scanner.string = str.dup
|
1049
|
+
type, token = nil, nil
|
1050
|
+
|
1051
|
+
until @scanner.empty?
|
1052
|
+
@log.debug "Scanning from %p" % @scanner.rest
|
1053
|
+
|
1054
|
+
# Match comments and PIs without nesting
|
1055
|
+
if (( token = @scanner.scan(MetaTag) ))
|
1056
|
+
type = :tag
|
1057
|
+
|
1058
|
+
# Do nested matching for HTML tags
|
1059
|
+
elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
|
1060
|
+
tagstart = @scanner.pos
|
1061
|
+
@log.debug " Found the start of a plain tag at %d" % tagstart
|
1062
|
+
|
1063
|
+
# Start the token with the opening angle
|
1064
|
+
depth = 1
|
1065
|
+
type = :tag
|
1066
|
+
|
1067
|
+
# Scan the rest of the tag, allowing unlimited nested <>s. If
|
1068
|
+
# the scanner runs out of text before the tag is closed, raise
|
1069
|
+
# an error.
|
1070
|
+
while depth.nonzero?
|
1071
|
+
|
1072
|
+
# Scan either an opener or a closer
|
1073
|
+
chunk = @scanner.scan( HTMLTagPart ) or
|
1074
|
+
raise "Malformed tag at character %d: %p" %
|
1075
|
+
[ tagstart, token + @scanner.rest ]
|
1076
|
+
|
1077
|
+
@log.debug " Found another part of the tag at depth %d: %p" %
|
1078
|
+
[ depth, chunk ]
|
1079
|
+
|
1080
|
+
token += chunk
|
1081
|
+
|
1082
|
+
# If the last character of the token so far is a closing
|
1083
|
+
# angle bracket, decrement the depth. Otherwise increment
|
1084
|
+
# it for a nested tag.
|
1085
|
+
depth += ( token[-1, 1] == '>' ? -1 : 1 )
|
1086
|
+
@log.debug " Depth is now #{depth}"
|
1087
|
+
end
|
1088
|
+
|
1089
|
+
# Match text segments
|
1090
|
+
else
|
1091
|
+
@log.debug " Looking for a chunk of text"
|
1092
|
+
type = :text
|
1093
|
+
|
1094
|
+
# Scan forward, always matching at least one character to move
|
1095
|
+
# the pointer beyond any non-tag '<'.
|
1096
|
+
token = @scanner.scan_until( /[^<]+/m )
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
@log.debug " type: %p, token: %p" % [ type, token ]
|
1100
|
+
|
1101
|
+
# If a block is given, feed it one token at a time. Add the token to
|
1102
|
+
# the token list to be returned regardless.
|
1103
|
+
if block_given?
|
1104
|
+
yield( type, token )
|
1105
|
+
end
|
1106
|
+
tokens << [ type, token ]
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
return tokens
|
1110
|
+
end
|
1111
|
+
|
1112
|
+
|
1113
|
+
### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
|
1114
|
+
def encode_html( str )
|
1115
|
+
str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&" ).
|
1116
|
+
gsub( %r{<(?![a-z/?\$!])}i, "<" )
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
|
1120
|
+
### Return one level of line-leading tabs or spaces from a copy of +str+ and
|
1121
|
+
### return it.
|
1122
|
+
def outdent( str )
|
1123
|
+
str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
|
1124
|
+
end
|
1125
|
+
|
1126
|
+
end # class BlueCloth
|
1127
|
+
|