Pimki 1.3.092 → 1.4.092

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. data/README +145 -131
  2. data/README-PIMKI +15 -5
  3. data/app/controllers/wiki.rb +167 -54
  4. data/app/models/author.rb +3 -3
  5. data/app/models/chunks/chunk.rb +3 -3
  6. data/app/models/chunks/engines.rb +18 -21
  7. data/app/models/chunks/include.rb +29 -29
  8. data/app/models/chunks/literal.rb +20 -20
  9. data/app/models/chunks/match.rb +19 -19
  10. data/app/models/chunks/nowiki.rb +31 -31
  11. data/app/models/chunks/nowiki_test.rb +14 -14
  12. data/app/models/chunks/test.rb +18 -18
  13. data/app/models/chunks/todo.rb +44 -23
  14. data/app/models/chunks/uri.rb +97 -97
  15. data/app/models/chunks/uri_test.rb +92 -92
  16. data/app/models/chunks/wiki.rb +4 -4
  17. data/app/models/chunks/wiki_symbols.rb +22 -22
  18. data/app/models/chunks/wiki_test.rb +36 -36
  19. data/app/models/page.rb +39 -7
  20. data/app/models/page_lock.rb +23 -23
  21. data/app/models/page_set.rb +72 -72
  22. data/app/models/page_test.rb +75 -75
  23. data/app/models/revision.rb +1 -1
  24. data/app/models/revision_test.rb +251 -251
  25. data/app/models/web.rb +19 -6
  26. data/app/models/web_test.rb +52 -52
  27. data/app/models/wiki_content.rb +131 -119
  28. data/app/models/wiki_service.rb +31 -16
  29. data/app/models/wiki_service_test.rb +15 -15
  30. data/app/models/wiki_words.rb +1 -1
  31. data/app/models/wiki_words_test.rb +12 -12
  32. data/app/views/bottom.rhtml +3 -3
  33. data/app/views/markdown_help.rhtml +15 -15
  34. data/app/views/menu.rhtml +20 -20
  35. data/app/views/navigation.rhtml +26 -26
  36. data/app/views/rdoc_help.rhtml +15 -15
  37. data/app/views/static_style_sheet.rhtml +237 -237
  38. data/app/views/style.rhtml +178 -178
  39. data/app/views/textile_help.rhtml +27 -27
  40. data/app/views/top.rhtml +7 -2
  41. data/app/views/wiki/authors.rhtml +15 -15
  42. data/app/views/wiki/bliki.rhtml +101 -101
  43. data/app/views/wiki/bliki_edit.rhtml +3 -0
  44. data/app/views/wiki/bliki_new.rhtml +3 -0
  45. data/app/views/wiki/bliki_revision.rhtml +90 -90
  46. data/app/views/wiki/edit.rhtml +12 -3
  47. data/app/views/wiki/edit_menu.rhtml +64 -47
  48. data/app/views/wiki/edit_web.rhtml +65 -18
  49. data/app/views/wiki/export.rhtml +14 -14
  50. data/app/views/wiki/feeds.rhtml +10 -10
  51. data/app/views/wiki/list.rhtml +17 -15
  52. data/app/views/wiki/locked.rhtml +13 -13
  53. data/app/views/wiki/login.rhtml +10 -10
  54. data/app/views/wiki/mind.rhtml +0 -1
  55. data/app/views/wiki/new.rhtml +8 -3
  56. data/app/views/wiki/new_system.rhtml +77 -77
  57. data/app/views/wiki/new_web.rhtml +63 -63
  58. data/app/views/wiki/page.rhtml +88 -82
  59. data/app/views/wiki/print.rhtml +15 -15
  60. data/app/views/wiki/published.rhtml +2 -1
  61. data/app/views/wiki/recently_revised.rhtml +31 -31
  62. data/app/views/wiki/revision.rhtml +1 -7
  63. data/app/views/wiki/rollback.rhtml +31 -0
  64. data/app/views/wiki/rss_feed.rhtml +21 -21
  65. data/app/views/wiki/search.rhtml +48 -48
  66. data/app/views/wiki/tex.rhtml +22 -22
  67. data/app/views/wiki/tex_web.rhtml +34 -34
  68. data/app/views/wiki/todo.rhtml +90 -67
  69. data/app/views/wiki/web_list.rhtml +12 -12
  70. data/app/views/wiki_words_help.rhtml +1 -1
  71. data/favicon.png +0 -0
  72. data/libraries/action_controller_servlet.rb +17 -2
  73. data/libraries/bluecloth.rb +1127 -1127
  74. data/libraries/diff/diff.rb +474 -474
  75. data/libraries/diff/diff_test.rb +79 -79
  76. data/libraries/erb.rb +490 -490
  77. data/libraries/madeleine/automatic.rb +418 -357
  78. data/libraries/madeleine/clock.rb +94 -94
  79. data/libraries/madeleine/files.rb +19 -0
  80. data/libraries/madeleine/zmarshal.rb +60 -0
  81. data/libraries/madeleine_service.rb +14 -15
  82. data/libraries/rdocsupport.rb +155 -155
  83. data/libraries/redcloth_for_tex.rb +869 -869
  84. data/libraries/redcloth_for_tex_test.rb +40 -40
  85. data/libraries/view_helper.rb +32 -32
  86. data/libraries/web_controller_server.rb +96 -94
  87. data/pimki.rb +47 -6
  88. metadata +18 -4
Binary file
@@ -5,6 +5,15 @@ include WEBrick
5
5
 
6
6
  require 'view_helper'
7
7
 
8
+ class FavIconHandler < HTTPServlet::AbstractServlet
9
+ def do_GET(req, res)
10
+ ico = File.read(File.join(Dir.pwd, 'favicon.png'))
11
+ res['content-type'] = 'image/png'
12
+ res['content-length'] = ico.size
13
+ res.body = ico
14
+ end
15
+ end
16
+
8
17
  class ActionControllerServlet < HTTPServlet::AbstractServlet
9
18
  @@template_root = "./views"
10
19
  def self.template_root() @@template_root end
@@ -25,7 +34,7 @@ class ActionControllerServlet < HTTPServlet::AbstractServlet
25
34
  @assigns = {}
26
35
  @performed_render = @performed_redirect = false
27
36
 
28
- @logger.info "Performing #{action_name}"
37
+ @logger.info "Performing #{action_name} (#{request_path.join('/')})"
29
38
  @logger.info " Parameters: #{@params.inspect}"
30
39
  @logger.info " Cookies: #{@req.cookies.collect { |c| "#{c.name} => #{c.value}" }.join(", ") }"
31
40
 
@@ -159,7 +168,13 @@ class ActionControllerServlet < HTTPServlet::AbstractServlet
159
168
 
160
169
  def template_result(template_path)
161
170
  @assigns.each { |key, value| instance_variable_set "@#{key}", value }
162
- ERB.new(IO.readlines(template_path).join).result(binding)
171
+ begin
172
+ ERB.new(IO.readlines(template_path).join).result(binding)
173
+ rescue Exception => detail
174
+ @logger.error "Processing #{template_path}"
175
+ @logger.error detail
176
+ raise
177
+ end
163
178
  end
164
179
 
165
180
  # Converts the class name from something like "OneModule::TwoModule::NeatController"
@@ -1,1127 +1,1127 @@
1
- #!/usr/bin/ruby
2
- #
3
- # Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
4
- # tool.
5
- #
6
- # == Synopsis
7
- #
8
- # doc = BlueCloth::new "
9
- # ## Test document ##
10
- #
11
- # Just a simple test.
12
- # "
13
- #
14
- # puts doc.to_html
15
- #
16
- # == Authors
17
- #
18
- # * Michael Granger <ged@FaerieMUD.org>
19
- #
20
- # == Contributors
21
- #
22
- # * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
23
- # * Florian Gross <flgr@ccan.de> - Filter options, suggestions
24
- #
25
- # == Copyright
26
- #
27
- # Original version:
28
- # Copyright (c) 2003-2004 John Gruber
29
- # <http://daringfireball.net/>
30
- # All rights reserved.
31
- #
32
- # Ruby port:
33
- # Copyright (c) 2004 The FaerieMUD Consortium.
34
- #
35
- # BlueCloth is free software; you can redistribute it and/or modify it under the
36
- # terms of the GNU General Public License as published by the Free Software
37
- # Foundation; either version 2 of the License, or (at your option) any later
38
- # version.
39
- #
40
- # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
41
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
42
- # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
43
- #
44
- # == To-do
45
- #
46
- # * Refactor some of the larger uglier methods that have to do their own
47
- # brute-force scanning because of lack of Perl features in Ruby's Regexp
48
- # class. Alternately, could add a dependency on 'pcre' and use most Perl
49
- # regexps.
50
- #
51
- # * Put the StringScanner in the render state for thread-safety.
52
- #
53
- # == Version
54
- #
55
- # $Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
56
- #
57
-
58
- require 'digest/md5'
59
- require 'logger'
60
- require 'strscan'
61
-
62
-
63
- ### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
64
- ### tool.
65
- class BlueCloth < String
66
-
67
- ### Exception class for formatting errors.
68
- class FormatError < RuntimeError
69
-
70
- ### Create a new FormatError with the given source +str+ and an optional
71
- ### message about the +specific+ error.
72
- def initialize( str, specific=nil )
73
- if specific
74
- msg = "Bad markdown format near %p: %s" % [ str, specific ]
75
- else
76
- msg = "Bad markdown format near %p" % str
77
- end
78
-
79
- super( msg )
80
- end
81
- end
82
-
83
-
84
- # Release Version
85
- Version = '0.0.3'
86
-
87
- # SVN Revision
88
- SvnRev = %q$Rev: 37 $
89
-
90
- # SVN Id tag
91
- SvnId = %q$Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
92
-
93
- # SVN URL
94
- SvnUrl = %q$URL: svn+ssh://cvs.faeriemud.org/var/svn/BlueCloth/trunk/lib/bluecloth.rb $
95
-
96
-
97
- # Rendering state struct. Keeps track of URLs, titles, and HTML blocks
98
- # midway through a render. I prefer this to the globals of the Perl version
99
- # because globals make me break out in hives. Or something.
100
- RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
101
-
102
- # Tab width for #detab! if none is specified
103
- TabWidth = 4
104
-
105
- # The tag-closing string -- set to '>' for HTML
106
- EmptyElementSuffix = "/>";
107
-
108
- # Table of MD5 sums for escaped characters
109
- EscapeTable = {}
110
- '\\`*_{}[]()#.!'.split(//).each {|char|
111
- hash = Digest::MD5::hexdigest( char )
112
-
113
- EscapeTable[ char ] = {
114
- :md5 => hash,
115
- :md5re => Regexp::new( hash ),
116
- :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
117
- }
118
- }
119
-
120
-
121
- #################################################################
122
- ### I N S T A N C E M E T H O D S
123
- #################################################################
124
-
125
- ### Create a new BlueCloth string.
126
- def initialize( content="", *restrictions )
127
- @log = Logger::new( $deferr )
128
- @log.level = $DEBUG ?
129
- Logger::DEBUG :
130
- ($VERBOSE ? Logger::INFO : Logger::WARN)
131
- @scanner = nil
132
-
133
- # Add any restrictions, and set the line-folding attribute to reflect
134
- # what happens by default.
135
- restrictions.flatten.each {|r| __send__("#{r}=", true) }
136
- @fold_lines = true
137
-
138
- super( content )
139
-
140
- @log.debug "String is: %p" % self
141
- end
142
-
143
-
144
- ######
145
- public
146
- ######
147
-
148
- # Filters for controlling what gets output for untrusted input. (But really,
149
- # you're filtering bad stuff out of untrusted input at submission-time via
150
- # untainting, aren't you?)
151
- attr_accessor :filter_html, :filter_styles
152
-
153
- # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
154
- # so this isn't used by anything.
155
- attr_accessor :fold_lines
156
-
157
-
158
- ### Render Markdown-formatted text in this string object as HTML and return
159
- ### it. The parameter is for compatibility with RedCloth, and is currently
160
- ### unused, though that may change in the future.
161
- def to_html( lite=false )
162
-
163
- # Create a StringScanner we can reuse for various lexing tasks
164
- @scanner = StringScanner::new( '' )
165
-
166
- # Make a structure to carry around stuff that gets placeholdered out of
167
- # the source.
168
- rs = RenderState::new( {}, {}, {} )
169
-
170
- # Make a copy of the string with normalized line endings, tabs turned to
171
- # spaces, and a couple of guaranteed newlines at the end
172
- text = self.gsub( /\r\n?/, "\n" ).detab
173
- text += "\n\n"
174
- @log.debug "Normalized line-endings: %p" % text
175
-
176
- # Filter HTML if we're asked to do so
177
- if self.filter_html
178
- text.gsub!( "<", "&lt;" )
179
- text.gsub!( ">", "&gt;" )
180
- @log.debug "Filtered HTML: %p" % text
181
- end
182
-
183
- # Simplify blank lines
184
- text.gsub!( /^ +$/, '' )
185
- @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
186
-
187
- # Replace HTML blocks with placeholders
188
- text = hide_html_blocks( text, rs )
189
- @log.debug "Hid HTML blocks: %p" % text
190
- @log.debug "Render state: %p" % rs
191
-
192
- # Strip link definitions, store in render state
193
- text = strip_link_definitions( text, rs )
194
- @log.debug "Stripped link definitions: %p" % text
195
- @log.debug "Render state: %p" % rs
196
-
197
- # Escape meta-characters
198
- text = escape_special_chars( text )
199
- @log.debug "Escaped special characters: %p" % text
200
-
201
- # Transform block-level constructs
202
- text = apply_block_transforms( text, rs )
203
- @log.debug "After block-level transforms: %p" % text
204
-
205
- # Now swap back in all the escaped characters
206
- text = unescape_special_chars( text )
207
- @log.debug "After unescaping special characters: %p" % text
208
-
209
- return text
210
- end
211
-
212
-
213
- ### Convert tabs in +str+ to spaces.
214
- def detab( tabwidth=TabWidth )
215
- copy = self.dup
216
- copy.detab!( tabwidth )
217
- return copy
218
- end
219
-
220
-
221
- ### Convert tabs to spaces in place and return self if any were converted.
222
- def detab!( tabwidth=TabWidth )
223
- newstr = self.split( /\n/ ).collect {|line|
224
- line.gsub( /(.*?)\t/ ) do
225
- $1 + ' ' * (tabwidth - $1.length % tabwidth)
226
- end
227
- }.join("\n")
228
- self.replace( newstr )
229
- end
230
-
231
-
232
- #######
233
- #private
234
- #######
235
-
236
- ### Do block-level transforms on a copy of +str+ using the specified render
237
- ### state +rs+ and return the results.
238
- def apply_block_transforms( str, rs )
239
- # Port: This was called '_runBlockGamut' in the original
240
-
241
- @log.debug "Applying block transforms to:\n %p" % str
242
- text = transform_headers( str, rs )
243
- text = transform_hrules( text, rs )
244
- text = transform_lists( text, rs )
245
- text = transform_code_blocks( text, rs )
246
- text = transform_block_quotes( text, rs )
247
- text = transform_auto_links( text, rs )
248
- text = hide_html_blocks( text, rs )
249
-
250
- text = form_paragraphs( text, rs )
251
-
252
- @log.debug "Done with block transforms:\n %p" % text
253
- return text
254
- end
255
-
256
-
257
- ### Apply Markdown span transforms to a copy of the specified +str+ with the
258
- ### given render state +rs+ and return it.
259
- def apply_span_transforms( str, rs )
260
- @log.debug "Applying span transforms to:\n %p" % str
261
-
262
- str = transform_code_spans( str, rs )
263
- str = encode_html( str )
264
- str = transform_images( str, rs )
265
- str = transform_anchors( str, rs )
266
- str = transform_italic_and_bold( str, rs )
267
-
268
- # Hard breaks
269
- str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
270
-
271
- @log.debug "Done with span transforms:\n %p" % str
272
- return str
273
- end
274
-
275
-
276
- # The list of tags which are considered block-level constructs and an
277
- # alternation pattern suitable for use in regexps made from the list
278
- BlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script ]
279
- BlockTagPattern = BlockTags.join('|')
280
-
281
- # Nested blocks:
282
- # <div>
283
- # <div>
284
- # tags for inner block must be indented.
285
- # </div>
286
- # </div>
287
- StrictBlockRegex = %r{
288
- ^ # Start of line
289
- <(#{BlockTagPattern}) # Start tag: \2
290
- \b # word break
291
- (.*\n)*? # Any number of lines, minimal match
292
- </\1> # Matching end tag
293
- [ ]* # trailing spaces
294
- (?=\n+|\Z) # End of line or document
295
- }ix
296
-
297
- # More-liberal block-matching
298
- LooseBlockRegex = %r{
299
- ^ # Start of line
300
- <(#{BlockTagPattern}) # start tag: \2
301
- \b # word break
302
- (.*\n)*? # Any number of lines, minimal match
303
- .*</\1> # Anything + Matching end tag
304
- [ ]* # trailing spaces
305
- (?=\n+|\Z) # End of line or document
306
- }ix
307
-
308
- # Special case for <hr />.
309
- HruleBlockRegex = %r{
310
- ( # $1
311
- \A\n? # Start of doc + optional \n
312
- | # or
313
- .*\n\n # anything + blank line
314
- )
315
- ( # save in $2
316
- [ ]* # Any spaces
317
- <hr # Tag open
318
- \b # Word break
319
- ([^<>])*? # Attributes
320
- /?> # Tag close
321
- (?=\n\n|\Z) # followed by a blank line or end of document
322
- )
323
- }ix
324
-
325
- ### Replace all blocks of HTML in +str+ that start in the left margin with
326
- ### tokens.
327
- def hide_html_blocks( str, rs )
328
- @log.debug "Hiding HTML blocks in %p" % str
329
-
330
- # Tokenizer proc to pass to gsub
331
- tokenize = lambda {|match|
332
- key = Digest::MD5::hexdigest( match )
333
- rs.html_blocks[ key ] = match
334
- @log.debug "Replacing %p with %p" %
335
- [ match, key ]
336
- "\n\n#{key}\n\n"
337
- }
338
-
339
- rval = str.dup
340
-
341
- @log.debug "Finding blocks with the strict regex..."
342
- rval.gsub!( StrictBlockRegex, &tokenize )
343
-
344
- @log.debug "Finding blocks with the loose regex..."
345
- rval.gsub!( LooseBlockRegex, &tokenize )
346
-
347
- @log.debug "Finding hrules..."
348
- rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
349
-
350
- return rval
351
- end
352
-
353
-
354
- # Link defs are in the form: ^[id]: url "optional title"
355
- LinkRegex = %r{
356
- ^[ ]*\[(.+)\]: # id = $1
357
- [ ]*
358
- \n? # maybe *one* newline
359
- [ ]*
360
- (\S+) # url = $2
361
- [ ]*
362
- \n? # maybe one newline
363
- [ ]*
364
- (?:
365
- # Titles are delimited by "quotes" or (parens).
366
- ["(]
367
- (.+?) # title = $3
368
- [")] # Matching ) or "
369
- [ ]*
370
- )? # title is optional
371
- (?:\n+|\Z)
372
- }x
373
-
374
- ### Strip link definitions from +str+, storing them in the given RenderState
375
- ### +rs+.
376
- def strip_link_definitions( str, rs )
377
- str.gsub( LinkRegex ) {|match|
378
- id, url, title = $1, $2, $3
379
-
380
- rs.urls[ id.downcase ] = encode_html( url )
381
- unless title.nil?
382
- rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
383
- end
384
- ""
385
- }
386
- end
387
-
388
-
389
- ### Escape special characters in the given +str+
390
- def escape_special_chars( str )
391
- @log.debug " Escaping special characters"
392
- text = ''
393
-
394
- tokenize_html( str ) {|token, str|
395
- @log.debug " Adding %p token %p" % [ token, str ]
396
- case token
397
-
398
- # Within tags, encode * and _
399
- when :tag
400
- text += str.
401
- gsub( /\*/, EscapeTable['*'][:md5] ).
402
- gsub( /_/, EscapeTable['_'][:md5] )
403
-
404
- # Encode backslashed stuff in regular text
405
- when :text
406
- text += encode_backslash_escapes( str )
407
- else
408
- raise TypeError, "Unknown token type %p" % token
409
- end
410
- }
411
-
412
- @log.debug " Text with escapes is now: %p" % text
413
- return text
414
- end
415
-
416
-
417
- ### Swap escaped special characters in a copy of the given +str+ and return
418
- ### it.
419
- def unescape_special_chars( str )
420
- EscapeTable.each {|char, hash|
421
- @log.debug "Unescaping escaped %p with %p" %
422
- [ char, hash[:md5re] ]
423
- str.gsub!( hash[:md5re], char )
424
- }
425
-
426
- return str
427
- end
428
-
429
-
430
- ### Return a copy of the given +str+ with any backslashed special character
431
- ### in it replaced with MD5 placeholders.
432
- def encode_backslash_escapes( str )
433
- # Make a copy with any double-escaped backslashes encoded
434
- text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
435
-
436
- EscapeTable.each_pair {|char, esc|
437
- next if char == '\\'
438
- text.gsub!( esc[:re], esc[:md5] )
439
- }
440
-
441
- return text
442
- end
443
-
444
-
445
- ### Transform any Markdown-style horizontal rules in a copy of the specified
446
- ### +str+ and return it.
447
- def transform_hrules( str, rs )
448
- @log.debug " Transforming horizontal rules"
449
- str.gsub( /^( ?[\-\*] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
450
- end
451
-
452
-
453
-
454
- # Pattern to transform lists
455
- ListRegexp = %r{
456
- (?:
457
- ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
458
- (\*|\d+\.) # unordered or ordered ($1)
459
- [ ]+ # At least one space
460
- )
461
- (?m:.+?) # item content (include newlines)
462
- (?:
463
- \z # Either EOF
464
- | # or
465
- \n{2,} # Blank line...
466
- (?=\S) # ...followed by non-space
467
- (?![ ]* (\*|\d+\.) [ ]+) # ...but not another item
468
- )
469
- }x
470
-
471
- ### Transform Markdown-style lists in a copy of the specified +str+ and
472
- ### return it.
473
- def transform_lists( str, rs )
474
- @log.debug " Transforming lists at %p" % (str[0,100] + '...')
475
-
476
- str.gsub( ListRegexp ) {|list|
477
- @log.debug " Found list %p" % list
478
- list_type = ($1 == '*' ? "ul" : "ol")
479
- list.gsub!( /\n{2,}/, "\n\n\n" )
480
-
481
- %{<%s>\n%s</%s>\n} % [
482
- list_type,
483
- transform_list_items( list, rs ),
484
- list_type,
485
- ]
486
- }
487
- end
488
-
489
-
490
- # Pattern for transforming list items
491
- ListItemRegexp = %r{
492
- (\n)? # leading line = $1
493
- (^[ ]*) # leading whitespace = $2
494
- (\*|\d+\.) [ ]+ # list marker = $3
495
- ((?m:.+?) # list item text = $4
496
- (\n{1,2}))
497
- (?= \n* (\z | \2 (\*|\d+\.) [ ]+))
498
- }x
499
-
500
- ### Transform list items in a copy of the given +str+ and return it.
501
- def transform_list_items( str, rs )
502
- @log.debug " Transforming list items"
503
-
504
- # Trim trailing blank lines
505
- str = str.sub( /\n{2,}\z/, "\n" )
506
-
507
- str.gsub( ListItemRegexp ) {|line|
508
- @log.debug " Found item line %p" % line
509
- leading_line, item = $1, $4
510
-
511
- if leading_line or /\n{2,}/.match( item )
512
- @log.debug " Found leading line or item has a blank"
513
- item = apply_block_transforms( outdent(item), rs )
514
- else
515
- # Recursion for sub-lists
516
- @log.debug " Recursing for sublist"
517
- item = transform_lists( outdent(item), rs ).chomp
518
- item = apply_span_transforms( item, rs )
519
- end
520
-
521
- %{<li>%s</li>\n} % item
522
- }
523
- end
524
-
525
-
526
- # Pattern for matching codeblocks
527
- CodeBlockRegexp = %r{
528
- (.?) # $1 = preceding character
529
- :\n+ # colon + NL delimiter
530
- ( # $2 = the code block
531
- (?:
532
- (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
533
- .*\n+
534
- )+
535
- )
536
- ((?=^[ ]{0,#{TabWidth}}\S)|\Z) # Lookahead for non-space at
537
- # line-start, or end of doc
538
- }x
539
-
540
- ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
541
- ### return it.
542
- def transform_code_blocks( str, rs )
543
- @log.debug " Transforming code blocks"
544
-
545
- str.gsub( CodeBlockRegexp ) {|block|
546
- prevchar, codeblock = $1, $2
547
-
548
- @log.debug " prevchar = %p" % prevchar
549
-
550
- # Generated the codeblock
551
- %{%s\n\n<pre><code>%s\n</code></pre>\n\n} % [
552
- (prevchar.empty? || /\s/ =~ prevchar) ? "" : "#{prevchar}:",
553
- encode_code( outdent(codeblock), rs ).rstrip,
554
- ]
555
- }
556
- end
557
-
558
-
559
- # Pattern for matching Markdown blockquote blocks
560
- BlockQuoteRegexp = %r{
561
- (?:
562
- ^[ ]*>[ ]? # '>' at the start of a line
563
- .+\n # rest of the first line
564
- (?:.+\n)* # subsequent consecutive lines
565
- \n* # blanks
566
- )+
567
- }x
568
-
569
- ### Transform Markdown-style blockquotes in a copy of the specified +str+
570
- ### and return it.
571
- def transform_block_quotes( str, rs )
572
- @log.debug " Transforming block quotes"
573
-
574
- str.gsub( BlockQuoteRegexp ) {|quote|
575
- @log.debug "Making blockquote from %p" % quote
576
- quote.gsub!( /^[ ]*>[ ]?/, '' )
577
- %{<blockquote>\n%s\n</blockquote>\n\n} %
578
- apply_block_transforms( quote, rs ).
579
- gsub( /^/, " " * TabWidth )
580
- }
581
- end
582
-
583
-
584
- AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
585
- AutoAnchorEmailRegexp = %r{
586
- <
587
- (
588
- [-.\w]+
589
- \@
590
- [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
591
- )
592
- >
593
- }x
594
-
595
- ### Transform URLs in a copy of the specified +str+ into links and return
596
- ### it.
597
- def transform_auto_links( str, rs )
598
- @log.debug " Transforming auto-links"
599
- str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
600
- gsub( AutoAnchorEmailRegexp ) {|addr|
601
- encode_email_address( unescape_special_chars($1) )
602
- }
603
- end
604
-
605
-
606
- # Encoder functions to turn characters of an email address into encoded
607
- # entities.
608
- Encoders = [
609
- lambda {|char| "&#%03d;" % char},
610
- lambda {|char| "&#x%X;" % char},
611
- lambda {|char| char.chr },
612
- ]
613
-
614
- ### Transform a copy of the given email +addr+ into an escaped version safer
615
- ### for posting publicly.
616
- def encode_email_address( addr )
617
-
618
- rval = ''
619
- ("mailto:" + addr).each_byte {|b|
620
- case b
621
- when ?:
622
- rval += ":"
623
- when ?@
624
- rval += Encoders[ rand(2) ][ b ]
625
- else
626
- r = rand(100)
627
- rval += (
628
- r > 90 ? Encoders[2][ b ] :
629
- r < 45 ? Encoders[1][ b ] :
630
- Encoders[0][ b ]
631
- )
632
- end
633
- }
634
-
635
- return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
636
- end
637
-
638
-
639
- # Regex for matching Setext-style headers
640
- SetextHeaderRegexp = %r{
641
- (.+) # The title text ($1)
642
- \n
643
- ([\-=])+ # Match a line of = or -. Save only one in $2.
644
- [ ]*\n+
645
- }x
646
-
647
- # Regexp for matching ATX-style headers
648
- AtxHeaderRegexp = %r{
649
- ^(\#{1,6}) # $1 = string of #'s
650
- [ ]*
651
- (.+?) # $2 = Header text
652
- [ ]*
653
- \#* # optional closing #'s (not counted)
654
- \n+
655
- }x
656
-
657
- ### Apply Markdown header transforms to a copy of the given +str+ amd render
658
- ### state +rs+ and return the result.
659
- def transform_headers( str, rs )
660
- @log.debug " Transforming headers"
661
-
662
- # Setext-style headers:
663
- # Header 1
664
- # ========
665
- #
666
- # Header 2
667
- # --------
668
- #
669
- str.
670
- gsub( SetextHeaderRegexp ) {|m|
671
- @log.debug "Found setext-style header"
672
- title, hdrchar = $1, $2
673
- title = apply_span_transforms( title, rs )
674
-
675
- case hdrchar
676
- when '='
677
- %[<h1>#{title}</h1>\n\n]
678
- when '-'
679
- %[<h2>#{title}</h2>\n\n]
680
- else
681
- title
682
- end
683
- }.
684
-
685
- gsub( AtxHeaderRegexp ) {|m|
686
- @log.debug "Found ATX-style header"
687
- hdrchars, title = $1, $2
688
- title = apply_span_transforms( title, rs )
689
-
690
- level = hdrchars.length
691
- %{<h%d>%s</h%d>\n\n} % [ level, title, level ]
692
- }
693
- end
694
-
695
-
696
- ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
697
- ### tags and return it.
698
- def form_paragraphs( str, rs )
699
- @log.debug " Forming paragraphs"
700
- grafs = str.
701
- sub( /\A\n+/, '' ).
702
- sub( /\n+\z/, '' ).
703
- split( /\n{2,}/ )
704
-
705
- rval = grafs.collect {|graf|
706
-
707
- # Unhashify HTML blocks if this is a placeholder
708
- if rs.html_blocks.key?( graf )
709
- rs.html_blocks[ graf ]
710
-
711
- # Otherwise, wrap in <p> tags
712
- else
713
- apply_span_transforms(graf, rs).
714
- sub( /^[ ]*/, '<p>' ) + '</p>'
715
- end
716
- }.join( "\n\n" )
717
-
718
- @log.debug " Formed paragraphs: %p" % rval
719
- return rval
720
- end
721
-
722
-
723
- # Pattern to match the linkid part of an anchor tag for reference-style
724
- # links.
725
- RefLinkIdRegex = %r{
726
- [ ]? # Optional leading space
727
- (?:\n[ ]*)? # Optional newline + spaces
728
- \[
729
- (.*?) # Id = $1
730
- \]
731
- }x
732
-
733
- InlineLinkRegex = %r{
734
- \( # Literal paren
735
- [ ]* # Zero or more spaces
736
- (.*?) # URI = $1
737
- [ ]* # Zero or more spaces
738
- (?: #
739
- ([\"\']) # Opening quote char = $2
740
- (.*?) # Title = $3
741
- \2 # Matching quote char
742
- )? # Title is optional
743
- \)
744
- }x
745
-
746
- ### Apply Markdown anchor transforms to a copy of the specified +str+ with
747
- ### the given render state +rs+ and return it.
748
- def transform_anchors( str, rs )
749
- @log.debug " Transforming anchors"
750
- @scanner.string = str.dup
751
- text = ''
752
-
753
- # Scan the whole string
754
- until @scanner.empty?
755
-
756
- if @scanner.scan( /\[/ )
757
- link = ''; linkid = ''
758
- depth = 1
759
- startpos = @scanner.pos
760
- @log.debug " Found a bracket-open at %d" % startpos
761
-
762
- # Scan the rest of the tag, allowing unlimited nested []s. If
763
- # the scanner runs out of text before the opening bracket is
764
- # closed, append the text and return (wasn't a valid anchor).
765
- while depth.nonzero?
766
- linktext = @scanner.scan_until( /\]|\[/ )
767
-
768
- if linktext
769
- @log.debug " Found a bracket at depth %d: %p" %
770
- [ depth, linktext ]
771
- link += linktext
772
-
773
- # Decrement depth for each closing bracket
774
- depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
775
- @log.debug " Depth is now #{depth}"
776
-
777
- # If there's no more brackets, it must not be an anchor, so
778
- # just abort.
779
- else
780
- @log.debug " Missing closing brace, assuming non-link."
781
- link += @scanner.rest
782
- @scanner.terminate
783
- return text + '[' + link
784
- end
785
- end
786
- link.slice!( -1 ) # Trim final ']'
787
- @log.debug " Found leading link %p" % link
788
-
789
- # Look for a reference-style second part
790
- if @scanner.scan( RefLinkIdRegex )
791
- linkid = @scanner[1]
792
- linkid = link.dup if linkid.empty?
793
- linkid.downcase!
794
- @log.debug " Found a linkid: %p" % linkid
795
-
796
- # If there's a matching link in the link table, build an
797
- # anchor tag for it.
798
- if rs.urls.key?( linkid )
799
- @log.debug " Found link key in the link table: %p" %
800
- rs.urls[linkid]
801
- url = escape_md( rs.urls[linkid] )
802
-
803
- text += %{<a href="#{url}"}
804
- if rs.titles.key?(linkid)
805
- text += %{ title="%s"} % escape_md( rs.titles[linkid] )
806
- end
807
- text += %{>#{link}</a>}
808
-
809
- # If the link referred to doesn't exist, just append the raw
810
- # source to the result
811
- else
812
- @log.debug " Linkid %p not found in link table" % linkid
813
- @log.debug " Appending original string instead: %p" %
814
- @scanner.string[ startpos-1 .. @scanner.pos ]
815
- text += @scanner.string[ startpos-1 .. @scanner.pos ]
816
- end
817
-
818
- # ...or for an inline style second part
819
- elsif @scanner.scan( InlineLinkRegex )
820
- url = @scanner[1]
821
- title = @scanner[3]
822
- @log.debug " Found an inline link to %p" % url
823
-
824
- text += %{<a href="%s"} % escape_md( url )
825
- if title
826
- text += %{ title="%s"} % escape_md( title )
827
- end
828
- text += %{>#{link}</a>}
829
-
830
- # No linkid part: just append the first part as-is.
831
- else
832
- @log.debug "No linkid, so no anchor. Appending literal text."
833
- text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
834
- end # if linkid
835
-
836
- # Plain text
837
- else
838
- @log.debug " Scanning to the next link from %p" % @scanner.rest
839
- text += @scanner.scan( /[^\[]+/ )
840
- end
841
-
842
- end # until @scanner.empty?
843
-
844
- return text
845
- end
846
-
847
- # Pattern to match strong emphasis in Markdown text
848
- BoldRegexp = %r{ (\*\*|__) (?=\S) (.+?\S) \1 }x
849
-
850
- # Pattern to match normal emphasis in Markdown text
851
- ItalicRegexp = %r{ (\*|_) (?=\S) (.+?\S) \1 }x
852
-
853
- ### Transform italic- and bold-encoded text in a copy of the specified +str+
854
- ### and return it.
855
- def transform_italic_and_bold( str, rs )
856
- @log.debug " Transforming italic and bold"
857
-
858
- str.
859
- gsub( BoldRegexp, %{<strong>\\2</strong>} ).
860
- gsub( ItalicRegexp, %{<em>\\2</em>} )
861
- end
862
-
863
-
864
- ### Transform backticked spans into <code> spans.
865
- def transform_code_spans( str, rs )
866
- @log.debug " Transforming code spans"
867
-
868
- # Set up the string scanner and just return the string unless there's at
869
- # least one backtick.
870
- @scanner.string = str.dup
871
- unless @scanner.exist?( /`/ )
872
- @scanner.terminate
873
- @log.debug "No backticks found for code span in %p" % str
874
- return str
875
- end
876
-
877
- @log.debug "Transforming code spans in %p" % str
878
-
879
- # Build the transformed text anew
880
- text = ''
881
-
882
- # Scan to the end of the string
883
- until @scanner.empty?
884
-
885
- # Scan up to an opening backtick
886
- if pre = @scanner.scan_until( /.?(?=`)/m )
887
- text += pre
888
- @log.debug "Found backtick at %d after '...%s'" %
889
- [ @scanner.pos, text[-10, 10] ]
890
-
891
- # Make a pattern to find the end of the span
892
- opener = @scanner.scan( /`+/ )
893
- len = opener.length
894
- closer = Regexp::new( opener )
895
- @log.debug "Scanning for end of code span with %p" % closer
896
-
897
- # Scan until the end of the closing backtick sequence. Chop the
898
- # backticks off the resultant string, strip leading and trailing
899
- # whitespace, and encode any enitites contained in it.
900
- codespan = @scanner.scan_until( closer ) or
901
- raise FormatError::new( @scanner.rest[0,20],
902
- "No %p found before end" % opener )
903
-
904
- @log.debug "Found close of code span at %d: %p" %
905
- [ @scanner.pos - len, codespan ]
906
- codespan.slice!( -len, len )
907
- text += "<code>%s</code>" %
908
- encode_code( codespan.strip, rs )
909
-
910
- # If there's no more backticks, just append the rest of the string
911
- # and move the scan pointer to the end
912
- else
913
- text += @scanner.rest
914
- @scanner.terminate
915
- end
916
- end
917
-
918
- return text
919
- end
920
-
921
-
922
- # Next, handle inline images: ![alt text](url "optional title")
923
- # Don't forget: encode * and _
924
- InlineImageRegexp = %r{
925
- ( # Whole match = $1
926
- !\[ (.*?) \] # alt text = $2
927
- \([ ]* (\S+) [ ]* # source url = $3
928
- ( # title = $4
929
- (["']) # quote char = $5
930
- .*?
931
- \5 # matching quote
932
- [ ]*
933
- )? # title is optional
934
- \)
935
- )
936
- }xs #"
937
-
938
-
939
- # Reference-style images
940
- ReferenceImageRegexp = %r{
941
- ( # Whole match = $1
942
- !\[ (.*?) \] # Alt text = $2
943
- [ ]? # Optional space
944
- (?:\n[ ]*)? # One optional newline + spaces
945
- \[ (.*?) \] # id = $3
946
- )
947
- }xs
948
-
949
- ### Turn image markup into image tags.
950
- def transform_images( str, rs )
951
- @log.debug " Transforming images" % str
952
-
953
- # Handle reference-style labeled images: ![alt text][id]
954
- str.
955
- gsub( ReferenceImageRegexp ) {|match|
956
- whole, alt, linkid = $1, $2, $3.downcase
957
- @log.debug "Matched %p" % match
958
- res = nil
959
-
960
- # for shortcut links like ![this][].
961
- linkid = alt.downcase if linkid.empty?
962
-
963
- if rs.urls.key?( linkid )
964
- url = escape_md( rs.urls[linkid] )
965
- @log.debug "Found url '%s' for linkid '%s' " %
966
- [ url, linkid ]
967
-
968
- # Build the tag
969
- result = %{<img src="%s" alt="%s"} % [ url, alt ]
970
- if rs.titles.key?( linkid )
971
- result += %{ title="%s"} % escape_md( rs.titles[linkid] )
972
- end
973
- result += EmptyElementSuffix
974
-
975
- else
976
- result = whole
977
- end
978
-
979
- @log.debug "Replacing %p with %p" %
980
- [ match, result ]
981
- result
982
- }.
983
-
984
- # Inline image style
985
- gsub( InlineImageRegexp ) {|match|
986
- @log.debug "Found inline image %p" % match
987
- whole, alt, title = $1, $2, $4
988
- url = escape_md( $3 )
989
-
990
- # Build the tag
991
- result = %{<img src="%s" alt="%s"} % [ url, alt ]
992
- unless title.nil?
993
- result += %{ title="%s"} % escape_md( title.gsub(/^"|"$/, '') )
994
- end
995
- result += EmptyElementSuffix
996
-
997
- @log.debug "Replacing %p with %p" %
998
- [ match, result ]
999
- result
1000
- }
1001
- end
1002
-
1003
-
1004
- # Regexp to match special characters in a code block
1005
- CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] )}x
1006
-
1007
- ### Escape any characters special to HTML and encode any characters special
1008
- ### to Markdown in a copy of the given +str+ and return it.
1009
- def encode_code( str, rs )
1010
- str.gsub( %r{&}, '&amp;' ).
1011
- gsub( %r{<}, '&lt;' ).
1012
- gsub( %r{>}, '&gt;' ).
1013
- gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
1014
- end
1015
-
1016
-
1017
-
1018
- #################################################################
1019
- ### U T I L I T Y F U N C T I O N S
1020
- #################################################################
1021
-
1022
- ### Escape any markdown characters in a copy of the given +str+ and return
1023
- ### it.
1024
- def escape_md( str )
1025
- str.
1026
- gsub( /\*/, '&#42;' ).
1027
- gsub( /_/, '&#95;' )
1028
- end
1029
-
1030
-
1031
- # Matching constructs for tokenizing X/HTML
1032
- HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
1033
- XMLProcInstRegexp = %r{ <\? .*? \?> }mx
1034
- MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
1035
-
1036
- HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }mx
1037
- HTMLTagCloseRegexp = %r{ > }x
1038
- HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
1039
-
1040
- ### Break the HTML source in +str+ into a series of tokens and return
1041
- ### them. The tokens are just 2-element Array tuples with a type and the
1042
- ### actual content. If this function is called with a block, the type and
1043
- ### text parts of each token will be yielded to it one at a time as they are
1044
- ### extracted.
1045
- def tokenize_html( str )
1046
- depth = 0
1047
- tokens = []
1048
- @scanner.string = str.dup
1049
- type, token = nil, nil
1050
-
1051
- until @scanner.empty?
1052
- @log.debug "Scanning from %p" % @scanner.rest
1053
-
1054
- # Match comments and PIs without nesting
1055
- if (( token = @scanner.scan(MetaTag) ))
1056
- type = :tag
1057
-
1058
- # Do nested matching for HTML tags
1059
- elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
1060
- tagstart = @scanner.pos
1061
- @log.debug " Found the start of a plain tag at %d" % tagstart
1062
-
1063
- # Start the token with the opening angle
1064
- depth = 1
1065
- type = :tag
1066
-
1067
- # Scan the rest of the tag, allowing unlimited nested <>s. If
1068
- # the scanner runs out of text before the tag is closed, raise
1069
- # an error.
1070
- while depth.nonzero?
1071
-
1072
- # Scan either an opener or a closer
1073
- chunk = @scanner.scan( HTMLTagPart ) or
1074
- raise "Malformed tag at character %d: %p" %
1075
- [ tagstart, token + @scanner.rest ]
1076
-
1077
- @log.debug " Found another part of the tag at depth %d: %p" %
1078
- [ depth, chunk ]
1079
-
1080
- token += chunk
1081
-
1082
- # If the last character of the token so far is a closing
1083
- # angle bracket, decrement the depth. Otherwise increment
1084
- # it for a nested tag.
1085
- depth += ( token[-1, 1] == '>' ? -1 : 1 )
1086
- @log.debug " Depth is now #{depth}"
1087
- end
1088
-
1089
- # Match text segments
1090
- else
1091
- @log.debug " Looking for a chunk of text"
1092
- type = :text
1093
-
1094
- # Scan forward, always matching at least one character to move
1095
- # the pointer beyond any non-tag '<'.
1096
- token = @scanner.scan_until( /[^<]+/m )
1097
- end
1098
-
1099
- @log.debug " type: %p, token: %p" % [ type, token ]
1100
-
1101
- # If a block is given, feed it one token at a time. Add the token to
1102
- # the token list to be returned regardless.
1103
- if block_given?
1104
- yield( type, token )
1105
- end
1106
- tokens << [ type, token ]
1107
- end
1108
-
1109
- return tokens
1110
- end
1111
-
1112
-
1113
- ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
1114
- def encode_html( str )
1115
- str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&amp;" ).
1116
- gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
1117
- end
1118
-
1119
-
1120
- ### Return one level of line-leading tabs or spaces from a copy of +str+ and
1121
- ### return it.
1122
- def outdent( str )
1123
- str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
1124
- end
1125
-
1126
- end # class BlueCloth
1127
-
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
4
+ # tool.
5
+ #
6
+ # == Synopsis
7
+ #
8
+ # doc = BlueCloth::new "
9
+ # ## Test document ##
10
+ #
11
+ # Just a simple test.
12
+ # "
13
+ #
14
+ # puts doc.to_html
15
+ #
16
+ # == Authors
17
+ #
18
+ # * Michael Granger <ged@FaerieMUD.org>
19
+ #
20
+ # == Contributors
21
+ #
22
+ # * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
23
+ # * Florian Gross <flgr@ccan.de> - Filter options, suggestions
24
+ #
25
+ # == Copyright
26
+ #
27
+ # Original version:
28
+ # Copyright (c) 2003-2004 John Gruber
29
+ # <http://daringfireball.net/>
30
+ # All rights reserved.
31
+ #
32
+ # Ruby port:
33
+ # Copyright (c) 2004 The FaerieMUD Consortium.
34
+ #
35
+ # BlueCloth is free software; you can redistribute it and/or modify it under the
36
+ # terms of the GNU General Public License as published by the Free Software
37
+ # Foundation; either version 2 of the License, or (at your option) any later
38
+ # version.
39
+ #
40
+ # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
41
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
42
+ # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
43
+ #
44
+ # == To-do
45
+ #
46
+ # * Refactor some of the larger uglier methods that have to do their own
47
+ # brute-force scanning because of lack of Perl features in Ruby's Regexp
48
+ # class. Alternately, could add a dependency on 'pcre' and use most Perl
49
+ # regexps.
50
+ #
51
+ # * Put the StringScanner in the render state for thread-safety.
52
+ #
53
+ # == Version
54
+ #
55
+ # $Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
56
+ #
57
+
58
+ require 'digest/md5'
59
+ require 'logger'
60
+ require 'strscan'
61
+
62
+
63
+ ### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
64
+ ### tool.
65
+ class BlueCloth < String
66
+
67
+ ### Exception class for formatting errors.
68
+ class FormatError < RuntimeError
69
+
70
+ ### Create a new FormatError with the given source +str+ and an optional
71
+ ### message about the +specific+ error.
72
+ def initialize( str, specific=nil )
73
+ if specific
74
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
75
+ else
76
+ msg = "Bad markdown format near %p" % str
77
+ end
78
+
79
+ super( msg )
80
+ end
81
+ end
82
+
83
+
84
+ # Release Version
85
+ Version = '0.0.3'
86
+
87
+ # SVN Revision
88
+ SvnRev = %q$Rev: 37 $
89
+
90
+ # SVN Id tag
91
+ SvnId = %q$Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
92
+
93
+ # SVN URL
94
+ SvnUrl = %q$URL: svn+ssh://cvs.faeriemud.org/var/svn/BlueCloth/trunk/lib/bluecloth.rb $
95
+
96
+
97
+ # Rendering state struct. Keeps track of URLs, titles, and HTML blocks
98
+ # midway through a render. I prefer this to the globals of the Perl version
99
+ # because globals make me break out in hives. Or something.
100
+ RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
101
+
102
+ # Tab width for #detab! if none is specified
103
+ TabWidth = 4
104
+
105
+ # The tag-closing string -- set to '>' for HTML
106
+ EmptyElementSuffix = "/>";
107
+
108
+ # Table of MD5 sums for escaped characters
109
+ EscapeTable = {}
110
+ '\\`*_{}[]()#.!'.split(//).each {|char|
111
+ hash = Digest::MD5::hexdigest( char )
112
+
113
+ EscapeTable[ char ] = {
114
+ :md5 => hash,
115
+ :md5re => Regexp::new( hash ),
116
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
117
+ }
118
+ }
119
+
120
+
121
+ #################################################################
122
+ ### I N S T A N C E M E T H O D S
123
+ #################################################################
124
+
125
+ ### Create a new BlueCloth string.
126
+ def initialize( content="", *restrictions )
127
+ @log = Logger::new( $deferr )
128
+ @log.level = $DEBUG ?
129
+ Logger::DEBUG :
130
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
131
+ @scanner = nil
132
+
133
+ # Add any restrictions, and set the line-folding attribute to reflect
134
+ # what happens by default.
135
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
136
+ @fold_lines = true
137
+
138
+ super( content )
139
+
140
+ @log.debug "String is: %p" % self
141
+ end
142
+
143
+
144
+ ######
145
+ public
146
+ ######
147
+
148
+ # Filters for controlling what gets output for untrusted input. (But really,
149
+ # you're filtering bad stuff out of untrusted input at submission-time via
150
+ # untainting, aren't you?)
151
+ attr_accessor :filter_html, :filter_styles
152
+
153
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
154
+ # so this isn't used by anything.
155
+ attr_accessor :fold_lines
156
+
157
+
158
+ ### Render Markdown-formatted text in this string object as HTML and return
159
+ ### it. The parameter is for compatibility with RedCloth, and is currently
160
+ ### unused, though that may change in the future.
161
+ def to_html( lite=false )
162
+
163
+ # Create a StringScanner we can reuse for various lexing tasks
164
+ @scanner = StringScanner::new( '' )
165
+
166
+ # Make a structure to carry around stuff that gets placeholdered out of
167
+ # the source.
168
+ rs = RenderState::new( {}, {}, {} )
169
+
170
+ # Make a copy of the string with normalized line endings, tabs turned to
171
+ # spaces, and a couple of guaranteed newlines at the end
172
+ text = self.gsub( /\r\n?/, "\n" ).detab
173
+ text += "\n\n"
174
+ @log.debug "Normalized line-endings: %p" % text
175
+
176
+ # Filter HTML if we're asked to do so
177
+ if self.filter_html
178
+ text.gsub!( "<", "&lt;" )
179
+ text.gsub!( ">", "&gt;" )
180
+ @log.debug "Filtered HTML: %p" % text
181
+ end
182
+
183
+ # Simplify blank lines
184
+ text.gsub!( /^ +$/, '' )
185
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
186
+
187
+ # Replace HTML blocks with placeholders
188
+ text = hide_html_blocks( text, rs )
189
+ @log.debug "Hid HTML blocks: %p" % text
190
+ @log.debug "Render state: %p" % rs
191
+
192
+ # Strip link definitions, store in render state
193
+ text = strip_link_definitions( text, rs )
194
+ @log.debug "Stripped link definitions: %p" % text
195
+ @log.debug "Render state: %p" % rs
196
+
197
+ # Escape meta-characters
198
+ text = escape_special_chars( text )
199
+ @log.debug "Escaped special characters: %p" % text
200
+
201
+ # Transform block-level constructs
202
+ text = apply_block_transforms( text, rs )
203
+ @log.debug "After block-level transforms: %p" % text
204
+
205
+ # Now swap back in all the escaped characters
206
+ text = unescape_special_chars( text )
207
+ @log.debug "After unescaping special characters: %p" % text
208
+
209
+ return text
210
+ end
211
+
212
+
213
+ ### Convert tabs in +str+ to spaces.
214
+ def detab( tabwidth=TabWidth )
215
+ copy = self.dup
216
+ copy.detab!( tabwidth )
217
+ return copy
218
+ end
219
+
220
+
221
+ ### Convert tabs to spaces in place and return self if any were converted.
222
+ def detab!( tabwidth=TabWidth )
223
+ newstr = self.split( /\n/ ).collect {|line|
224
+ line.gsub( /(.*?)\t/ ) do
225
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
226
+ end
227
+ }.join("\n")
228
+ self.replace( newstr )
229
+ end
230
+
231
+
232
+ #######
233
+ #private
234
+ #######
235
+
236
+ ### Do block-level transforms on a copy of +str+ using the specified render
237
+ ### state +rs+ and return the results.
238
+ def apply_block_transforms( str, rs )
239
+ # Port: This was called '_runBlockGamut' in the original
240
+
241
+ @log.debug "Applying block transforms to:\n %p" % str
242
+ text = transform_headers( str, rs )
243
+ text = transform_hrules( text, rs )
244
+ text = transform_lists( text, rs )
245
+ text = transform_code_blocks( text, rs )
246
+ text = transform_block_quotes( text, rs )
247
+ text = transform_auto_links( text, rs )
248
+ text = hide_html_blocks( text, rs )
249
+
250
+ text = form_paragraphs( text, rs )
251
+
252
+ @log.debug "Done with block transforms:\n %p" % text
253
+ return text
254
+ end
255
+
256
+
257
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
258
+ ### given render state +rs+ and return it.
259
+ def apply_span_transforms( str, rs )
260
+ @log.debug "Applying span transforms to:\n %p" % str
261
+
262
+ str = transform_code_spans( str, rs )
263
+ str = encode_html( str )
264
+ str = transform_images( str, rs )
265
+ str = transform_anchors( str, rs )
266
+ str = transform_italic_and_bold( str, rs )
267
+
268
+ # Hard breaks
269
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
270
+
271
+ @log.debug "Done with span transforms:\n %p" % str
272
+ return str
273
+ end
274
+
275
+
276
+ # The list of tags which are considered block-level constructs and an
277
+ # alternation pattern suitable for use in regexps made from the list
278
+ BlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script ]
279
+ BlockTagPattern = BlockTags.join('|')
280
+
281
+ # Nested blocks:
282
+ # <div>
283
+ # <div>
284
+ # tags for inner block must be indented.
285
+ # </div>
286
+ # </div>
287
+ StrictBlockRegex = %r{
288
+ ^ # Start of line
289
+ <(#{BlockTagPattern}) # Start tag: \2
290
+ \b # word break
291
+ (.*\n)*? # Any number of lines, minimal match
292
+ </\1> # Matching end tag
293
+ [ ]* # trailing spaces
294
+ (?=\n+|\Z) # End of line or document
295
+ }ix
296
+
297
+ # More-liberal block-matching
298
+ LooseBlockRegex = %r{
299
+ ^ # Start of line
300
+ <(#{BlockTagPattern}) # start tag: \2
301
+ \b # word break
302
+ (.*\n)*? # Any number of lines, minimal match
303
+ .*</\1> # Anything + Matching end tag
304
+ [ ]* # trailing spaces
305
+ (?=\n+|\Z) # End of line or document
306
+ }ix
307
+
308
+ # Special case for <hr />.
309
+ HruleBlockRegex = %r{
310
+ ( # $1
311
+ \A\n? # Start of doc + optional \n
312
+ | # or
313
+ .*\n\n # anything + blank line
314
+ )
315
+ ( # save in $2
316
+ [ ]* # Any spaces
317
+ <hr # Tag open
318
+ \b # Word break
319
+ ([^<>])*? # Attributes
320
+ /?> # Tag close
321
+ (?=\n\n|\Z) # followed by a blank line or end of document
322
+ )
323
+ }ix
324
+
325
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
326
+ ### tokens.
327
+ def hide_html_blocks( str, rs )
328
+ @log.debug "Hiding HTML blocks in %p" % str
329
+
330
+ # Tokenizer proc to pass to gsub
331
+ tokenize = lambda {|match|
332
+ key = Digest::MD5::hexdigest( match )
333
+ rs.html_blocks[ key ] = match
334
+ @log.debug "Replacing %p with %p" %
335
+ [ match, key ]
336
+ "\n\n#{key}\n\n"
337
+ }
338
+
339
+ rval = str.dup
340
+
341
+ @log.debug "Finding blocks with the strict regex..."
342
+ rval.gsub!( StrictBlockRegex, &tokenize )
343
+
344
+ @log.debug "Finding blocks with the loose regex..."
345
+ rval.gsub!( LooseBlockRegex, &tokenize )
346
+
347
+ @log.debug "Finding hrules..."
348
+ rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
349
+
350
+ return rval
351
+ end
352
+
353
+
354
+ # Link defs are in the form: ^[id]: url "optional title"
355
+ LinkRegex = %r{
356
+ ^[ ]*\[(.+)\]: # id = $1
357
+ [ ]*
358
+ \n? # maybe *one* newline
359
+ [ ]*
360
+ (\S+) # url = $2
361
+ [ ]*
362
+ \n? # maybe one newline
363
+ [ ]*
364
+ (?:
365
+ # Titles are delimited by "quotes" or (parens).
366
+ ["(]
367
+ (.+?) # title = $3
368
+ [")] # Matching ) or "
369
+ [ ]*
370
+ )? # title is optional
371
+ (?:\n+|\Z)
372
+ }x
373
+
374
+ ### Strip link definitions from +str+, storing them in the given RenderState
375
+ ### +rs+.
376
+ def strip_link_definitions( str, rs )
377
+ str.gsub( LinkRegex ) {|match|
378
+ id, url, title = $1, $2, $3
379
+
380
+ rs.urls[ id.downcase ] = encode_html( url )
381
+ unless title.nil?
382
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
383
+ end
384
+ ""
385
+ }
386
+ end
387
+
388
+
389
+ ### Escape special characters in the given +str+
390
+ def escape_special_chars( str )
391
+ @log.debug " Escaping special characters"
392
+ text = ''
393
+
394
+ tokenize_html( str ) {|token, str|
395
+ @log.debug " Adding %p token %p" % [ token, str ]
396
+ case token
397
+
398
+ # Within tags, encode * and _
399
+ when :tag
400
+ text += str.
401
+ gsub( /\*/, EscapeTable['*'][:md5] ).
402
+ gsub( /_/, EscapeTable['_'][:md5] )
403
+
404
+ # Encode backslashed stuff in regular text
405
+ when :text
406
+ text += encode_backslash_escapes( str )
407
+ else
408
+ raise TypeError, "Unknown token type %p" % token
409
+ end
410
+ }
411
+
412
+ @log.debug " Text with escapes is now: %p" % text
413
+ return text
414
+ end
415
+
416
+
417
+ ### Swap escaped special characters in a copy of the given +str+ and return
418
+ ### it.
419
+ def unescape_special_chars( str )
420
+ EscapeTable.each {|char, hash|
421
+ @log.debug "Unescaping escaped %p with %p" %
422
+ [ char, hash[:md5re] ]
423
+ str.gsub!( hash[:md5re], char )
424
+ }
425
+
426
+ return str
427
+ end
428
+
429
+
430
+ ### Return a copy of the given +str+ with any backslashed special character
431
+ ### in it replaced with MD5 placeholders.
432
+ def encode_backslash_escapes( str )
433
+ # Make a copy with any double-escaped backslashes encoded
434
+ text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
435
+
436
+ EscapeTable.each_pair {|char, esc|
437
+ next if char == '\\'
438
+ text.gsub!( esc[:re], esc[:md5] )
439
+ }
440
+
441
+ return text
442
+ end
443
+
444
+
445
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
446
+ ### +str+ and return it.
447
+ def transform_hrules( str, rs )
448
+ @log.debug " Transforming horizontal rules"
449
+ str.gsub( /^( ?[\-\*] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
450
+ end
451
+
452
+
453
+
454
+ # Pattern to transform lists
455
+ ListRegexp = %r{
456
+ (?:
457
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
458
+ (\*|\d+\.) # unordered or ordered ($1)
459
+ [ ]+ # At least one space
460
+ )
461
+ (?m:.+?) # item content (include newlines)
462
+ (?:
463
+ \z # Either EOF
464
+ | # or
465
+ \n{2,} # Blank line...
466
+ (?=\S) # ...followed by non-space
467
+ (?![ ]* (\*|\d+\.) [ ]+) # ...but not another item
468
+ )
469
+ }x
470
+
471
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
472
+ ### return it.
473
+ def transform_lists( str, rs )
474
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
475
+
476
+ str.gsub( ListRegexp ) {|list|
477
+ @log.debug " Found list %p" % list
478
+ list_type = ($1 == '*' ? "ul" : "ol")
479
+ list.gsub!( /\n{2,}/, "\n\n\n" )
480
+
481
+ %{<%s>\n%s</%s>\n} % [
482
+ list_type,
483
+ transform_list_items( list, rs ),
484
+ list_type,
485
+ ]
486
+ }
487
+ end
488
+
489
+
490
+ # Pattern for transforming list items
491
+ ListItemRegexp = %r{
492
+ (\n)? # leading line = $1
493
+ (^[ ]*) # leading whitespace = $2
494
+ (\*|\d+\.) [ ]+ # list marker = $3
495
+ ((?m:.+?) # list item text = $4
496
+ (\n{1,2}))
497
+ (?= \n* (\z | \2 (\*|\d+\.) [ ]+))
498
+ }x
499
+
500
+ ### Transform list items in a copy of the given +str+ and return it.
501
+ def transform_list_items( str, rs )
502
+ @log.debug " Transforming list items"
503
+
504
+ # Trim trailing blank lines
505
+ str = str.sub( /\n{2,}\z/, "\n" )
506
+
507
+ str.gsub( ListItemRegexp ) {|line|
508
+ @log.debug " Found item line %p" % line
509
+ leading_line, item = $1, $4
510
+
511
+ if leading_line or /\n{2,}/.match( item )
512
+ @log.debug " Found leading line or item has a blank"
513
+ item = apply_block_transforms( outdent(item), rs )
514
+ else
515
+ # Recursion for sub-lists
516
+ @log.debug " Recursing for sublist"
517
+ item = transform_lists( outdent(item), rs ).chomp
518
+ item = apply_span_transforms( item, rs )
519
+ end
520
+
521
+ %{<li>%s</li>\n} % item
522
+ }
523
+ end
524
+
525
+
526
+ # Pattern for matching codeblocks
527
+ CodeBlockRegexp = %r{
528
+ (.?) # $1 = preceding character
529
+ :\n+ # colon + NL delimiter
530
+ ( # $2 = the code block
531
+ (?:
532
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
533
+ .*\n+
534
+ )+
535
+ )
536
+ ((?=^[ ]{0,#{TabWidth}}\S)|\Z) # Lookahead for non-space at
537
+ # line-start, or end of doc
538
+ }x
539
+
540
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
541
+ ### return it.
542
+ def transform_code_blocks( str, rs )
543
+ @log.debug " Transforming code blocks"
544
+
545
+ str.gsub( CodeBlockRegexp ) {|block|
546
+ prevchar, codeblock = $1, $2
547
+
548
+ @log.debug " prevchar = %p" % prevchar
549
+
550
+ # Generated the codeblock
551
+ %{%s\n\n<pre><code>%s\n</code></pre>\n\n} % [
552
+ (prevchar.empty? || /\s/ =~ prevchar) ? "" : "#{prevchar}:",
553
+ encode_code( outdent(codeblock), rs ).rstrip,
554
+ ]
555
+ }
556
+ end
557
+
558
+
559
+ # Pattern for matching Markdown blockquote blocks
560
+ BlockQuoteRegexp = %r{
561
+ (?:
562
+ ^[ ]*>[ ]? # '>' at the start of a line
563
+ .+\n # rest of the first line
564
+ (?:.+\n)* # subsequent consecutive lines
565
+ \n* # blanks
566
+ )+
567
+ }x
568
+
569
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
570
+ ### and return it.
571
+ def transform_block_quotes( str, rs )
572
+ @log.debug " Transforming block quotes"
573
+
574
+ str.gsub( BlockQuoteRegexp ) {|quote|
575
+ @log.debug "Making blockquote from %p" % quote
576
+ quote.gsub!( /^[ ]*>[ ]?/, '' )
577
+ %{<blockquote>\n%s\n</blockquote>\n\n} %
578
+ apply_block_transforms( quote, rs ).
579
+ gsub( /^/, " " * TabWidth )
580
+ }
581
+ end
582
+
583
+
584
+ AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
585
+ AutoAnchorEmailRegexp = %r{
586
+ <
587
+ (
588
+ [-.\w]+
589
+ \@
590
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
591
+ )
592
+ >
593
+ }x
594
+
595
+ ### Transform URLs in a copy of the specified +str+ into links and return
596
+ ### it.
597
+ def transform_auto_links( str, rs )
598
+ @log.debug " Transforming auto-links"
599
+ str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
600
+ gsub( AutoAnchorEmailRegexp ) {|addr|
601
+ encode_email_address( unescape_special_chars($1) )
602
+ }
603
+ end
604
+
605
+
606
+ # Encoder functions to turn characters of an email address into encoded
607
+ # entities.
608
+ Encoders = [
609
+ lambda {|char| "&#%03d;" % char},
610
+ lambda {|char| "&#x%X;" % char},
611
+ lambda {|char| char.chr },
612
+ ]
613
+
614
+ ### Transform a copy of the given email +addr+ into an escaped version safer
615
+ ### for posting publicly.
616
+ def encode_email_address( addr )
617
+
618
+ rval = ''
619
+ ("mailto:" + addr).each_byte {|b|
620
+ case b
621
+ when ?:
622
+ rval += ":"
623
+ when ?@
624
+ rval += Encoders[ rand(2) ][ b ]
625
+ else
626
+ r = rand(100)
627
+ rval += (
628
+ r > 90 ? Encoders[2][ b ] :
629
+ r < 45 ? Encoders[1][ b ] :
630
+ Encoders[0][ b ]
631
+ )
632
+ end
633
+ }
634
+
635
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
636
+ end
637
+
638
+
639
+ # Regex for matching Setext-style headers
640
+ SetextHeaderRegexp = %r{
641
+ (.+) # The title text ($1)
642
+ \n
643
+ ([\-=])+ # Match a line of = or -. Save only one in $2.
644
+ [ ]*\n+
645
+ }x
646
+
647
+ # Regexp for matching ATX-style headers
648
+ AtxHeaderRegexp = %r{
649
+ ^(\#{1,6}) # $1 = string of #'s
650
+ [ ]*
651
+ (.+?) # $2 = Header text
652
+ [ ]*
653
+ \#* # optional closing #'s (not counted)
654
+ \n+
655
+ }x
656
+
657
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
658
+ ### state +rs+ and return the result.
659
+ def transform_headers( str, rs )
660
+ @log.debug " Transforming headers"
661
+
662
+ # Setext-style headers:
663
+ # Header 1
664
+ # ========
665
+ #
666
+ # Header 2
667
+ # --------
668
+ #
669
+ str.
670
+ gsub( SetextHeaderRegexp ) {|m|
671
+ @log.debug "Found setext-style header"
672
+ title, hdrchar = $1, $2
673
+ title = apply_span_transforms( title, rs )
674
+
675
+ case hdrchar
676
+ when '='
677
+ %[<h1>#{title}</h1>\n\n]
678
+ when '-'
679
+ %[<h2>#{title}</h2>\n\n]
680
+ else
681
+ title
682
+ end
683
+ }.
684
+
685
+ gsub( AtxHeaderRegexp ) {|m|
686
+ @log.debug "Found ATX-style header"
687
+ hdrchars, title = $1, $2
688
+ title = apply_span_transforms( title, rs )
689
+
690
+ level = hdrchars.length
691
+ %{<h%d>%s</h%d>\n\n} % [ level, title, level ]
692
+ }
693
+ end
694
+
695
+
696
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
697
+ ### tags and return it.
698
+ def form_paragraphs( str, rs )
699
+ @log.debug " Forming paragraphs"
700
+ grafs = str.
701
+ sub( /\A\n+/, '' ).
702
+ sub( /\n+\z/, '' ).
703
+ split( /\n{2,}/ )
704
+
705
+ rval = grafs.collect {|graf|
706
+
707
+ # Unhashify HTML blocks if this is a placeholder
708
+ if rs.html_blocks.key?( graf )
709
+ rs.html_blocks[ graf ]
710
+
711
+ # Otherwise, wrap in <p> tags
712
+ else
713
+ apply_span_transforms(graf, rs).
714
+ sub( /^[ ]*/, '<p>' ) + '</p>'
715
+ end
716
+ }.join( "\n\n" )
717
+
718
+ @log.debug " Formed paragraphs: %p" % rval
719
+ return rval
720
+ end
721
+
722
+
723
+ # Pattern to match the linkid part of an anchor tag for reference-style
724
+ # links.
725
+ RefLinkIdRegex = %r{
726
+ [ ]? # Optional leading space
727
+ (?:\n[ ]*)? # Optional newline + spaces
728
+ \[
729
+ (.*?) # Id = $1
730
+ \]
731
+ }x
732
+
733
+ InlineLinkRegex = %r{
734
+ \( # Literal paren
735
+ [ ]* # Zero or more spaces
736
+ (.*?) # URI = $1
737
+ [ ]* # Zero or more spaces
738
+ (?: #
739
+ ([\"\']) # Opening quote char = $2
740
+ (.*?) # Title = $3
741
+ \2 # Matching quote char
742
+ )? # Title is optional
743
+ \)
744
+ }x
745
+
746
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
747
+ ### the given render state +rs+ and return it.
748
+ def transform_anchors( str, rs )
749
+ @log.debug " Transforming anchors"
750
+ @scanner.string = str.dup
751
+ text = ''
752
+
753
+ # Scan the whole string
754
+ until @scanner.empty?
755
+
756
+ if @scanner.scan( /\[/ )
757
+ link = ''; linkid = ''
758
+ depth = 1
759
+ startpos = @scanner.pos
760
+ @log.debug " Found a bracket-open at %d" % startpos
761
+
762
+ # Scan the rest of the tag, allowing unlimited nested []s. If
763
+ # the scanner runs out of text before the opening bracket is
764
+ # closed, append the text and return (wasn't a valid anchor).
765
+ while depth.nonzero?
766
+ linktext = @scanner.scan_until( /\]|\[/ )
767
+
768
+ if linktext
769
+ @log.debug " Found a bracket at depth %d: %p" %
770
+ [ depth, linktext ]
771
+ link += linktext
772
+
773
+ # Decrement depth for each closing bracket
774
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
775
+ @log.debug " Depth is now #{depth}"
776
+
777
+ # If there's no more brackets, it must not be an anchor, so
778
+ # just abort.
779
+ else
780
+ @log.debug " Missing closing brace, assuming non-link."
781
+ link += @scanner.rest
782
+ @scanner.terminate
783
+ return text + '[' + link
784
+ end
785
+ end
786
+ link.slice!( -1 ) # Trim final ']'
787
+ @log.debug " Found leading link %p" % link
788
+
789
+ # Look for a reference-style second part
790
+ if @scanner.scan( RefLinkIdRegex )
791
+ linkid = @scanner[1]
792
+ linkid = link.dup if linkid.empty?
793
+ linkid.downcase!
794
+ @log.debug " Found a linkid: %p" % linkid
795
+
796
+ # If there's a matching link in the link table, build an
797
+ # anchor tag for it.
798
+ if rs.urls.key?( linkid )
799
+ @log.debug " Found link key in the link table: %p" %
800
+ rs.urls[linkid]
801
+ url = escape_md( rs.urls[linkid] )
802
+
803
+ text += %{<a href="#{url}"}
804
+ if rs.titles.key?(linkid)
805
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
806
+ end
807
+ text += %{>#{link}</a>}
808
+
809
+ # If the link referred to doesn't exist, just append the raw
810
+ # source to the result
811
+ else
812
+ @log.debug " Linkid %p not found in link table" % linkid
813
+ @log.debug " Appending original string instead: %p" %
814
+ @scanner.string[ startpos-1 .. @scanner.pos ]
815
+ text += @scanner.string[ startpos-1 .. @scanner.pos ]
816
+ end
817
+
818
+ # ...or for an inline style second part
819
+ elsif @scanner.scan( InlineLinkRegex )
820
+ url = @scanner[1]
821
+ title = @scanner[3]
822
+ @log.debug " Found an inline link to %p" % url
823
+
824
+ text += %{<a href="%s"} % escape_md( url )
825
+ if title
826
+ text += %{ title="%s"} % escape_md( title )
827
+ end
828
+ text += %{>#{link}</a>}
829
+
830
+ # No linkid part: just append the first part as-is.
831
+ else
832
+ @log.debug "No linkid, so no anchor. Appending literal text."
833
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
834
+ end # if linkid
835
+
836
+ # Plain text
837
+ else
838
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
839
+ text += @scanner.scan( /[^\[]+/ )
840
+ end
841
+
842
+ end # until @scanner.empty?
843
+
844
+ return text
845
+ end
846
+
847
+ # Pattern to match strong emphasis in Markdown text
848
+ BoldRegexp = %r{ (\*\*|__) (?=\S) (.+?\S) \1 }x
849
+
850
+ # Pattern to match normal emphasis in Markdown text
851
+ ItalicRegexp = %r{ (\*|_) (?=\S) (.+?\S) \1 }x
852
+
853
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
854
+ ### and return it.
855
+ def transform_italic_and_bold( str, rs )
856
+ @log.debug " Transforming italic and bold"
857
+
858
+ str.
859
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
860
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
861
+ end
862
+
863
+
864
+ ### Transform backticked spans into <code> spans.
865
+ def transform_code_spans( str, rs )
866
+ @log.debug " Transforming code spans"
867
+
868
+ # Set up the string scanner and just return the string unless there's at
869
+ # least one backtick.
870
+ @scanner.string = str.dup
871
+ unless @scanner.exist?( /`/ )
872
+ @scanner.terminate
873
+ @log.debug "No backticks found for code span in %p" % str
874
+ return str
875
+ end
876
+
877
+ @log.debug "Transforming code spans in %p" % str
878
+
879
+ # Build the transformed text anew
880
+ text = ''
881
+
882
+ # Scan to the end of the string
883
+ until @scanner.empty?
884
+
885
+ # Scan up to an opening backtick
886
+ if pre = @scanner.scan_until( /.?(?=`)/m )
887
+ text += pre
888
+ @log.debug "Found backtick at %d after '...%s'" %
889
+ [ @scanner.pos, text[-10, 10] ]
890
+
891
+ # Make a pattern to find the end of the span
892
+ opener = @scanner.scan( /`+/ )
893
+ len = opener.length
894
+ closer = Regexp::new( opener )
895
+ @log.debug "Scanning for end of code span with %p" % closer
896
+
897
+ # Scan until the end of the closing backtick sequence. Chop the
898
+ # backticks off the resultant string, strip leading and trailing
899
+ # whitespace, and encode any enitites contained in it.
900
+ codespan = @scanner.scan_until( closer ) or
901
+ raise FormatError::new( @scanner.rest[0,20],
902
+ "No %p found before end" % opener )
903
+
904
+ @log.debug "Found close of code span at %d: %p" %
905
+ [ @scanner.pos - len, codespan ]
906
+ codespan.slice!( -len, len )
907
+ text += "<code>%s</code>" %
908
+ encode_code( codespan.strip, rs )
909
+
910
+ # If there's no more backticks, just append the rest of the string
911
+ # and move the scan pointer to the end
912
+ else
913
+ text += @scanner.rest
914
+ @scanner.terminate
915
+ end
916
+ end
917
+
918
+ return text
919
+ end
920
+
921
+
922
+ # Next, handle inline images: ![alt text](url "optional title")
923
+ # Don't forget: encode * and _
924
+ InlineImageRegexp = %r{
925
+ ( # Whole match = $1
926
+ !\[ (.*?) \] # alt text = $2
927
+ \([ ]* (\S+) [ ]* # source url = $3
928
+ ( # title = $4
929
+ (["']) # quote char = $5
930
+ .*?
931
+ \5 # matching quote
932
+ [ ]*
933
+ )? # title is optional
934
+ \)
935
+ )
936
+ }xs #"
937
+
938
+
939
+ # Reference-style images
940
+ ReferenceImageRegexp = %r{
941
+ ( # Whole match = $1
942
+ !\[ (.*?) \] # Alt text = $2
943
+ [ ]? # Optional space
944
+ (?:\n[ ]*)? # One optional newline + spaces
945
+ \[ (.*?) \] # id = $3
946
+ )
947
+ }xs
948
+
949
+ ### Turn image markup into image tags.
950
+ def transform_images( str, rs )
951
+ @log.debug " Transforming images" % str
952
+
953
+ # Handle reference-style labeled images: ![alt text][id]
954
+ str.
955
+ gsub( ReferenceImageRegexp ) {|match|
956
+ whole, alt, linkid = $1, $2, $3.downcase
957
+ @log.debug "Matched %p" % match
958
+ res = nil
959
+
960
+ # for shortcut links like ![this][].
961
+ linkid = alt.downcase if linkid.empty?
962
+
963
+ if rs.urls.key?( linkid )
964
+ url = escape_md( rs.urls[linkid] )
965
+ @log.debug "Found url '%s' for linkid '%s' " %
966
+ [ url, linkid ]
967
+
968
+ # Build the tag
969
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
970
+ if rs.titles.key?( linkid )
971
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
972
+ end
973
+ result += EmptyElementSuffix
974
+
975
+ else
976
+ result = whole
977
+ end
978
+
979
+ @log.debug "Replacing %p with %p" %
980
+ [ match, result ]
981
+ result
982
+ }.
983
+
984
+ # Inline image style
985
+ gsub( InlineImageRegexp ) {|match|
986
+ @log.debug "Found inline image %p" % match
987
+ whole, alt, title = $1, $2, $4
988
+ url = escape_md( $3 )
989
+
990
+ # Build the tag
991
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
992
+ unless title.nil?
993
+ result += %{ title="%s"} % escape_md( title.gsub(/^"|"$/, '') )
994
+ end
995
+ result += EmptyElementSuffix
996
+
997
+ @log.debug "Replacing %p with %p" %
998
+ [ match, result ]
999
+ result
1000
+ }
1001
+ end
1002
+
1003
+
1004
+ # Regexp to match special characters in a code block
1005
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] )}x
1006
+
1007
+ ### Escape any characters special to HTML and encode any characters special
1008
+ ### to Markdown in a copy of the given +str+ and return it.
1009
+ def encode_code( str, rs )
1010
+ str.gsub( %r{&}, '&amp;' ).
1011
+ gsub( %r{<}, '&lt;' ).
1012
+ gsub( %r{>}, '&gt;' ).
1013
+ gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
1014
+ end
1015
+
1016
+
1017
+
1018
+ #################################################################
1019
+ ### U T I L I T Y F U N C T I O N S
1020
+ #################################################################
1021
+
1022
+ ### Escape any markdown characters in a copy of the given +str+ and return
1023
+ ### it.
1024
+ def escape_md( str )
1025
+ str.
1026
+ gsub( /\*/, '&#42;' ).
1027
+ gsub( /_/, '&#95;' )
1028
+ end
1029
+
1030
+
1031
+ # Matching constructs for tokenizing X/HTML
1032
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
1033
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
1034
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
1035
+
1036
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }mx
1037
+ HTMLTagCloseRegexp = %r{ > }x
1038
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
1039
+
1040
+ ### Break the HTML source in +str+ into a series of tokens and return
1041
+ ### them. The tokens are just 2-element Array tuples with a type and the
1042
+ ### actual content. If this function is called with a block, the type and
1043
+ ### text parts of each token will be yielded to it one at a time as they are
1044
+ ### extracted.
1045
+ def tokenize_html( str )
1046
+ depth = 0
1047
+ tokens = []
1048
+ @scanner.string = str.dup
1049
+ type, token = nil, nil
1050
+
1051
+ until @scanner.empty?
1052
+ @log.debug "Scanning from %p" % @scanner.rest
1053
+
1054
+ # Match comments and PIs without nesting
1055
+ if (( token = @scanner.scan(MetaTag) ))
1056
+ type = :tag
1057
+
1058
+ # Do nested matching for HTML tags
1059
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
1060
+ tagstart = @scanner.pos
1061
+ @log.debug " Found the start of a plain tag at %d" % tagstart
1062
+
1063
+ # Start the token with the opening angle
1064
+ depth = 1
1065
+ type = :tag
1066
+
1067
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
1068
+ # the scanner runs out of text before the tag is closed, raise
1069
+ # an error.
1070
+ while depth.nonzero?
1071
+
1072
+ # Scan either an opener or a closer
1073
+ chunk = @scanner.scan( HTMLTagPart ) or
1074
+ raise "Malformed tag at character %d: %p" %
1075
+ [ tagstart, token + @scanner.rest ]
1076
+
1077
+ @log.debug " Found another part of the tag at depth %d: %p" %
1078
+ [ depth, chunk ]
1079
+
1080
+ token += chunk
1081
+
1082
+ # If the last character of the token so far is a closing
1083
+ # angle bracket, decrement the depth. Otherwise increment
1084
+ # it for a nested tag.
1085
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
1086
+ @log.debug " Depth is now #{depth}"
1087
+ end
1088
+
1089
+ # Match text segments
1090
+ else
1091
+ @log.debug " Looking for a chunk of text"
1092
+ type = :text
1093
+
1094
+ # Scan forward, always matching at least one character to move
1095
+ # the pointer beyond any non-tag '<'.
1096
+ token = @scanner.scan_until( /[^<]+/m )
1097
+ end
1098
+
1099
+ @log.debug " type: %p, token: %p" % [ type, token ]
1100
+
1101
+ # If a block is given, feed it one token at a time. Add the token to
1102
+ # the token list to be returned regardless.
1103
+ if block_given?
1104
+ yield( type, token )
1105
+ end
1106
+ tokens << [ type, token ]
1107
+ end
1108
+
1109
+ return tokens
1110
+ end
1111
+
1112
+
1113
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
1114
+ def encode_html( str )
1115
+ str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&amp;" ).
1116
+ gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
1117
+ end
1118
+
1119
+
1120
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
1121
+ ### return it.
1122
+ def outdent( str )
1123
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
1124
+ end
1125
+
1126
+ end # class BlueCloth
1127
+