instiki 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. data/CHANGELOG +174 -165
  2. data/README +68 -68
  3. data/app/controllers/admin_controller.rb +94 -94
  4. data/app/controllers/application.rb +135 -131
  5. data/app/controllers/file_controller.rb +129 -129
  6. data/app/controllers/wiki_controller.rb +354 -354
  7. data/app/helpers/application_helper.rb +68 -68
  8. data/app/models/author.rb +3 -3
  9. data/app/models/chunks/category.rb +33 -33
  10. data/app/models/chunks/chunk.rb +86 -86
  11. data/app/models/chunks/engines.rb +61 -54
  12. data/app/models/chunks/include.rb +41 -41
  13. data/app/models/chunks/literal.rb +31 -31
  14. data/app/models/chunks/nowiki.rb +28 -28
  15. data/app/models/chunks/test.rb +18 -18
  16. data/app/models/chunks/uri.rb +182 -182
  17. data/app/models/chunks/wiki.rb +141 -141
  18. data/app/models/file_yard.rb +58 -58
  19. data/app/models/page.rb +112 -112
  20. data/app/models/page_lock.rb +22 -22
  21. data/app/models/page_set.rb +89 -89
  22. data/app/models/revision.rb +123 -123
  23. data/app/models/web.rb +182 -176
  24. data/app/models/wiki_content.rb +207 -207
  25. data/app/models/wiki_service.rb +233 -233
  26. data/app/models/wiki_words.rb +23 -23
  27. data/app/views/admin/create_system.rhtml +83 -83
  28. data/app/views/admin/create_web.rhtml +69 -69
  29. data/app/views/admin/edit_web.rhtml +137 -136
  30. data/app/views/file/file.rhtml +18 -18
  31. data/app/views/file/import.rhtml +22 -22
  32. data/app/views/layouts/default.rhtml +86 -85
  33. data/app/views/markdown_help.rhtml +12 -12
  34. data/app/views/mixed_help.rhtml +6 -6
  35. data/app/views/navigation.rhtml +30 -30
  36. data/app/views/rdoc_help.rhtml +12 -12
  37. data/app/views/textile_help.rhtml +24 -24
  38. data/app/views/wiki/authors.rhtml +11 -11
  39. data/app/views/wiki/edit.rhtml +39 -39
  40. data/app/views/wiki/export.rhtml +12 -12
  41. data/app/views/wiki/feeds.rhtml +14 -14
  42. data/app/views/wiki/list.rhtml +64 -64
  43. data/app/views/wiki/locked.rhtml +23 -23
  44. data/app/views/wiki/login.rhtml +14 -14
  45. data/app/views/wiki/new.rhtml +31 -31
  46. data/app/views/wiki/page.rhtml +115 -115
  47. data/app/views/wiki/print.rhtml +14 -14
  48. data/app/views/wiki/published.rhtml +9 -9
  49. data/app/views/wiki/recently_revised.rhtml +26 -26
  50. data/app/views/wiki/revision.rhtml +103 -103
  51. data/app/views/wiki/rollback.rhtml +36 -36
  52. data/app/views/wiki/rss_feed.rhtml +22 -22
  53. data/app/views/wiki/search.rhtml +38 -38
  54. data/app/views/wiki/tex.rhtml +22 -22
  55. data/app/views/wiki/tex_web.rhtml +34 -34
  56. data/app/views/wiki/web_list.rhtml +18 -18
  57. data/app/views/wiki_words_help.rhtml +9 -9
  58. data/config/environment.rb +82 -82
  59. data/config/environments/development.rb +5 -5
  60. data/config/environments/production.rb +4 -4
  61. data/config/environments/test.rb +17 -17
  62. data/config/routes.rb +18 -18
  63. data/lib/active_record_stub.rb +31 -31
  64. data/lib/bluecloth_tweaked.rb +1127 -0
  65. data/lib/diff.rb +444 -444
  66. data/lib/instiki_errors.rb +14 -14
  67. data/lib/rdocsupport.rb +151 -151
  68. data/lib/redcloth_for_tex.rb +736 -736
  69. data/natives/osx/desktop_launcher/AppDelegate.h +18 -18
  70. data/natives/osx/desktop_launcher/AppDelegate.mm +109 -109
  71. data/natives/osx/desktop_launcher/Credits.html +15 -15
  72. data/natives/osx/desktop_launcher/English.lproj/MainMenu.nib/classes.nib +12 -12
  73. data/natives/osx/desktop_launcher/English.lproj/MainMenu.nib/info.nib +24 -24
  74. data/natives/osx/desktop_launcher/Info.plist +12 -12
  75. data/natives/osx/desktop_launcher/Instiki.xcode/project.pbxproj +592 -592
  76. data/natives/osx/desktop_launcher/Instiki_Prefix.pch +7 -7
  77. data/natives/osx/desktop_launcher/MakeDMG.sh +9 -9
  78. data/natives/osx/desktop_launcher/main.mm +14 -14
  79. data/natives/osx/desktop_launcher/version.plist +16 -16
  80. data/public/404.html +5 -5
  81. data/public/500.html +5 -5
  82. data/public/dispatch.rb +9 -9
  83. data/public/javascripts/edit_web.js +52 -52
  84. data/public/javascripts/prototype.js +336 -336
  85. data/public/stylesheets/instiki.css +222 -222
  86. data/script/breakpointer +4 -4
  87. data/script/server +93 -93
  88. metadata +4 -3
@@ -0,0 +1,1127 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
4
+ # tool.
5
+ #
6
+ # == Synopsis
7
+ #
8
+ # doc = BlueCloth::new "
9
+ # ## Test document ##
10
+ #
11
+ # Just a simple test.
12
+ # "
13
+ #
14
+ # puts doc.to_html
15
+ #
16
+ # == Authors
17
+ #
18
+ # * Michael Granger <ged@FaerieMUD.org>
19
+ #
20
+ # == Contributors
21
+ #
22
+ # * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
23
+ # * Florian Gross <flgr@ccan.de> - Filter options, suggestions
24
+ #
25
+ # == Copyright
26
+ #
27
+ # Original version:
28
+ # Copyright (c) 2003-2004 John Gruber
29
+ # <http://daringfireball.net/>
30
+ # All rights reserved.
31
+ #
32
+ # Ruby port:
33
+ # Copyright (c) 2004 The FaerieMUD Consortium.
34
+ #
35
+ # BlueCloth is free software; you can redistribute it and/or modify it under the
36
+ # terms of the GNU General Public License as published by the Free Software
37
+ # Foundation; either version 2 of the License, or (at your option) any later
38
+ # version.
39
+ #
40
+ # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
41
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
42
+ # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
43
+ #
44
+ # == To-do
45
+ #
46
+ # * Refactor some of the larger uglier methods that have to do their own
47
+ # brute-force scanning because of lack of Perl features in Ruby's Regexp
48
+ # class. Alternately, could add a dependency on 'pcre' and use most Perl
49
+ # regexps.
50
+ #
51
+ # * Put the StringScanner in the render state for thread-safety.
52
+ #
53
+ # == Version
54
+ #
55
+ # $Id: bluecloth.rb,v 1.3 2004/05/02 15:56:33 webster132 Exp $
56
+ #
57
+
58
+ require 'digest/md5'
59
+ require 'logger'
60
+ require 'strscan'
61
+
62
+
63
+ ### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
64
+ ### tool.
65
+ class BlueCloth < String
66
+
67
+ ### Exception class for formatting errors.
68
+ class FormatError < RuntimeError
69
+
70
+ ### Create a new FormatError with the given source +str+ and an optional
71
+ ### message about the +specific+ error.
72
+ def initialize( str, specific=nil )
73
+ if specific
74
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
75
+ else
76
+ msg = "Bad markdown format near %p" % str
77
+ end
78
+
79
+ super( msg )
80
+ end
81
+ end
82
+
83
+
84
+ # Release Version
85
+ Version = '0.0.3'
86
+
87
+ # SVN Revision
88
+ SvnRev = %q$Rev: 37 $
89
+
90
+ # SVN Id tag
91
+ SvnId = %q$Id: bluecloth.rb,v 1.3 2004/05/02 15:56:33 webster132 Exp $
92
+
93
+ # SVN URL
94
+ SvnUrl = %q$URL: svn+ssh://cvs.faeriemud.org/var/svn/BlueCloth/trunk/lib/bluecloth.rb $
95
+
96
+
97
+ # Rendering state struct. Keeps track of URLs, titles, and HTML blocks
98
+ # midway through a render. I prefer this to the globals of the Perl version
99
+ # because globals make me break out in hives. Or something.
100
+ RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
101
+
102
+ # Tab width for #detab! if none is specified
103
+ TabWidth = 4
104
+
105
+ # The tag-closing string -- set to '>' for HTML
106
+ EmptyElementSuffix = "/>";
107
+
108
+ # Table of MD5 sums for escaped characters
109
+ EscapeTable = {}
110
+ '\\`*_{}[]()#.!'.split(//).each {|char|
111
+ hash = Digest::MD5::hexdigest( char )
112
+
113
+ EscapeTable[ char ] = {
114
+ :md5 => hash,
115
+ :md5re => Regexp::new( hash ),
116
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
117
+ }
118
+ }
119
+
120
+
121
+ #################################################################
122
+ ### I N S T A N C E M E T H O D S
123
+ #################################################################
124
+
125
+ ### Create a new BlueCloth string.
126
+ def initialize( content="", *restrictions )
127
+ @log = Logger::new( $deferr )
128
+ @log.level = $DEBUG ?
129
+ Logger::DEBUG :
130
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
131
+ @scanner = nil
132
+
133
+ # Add any restrictions, and set the line-folding attribute to reflect
134
+ # what happens by default.
135
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
136
+ @fold_lines = true
137
+
138
+ super( content )
139
+
140
+ @log.debug "String is: %p" % self
141
+ end
142
+
143
+
144
+ ######
145
+ public
146
+ ######
147
+
148
+ # Filters for controlling what gets output for untrusted input. (But really,
149
+ # you're filtering bad stuff out of untrusted input at submission-time via
150
+ # untainting, aren't you?)
151
+ attr_accessor :filter_html, :filter_styles
152
+
153
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
154
+ # so this isn't used by anything.
155
+ attr_accessor :fold_lines
156
+
157
+
158
+ ### Render Markdown-formatted text in this string object as HTML and return
159
+ ### it. The parameter is for compatibility with RedCloth, and is currently
160
+ ### unused, though that may change in the future.
161
+ def to_html( lite=false )
162
+
163
+ # Create a StringScanner we can reuse for various lexing tasks
164
+ @scanner = StringScanner::new( '' )
165
+
166
+ # Make a structure to carry around stuff that gets placeholdered out of
167
+ # the source.
168
+ rs = RenderState::new( {}, {}, {} )
169
+
170
+ # Make a copy of the string with normalized line endings, tabs turned to
171
+ # spaces, and a couple of guaranteed newlines at the end
172
+ text = self.gsub( /\r\n?/, "\n" ).detab
173
+ text += "\n\n"
174
+ @log.debug "Normalized line-endings: %p" % text
175
+
176
+ # Filter HTML if we're asked to do so
177
+ if self.filter_html
178
+ text.gsub!( "<", "&lt;" )
179
+ text.gsub!( ">", "&gt;" )
180
+ @log.debug "Filtered HTML: %p" % text
181
+ end
182
+
183
+ # Simplify blank lines
184
+ text.gsub!( /^ +$/, '' )
185
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
186
+
187
+ # Replace HTML blocks with placeholders
188
+ text = hide_html_blocks( text, rs )
189
+ @log.debug "Hid HTML blocks: %p" % text
190
+ @log.debug "Render state: %p" % rs
191
+
192
+ # Strip link definitions, store in render state
193
+ text = strip_link_definitions( text, rs )
194
+ @log.debug "Stripped link definitions: %p" % text
195
+ @log.debug "Render state: %p" % rs
196
+
197
+ # Escape meta-characters
198
+ text = escape_special_chars( text )
199
+ @log.debug "Escaped special characters: %p" % text
200
+
201
+ # Transform block-level constructs
202
+ text = apply_block_transforms( text, rs )
203
+ @log.debug "After block-level transforms: %p" % text
204
+
205
+ # Now swap back in all the escaped characters
206
+ text = unescape_special_chars( text )
207
+ @log.debug "After unescaping special characters: %p" % text
208
+
209
+ return text
210
+ end
211
+
212
+
213
+ ### Convert tabs in +str+ to spaces.
214
+ def detab( tabwidth=TabWidth )
215
+ copy = self.dup
216
+ copy.detab!( tabwidth )
217
+ return copy
218
+ end
219
+
220
+
221
+ ### Convert tabs to spaces in place and return self if any were converted.
222
+ def detab!( tabwidth=TabWidth )
223
+ newstr = self.split( /\n/ ).collect {|line|
224
+ line.gsub( /(.*?)\t/ ) do
225
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
226
+ end
227
+ }.join("\n")
228
+ self.replace( newstr )
229
+ end
230
+
231
+
232
+ #######
233
+ #private
234
+ #######
235
+
236
+ ### Do block-level transforms on a copy of +str+ using the specified render
237
+ ### state +rs+ and return the results.
238
+ def apply_block_transforms( str, rs )
239
+ # Port: This was called '_runBlockGamut' in the original
240
+
241
+ @log.debug "Applying block transforms to:\n %p" % str
242
+ text = transform_headers( str, rs )
243
+ text = transform_hrules( text, rs )
244
+ text = transform_lists( text, rs )
245
+ text = transform_code_blocks( text, rs )
246
+ text = transform_block_quotes( text, rs )
247
+ text = transform_auto_links( text, rs )
248
+ text = hide_html_blocks( text, rs )
249
+
250
+ text = form_paragraphs( text, rs )
251
+
252
+ @log.debug "Done with block transforms:\n %p" % text
253
+ return text
254
+ end
255
+
256
+
257
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
258
+ ### given render state +rs+ and return it.
259
+ def apply_span_transforms( str, rs )
260
+ @log.debug "Applying span transforms to:\n %p" % str
261
+
262
+ str = transform_code_spans( str, rs )
263
+ str = encode_html( str )
264
+ str = transform_images( str, rs )
265
+ str = transform_anchors( str, rs )
266
+ str = transform_italic_and_bold( str, rs )
267
+
268
+ # Hard breaks
269
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
270
+
271
+ @log.debug "Done with span transforms:\n %p" % str
272
+ return str
273
+ end
274
+
275
+
276
+ # The list of tags which are considered block-level constructs and an
277
+ # alternation pattern suitable for use in regexps made from the list
278
+ BlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script ]
279
+ BlockTagPattern = BlockTags.join('|')
280
+
281
+ # Nested blocks:
282
+ # <div>
283
+ # <div>
284
+ # tags for inner block must be indented.
285
+ # </div>
286
+ # </div>
287
+ StrictBlockRegex = %r{
288
+ ^ # Start of line
289
+ <(#{BlockTagPattern}) # Start tag: \2
290
+ \b # word break
291
+ (.*\n)*? # Any number of lines, minimal match
292
+ </\1> # Matching end tag
293
+ [ ]* # trailing spaces
294
+ (?=\n+|\Z) # End of line or document
295
+ }ix
296
+
297
+ # More-liberal block-matching
298
+ LooseBlockRegex = %r{
299
+ ^ # Start of line
300
+ <(#{BlockTagPattern}) # start tag: \2
301
+ \b # word break
302
+ (.*\n)*? # Any number of lines, minimal match
303
+ .*</\1> # Anything + Matching end tag
304
+ [ ]* # trailing spaces
305
+ (?=\n+|\Z) # End of line or document
306
+ }ix
307
+
308
+ # Special case for <hr />.
309
+ HruleBlockRegex = %r{
310
+ ( # $1
311
+ \A\n? # Start of doc + optional \n
312
+ | # or
313
+ .*\n\n # anything + blank line
314
+ )
315
+ ( # save in $2
316
+ [ ]* # Any spaces
317
+ <hr # Tag open
318
+ \b # Word break
319
+ ([^<>])*? # Attributes
320
+ /?> # Tag close
321
+ (?=\n\n|\Z) # followed by a blank line or end of document
322
+ )
323
+ }ix
324
+
325
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
326
+ ### tokens.
327
+ def hide_html_blocks( str, rs )
328
+ @log.debug "Hiding HTML blocks in %p" % str
329
+
330
+ # Tokenizer proc to pass to gsub
331
+ tokenize = lambda {|match|
332
+ key = Digest::MD5::hexdigest( match )
333
+ rs.html_blocks[ key ] = match
334
+ @log.debug "Replacing %p with %p" %
335
+ [ match, key ]
336
+ "\n\n#{key}\n\n"
337
+ }
338
+
339
+ rval = str.dup
340
+
341
+ @log.debug "Finding blocks with the strict regex..."
342
+ rval.gsub!( StrictBlockRegex, &tokenize )
343
+
344
+ @log.debug "Finding blocks with the loose regex..."
345
+ rval.gsub!( LooseBlockRegex, &tokenize )
346
+
347
+ @log.debug "Finding hrules..."
348
+ rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
349
+
350
+ return rval
351
+ end
352
+
353
+
354
+ # Link defs are in the form: ^[id]: url "optional title"
355
+ LinkRegex = %r{
356
+ ^[ ]*\[(.+)\]: # id = $1
357
+ [ ]*
358
+ \n? # maybe *one* newline
359
+ [ ]*
360
+ (\S+) # url = $2
361
+ [ ]*
362
+ \n? # maybe one newline
363
+ [ ]*
364
+ (?:
365
+ # Titles are delimited by "quotes" or (parens).
366
+ ["(]
367
+ (.+?) # title = $3
368
+ [")] # Matching ) or "
369
+ [ ]*
370
+ )? # title is optional
371
+ (?:\n+|\Z)
372
+ }x
373
+
374
+ ### Strip link definitions from +str+, storing them in the given RenderState
375
+ ### +rs+.
376
+ def strip_link_definitions( str, rs )
377
+ str.gsub( LinkRegex ) {|match|
378
+ id, url, title = $1, $2, $3
379
+
380
+ rs.urls[ id.downcase ] = encode_html( url )
381
+ unless title.nil?
382
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
383
+ end
384
+ ""
385
+ }
386
+ end
387
+
388
+
389
+ ### Escape special characters in the given +str+
390
+ def escape_special_chars( str )
391
+ @log.debug " Escaping special characters"
392
+ text = ''
393
+
394
+ tokenize_html( str ) {|token, str|
395
+ @log.debug " Adding %p token %p" % [ token, str ]
396
+ case token
397
+
398
+ # Within tags, encode * and _
399
+ when :tag
400
+ text += str.
401
+ gsub( /\*/, EscapeTable['*'][:md5] ).
402
+ gsub( /_/, EscapeTable['_'][:md5] )
403
+
404
+ # Encode backslashed stuff in regular text
405
+ when :text
406
+ text += encode_backslash_escapes( str )
407
+ else
408
+ raise TypeError, "Unknown token type %p" % token
409
+ end
410
+ }
411
+
412
+ @log.debug " Text with escapes is now: %p" % text
413
+ return text
414
+ end
415
+
416
+
417
+ ### Swap escaped special characters in a copy of the given +str+ and return
418
+ ### it.
419
+ def unescape_special_chars( str )
420
+ EscapeTable.each {|char, hash|
421
+ @log.debug "Unescaping escaped %p with %p" %
422
+ [ char, hash[:md5re] ]
423
+ str.gsub!( hash[:md5re], char )
424
+ }
425
+
426
+ return str
427
+ end
428
+
429
+
430
+ ### Return a copy of the given +str+ with any backslashed special character
431
+ ### in it replaced with MD5 placeholders.
432
+ def encode_backslash_escapes( str )
433
+ # Make a copy with any double-escaped backslashes encoded
434
+ text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
435
+
436
+ EscapeTable.each_pair {|char, esc|
437
+ next if char == '\\'
438
+ text.gsub!( esc[:re], esc[:md5] )
439
+ }
440
+
441
+ return text
442
+ end
443
+
444
+
445
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
446
+ ### +str+ and return it.
447
+ def transform_hrules( str, rs )
448
+ @log.debug " Transforming horizontal rules"
449
+ str.gsub( /^( ?[\-\*] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
450
+ end
451
+
452
+
453
+
454
+ # Pattern to transform lists
455
+ ListRegexp = %r{
456
+ (?:
457
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
458
+ (\*|\d+\.) # unordered or ordered ($1)
459
+ [ ]+ # At least one space
460
+ )
461
+ (?m:.+?) # item content (include newlines)
462
+ (?:
463
+ \z # Either EOF
464
+ | # or
465
+ \n{2,} # Blank line...
466
+ (?=\S) # ...followed by non-space
467
+ (?![ ]* (\*|\d+\.) [ ]+) # ...but not another item
468
+ )
469
+ }x
470
+
471
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
472
+ ### return it.
473
+ def transform_lists( str, rs )
474
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
475
+
476
+ str.gsub( ListRegexp ) {|list|
477
+ @log.debug " Found list %p" % list
478
+ list_type = ($1 == '*' ? "ul" : "ol")
479
+ list.gsub!( /\n{2,}/, "\n\n\n" )
480
+
481
+ %{<%s>\n%s</%s>\n} % [
482
+ list_type,
483
+ transform_list_items( list, rs ),
484
+ list_type,
485
+ ]
486
+ }
487
+ end
488
+
489
+
490
+ # Pattern for transforming list items
491
+ ListItemRegexp = %r{
492
+ (\n)? # leading line = $1
493
+ (^[ ]*) # leading whitespace = $2
494
+ (\*|\d+\.) [ ]+ # list marker = $3
495
+ ((?m:.+?) # list item text = $4
496
+ (\n{1,2}))
497
+ (?= \n* (\z | \2 (\*|\d+\.) [ ]+))
498
+ }x
499
+
500
+ ### Transform list items in a copy of the given +str+ and return it.
501
+ def transform_list_items( str, rs )
502
+ @log.debug " Transforming list items"
503
+
504
+ # Trim trailing blank lines
505
+ str = str.sub( /\n{2,}\z/, "\n" )
506
+
507
+ str.gsub( ListItemRegexp ) {|line|
508
+ @log.debug " Found item line %p" % line
509
+ leading_line, item = $1, $4
510
+
511
+ if leading_line or /\n{2,}/.match( item )
512
+ @log.debug " Found leading line or item has a blank"
513
+ item = apply_block_transforms( outdent(item), rs )
514
+ else
515
+ # Recursion for sub-lists
516
+ @log.debug " Recursing for sublist"
517
+ item = transform_lists( outdent(item), rs ).chomp
518
+ item = apply_span_transforms( item, rs )
519
+ end
520
+
521
+ %{<li>%s</li>\n} % item
522
+ }
523
+ end
524
+
525
+
526
+ # Pattern for matching codeblocks
527
+ CodeBlockRegexp = %r{
528
+ (.?) # $1 = preceding character
529
+ :\n+ # colon + NL delimiter
530
+ ( # $2 = the code block
531
+ (?:
532
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
533
+ .*\n+
534
+ )+
535
+ )
536
+ ((?=^[ ]{0,#{TabWidth}}\S)|\Z) # Lookahead for non-space at
537
+ # line-start, or end of doc
538
+ }x
539
+
540
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
541
+ ### return it.
542
+ def transform_code_blocks( str, rs )
543
+ @log.debug " Transforming code blocks"
544
+
545
+ str.gsub( CodeBlockRegexp ) {|block|
546
+ prevchar, codeblock = $1, $2
547
+
548
+ @log.debug " prevchar = %p" % prevchar
549
+
550
+ # Generated the codeblock
551
+ %{%s\n\n<pre><code>%s\n</code></pre>\n\n} % [
552
+ (prevchar.empty? || /\s/ =~ prevchar) ? "" : "#{prevchar}:",
553
+ encode_code( outdent(codeblock), rs ).rstrip,
554
+ ]
555
+ }
556
+ end
557
+
558
+
559
+ # Pattern for matching Markdown blockquote blocks
560
+ BlockQuoteRegexp = %r{
561
+ (?:
562
+ ^[ ]*>[ ]? # '>' at the start of a line
563
+ .+\n # rest of the first line
564
+ (?:.+\n)* # subsequent consecutive lines
565
+ \n* # blanks
566
+ )+
567
+ }x
568
+
569
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
570
+ ### and return it.
571
+ def transform_block_quotes( str, rs )
572
+ @log.debug " Transforming block quotes"
573
+
574
+ str.gsub( BlockQuoteRegexp ) {|quote|
575
+ @log.debug "Making blockquote from %p" % quote
576
+ quote.gsub!( /^[ ]*>[ ]?/, '' )
577
+ %{<blockquote>\n%s\n</blockquote>\n\n} %
578
+ apply_block_transforms( quote, rs ).
579
+ gsub( /^/, " " * TabWidth )
580
+ }
581
+ end
582
+
583
+
584
+ AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
585
+ AutoAnchorEmailRegexp = %r{
586
+ <
587
+ (
588
+ [-.\w]+
589
+ \@
590
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
591
+ )
592
+ >
593
+ }x
594
+
595
+ ### Transform URLs in a copy of the specified +str+ into links and return
596
+ ### it.
597
+ def transform_auto_links( str, rs )
598
+ @log.debug " Transforming auto-links"
599
+ str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
600
+ gsub( AutoAnchorEmailRegexp ) {|addr|
601
+ encode_email_address( unescape_special_chars($1) )
602
+ }
603
+ end
604
+
605
+
606
+ # Encoder functions to turn characters of an email address into encoded
607
+ # entities.
608
+ Encoders = [
609
+ lambda {|char| "&#%03d;" % char},
610
+ lambda {|char| "&#x%X;" % char},
611
+ lambda {|char| char.chr },
612
+ ]
613
+
614
+ ### Transform a copy of the given email +addr+ into an escaped version safer
615
+ ### for posting publicly.
616
+ def encode_email_address( addr )
617
+
618
+ rval = ''
619
+ ("mailto:" + addr).each_byte {|b|
620
+ case b
621
+ when ?:
622
+ rval += ":"
623
+ when ?@
624
+ rval += Encoders[ rand(2) ][ b ]
625
+ else
626
+ r = rand(100)
627
+ rval += (
628
+ r > 90 ? Encoders[2][ b ] :
629
+ r < 45 ? Encoders[1][ b ] :
630
+ Encoders[0][ b ]
631
+ )
632
+ end
633
+ }
634
+
635
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
636
+ end
637
+
638
+
639
+ # Regex for matching Setext-style headers
640
+ SetextHeaderRegexp = %r{
641
+ (.+) # The title text ($1)
642
+ \n
643
+ ([\-=])+ # Match a line of = or -. Save only one in $2.
644
+ [ ]*\n+
645
+ }x
646
+
647
+ # Regexp for matching ATX-style headers
648
+ AtxHeaderRegexp = %r{
649
+ ^(\#{1,6}) # $1 = string of #'s
650
+ [ ]*
651
+ (.+?) # $2 = Header text
652
+ [ ]*
653
+ \#* # optional closing #'s (not counted)
654
+ \n+
655
+ }x
656
+
657
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
658
+ ### state +rs+ and return the result.
659
+ def transform_headers( str, rs )
660
+ @log.debug " Transforming headers"
661
+
662
+ # Setext-style headers:
663
+ # Header 1
664
+ # ========
665
+ #
666
+ # Header 2
667
+ # --------
668
+ #
669
+ str.
670
+ gsub( SetextHeaderRegexp ) {|m|
671
+ @log.debug "Found setext-style header"
672
+ title, hdrchar = $1, $2
673
+ title = apply_span_transforms( title, rs )
674
+
675
+ case hdrchar
676
+ when '='
677
+ %[<h1>#{title}</h1>\n\n]
678
+ when '-'
679
+ %[<h2>#{title}</h2>\n\n]
680
+ else
681
+ title
682
+ end
683
+ }.
684
+
685
+ gsub( AtxHeaderRegexp ) {|m|
686
+ @log.debug "Found ATX-style header"
687
+ hdrchars, title = $1, $2
688
+ title = apply_span_transforms( title, rs )
689
+
690
+ level = hdrchars.length
691
+ %{<h%d>%s</h%d>\n\n} % [ level, title, level ]
692
+ }
693
+ end
694
+
695
+
696
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
697
+ ### tags and return it.
698
+ def form_paragraphs( str, rs )
699
+ @log.debug " Forming paragraphs"
700
+ grafs = str.
701
+ sub( /\A\n+/, '' ).
702
+ sub( /\n+\z/, '' ).
703
+ split( /\n{2,}/ )
704
+
705
+ rval = grafs.collect {|graf|
706
+
707
+ # Unhashify HTML blocks if this is a placeholder
708
+ if rs.html_blocks.key?( graf )
709
+ rs.html_blocks[ graf ]
710
+
711
+ # Otherwise, wrap in <p> tags
712
+ else
713
+ apply_span_transforms(graf, rs).
714
+ sub( /^[ ]*/, '<p>' ) + '</p>'
715
+ end
716
+ }.join( "\n\n" )
717
+
718
+ @log.debug " Formed paragraphs: %p" % rval
719
+ return rval
720
+ end
721
+
722
+
723
+ # Pattern to match the linkid part of an anchor tag for reference-style
724
+ # links.
725
+ RefLinkIdRegex = %r{
726
+ [ ]? # Optional leading space
727
+ (?:\n[ ]*)? # Optional newline + spaces
728
+ \[
729
+ (.*?) # Id = $1
730
+ \]
731
+ }x
732
+
733
+ InlineLinkRegex = %r{
734
+ \( # Literal paren
735
+ [ ]* # Zero or more spaces
736
+ (.*?) # URI = $1
737
+ [ ]* # Zero or more spaces
738
+ (?: #
739
+ ([\"\']) # Opening quote char = $2
740
+ (.*?) # Title = $3
741
+ \2 # Matching quote char
742
+ )? # Title is optional
743
+ \)
744
+ }x
745
+
746
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
747
+ ### the given render state +rs+ and return it.
748
+ def transform_anchors( str, rs )
749
+ @log.debug " Transforming anchors"
750
+ @scanner.string = str.dup
751
+ text = ''
752
+
753
+ # Scan the whole string
754
+ until @scanner.empty?
755
+
756
+ if @scanner.scan( /\[/ )
757
+ link = ''; linkid = ''
758
+ depth = 1
759
+ startpos = @scanner.pos
760
+ @log.debug " Found a bracket-open at %d" % startpos
761
+
762
+ # Scan the rest of the tag, allowing unlimited nested []s. If
763
+ # the scanner runs out of text before the opening bracket is
764
+ # closed, append the text and return (wasn't a valid anchor).
765
+ while depth.nonzero?
766
+ linktext = @scanner.scan_until( /\]|\[/ )
767
+
768
+ if linktext
769
+ @log.debug " Found a bracket at depth %d: %p" %
770
+ [ depth, linktext ]
771
+ link += linktext
772
+
773
+ # Decrement depth for each closing bracket
774
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
775
+ @log.debug " Depth is now #{depth}"
776
+
777
+ # If there's no more brackets, it must not be an anchor, so
778
+ # just abort.
779
+ else
780
+ @log.debug " Missing closing brace, assuming non-link."
781
+ link += @scanner.rest
782
+ @scanner.terminate
783
+ return text + '[' + link
784
+ end
785
+ end
786
+ link.slice!( -1 ) # Trim final ']'
787
+ @log.debug " Found leading link %p" % link
788
+
789
+ # Look for a reference-style second part
790
+ if @scanner.scan( RefLinkIdRegex )
791
+ linkid = @scanner[1]
792
+ linkid = link.dup if linkid.empty?
793
+ linkid.downcase!
794
+ @log.debug " Found a linkid: %p" % linkid
795
+
796
+ # If there's a matching link in the link table, build an
797
+ # anchor tag for it.
798
+ if rs.urls.key?( linkid )
799
+ @log.debug " Found link key in the link table: %p" %
800
+ rs.urls[linkid]
801
+ url = escape_md( rs.urls[linkid] )
802
+
803
+ text += %{<a href="#{url}"}
804
+ if rs.titles.key?(linkid)
805
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
806
+ end
807
+ text += %{>#{link}</a>}
808
+
809
+ # If the link referred to doesn't exist, just append the raw
810
+ # source to the result
811
+ else
812
+ @log.debug " Linkid %p not found in link table" % linkid
813
+ @log.debug " Appending original string instead: %p" %
814
+ @scanner.string[ startpos-1 .. @scanner.pos ]
815
+ text += @scanner.string[ startpos-1 .. @scanner.pos ]
816
+ end
817
+
818
+ # ...or for an inline style second part
819
+ elsif @scanner.scan( InlineLinkRegex )
820
+ url = @scanner[1]
821
+ title = @scanner[3]
822
+ @log.debug " Found an inline link to %p" % url
823
+
824
+ text += %{<a href="%s"} % escape_md( url )
825
+ if title
826
+ text += %{ title="%s"} % escape_md( title )
827
+ end
828
+ text += %{>#{link}</a>}
829
+
830
+ # No linkid part: just append the first part as-is.
831
+ else
832
+ @log.debug "No linkid, so no anchor. Appending literal text."
833
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
834
+ end # if linkid
835
+
836
+ # Plain text
837
+ else
838
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
839
+ text += @scanner.scan( /[^\[]+/ )
840
+ end
841
+
842
+ end # until @scanner.empty?
843
+
844
+ return text
845
+ end
846
+
847
+ # Pattern to match strong emphasis in Markdown text
848
+ BoldRegexp = %r{ (\*\*|__) (?=\S) (.+?\S) \1 }x
849
+
850
+ # Pattern to match normal emphasis in Markdown text
851
+ ItalicRegexp = %r{ (\*|_) (?=\S) (.+?\S) \1 }x
852
+
853
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
854
+ ### and return it.
855
+ def transform_italic_and_bold( str, rs )
856
+ @log.debug " Transforming italic and bold"
857
+
858
+ str.
859
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
860
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
861
+ end
862
+
863
+
864
+ ### Transform backticked spans into <code> spans.
865
+ def transform_code_spans( str, rs )
866
+ @log.debug " Transforming code spans"
867
+
868
+ # Set up the string scanner and just return the string unless there's at
869
+ # least one backtick.
870
+ @scanner.string = str.dup
871
+ unless @scanner.exist?( /`/ )
872
+ @scanner.terminate
873
+ @log.debug "No backticks found for code span in %p" % str
874
+ return str
875
+ end
876
+
877
+ @log.debug "Transforming code spans in %p" % str
878
+
879
+ # Build the transformed text anew
880
+ text = ''
881
+
882
+ # Scan to the end of the string
883
+ until @scanner.empty?
884
+
885
+ # Scan up to an opening backtick
886
+ if pre = @scanner.scan_until( /.?(?=`)/m )
887
+ text += pre
888
+ @log.debug "Found backtick at %d after '...%s'" %
889
+ [ @scanner.pos, text[-10, 10] ]
890
+
891
+ # Make a pattern to find the end of the span
892
+ opener = @scanner.scan( /`+/ )
893
+ len = opener.length
894
+ closer = Regexp::new( opener )
895
+ @log.debug "Scanning for end of code span with %p" % closer
896
+
897
+ # Scan until the end of the closing backtick sequence. Chop the
898
+ # backticks off the resultant string, strip leading and trailing
899
+ # whitespace, and encode any enitites contained in it.
900
+ codespan = @scanner.scan_until( closer ) or
901
+ raise FormatError::new( @scanner.rest[0,20],
902
+ "No %p found before end" % opener )
903
+
904
+ @log.debug "Found close of code span at %d: %p" %
905
+ [ @scanner.pos - len, codespan ]
906
+ codespan.slice!( -len, len )
907
+ text += "<code>%s</code>" %
908
+ encode_code( codespan.strip, rs )
909
+
910
+ # If there's no more backticks, just append the rest of the string
911
+ # and move the scan pointer to the end
912
+ else
913
+ text += @scanner.rest
914
+ @scanner.terminate
915
+ end
916
+ end
917
+
918
+ return text
919
+ end
920
+
921
+
922
+ # Next, handle inline images: ![alt text](url "optional title")
923
+ # Don't forget: encode * and _
924
+ InlineImageRegexp = %r{
925
+ ( # Whole match = $1
926
+ !\[ (.*?) \] # alt text = $2
927
+ \([ ]* (\S+) [ ]* # source url = $3
928
+ ( # title = $4
929
+ (["']) # quote char = $5
930
+ .*?
931
+ \5 # matching quote
932
+ [ ]*
933
+ )? # title is optional
934
+ \)
935
+ )
936
+ }xs #"
937
+
938
+
939
+ # Reference-style images
940
+ ReferenceImageRegexp = %r{
941
+ ( # Whole match = $1
942
+ !\[ (.*?) \] # Alt text = $2
943
+ [ ]? # Optional space
944
+ (?:\n[ ]*)? # One optional newline + spaces
945
+ \[ (.*?) \] # id = $3
946
+ )
947
+ }xs
948
+
949
+ ### Turn image markup into image tags.
950
+ def transform_images( str, rs )
951
+ @log.debug " Transforming images" % str
952
+
953
+ # Handle reference-style labeled images: ![alt text][id]
954
+ str.
955
+ gsub( ReferenceImageRegexp ) {|match|
956
+ whole, alt, linkid = $1, $2, $3.downcase
957
+ @log.debug "Matched %p" % match
958
+ res = nil
959
+
960
+ # for shortcut links like ![this][].
961
+ linkid = alt.downcase if linkid.empty?
962
+
963
+ if rs.urls.key?( linkid )
964
+ url = escape_md( rs.urls[linkid] )
965
+ @log.debug "Found url '%s' for linkid '%s' " %
966
+ [ url, linkid ]
967
+
968
+ # Build the tag
969
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
970
+ if rs.titles.key?( linkid )
971
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
972
+ end
973
+ result += EmptyElementSuffix
974
+
975
+ else
976
+ result = whole
977
+ end
978
+
979
+ @log.debug "Replacing %p with %p" %
980
+ [ match, result ]
981
+ result
982
+ }.
983
+
984
+ # Inline image style
985
+ gsub( InlineImageRegexp ) {|match|
986
+ @log.debug "Found inline image %p" % match
987
+ whole, alt, title = $1, $2, $4
988
+ url = escape_md( $3 )
989
+
990
+ # Build the tag
991
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
992
+ unless title.nil?
993
+ result += %{ title="%s"} % escape_md( title.gsub(/^"|"$/, '') )
994
+ end
995
+ result += EmptyElementSuffix
996
+
997
+ @log.debug "Replacing %p with %p" %
998
+ [ match, result ]
999
+ result
1000
+ }
1001
+ end
1002
+
1003
+
1004
+ # Regexp to match special characters in a code block
1005
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] )}x
1006
+
1007
+ ### Escape any characters special to HTML and encode any characters special
1008
+ ### to Markdown in a copy of the given +str+ and return it.
1009
+ def encode_code( str, rs )
1010
+ str.gsub( %r{&}, '&amp;' ).
1011
+ gsub( %r{<}, '&lt;' ).
1012
+ gsub( %r{>}, '&gt;' ).
1013
+ gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
1014
+ end
1015
+
1016
+
1017
+
1018
+ #################################################################
1019
+ ### U T I L I T Y F U N C T I O N S
1020
+ #################################################################
1021
+
1022
+ ### Escape any markdown characters in a copy of the given +str+ and return
1023
+ ### it.
1024
+ def escape_md( str )
1025
+ str.
1026
+ gsub( /\*/, '&#42;' ).
1027
+ gsub( /_/, '&#95;' )
1028
+ end
1029
+
1030
+
1031
+ # Matching constructs for tokenizing X/HTML
1032
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
1033
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
1034
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
1035
+
1036
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }mx
1037
+ HTMLTagCloseRegexp = %r{ > }x
1038
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
1039
+
1040
+ ### Break the HTML source in +str+ into a series of tokens and return
1041
+ ### them. The tokens are just 2-element Array tuples with a type and the
1042
+ ### actual content. If this function is called with a block, the type and
1043
+ ### text parts of each token will be yielded to it one at a time as they are
1044
+ ### extracted.
1045
+ def tokenize_html( str )
1046
+ depth = 0
1047
+ tokens = []
1048
+ @scanner.string = str.dup
1049
+ type, token = nil, nil
1050
+
1051
+ until @scanner.empty?
1052
+ @log.debug "Scanning from %p" % @scanner.rest
1053
+
1054
+ # Match comments and PIs without nesting
1055
+ if (( token = @scanner.scan(MetaTag) ))
1056
+ type = :tag
1057
+
1058
+ # Do nested matching for HTML tags
1059
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
1060
+ tagstart = @scanner.pos
1061
+ @log.debug " Found the start of a plain tag at %d" % tagstart
1062
+
1063
+ # Start the token with the opening angle
1064
+ depth = 1
1065
+ type = :tag
1066
+
1067
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
1068
+ # the scanner runs out of text before the tag is closed, raise
1069
+ # an error.
1070
+ while depth.nonzero?
1071
+
1072
+ # Scan either an opener or a closer
1073
+ chunk = @scanner.scan( HTMLTagPart ) or
1074
+ raise "Malformed tag at character %d: %p" %
1075
+ [ tagstart, token + @scanner.rest ]
1076
+
1077
+ @log.debug " Found another part of the tag at depth %d: %p" %
1078
+ [ depth, chunk ]
1079
+
1080
+ token += chunk
1081
+
1082
+ # If the last character of the token so far is a closing
1083
+ # angle bracket, decrement the depth. Otherwise increment
1084
+ # it for a nested tag.
1085
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
1086
+ @log.debug " Depth is now #{depth}"
1087
+ end
1088
+
1089
+ # Match text segments
1090
+ else
1091
+ @log.debug " Looking for a chunk of text"
1092
+ type = :text
1093
+
1094
+ # Scan forward, always matching at least one character to move
1095
+ # the pointer beyond any non-tag '<'.
1096
+ token = @scanner.scan_until( /[^<]+/m )
1097
+ end
1098
+
1099
+ @log.debug " type: %p, token: %p" % [ type, token ]
1100
+
1101
+ # If a block is given, feed it one token at a time. Add the token to
1102
+ # the token list to be returned regardless.
1103
+ if block_given?
1104
+ yield( type, token )
1105
+ end
1106
+ tokens << [ type, token ]
1107
+ end
1108
+
1109
+ return tokens
1110
+ end
1111
+
1112
+
1113
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
1114
+ def encode_html( str )
1115
+ str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&amp;" ).
1116
+ gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
1117
+ end
1118
+
1119
+
1120
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
1121
+ ### return it.
1122
+ def outdent( str )
1123
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
1124
+ end
1125
+
1126
+ end # class BlueCloth
1127
+