Pimki 1.0.092

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/README +158 -0
  2. data/README-PIMKI +87 -0
  3. data/app/controllers/wiki.rb +563 -0
  4. data/app/models/author.rb +4 -0
  5. data/app/models/chunks/category.rb +31 -0
  6. data/app/models/chunks/category_test.rb +21 -0
  7. data/app/models/chunks/chunk.rb +20 -0
  8. data/app/models/chunks/engines.rb +34 -0
  9. data/app/models/chunks/include.rb +29 -0
  10. data/app/models/chunks/literal.rb +19 -0
  11. data/app/models/chunks/match.rb +19 -0
  12. data/app/models/chunks/nowiki.rb +31 -0
  13. data/app/models/chunks/nowiki_test.rb +14 -0
  14. data/app/models/chunks/test.rb +18 -0
  15. data/app/models/chunks/todo.rb +22 -0
  16. data/app/models/chunks/uri.rb +97 -0
  17. data/app/models/chunks/uri_test.rb +92 -0
  18. data/app/models/chunks/wiki.rb +82 -0
  19. data/app/models/chunks/wiki_test.rb +36 -0
  20. data/app/models/page.rb +91 -0
  21. data/app/models/page_lock.rb +24 -0
  22. data/app/models/page_set.rb +73 -0
  23. data/app/models/page_test.rb +76 -0
  24. data/app/models/revision.rb +91 -0
  25. data/app/models/revision_test.rb +252 -0
  26. data/app/models/web.rb +277 -0
  27. data/app/models/web_test.rb +53 -0
  28. data/app/models/wiki_content.rb +113 -0
  29. data/app/models/wiki_service.rb +137 -0
  30. data/app/models/wiki_service_test.rb +15 -0
  31. data/app/models/wiki_words.rb +26 -0
  32. data/app/models/wiki_words_test.rb +12 -0
  33. data/app/views/bottom.rhtml +4 -0
  34. data/app/views/markdown_help.rhtml +16 -0
  35. data/app/views/menu.rhtml +20 -0
  36. data/app/views/navigation.rhtml +26 -0
  37. data/app/views/rdoc_help.rhtml +16 -0
  38. data/app/views/static_style_sheet.rhtml +231 -0
  39. data/app/views/style.rhtml +179 -0
  40. data/app/views/textile_help.rhtml +28 -0
  41. data/app/views/top.rhtml +52 -0
  42. data/app/views/wiki/authors.rhtml +15 -0
  43. data/app/views/wiki/bliki.rhtml +101 -0
  44. data/app/views/wiki/bliki_edit.rhtml +33 -0
  45. data/app/views/wiki/bliki_new.rhtml +61 -0
  46. data/app/views/wiki/bliki_revision.rhtml +51 -0
  47. data/app/views/wiki/edit.rhtml +34 -0
  48. data/app/views/wiki/edit_menu.rhtml +27 -0
  49. data/app/views/wiki/edit_web.rhtml +139 -0
  50. data/app/views/wiki/export.rhtml +14 -0
  51. data/app/views/wiki/feeds.rhtml +10 -0
  52. data/app/views/wiki/list.rhtml +164 -0
  53. data/app/views/wiki/locked.rhtml +14 -0
  54. data/app/views/wiki/login.rhtml +11 -0
  55. data/app/views/wiki/mind.rhtml +39 -0
  56. data/app/views/wiki/new.rhtml +27 -0
  57. data/app/views/wiki/new_system.rhtml +78 -0
  58. data/app/views/wiki/new_web.rhtml +64 -0
  59. data/app/views/wiki/page.rhtml +84 -0
  60. data/app/views/wiki/print.rhtml +16 -0
  61. data/app/views/wiki/published.rhtml +10 -0
  62. data/app/views/wiki/recently_revised.rhtml +31 -0
  63. data/app/views/wiki/revision.rhtml +87 -0
  64. data/app/views/wiki/rss_feed.rhtml +22 -0
  65. data/app/views/wiki/search.rhtml +26 -0
  66. data/app/views/wiki/tex.rhtml +23 -0
  67. data/app/views/wiki/tex_web.rhtml +35 -0
  68. data/app/views/wiki/todo.rhtml +39 -0
  69. data/app/views/wiki/web_list.rhtml +13 -0
  70. data/app/views/wiki_words_help.rhtml +8 -0
  71. data/libraries/action_controller_servlet.rb +177 -0
  72. data/libraries/bluecloth.rb +1127 -0
  73. data/libraries/diff/diff.rb +475 -0
  74. data/libraries/diff/diff_test.rb +80 -0
  75. data/libraries/erb.rb +490 -0
  76. data/libraries/madeleine/automatic.rb +357 -0
  77. data/libraries/madeleine/clock.rb +94 -0
  78. data/libraries/madeleine_service.rb +69 -0
  79. data/libraries/rdocsupport.rb +156 -0
  80. data/libraries/redcloth_for_tex.rb +869 -0
  81. data/libraries/redcloth_for_tex_test.rb +41 -0
  82. data/libraries/view_helper.rb +33 -0
  83. data/libraries/web_controller_server.rb +95 -0
  84. data/pimki.rb +97 -0
  85. metadata +169 -0
@@ -0,0 +1,1127 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
4
+ # tool.
5
+ #
6
+ # == Synopsis
7
+ #
8
+ # doc = BlueCloth::new "
9
+ # ## Test document ##
10
+ #
11
+ # Just a simple test.
12
+ # "
13
+ #
14
+ # puts doc.to_html
15
+ #
16
+ # == Authors
17
+ #
18
+ # * Michael Granger <ged@FaerieMUD.org>
19
+ #
20
+ # == Contributors
21
+ #
22
+ # * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
23
+ # * Florian Gross <flgr@ccan.de> - Filter options, suggestions
24
+ #
25
+ # == Copyright
26
+ #
27
+ # Original version:
28
+ # Copyright (c) 2003-2004 John Gruber
29
+ # <http://daringfireball.net/>
30
+ # All rights reserved.
31
+ #
32
+ # Ruby port:
33
+ # Copyright (c) 2004 The FaerieMUD Consortium.
34
+ #
35
+ # BlueCloth is free software; you can redistribute it and/or modify it under the
36
+ # terms of the GNU General Public License as published by the Free Software
37
+ # Foundation; either version 2 of the License, or (at your option) any later
38
+ # version.
39
+ #
40
+ # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
41
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
42
+ # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
43
+ #
44
+ # == To-do
45
+ #
46
+ # * Refactor some of the larger uglier methods that have to do their own
47
+ # brute-force scanning because of lack of Perl features in Ruby's Regexp
48
+ # class. Alternately, could add a dependency on 'pcre' and use most Perl
49
+ # regexps.
50
+ #
51
+ # * Put the StringScanner in the render state for thread-safety.
52
+ #
53
+ # == Version
54
+ #
55
+ # $Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
56
+ #
57
+
58
+ require 'digest/md5'
59
+ require 'logger'
60
+ require 'strscan'
61
+
62
+
63
+ ### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
64
+ ### tool.
65
+ class BlueCloth < String
66
+
67
+ ### Exception class for formatting errors.
68
+ class FormatError < RuntimeError
69
+
70
+ ### Create a new FormatError with the given source +str+ and an optional
71
+ ### message about the +specific+ error.
72
+ def initialize( str, specific=nil )
73
+ if specific
74
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
75
+ else
76
+ msg = "Bad markdown format near %p" % str
77
+ end
78
+
79
+ super( msg )
80
+ end
81
+ end
82
+
83
+
84
+ # Release Version
85
+ Version = '0.0.3'
86
+
87
+ # SVN Revision
88
+ SvnRev = %q$Rev: 37 $
89
+
90
+ # SVN Id tag
91
+ SvnId = %q$Id: bluecloth.rb,v 1.1.1.1 2004/11/09 02:02:58 assaph Exp $
92
+
93
+ # SVN URL
94
+ SvnUrl = %q$URL: svn+ssh://cvs.faeriemud.org/var/svn/BlueCloth/trunk/lib/bluecloth.rb $
95
+
96
+
97
+ # Rendering state struct. Keeps track of URLs, titles, and HTML blocks
98
+ # midway through a render. I prefer this to the globals of the Perl version
99
+ # because globals make me break out in hives. Or something.
100
+ RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
101
+
102
+ # Tab width for #detab! if none is specified
103
+ TabWidth = 4
104
+
105
+ # The tag-closing string -- set to '>' for HTML
106
+ EmptyElementSuffix = "/>";
107
+
108
+ # Table of MD5 sums for escaped characters
109
+ EscapeTable = {}
110
+ '\\`*_{}[]()#.!'.split(//).each {|char|
111
+ hash = Digest::MD5::hexdigest( char )
112
+
113
+ EscapeTable[ char ] = {
114
+ :md5 => hash,
115
+ :md5re => Regexp::new( hash ),
116
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
117
+ }
118
+ }
119
+
120
+
121
+ #################################################################
122
+ ### I N S T A N C E M E T H O D S
123
+ #################################################################
124
+
125
+ ### Create a new BlueCloth string.
126
+ def initialize( content="", *restrictions )
127
+ @log = Logger::new( $deferr )
128
+ @log.level = $DEBUG ?
129
+ Logger::DEBUG :
130
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
131
+ @scanner = nil
132
+
133
+ # Add any restrictions, and set the line-folding attribute to reflect
134
+ # what happens by default.
135
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
136
+ @fold_lines = true
137
+
138
+ super( content )
139
+
140
+ @log.debug "String is: %p" % self
141
+ end
142
+
143
+
144
+ ######
145
+ public
146
+ ######
147
+
148
+ # Filters for controlling what gets output for untrusted input. (But really,
149
+ # you're filtering bad stuff out of untrusted input at submission-time via
150
+ # untainting, aren't you?)
151
+ attr_accessor :filter_html, :filter_styles
152
+
153
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
154
+ # so this isn't used by anything.
155
+ attr_accessor :fold_lines
156
+
157
+
158
+ ### Render Markdown-formatted text in this string object as HTML and return
159
+ ### it. The parameter is for compatibility with RedCloth, and is currently
160
+ ### unused, though that may change in the future.
161
+ def to_html( lite=false )
162
+
163
+ # Create a StringScanner we can reuse for various lexing tasks
164
+ @scanner = StringScanner::new( '' )
165
+
166
+ # Make a structure to carry around stuff that gets placeholdered out of
167
+ # the source.
168
+ rs = RenderState::new( {}, {}, {} )
169
+
170
+ # Make a copy of the string with normalized line endings, tabs turned to
171
+ # spaces, and a couple of guaranteed newlines at the end
172
+ text = self.gsub( /\r\n?/, "\n" ).detab
173
+ text += "\n\n"
174
+ @log.debug "Normalized line-endings: %p" % text
175
+
176
+ # Filter HTML if we're asked to do so
177
+ if self.filter_html
178
+ text.gsub!( "<", "&lt;" )
179
+ text.gsub!( ">", "&gt;" )
180
+ @log.debug "Filtered HTML: %p" % text
181
+ end
182
+
183
+ # Simplify blank lines
184
+ text.gsub!( /^ +$/, '' )
185
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
186
+
187
+ # Replace HTML blocks with placeholders
188
+ text = hide_html_blocks( text, rs )
189
+ @log.debug "Hid HTML blocks: %p" % text
190
+ @log.debug "Render state: %p" % rs
191
+
192
+ # Strip link definitions, store in render state
193
+ text = strip_link_definitions( text, rs )
194
+ @log.debug "Stripped link definitions: %p" % text
195
+ @log.debug "Render state: %p" % rs
196
+
197
+ # Escape meta-characters
198
+ text = escape_special_chars( text )
199
+ @log.debug "Escaped special characters: %p" % text
200
+
201
+ # Transform block-level constructs
202
+ text = apply_block_transforms( text, rs )
203
+ @log.debug "After block-level transforms: %p" % text
204
+
205
+ # Now swap back in all the escaped characters
206
+ text = unescape_special_chars( text )
207
+ @log.debug "After unescaping special characters: %p" % text
208
+
209
+ return text
210
+ end
211
+
212
+
213
+ ### Convert tabs in +str+ to spaces.
214
+ def detab( tabwidth=TabWidth )
215
+ copy = self.dup
216
+ copy.detab!( tabwidth )
217
+ return copy
218
+ end
219
+
220
+
221
+ ### Convert tabs to spaces in place and return self if any were converted.
222
+ def detab!( tabwidth=TabWidth )
223
+ newstr = self.split( /\n/ ).collect {|line|
224
+ line.gsub( /(.*?)\t/ ) do
225
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
226
+ end
227
+ }.join("\n")
228
+ self.replace( newstr )
229
+ end
230
+
231
+
232
+ #######
233
+ #private
234
+ #######
235
+
236
+ ### Do block-level transforms on a copy of +str+ using the specified render
237
+ ### state +rs+ and return the results.
238
+ def apply_block_transforms( str, rs )
239
+ # Port: This was called '_runBlockGamut' in the original
240
+
241
+ @log.debug "Applying block transforms to:\n %p" % str
242
+ text = transform_headers( str, rs )
243
+ text = transform_hrules( text, rs )
244
+ text = transform_lists( text, rs )
245
+ text = transform_code_blocks( text, rs )
246
+ text = transform_block_quotes( text, rs )
247
+ text = transform_auto_links( text, rs )
248
+ text = hide_html_blocks( text, rs )
249
+
250
+ text = form_paragraphs( text, rs )
251
+
252
+ @log.debug "Done with block transforms:\n %p" % text
253
+ return text
254
+ end
255
+
256
+
257
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
258
+ ### given render state +rs+ and return it.
259
+ def apply_span_transforms( str, rs )
260
+ @log.debug "Applying span transforms to:\n %p" % str
261
+
262
+ str = transform_code_spans( str, rs )
263
+ str = encode_html( str )
264
+ str = transform_images( str, rs )
265
+ str = transform_anchors( str, rs )
266
+ str = transform_italic_and_bold( str, rs )
267
+
268
+ # Hard breaks
269
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
270
+
271
+ @log.debug "Done with span transforms:\n %p" % str
272
+ return str
273
+ end
274
+
275
+
276
+ # The list of tags which are considered block-level constructs and an
277
+ # alternation pattern suitable for use in regexps made from the list
278
+ BlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script ]
279
+ BlockTagPattern = BlockTags.join('|')
280
+
281
+ # Nested blocks:
282
+ # <div>
283
+ # <div>
284
+ # tags for inner block must be indented.
285
+ # </div>
286
+ # </div>
287
+ StrictBlockRegex = %r{
288
+ ^ # Start of line
289
+ <(#{BlockTagPattern}) # Start tag: \2
290
+ \b # word break
291
+ (.*\n)*? # Any number of lines, minimal match
292
+ </\1> # Matching end tag
293
+ [ ]* # trailing spaces
294
+ (?=\n+|\Z) # End of line or document
295
+ }ix
296
+
297
+ # More-liberal block-matching
298
+ LooseBlockRegex = %r{
299
+ ^ # Start of line
300
+ <(#{BlockTagPattern}) # start tag: \2
301
+ \b # word break
302
+ (.*\n)*? # Any number of lines, minimal match
303
+ .*</\1> # Anything + Matching end tag
304
+ [ ]* # trailing spaces
305
+ (?=\n+|\Z) # End of line or document
306
+ }ix
307
+
308
+ # Special case for <hr />.
309
+ HruleBlockRegex = %r{
310
+ ( # $1
311
+ \A\n? # Start of doc + optional \n
312
+ | # or
313
+ .*\n\n # anything + blank line
314
+ )
315
+ ( # save in $2
316
+ [ ]* # Any spaces
317
+ <hr # Tag open
318
+ \b # Word break
319
+ ([^<>])*? # Attributes
320
+ /?> # Tag close
321
+ (?=\n\n|\Z) # followed by a blank line or end of document
322
+ )
323
+ }ix
324
+
325
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
326
+ ### tokens.
327
+ def hide_html_blocks( str, rs )
328
+ @log.debug "Hiding HTML blocks in %p" % str
329
+
330
+ # Tokenizer proc to pass to gsub
331
+ tokenize = lambda {|match|
332
+ key = Digest::MD5::hexdigest( match )
333
+ rs.html_blocks[ key ] = match
334
+ @log.debug "Replacing %p with %p" %
335
+ [ match, key ]
336
+ "\n\n#{key}\n\n"
337
+ }
338
+
339
+ rval = str.dup
340
+
341
+ @log.debug "Finding blocks with the strict regex..."
342
+ rval.gsub!( StrictBlockRegex, &tokenize )
343
+
344
+ @log.debug "Finding blocks with the loose regex..."
345
+ rval.gsub!( LooseBlockRegex, &tokenize )
346
+
347
+ @log.debug "Finding hrules..."
348
+ rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
349
+
350
+ return rval
351
+ end
352
+
353
+
354
+ # Link defs are in the form: ^[id]: url "optional title"
355
+ LinkRegex = %r{
356
+ ^[ ]*\[(.+)\]: # id = $1
357
+ [ ]*
358
+ \n? # maybe *one* newline
359
+ [ ]*
360
+ (\S+) # url = $2
361
+ [ ]*
362
+ \n? # maybe one newline
363
+ [ ]*
364
+ (?:
365
+ # Titles are delimited by "quotes" or (parens).
366
+ ["(]
367
+ (.+?) # title = $3
368
+ [")] # Matching ) or "
369
+ [ ]*
370
+ )? # title is optional
371
+ (?:\n+|\Z)
372
+ }x
373
+
374
+ ### Strip link definitions from +str+, storing them in the given RenderState
375
+ ### +rs+.
376
+ def strip_link_definitions( str, rs )
377
+ str.gsub( LinkRegex ) {|match|
378
+ id, url, title = $1, $2, $3
379
+
380
+ rs.urls[ id.downcase ] = encode_html( url )
381
+ unless title.nil?
382
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
383
+ end
384
+ ""
385
+ }
386
+ end
387
+
388
+
389
+ ### Escape special characters in the given +str+
390
+ def escape_special_chars( str )
391
+ @log.debug " Escaping special characters"
392
+ text = ''
393
+
394
+ tokenize_html( str ) {|token, str|
395
+ @log.debug " Adding %p token %p" % [ token, str ]
396
+ case token
397
+
398
+ # Within tags, encode * and _
399
+ when :tag
400
+ text += str.
401
+ gsub( /\*/, EscapeTable['*'][:md5] ).
402
+ gsub( /_/, EscapeTable['_'][:md5] )
403
+
404
+ # Encode backslashed stuff in regular text
405
+ when :text
406
+ text += encode_backslash_escapes( str )
407
+ else
408
+ raise TypeError, "Unknown token type %p" % token
409
+ end
410
+ }
411
+
412
+ @log.debug " Text with escapes is now: %p" % text
413
+ return text
414
+ end
415
+
416
+
417
+ ### Swap escaped special characters in a copy of the given +str+ and return
418
+ ### it.
419
+ def unescape_special_chars( str )
420
+ EscapeTable.each {|char, hash|
421
+ @log.debug "Unescaping escaped %p with %p" %
422
+ [ char, hash[:md5re] ]
423
+ str.gsub!( hash[:md5re], char )
424
+ }
425
+
426
+ return str
427
+ end
428
+
429
+
430
+ ### Return a copy of the given +str+ with any backslashed special character
431
+ ### in it replaced with MD5 placeholders.
432
+ def encode_backslash_escapes( str )
433
+ # Make a copy with any double-escaped backslashes encoded
434
+ text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
435
+
436
+ EscapeTable.each_pair {|char, esc|
437
+ next if char == '\\'
438
+ text.gsub!( esc[:re], esc[:md5] )
439
+ }
440
+
441
+ return text
442
+ end
443
+
444
+
445
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
446
+ ### +str+ and return it.
447
+ def transform_hrules( str, rs )
448
+ @log.debug " Transforming horizontal rules"
449
+ str.gsub( /^( ?[\-\*] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
450
+ end
451
+
452
+
453
+
454
+ # Pattern to transform lists
455
+ ListRegexp = %r{
456
+ (?:
457
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
458
+ (\*|\d+\.) # unordered or ordered ($1)
459
+ [ ]+ # At least one space
460
+ )
461
+ (?m:.+?) # item content (include newlines)
462
+ (?:
463
+ \z # Either EOF
464
+ | # or
465
+ \n{2,} # Blank line...
466
+ (?=\S) # ...followed by non-space
467
+ (?![ ]* (\*|\d+\.) [ ]+) # ...but not another item
468
+ )
469
+ }x
470
+
471
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
472
+ ### return it.
473
+ def transform_lists( str, rs )
474
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
475
+
476
+ str.gsub( ListRegexp ) {|list|
477
+ @log.debug " Found list %p" % list
478
+ list_type = ($1 == '*' ? "ul" : "ol")
479
+ list.gsub!( /\n{2,}/, "\n\n\n" )
480
+
481
+ %{<%s>\n%s</%s>\n} % [
482
+ list_type,
483
+ transform_list_items( list, rs ),
484
+ list_type,
485
+ ]
486
+ }
487
+ end
488
+
489
+
490
+ # Pattern for transforming list items
491
+ ListItemRegexp = %r{
492
+ (\n)? # leading line = $1
493
+ (^[ ]*) # leading whitespace = $2
494
+ (\*|\d+\.) [ ]+ # list marker = $3
495
+ ((?m:.+?) # list item text = $4
496
+ (\n{1,2}))
497
+ (?= \n* (\z | \2 (\*|\d+\.) [ ]+))
498
+ }x
499
+
500
+ ### Transform list items in a copy of the given +str+ and return it.
501
+ def transform_list_items( str, rs )
502
+ @log.debug " Transforming list items"
503
+
504
+ # Trim trailing blank lines
505
+ str = str.sub( /\n{2,}\z/, "\n" )
506
+
507
+ str.gsub( ListItemRegexp ) {|line|
508
+ @log.debug " Found item line %p" % line
509
+ leading_line, item = $1, $4
510
+
511
+ if leading_line or /\n{2,}/.match( item )
512
+ @log.debug " Found leading line or item has a blank"
513
+ item = apply_block_transforms( outdent(item), rs )
514
+ else
515
+ # Recursion for sub-lists
516
+ @log.debug " Recursing for sublist"
517
+ item = transform_lists( outdent(item), rs ).chomp
518
+ item = apply_span_transforms( item, rs )
519
+ end
520
+
521
+ %{<li>%s</li>\n} % item
522
+ }
523
+ end
524
+
525
+
526
+ # Pattern for matching codeblocks
527
+ CodeBlockRegexp = %r{
528
+ (.?) # $1 = preceding character
529
+ :\n+ # colon + NL delimiter
530
+ ( # $2 = the code block
531
+ (?:
532
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
533
+ .*\n+
534
+ )+
535
+ )
536
+ ((?=^[ ]{0,#{TabWidth}}\S)|\Z) # Lookahead for non-space at
537
+ # line-start, or end of doc
538
+ }x
539
+
540
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
541
+ ### return it.
542
+ def transform_code_blocks( str, rs )
543
+ @log.debug " Transforming code blocks"
544
+
545
+ str.gsub( CodeBlockRegexp ) {|block|
546
+ prevchar, codeblock = $1, $2
547
+
548
+ @log.debug " prevchar = %p" % prevchar
549
+
550
+ # Generated the codeblock
551
+ %{%s\n\n<pre><code>%s\n</code></pre>\n\n} % [
552
+ (prevchar.empty? || /\s/ =~ prevchar) ? "" : "#{prevchar}:",
553
+ encode_code( outdent(codeblock), rs ).rstrip,
554
+ ]
555
+ }
556
+ end
557
+
558
+
559
+ # Pattern for matching Markdown blockquote blocks
560
+ BlockQuoteRegexp = %r{
561
+ (?:
562
+ ^[ ]*>[ ]? # '>' at the start of a line
563
+ .+\n # rest of the first line
564
+ (?:.+\n)* # subsequent consecutive lines
565
+ \n* # blanks
566
+ )+
567
+ }x
568
+
569
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
570
+ ### and return it.
571
+ def transform_block_quotes( str, rs )
572
+ @log.debug " Transforming block quotes"
573
+
574
+ str.gsub( BlockQuoteRegexp ) {|quote|
575
+ @log.debug "Making blockquote from %p" % quote
576
+ quote.gsub!( /^[ ]*>[ ]?/, '' )
577
+ %{<blockquote>\n%s\n</blockquote>\n\n} %
578
+ apply_block_transforms( quote, rs ).
579
+ gsub( /^/, " " * TabWidth )
580
+ }
581
+ end
582
+
583
+
584
+ AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
585
+ AutoAnchorEmailRegexp = %r{
586
+ <
587
+ (
588
+ [-.\w]+
589
+ \@
590
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
591
+ )
592
+ >
593
+ }x
594
+
595
+ ### Transform URLs in a copy of the specified +str+ into links and return
596
+ ### it.
597
+ def transform_auto_links( str, rs )
598
+ @log.debug " Transforming auto-links"
599
+ str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
600
+ gsub( AutoAnchorEmailRegexp ) {|addr|
601
+ encode_email_address( unescape_special_chars($1) )
602
+ }
603
+ end
604
+
605
+
606
+ # Encoder functions to turn characters of an email address into encoded
607
+ # entities.
608
+ Encoders = [
609
+ lambda {|char| "&#%03d;" % char},
610
+ lambda {|char| "&#x%X;" % char},
611
+ lambda {|char| char.chr },
612
+ ]
613
+
614
+ ### Transform a copy of the given email +addr+ into an escaped version safer
615
+ ### for posting publicly.
616
+ def encode_email_address( addr )
617
+
618
+ rval = ''
619
+ ("mailto:" + addr).each_byte {|b|
620
+ case b
621
+ when ?:
622
+ rval += ":"
623
+ when ?@
624
+ rval += Encoders[ rand(2) ][ b ]
625
+ else
626
+ r = rand(100)
627
+ rval += (
628
+ r > 90 ? Encoders[2][ b ] :
629
+ r < 45 ? Encoders[1][ b ] :
630
+ Encoders[0][ b ]
631
+ )
632
+ end
633
+ }
634
+
635
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
636
+ end
637
+
638
+
639
+ # Regex for matching Setext-style headers
640
+ SetextHeaderRegexp = %r{
641
+ (.+) # The title text ($1)
642
+ \n
643
+ ([\-=])+ # Match a line of = or -. Save only one in $2.
644
+ [ ]*\n+
645
+ }x
646
+
647
+ # Regexp for matching ATX-style headers
648
+ AtxHeaderRegexp = %r{
649
+ ^(\#{1,6}) # $1 = string of #'s
650
+ [ ]*
651
+ (.+?) # $2 = Header text
652
+ [ ]*
653
+ \#* # optional closing #'s (not counted)
654
+ \n+
655
+ }x
656
+
657
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
658
+ ### state +rs+ and return the result.
659
+ def transform_headers( str, rs )
660
+ @log.debug " Transforming headers"
661
+
662
+ # Setext-style headers:
663
+ # Header 1
664
+ # ========
665
+ #
666
+ # Header 2
667
+ # --------
668
+ #
669
+ str.
670
+ gsub( SetextHeaderRegexp ) {|m|
671
+ @log.debug "Found setext-style header"
672
+ title, hdrchar = $1, $2
673
+ title = apply_span_transforms( title, rs )
674
+
675
+ case hdrchar
676
+ when '='
677
+ %[<h1>#{title}</h1>\n\n]
678
+ when '-'
679
+ %[<h2>#{title}</h2>\n\n]
680
+ else
681
+ title
682
+ end
683
+ }.
684
+
685
+ gsub( AtxHeaderRegexp ) {|m|
686
+ @log.debug "Found ATX-style header"
687
+ hdrchars, title = $1, $2
688
+ title = apply_span_transforms( title, rs )
689
+
690
+ level = hdrchars.length
691
+ %{<h%d>%s</h%d>\n\n} % [ level, title, level ]
692
+ }
693
+ end
694
+
695
+
696
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
697
+ ### tags and return it.
698
+ def form_paragraphs( str, rs )
699
+ @log.debug " Forming paragraphs"
700
+ grafs = str.
701
+ sub( /\A\n+/, '' ).
702
+ sub( /\n+\z/, '' ).
703
+ split( /\n{2,}/ )
704
+
705
+ rval = grafs.collect {|graf|
706
+
707
+ # Unhashify HTML blocks if this is a placeholder
708
+ if rs.html_blocks.key?( graf )
709
+ rs.html_blocks[ graf ]
710
+
711
+ # Otherwise, wrap in <p> tags
712
+ else
713
+ apply_span_transforms(graf, rs).
714
+ sub( /^[ ]*/, '<p>' ) + '</p>'
715
+ end
716
+ }.join( "\n\n" )
717
+
718
+ @log.debug " Formed paragraphs: %p" % rval
719
+ return rval
720
+ end
721
+
722
+
723
+ # Pattern to match the linkid part of an anchor tag for reference-style
724
+ # links.
725
+ RefLinkIdRegex = %r{
726
+ [ ]? # Optional leading space
727
+ (?:\n[ ]*)? # Optional newline + spaces
728
+ \[
729
+ (.*?) # Id = $1
730
+ \]
731
+ }x
732
+
733
+ InlineLinkRegex = %r{
734
+ \( # Literal paren
735
+ [ ]* # Zero or more spaces
736
+ (.*?) # URI = $1
737
+ [ ]* # Zero or more spaces
738
+ (?: #
739
+ ([\"\']) # Opening quote char = $2
740
+ (.*?) # Title = $3
741
+ \2 # Matching quote char
742
+ )? # Title is optional
743
+ \)
744
+ }x
745
+
746
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
747
+ ### the given render state +rs+ and return it.
748
+ def transform_anchors( str, rs )
749
+ @log.debug " Transforming anchors"
750
+ @scanner.string = str.dup
751
+ text = ''
752
+
753
+ # Scan the whole string
754
+ until @scanner.empty?
755
+
756
+ if @scanner.scan( /\[/ )
757
+ link = ''; linkid = ''
758
+ depth = 1
759
+ startpos = @scanner.pos
760
+ @log.debug " Found a bracket-open at %d" % startpos
761
+
762
+ # Scan the rest of the tag, allowing unlimited nested []s. If
763
+ # the scanner runs out of text before the opening bracket is
764
+ # closed, append the text and return (wasn't a valid anchor).
765
+ while depth.nonzero?
766
+ linktext = @scanner.scan_until( /\]|\[/ )
767
+
768
+ if linktext
769
+ @log.debug " Found a bracket at depth %d: %p" %
770
+ [ depth, linktext ]
771
+ link += linktext
772
+
773
+ # Decrement depth for each closing bracket
774
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
775
+ @log.debug " Depth is now #{depth}"
776
+
777
+ # If there's no more brackets, it must not be an anchor, so
778
+ # just abort.
779
+ else
780
+ @log.debug " Missing closing brace, assuming non-link."
781
+ link += @scanner.rest
782
+ @scanner.terminate
783
+ return text + '[' + link
784
+ end
785
+ end
786
+ link.slice!( -1 ) # Trim final ']'
787
+ @log.debug " Found leading link %p" % link
788
+
789
+ # Look for a reference-style second part
790
+ if @scanner.scan( RefLinkIdRegex )
791
+ linkid = @scanner[1]
792
+ linkid = link.dup if linkid.empty?
793
+ linkid.downcase!
794
+ @log.debug " Found a linkid: %p" % linkid
795
+
796
+ # If there's a matching link in the link table, build an
797
+ # anchor tag for it.
798
+ if rs.urls.key?( linkid )
799
+ @log.debug " Found link key in the link table: %p" %
800
+ rs.urls[linkid]
801
+ url = escape_md( rs.urls[linkid] )
802
+
803
+ text += %{<a href="#{url}"}
804
+ if rs.titles.key?(linkid)
805
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
806
+ end
807
+ text += %{>#{link}</a>}
808
+
809
+ # If the link referred to doesn't exist, just append the raw
810
+ # source to the result
811
+ else
812
+ @log.debug " Linkid %p not found in link table" % linkid
813
+ @log.debug " Appending original string instead: %p" %
814
+ @scanner.string[ startpos-1 .. @scanner.pos ]
815
+ text += @scanner.string[ startpos-1 .. @scanner.pos ]
816
+ end
817
+
818
+ # ...or for an inline style second part
819
+ elsif @scanner.scan( InlineLinkRegex )
820
+ url = @scanner[1]
821
+ title = @scanner[3]
822
+ @log.debug " Found an inline link to %p" % url
823
+
824
+ text += %{<a href="%s"} % escape_md( url )
825
+ if title
826
+ text += %{ title="%s"} % escape_md( title )
827
+ end
828
+ text += %{>#{link}</a>}
829
+
830
+ # No linkid part: just append the first part as-is.
831
+ else
832
+ @log.debug "No linkid, so no anchor. Appending literal text."
833
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
834
+ end # if linkid
835
+
836
+ # Plain text
837
+ else
838
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
839
+ text += @scanner.scan( /[^\[]+/ )
840
+ end
841
+
842
+ end # until @scanner.empty?
843
+
844
+ return text
845
+ end
846
+
847
+ # Pattern to match strong emphasis in Markdown text
848
+ BoldRegexp = %r{ (\*\*|__) (?=\S) (.+?\S) \1 }x
849
+
850
+ # Pattern to match normal emphasis in Markdown text
851
+ ItalicRegexp = %r{ (\*|_) (?=\S) (.+?\S) \1 }x
852
+
853
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
854
+ ### and return it.
855
+ def transform_italic_and_bold( str, rs )
856
+ @log.debug " Transforming italic and bold"
857
+
858
+ str.
859
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
860
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
861
+ end
862
+
863
+
864
+ ### Transform backticked spans into <code> spans.
865
+ def transform_code_spans( str, rs )
866
+ @log.debug " Transforming code spans"
867
+
868
+ # Set up the string scanner and just return the string unless there's at
869
+ # least one backtick.
870
+ @scanner.string = str.dup
871
+ unless @scanner.exist?( /`/ )
872
+ @scanner.terminate
873
+ @log.debug "No backticks found for code span in %p" % str
874
+ return str
875
+ end
876
+
877
+ @log.debug "Transforming code spans in %p" % str
878
+
879
+ # Build the transformed text anew
880
+ text = ''
881
+
882
+ # Scan to the end of the string
883
+ until @scanner.empty?
884
+
885
+ # Scan up to an opening backtick
886
+ if pre = @scanner.scan_until( /.?(?=`)/m )
887
+ text += pre
888
+ @log.debug "Found backtick at %d after '...%s'" %
889
+ [ @scanner.pos, text[-10, 10] ]
890
+
891
+ # Make a pattern to find the end of the span
892
+ opener = @scanner.scan( /`+/ )
893
+ len = opener.length
894
+ closer = Regexp::new( opener )
895
+ @log.debug "Scanning for end of code span with %p" % closer
896
+
897
+ # Scan until the end of the closing backtick sequence. Chop the
898
+ # backticks off the resultant string, strip leading and trailing
899
+ # whitespace, and encode any enitites contained in it.
900
+ codespan = @scanner.scan_until( closer ) or
901
+ raise FormatError::new( @scanner.rest[0,20],
902
+ "No %p found before end" % opener )
903
+
904
+ @log.debug "Found close of code span at %d: %p" %
905
+ [ @scanner.pos - len, codespan ]
906
+ codespan.slice!( -len, len )
907
+ text += "<code>%s</code>" %
908
+ encode_code( codespan.strip, rs )
909
+
910
+ # If there's no more backticks, just append the rest of the string
911
+ # and move the scan pointer to the end
912
+ else
913
+ text += @scanner.rest
914
+ @scanner.terminate
915
+ end
916
+ end
917
+
918
+ return text
919
+ end
920
+
921
+
922
+ # Next, handle inline images: ![alt text](url "optional title")
923
+ # Don't forget: encode * and _
924
+ InlineImageRegexp = %r{
925
+ ( # Whole match = $1
926
+ !\[ (.*?) \] # alt text = $2
927
+ \([ ]* (\S+) [ ]* # source url = $3
928
+ ( # title = $4
929
+ (["']) # quote char = $5
930
+ .*?
931
+ \5 # matching quote
932
+ [ ]*
933
+ )? # title is optional
934
+ \)
935
+ )
936
+ }xs #"
937
+
938
+
939
+ # Reference-style images
940
+ ReferenceImageRegexp = %r{
941
+ ( # Whole match = $1
942
+ !\[ (.*?) \] # Alt text = $2
943
+ [ ]? # Optional space
944
+ (?:\n[ ]*)? # One optional newline + spaces
945
+ \[ (.*?) \] # id = $3
946
+ )
947
+ }xs
948
+
949
+ ### Turn image markup into image tags.
950
+ def transform_images( str, rs )
951
+ @log.debug " Transforming images" % str
952
+
953
+ # Handle reference-style labeled images: ![alt text][id]
954
+ str.
955
+ gsub( ReferenceImageRegexp ) {|match|
956
+ whole, alt, linkid = $1, $2, $3.downcase
957
+ @log.debug "Matched %p" % match
958
+ res = nil
959
+
960
+ # for shortcut links like ![this][].
961
+ linkid = alt.downcase if linkid.empty?
962
+
963
+ if rs.urls.key?( linkid )
964
+ url = escape_md( rs.urls[linkid] )
965
+ @log.debug "Found url '%s' for linkid '%s' " %
966
+ [ url, linkid ]
967
+
968
+ # Build the tag
969
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
970
+ if rs.titles.key?( linkid )
971
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
972
+ end
973
+ result += EmptyElementSuffix
974
+
975
+ else
976
+ result = whole
977
+ end
978
+
979
+ @log.debug "Replacing %p with %p" %
980
+ [ match, result ]
981
+ result
982
+ }.
983
+
984
+ # Inline image style
985
+ gsub( InlineImageRegexp ) {|match|
986
+ @log.debug "Found inline image %p" % match
987
+ whole, alt, title = $1, $2, $4
988
+ url = escape_md( $3 )
989
+
990
+ # Build the tag
991
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
992
+ unless title.nil?
993
+ result += %{ title="%s"} % escape_md( title.gsub(/^"|"$/, '') )
994
+ end
995
+ result += EmptyElementSuffix
996
+
997
+ @log.debug "Replacing %p with %p" %
998
+ [ match, result ]
999
+ result
1000
+ }
1001
+ end
1002
+
1003
+
1004
+ # Regexp to match special characters in a code block
1005
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] )}x
1006
+
1007
+ ### Escape any characters special to HTML and encode any characters special
1008
+ ### to Markdown in a copy of the given +str+ and return it.
1009
+ def encode_code( str, rs )
1010
+ str.gsub( %r{&}, '&amp;' ).
1011
+ gsub( %r{<}, '&lt;' ).
1012
+ gsub( %r{>}, '&gt;' ).
1013
+ gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
1014
+ end
1015
+
1016
+
1017
+
1018
+ #################################################################
1019
+ ### U T I L I T Y F U N C T I O N S
1020
+ #################################################################
1021
+
1022
+ ### Escape any markdown characters in a copy of the given +str+ and return
1023
+ ### it.
1024
+ def escape_md( str )
1025
+ str.
1026
+ gsub( /\*/, '&#42;' ).
1027
+ gsub( /_/, '&#95;' )
1028
+ end
1029
+
1030
+
1031
+ # Matching constructs for tokenizing X/HTML
1032
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
1033
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
1034
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
1035
+
1036
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }mx
1037
+ HTMLTagCloseRegexp = %r{ > }x
1038
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
1039
+
1040
+ ### Break the HTML source in +str+ into a series of tokens and return
1041
+ ### them. The tokens are just 2-element Array tuples with a type and the
1042
+ ### actual content. If this function is called with a block, the type and
1043
+ ### text parts of each token will be yielded to it one at a time as they are
1044
+ ### extracted.
1045
+ def tokenize_html( str )
1046
+ depth = 0
1047
+ tokens = []
1048
+ @scanner.string = str.dup
1049
+ type, token = nil, nil
1050
+
1051
+ until @scanner.empty?
1052
+ @log.debug "Scanning from %p" % @scanner.rest
1053
+
1054
+ # Match comments and PIs without nesting
1055
+ if (( token = @scanner.scan(MetaTag) ))
1056
+ type = :tag
1057
+
1058
+ # Do nested matching for HTML tags
1059
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
1060
+ tagstart = @scanner.pos
1061
+ @log.debug " Found the start of a plain tag at %d" % tagstart
1062
+
1063
+ # Start the token with the opening angle
1064
+ depth = 1
1065
+ type = :tag
1066
+
1067
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
1068
+ # the scanner runs out of text before the tag is closed, raise
1069
+ # an error.
1070
+ while depth.nonzero?
1071
+
1072
+ # Scan either an opener or a closer
1073
+ chunk = @scanner.scan( HTMLTagPart ) or
1074
+ raise "Malformed tag at character %d: %p" %
1075
+ [ tagstart, token + @scanner.rest ]
1076
+
1077
+ @log.debug " Found another part of the tag at depth %d: %p" %
1078
+ [ depth, chunk ]
1079
+
1080
+ token += chunk
1081
+
1082
+ # If the last character of the token so far is a closing
1083
+ # angle bracket, decrement the depth. Otherwise increment
1084
+ # it for a nested tag.
1085
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
1086
+ @log.debug " Depth is now #{depth}"
1087
+ end
1088
+
1089
+ # Match text segments
1090
+ else
1091
+ @log.debug " Looking for a chunk of text"
1092
+ type = :text
1093
+
1094
+ # Scan forward, always matching at least one character to move
1095
+ # the pointer beyond any non-tag '<'.
1096
+ token = @scanner.scan_until( /[^<]+/m )
1097
+ end
1098
+
1099
+ @log.debug " type: %p, token: %p" % [ type, token ]
1100
+
1101
+ # If a block is given, feed it one token at a time. Add the token to
1102
+ # the token list to be returned regardless.
1103
+ if block_given?
1104
+ yield( type, token )
1105
+ end
1106
+ tokens << [ type, token ]
1107
+ end
1108
+
1109
+ return tokens
1110
+ end
1111
+
1112
+
1113
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
1114
+ def encode_html( str )
1115
+ str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&amp;" ).
1116
+ gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
1117
+ end
1118
+
1119
+
1120
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
1121
+ ### return it.
1122
+ def outdent( str )
1123
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
1124
+ end
1125
+
1126
+ end # class BlueCloth
1127
+