AoBane 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/AoBane.rb ADDED
@@ -0,0 +1,2191 @@
1
+ #
2
+ # AoBane - Extended Markdown Converter
3
+ #
4
+ # Author of Original BlueFeather: Dice <tetradice@gmail.com>
5
+ # Remaker: set.minami <set.minami@gmail.com>
6
+ # Website: https://github.com/setminami/AoBane/blob/master/README.md
7
+ # License: GPL version 2 or later
8
+ #
9
+ # If you want to know better about AoBane, See the Website.
10
+ #
11
+ #
12
+ #
13
+ #-- Copyrights & License -------------------------------------------------------
14
+ #
15
+ # Original Markdown:
16
+ # Copyright (c) 2003-2004 John Gruber
17
+ # <http://daringfireball.net/>
18
+ # All rights reserved.
19
+ #
20
+ # Orignal BlueCloth:
21
+ # Copyright (c) 2004 The FaerieMUD Consortium.
22
+ #
23
+ # AoBane:
24
+ # Copyright (c) 2013 Set.Minami
25
+ #
26
+ # AoBane is free software; you can redistribute it and/or modify it under
27
+ # the terms of the GNU General Public License as published by the Free Software
28
+ # Foundation; either version 2 of the License, or (at your option) any later
29
+ # version.
30
+ #
31
+ # AoBane is distributed in the hope that it will be useful, but WITHOUT ANY
32
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
33
+ # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
34
+
35
+
36
+ require 'digest/md5'
37
+ require 'logger'
38
+ require 'strscan'
39
+ require 'stringio'
40
+ require 'uri'
41
+
42
+
43
+ module AoBane
44
+ VERSION = '0.01'
45
+ VERSION_NUMBER = 0.01
46
+ RELEASE_DATE = '2013-03-30'
47
+ VERSION_LABEL = "#{VERSION} (#{RELEASE_DATE})"
48
+
49
+ UTF8_BOM = "\xef\xbb\xbf"
50
+ UTF8_BOM_PATTERN = /^#{UTF8_BOM}/
51
+
52
+
53
+ # Fancy methods
54
+ class << self
55
+ def parse_text(src)
56
+ Parser.new.parse_text(src)
57
+ end
58
+
59
+ alias parse parse_text
60
+
61
+ def parse_document(src, default_enc = EncodingType::UTF8)
62
+ Parser.new.parse_document(src, default_enc)
63
+ end
64
+
65
+
66
+ def parse_text_file(path)
67
+ Parser.new.parse_text_file(path)
68
+ end
69
+
70
+ alias parse_file parse_text_file
71
+
72
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
73
+ Parser.new.parse_document_file(path, default_enc)
74
+ end
75
+ end
76
+
77
+ ### Exception class on AoBane running.
78
+ class Error < ::RuntimeError
79
+ end
80
+
81
+ class EncodingError < Error
82
+ end
83
+
84
+ ### Exception class for formatting errors.
85
+ class FormatError < Error
86
+
87
+ ### Create a new FormatError with the given source +str+ and an optional
88
+ ### message about the +specific+ error.
89
+ def initialize( str, specific=nil )
90
+ if specific
91
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
92
+ else
93
+ msg = "Bad markdown format near %p" % str
94
+ end
95
+
96
+ super( msg )
97
+ end
98
+ end
99
+
100
+ module HeaderIDType
101
+ MD5 = 'md5'
102
+ ESCAPE = 'escape'
103
+ end
104
+
105
+ module EncodingType
106
+ EUC = 'euc-jp'
107
+ EUCJP = EUC_JP = EUC
108
+
109
+ SJIS = 'shift_jis'
110
+ SHIFT_JIS = SJIS
111
+
112
+ UTF8 = 'utf-8'
113
+ UTF_8 = UTF8
114
+
115
+ ASCII = 'ascii'
116
+ US_ASCII = ASCII
117
+
118
+ def self.regulate(str_value)
119
+ case str_value.downcase
120
+ when 'shift-jis', 'shift_jis'
121
+ SJIS
122
+ when 'euc-jp'
123
+ EUC
124
+ when 'utf-8'
125
+ UTF8
126
+ when 'ascii'
127
+ ASCII
128
+ else
129
+ raise EncodingError, "not adapted encoding type - #{str_value} (shift[-_]jis, euc-jp, utf-8, or ascii)"
130
+ end
131
+ end
132
+
133
+ def self.convert_to_kcode(str_value)
134
+ type = self.regulate(str_value)
135
+ case type
136
+ when EUC, SJIS, UTF8
137
+ type
138
+ when ASCII
139
+ 'none'
140
+ end
141
+ end
142
+
143
+
144
+ def self.convert_to_charset(str_value)
145
+ type = self.regulate(str_value)
146
+ case type
147
+ when EUC
148
+ 'euc-jp'
149
+ when SJIS
150
+ 'shift_jis'
151
+ when UTF8
152
+ 'utf-8'
153
+ when ASCII
154
+ nil
155
+ end
156
+ end
157
+
158
+ end
159
+
160
+ module Util
161
+ HTML_ESC = {
162
+ '&' => '&amp;',
163
+ '"' => '&quot;',
164
+ '<' => '&lt;',
165
+ '>' => '&gt;'
166
+ }
167
+
168
+ module_function
169
+
170
+ # from http://jp.rubyist.net/magazine/?0010-CodeReview#l28
171
+ # (Author: Minero Aoki)
172
+ def escape_html(str)
173
+ #table = HTML_ESC # optimize
174
+ #str.gsub(/[&"<>]/) {|s| table[s] }
175
+ return str
176
+ end
177
+
178
+ def generate_blank_string_io(encoding_base)
179
+ io = StringIO.new
180
+
181
+ if io.respond_to?(:set_encoding) then
182
+ io.set_encoding(encoding_base.encoding)
183
+ end
184
+
185
+ return io
186
+ end
187
+
188
+ def change_kcode(kcode = nil)
189
+ if defined?(Encoding) then
190
+ # ruby 1.9 later
191
+ yield
192
+ else
193
+ # ruby 1.8 earlier
194
+ original_kcode = $KCODE
195
+
196
+ begin
197
+ $KCODE = kcode if kcode
198
+ yield
199
+
200
+ ensure
201
+ # recover
202
+ $KCODE = original_kcode
203
+ end
204
+ end # if defined?
205
+ end # def
206
+
207
+
208
+ def utf8_bom?(str)
209
+ if str.respond_to?(:getbyte) and str.respond_to?(:bytesize) then
210
+ if str.bytesize >= 3 and
211
+ str.getbyte(0) == UTF8_BOM.getbyte(0) and
212
+ str.getbyte(1) == UTF8_BOM.getbyte(1) and
213
+ str.getbyte(2) == UTF8_BOM.getbyte(2) then
214
+ return true
215
+ else
216
+ return false
217
+ end
218
+
219
+ else
220
+ return(str =~ UTF8_BOM_PATTERN ? true : false)
221
+ end
222
+ end
223
+ end
224
+
225
+ class Document
226
+ HEADER_PATTERN = /^([a-zA-Z0-9-]+?)\s*\:\s*(.+?)\s*(?:\n|\Z)/
227
+ BLANK_LINE_PATTERN = /^\n/
228
+ HEADER_SEQUEL_PATTERN = /^\s+(.+)$/
229
+
230
+ attr_accessor :headers, :body
231
+ alias text body
232
+ alias text= body=
233
+
234
+ class << self
235
+ def parse_io(input, default_enc = EncodingType::UTF8)
236
+ headers = {}
237
+ body = nil
238
+ first_pos = input.pos
239
+ default_enc = EncodingType.regulate(default_enc)
240
+
241
+ Util.change_kcode(EncodingType.convert_to_kcode(default_enc)){
242
+ # default encoding
243
+ if defined?(Encoding) then
244
+ input.set_encoding(Encoding.find(default_enc))
245
+ end
246
+
247
+
248
+
249
+ # get headers
250
+ pos_before_gets = nil
251
+ first_line = true
252
+
253
+ loop do
254
+ pos_before_gets = input.pos
255
+ line = input.gets
256
+
257
+ # cut UTF-8 BOM
258
+ if first_line and Util.utf8_bom?(line) then
259
+ line.slice!(UTF8_BOM_PATTERN)
260
+ end
261
+ first_line = false
262
+
263
+ if line and line.chomp =~ HEADER_PATTERN then
264
+ key = $1.downcase; value = $2
265
+
266
+ if key == 'encoding' and not headers.include?('encoding') then
267
+ kc = EncodingType.convert_to_kcode(value.downcase)
268
+ if input.respond_to?(:set_encoding) then
269
+ input.set_encoding(EncodingType.regulate(value))
270
+
271
+ # rewind (reason => [ruby-list:45988])
272
+ input.pos = first_pos
273
+ first_line = true
274
+ else
275
+ $KCODE = kc
276
+ end
277
+ end
278
+
279
+ headers[key] = value
280
+ else
281
+ # EOF or Metadata end
282
+ break
283
+ end
284
+ end
285
+
286
+ # back
287
+ input.pos = pos_before_gets
288
+
289
+
290
+
291
+ # skip blank lines
292
+ loop do
293
+ pos_before_gets = input.pos
294
+
295
+ line = input.gets
296
+ if line.nil? or not line =~ BLANK_LINE_PATTERN then
297
+ break
298
+ end
299
+ end
300
+
301
+ # back
302
+ input.pos = pos_before_gets
303
+
304
+
305
+
306
+ # get body
307
+ body = input.read
308
+
309
+ }
310
+
311
+
312
+ return self.new(headers, body)
313
+ end
314
+
315
+ def parse(str, default_enc = EncodingType::UTF8)
316
+ parse_io(StringIO.new(str), default_enc)
317
+ end
318
+
319
+ end
320
+
321
+
322
+ def initialize(headers = {}, body = '')
323
+ @headers = {}
324
+ headers.each do |k, v|
325
+ self[k] = v
326
+ end
327
+ @body = body
328
+ end
329
+
330
+ def [](key)
331
+ @headers[key.to_s.downcase]
332
+ end
333
+
334
+ def []=(key, value)
335
+ @headers[key.to_s.downcase] = value.to_s
336
+ end
337
+
338
+ def title
339
+ @headers['title']
340
+ end
341
+
342
+ def css
343
+ @headers['css']
344
+ end
345
+
346
+ def numbering
347
+ case @headers['numbering']
348
+ when 'yes', '1', 'true', 'on'
349
+ true
350
+ else
351
+ false
352
+ end
353
+ end
354
+
355
+ alias numbering? numbering
356
+
357
+ def numbering_start_level
358
+ level = (@headers['numbering-start-level'] || 2).to_i
359
+ if level >= 1 and level <= 6 then
360
+ return level
361
+ else
362
+ return 2
363
+ end
364
+ end
365
+
366
+ def encoding_type
367
+ @headers['encoding'] || EncodingType::UTF8
368
+ end
369
+
370
+ def header_id_type
371
+ (@headers['header-id-type'] || HeaderIDType::MD5).downcase
372
+ end
373
+
374
+ def kcode
375
+ self.encoding_type && EncodingType.convert_to_kcode(self.encoding_type)
376
+ end
377
+
378
+ def to_html
379
+ Parser.new.document_to_html(self)
380
+ end
381
+ end
382
+
383
+
384
+ class Parser
385
+ # Rendering state class Keeps track of URLs, titles, and HTML blocks
386
+ # midway through a render. I prefer this to the globals of the Perl version
387
+ # because globals make me break out in hives. Or something.
388
+ class RenderState
389
+ # Headers struct.
390
+ Header = Struct.new(:id, :level, :content, :content_html)
391
+
392
+ # from Original BlueCloth
393
+ attr_accessor :urls, :titles, :html_blocks, :log
394
+
395
+ # AoBane Extension
396
+ attr_accessor :footnotes, :found_footnote_ids, :warnings
397
+ attr_accessor :headers, :block_transform_depth
398
+ attr_accessor :header_id_type # option switch
399
+ attr_accessor :numbering, :numbering_start_level # option switch
400
+ alias numbering? numbering
401
+
402
+ def initialize
403
+ @urls, @titles, @html_blocks = {}, {}, {}
404
+ @log = nil
405
+ @footnotes, @found_footnote_ids, @warnings = {}, [], []
406
+ @headers = []
407
+ @block_transform_depth = 0
408
+ @header_id_type = HeaderIDType::MD5
409
+ @numbering = false
410
+ @numbering_start_level = 2
411
+ end
412
+
413
+ end
414
+
415
+ # Tab width for #detab! if none is specified
416
+ TabWidth = 4
417
+
418
+ # The tag-closing string -- set to '>' for HTML
419
+ EmptyElementSuffix = " />";
420
+
421
+ # Table of MD5 sums for escaped characters
422
+ EscapeTable = {}
423
+ '\\`*_{}[]()#.!|:~'.split(//).each {|char|
424
+ hash = Digest::MD5::hexdigest( char )
425
+
426
+ EscapeTable[ char ] = {
427
+ :md5 => hash,
428
+ :md5re => Regexp::new( hash ),
429
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
430
+ :unescape => char,
431
+ }
432
+
433
+ escaped = "\\#{char}"
434
+ hash = Digest::MD5::hexdigest(escaped)
435
+ EscapeTable[escaped] = {
436
+ :md5 => hash,
437
+ :md5re => Regexp::new( hash ),
438
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
439
+ :unescape => char,
440
+ }
441
+ }
442
+
443
+
444
+ #################################################################
445
+ ### I N S T A N C E M E T H O D S
446
+ #################################################################
447
+
448
+ ### Create a new AoBane parser.
449
+ def initialize(*restrictions)
450
+ @log = Logger::new( $deferr )
451
+ @log.level = $DEBUG ?
452
+ Logger::DEBUG :
453
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
454
+ @scanner = nil
455
+
456
+ # Add any restrictions, and set the line-folding attribute to reflect
457
+ # what happens by default.
458
+ @filter_html = nil
459
+ @filter_styles = nil
460
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
461
+ @fold_lines = true
462
+
463
+ @use_header_id = true
464
+ @display_warnings = true
465
+
466
+ @log.debug "String is: %p" % self
467
+ end
468
+
469
+
470
+ ######
471
+ public
472
+ ######
473
+
474
+ # Filters for controlling what gets output for untrusted input. (But really,
475
+ # you're filtering bad stuff out of untrusted input at submission-time via
476
+ # untainting, aren't you?)
477
+ attr_accessor :filter_html, :filter_styles
478
+
479
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
480
+ # so this isn't used by anything.
481
+ attr_accessor :fold_lines
482
+
483
+ # AoBane Extension: display warnings on the top of output html (default: true)
484
+ attr_accessor :display_warnings
485
+
486
+ # AoBane Extension: add id to each header, for toc and anchors. (default: true)
487
+ attr_accessor :use_header_id
488
+
489
+ ### Render Markdown-formatted text in this string object as HTML and return
490
+ ### it. The parameter is for compatibility with RedCloth, and is currently
491
+ ### unused, though that may change in the future.
492
+ def parse_text(source, rs = nil)
493
+ rs ||= RenderState.new
494
+
495
+ # check
496
+ case rs.header_id_type
497
+ when HeaderIDType::MD5, HeaderIDType::ESCAPE
498
+ else
499
+ rs.warnings << "illegal header id type - #{rs.header_id_type}"
500
+ end
501
+
502
+ # Create a StringScanner we can reuse for various lexing tasks
503
+ @scanner = StringScanner::new( '' )
504
+
505
+ # Make a copy of the string with normalized line endings, tabs turned to
506
+ # spaces, and a couple of guaranteed newlines at the end
507
+
508
+ text = detab(source.gsub( /\r\n?/, "\n" ))
509
+ text += "\n\n"
510
+ @log.debug "Normalized line-endings: %p" % text
511
+
512
+ #Insert by set.minami 2013-03-30
513
+ text.gsub!(/\*\[(.*?)\]\((.*?)(\|.*?)*(#.*?)*\)/){
514
+ |match|
515
+ '<font color="' +
516
+ if $2.nil? then '' else $2 end +'" ' +
517
+ 'face="' +
518
+ if $3.nil? then '' else $3.delete('|') end + '" ' +
519
+ 'size="' +
520
+ if $4.nil? then '' else $4.delete('#') end + '">' +
521
+ $1 + '</font>'
522
+ }
523
+ #Insert by set.minami
524
+
525
+ # Filter HTML if we're asked to do so
526
+ if self.filter_html
527
+ #text.gsub!( "<", "&lt;" )
528
+ #text.gsub!( ">", "&gt;" )
529
+ @log.debug "Filtered HTML: %p" % text
530
+ end
531
+
532
+ # Simplify blank lines
533
+ text.gsub!( /^ +$/, '' )
534
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
535
+
536
+
537
+ # Replace HTML blocks with placeholders
538
+ text = hide_html_blocks( text, rs )
539
+ @log.debug "Hid HTML blocks: %p" % text
540
+ @log.debug "Render state: %p" % rs
541
+
542
+
543
+ # Strip footnote definitions, store in render state
544
+ text = strip_footnote_definitions( text, rs )
545
+ @log.debug "Stripped footnote definitions: %p" % text
546
+ @log.debug "Render state: %p" % rs
547
+
548
+
549
+ # Strip link definitions, store in render state
550
+ text = strip_link_definitions( text, rs )
551
+ @log.debug "Stripped link definitions: %p" % text
552
+ @log.debug "Render state: %p" % rs
553
+
554
+ # Escape meta-characters
555
+ text = escape_special_chars( text )
556
+ @log.debug "Escaped special characters: %p" % text
557
+
558
+ # Transform block-level constructs
559
+ text = apply_block_transforms( text, rs )
560
+ @log.debug "After block-level transforms: %p" % text
561
+
562
+ # Now swap back in all the escaped characters
563
+ text = unescape_special_chars( text )
564
+ @log.debug "After unescaping special characters: %p" % text
565
+
566
+ # Extend footnotes
567
+ unless rs.footnotes.empty? then
568
+ text << %Q|<div class="footnotes"><hr#{EmptyElementSuffix}\n<ol>\n|
569
+ rs.found_footnote_ids.each do |id|
570
+ content = rs.footnotes[id]
571
+ html = apply_block_transforms(content.sub(/\n+\Z/, '') + %Q| <a href="#footnote-ref:#{id}" rev="footnote">&#8617;</a>|, rs)
572
+ text << %Q|<li id="footnote:#{id}">\n#{html}\n</li>|
573
+ end
574
+ text << %Q|</ol>\n</div>\n|
575
+ end
576
+
577
+ # Display warnings
578
+ if @display_warnings then
579
+ unless rs.warnings.empty? then
580
+ html = %Q|<pre><strong>[WARNINGS]\n|
581
+ html << rs.warnings.map{|x| Util.escape_html(x)}.join("\n")
582
+ html << %Q|</strong></pre>|
583
+
584
+ text = html + text
585
+ end
586
+ end
587
+
588
+ #Insert by set.minami 2013-03-30
589
+ output = []
590
+ text.lines {|line|
591
+ if /<pre><code>/ =~ line
592
+ output << line
593
+ next
594
+ until /<\/code><\/pre>/ =~ line
595
+ output << line
596
+ next
597
+ end
598
+ else
599
+ line.gsub!(/\-\-|<=>|<\->|\->|<\-|=>|<=|\|\^|\|\|\/|\|\/|\^|>>|<<|\+_|!=|~~|~=|>_|<_|\|FA|\|EX|\|=|\(+\)|\(x\)|\\&|\(c\)|\(R\)|\(SS\)|\(TM\)/,
600
+ "\-\-" => "&mdash;",
601
+ "<=" => "&hArr;",
602
+ "<\->" => "&harr;",
603
+ "\->" =>"&rarr;",
604
+ "<\-" =>"&larr;",
605
+ "=>" => "&rArr;",
606
+ "<=" => "&lArr;",
607
+ "\|\|\^" => "&uArr;",
608
+ "\|\|\/" => "&dArr;",
609
+ "\|\/" => "&darr;",
610
+ "\|\^" => "&uarr;",
611
+ ">>" => "&raquo;",
612
+ "<<" => "&laquo;",
613
+ "+_" => "&plusmn;",
614
+ "!=" => "&ne;",
615
+ "~~" => "&asymp;",
616
+ "~=" => "&cong;",
617
+ "<_" => "&le;",
618
+ ">_" => "&ge",
619
+ "\|FA" => "&forall;",
620
+ "\|EX" => "&exist;",
621
+ "\|=" => "&equiv;",
622
+ "\(+\)" => "&oplus",
623
+ "\(x\)" => "&otimes;",
624
+ "\\&" =>"&amp;",
625
+ "\(c\)" => "&copy;",
626
+ "\(R\)" =>"&reg;",
627
+ "\(SS\)" => "&sect;",
628
+ "\(TM\)" => "&trade;" #29
629
+ )
630
+ output << line
631
+ end
632
+ }
633
+ return output
634
+ #Insert by set.minami
635
+ #return text
636
+
637
+ end
638
+
639
+ alias parse parse_text
640
+
641
+ # return values are extended. (mainly for testing)
642
+ def parse_text_with_render_state(str, rs = nil)
643
+ rs ||= RenderState.new
644
+ html = parse_text(str, rs)
645
+
646
+ return [html, rs]
647
+ end
648
+
649
+ def parse_text_file(path)
650
+ parse_text(File.read(path))
651
+ end
652
+
653
+ alias parse_file parse_text_file
654
+
655
+
656
+ def parse_document(source, default_enc = EncodingType::UTF8)
657
+ doc = Document.parse(source, default_enc)
658
+
659
+ return document_to_html(doc)
660
+ end
661
+
662
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
663
+ doc = nil
664
+ open(path){|f|
665
+ doc = Document.parse_io(f, default_enc)
666
+ }
667
+
668
+ return document_to_html(doc)
669
+ end
670
+
671
+
672
+ def document_to_html(doc)
673
+ rs = RenderState.new
674
+ if doc.numbering? then
675
+ rs.numbering = true
676
+ end
677
+ rs.numbering_start_level = doc.numbering_start_level
678
+ rs.header_id_type = doc.header_id_type
679
+
680
+ body_html = nil
681
+
682
+ if doc.encoding_type then
683
+ Util.change_kcode(doc.kcode){
684
+ body_html = parse_text(doc.body, rs)
685
+ }
686
+ else
687
+ body_html = parse_text(doc.body, rs)
688
+ end
689
+
690
+ out = Util.generate_blank_string_io(doc.body)
691
+
692
+ # XHTML decleration
693
+ out.puts %Q|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">|
694
+
695
+ # html start
696
+ out.puts %Q|<html>|
697
+
698
+ # head
699
+ out.puts %Q|<head>|
700
+
701
+ if doc.encoding_type and (charset = EncodingType.convert_to_charset(doc.encoding_type)) then
702
+ out.puts %Q|<meta http-equiv="Content-Type" content="text/html; charset=#{charset}" />|
703
+ end
704
+
705
+ h1 = rs.headers.find{|x| x.level == 1}
706
+ h1_content = (h1 ? h1.content : nil)
707
+ title = Util.escape_html(doc.title || h1_content || 'no title (Generated by AoBane)')
708
+ out.puts %Q|<title>#{title}</title>|
709
+
710
+ %w(description keywords).each do |name|
711
+ if doc[name] then
712
+ content = Util.escape_html(doc[name])
713
+ out.puts %Q|<meta name="#{name}" content="#{content}" />|
714
+ end
715
+ end
716
+
717
+
718
+ if doc['css'] then
719
+ href = Util.escape_html(doc.css)
720
+ out.puts %Q|<link rel="stylesheet" type="text/css" href="#{href}" />|
721
+
722
+ end
723
+
724
+ if doc['rdf-feed'] then
725
+ href = Util.escape_html(doc['rdf-feed'])
726
+ out.puts %Q|<link rel="alternate" type="application/rdf+xml" href="#{href}" />|
727
+ end
728
+
729
+
730
+
731
+ if doc['rss-feed'] then
732
+ href = Util.escape_html(doc['rss-feed'])
733
+ out.puts %Q|<link rel="alternate" type="application/rss+xml" href="#{href}" />|
734
+ end
735
+
736
+ if doc['atom-feed'] then
737
+ href = Util.escape_html(doc['atom-feed'])
738
+ out.puts %Q|<link rel="alternate" type="application/atom+xml" href="#{href}" />|
739
+ end
740
+
741
+ out.puts %Q|</head>|
742
+
743
+ # body
744
+ out.puts %Q|<body>|
745
+ out.puts
746
+ out.puts body_html
747
+ out.puts
748
+ out.puts %Q|</body>|
749
+
750
+ # html end
751
+ out.puts %Q|</html>|
752
+
753
+
754
+ return out.string
755
+ end
756
+
757
+ alias doc2html document_to_html
758
+
759
+
760
+
761
+
762
+ #######
763
+ #private
764
+ #######
765
+
766
+ ### Convert tabs in +str+ to spaces.
767
+ ### (this method is reformed to function-like method from original BlueCloth)
768
+ def detab( str, tabwidth=TabWidth )
769
+ re = str.split( /\n/ ).collect {|line|
770
+ line.gsub( /(.*?)\t/ ) do
771
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
772
+ end
773
+ }.join("\n")
774
+
775
+ re
776
+ end
777
+
778
+
779
+
780
+
781
+ ### Do block-level transforms on a copy of +str+ using the specified render
782
+ ### state +rs+ and return the results.
783
+ def apply_block_transforms( str, rs )
784
+ rs.block_transform_depth += 1
785
+
786
+ # Port: This was called '_runBlockGamut' in the original
787
+
788
+ @log.debug "Applying block transforms to:\n %p" % str
789
+ text = str
790
+ text = pretransform_fenced_code_blocks( text, rs )
791
+ text = pretransform_block_separators(text, rs)
792
+
793
+ text = transform_headers( text, rs )
794
+ text = transform_toc(text, rs)
795
+
796
+ text = transform_hrules( text, rs )
797
+ text = transform_lists( text, rs )
798
+ text = transform_definition_lists( text, rs ) # AoBane Extension
799
+ text = transform_code_blocks( text, rs )
800
+ text = transform_block_quotes( text, rs )
801
+ text = transform_tables(text, rs)
802
+ text = hide_html_blocks( text, rs )
803
+
804
+ text = form_paragraphs( text, rs )
805
+
806
+ rs.block_transform_depth -= 1
807
+ @log.debug "Done with block transforms:\n %p" % text
808
+ return text
809
+ end
810
+
811
+
812
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
813
+ ### given render state +rs+ and return it.
814
+ def apply_span_transforms( str, rs )
815
+ @log.debug "Applying span transforms to:\n %p" % str
816
+
817
+ str = transform_code_spans( str, rs )
818
+ str = transform_auto_links( str, rs )
819
+ str = encode_html( str )
820
+ str = transform_images( str, rs )
821
+ str = transform_anchors( str, rs )
822
+ str = transform_italic_and_bold( str, rs )
823
+
824
+ # Hard breaks
825
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
826
+
827
+ @log.debug "Done with span transforms:\n %p" % str
828
+ return str
829
+ end
830
+
831
+
832
+ # The list of tags which are considered block-level constructs and an
833
+ # alternation pattern suitable for use in regexps made from the list
834
+ StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
835
+ form fieldset iframe math ins del ]
836
+ StrictTagPattern = StrictBlockTags.join('|')
837
+
838
+ LooseBlockTags = StrictBlockTags - %w[ins del]
839
+ LooseTagPattern = LooseBlockTags.join('|')
840
+
841
+ # Nested blocks:
842
+ # <div>
843
+ # <div>
844
+ # tags for inner block must be indented.
845
+ # </div>
846
+ # </div>
847
+ StrictBlockRegexp = %r{
848
+ ^ # Start of line
849
+ <(#{StrictTagPattern}) # Start tag: \2
850
+ \b # word break
851
+ (.*\n)*? # Any number of lines, minimal match
852
+ </\1> # Matching end tag
853
+ [ ]* # trailing spaces
854
+ $ # End of line or document
855
+ }ix
856
+
857
+ # More-liberal block-matching
858
+ LooseBlockRegexp = %r{
859
+ ^ # Start of line
860
+ <(#{LooseTagPattern}) # start tag: \2
861
+ \b # word break
862
+ (.*\n)*? # Any number of lines, minimal match
863
+ .*</\1> # Anything + Matching end tag
864
+ [ ]* # trailing spaces
865
+ $ # End of line or document
866
+ }ix
867
+
868
+ # Special case for <hr />.
869
+ HruleBlockRegexp = %r{
870
+ ( # $1
871
+ \A\n? # Start of doc + optional \n
872
+ | # or
873
+ .*\n\n # anything + blank line
874
+ )
875
+ ( # save in $2
876
+ # AoBane fix: Not allow any space on line top
877
+ <hr # Tag open
878
+ \b # Word break
879
+ ([^<>])*? # Attributes
880
+ /?> # Tag close
881
+ $ # followed by a blank line or end of document
882
+ )
883
+ }ix
884
+
885
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
886
+ ### tokens.
887
+ def hide_html_blocks( str, rs )
888
+ @log.debug "Hiding HTML blocks in %p" % str
889
+
890
+ # Tokenizer proc to pass to gsub
891
+ tokenize = lambda {|match|
892
+ key = Digest::MD5::hexdigest( match )
893
+ rs.html_blocks[ key ] = match
894
+ @log.debug "Replacing %p with %p" % [ match, key ]
895
+ "\n\n#{key}\n\n"
896
+ }
897
+
898
+ rval = str.dup
899
+
900
+ @log.debug "Finding blocks with the strict regex..."
901
+ rval.gsub!( StrictBlockRegexp, &tokenize )
902
+
903
+ @log.debug "Finding blocks with the loose regex..."
904
+ rval.gsub!( LooseBlockRegexp, &tokenize )
905
+
906
+ @log.debug "Finding hrules..."
907
+ rval.gsub!( HruleBlockRegexp ) {|match| $1 + tokenize[$2] }
908
+
909
+ return rval
910
+ end
911
+
912
+
913
+ # Link defs are in the form: ^[id]: url "optional title"
914
+ LinkRegexp = %r{
915
+ ^[ ]{0,#{TabWidth - 1}} # AoBane fix: indent < tab width
916
+ \[(.+)\]: # id = $1
917
+ [ ]*
918
+ \n? # maybe *one* newline
919
+ [ ]*
920
+ <?(\S+?)>? # url = $2
921
+ [ ]*
922
+ \n? # maybe one newline
923
+ [ ]*
924
+ (?:
925
+ # Titles are delimited by "quotes" or (parens).
926
+ ["(]
927
+ (.+?) # title = $3
928
+ [")] # Matching ) or "
929
+ [ ]*
930
+ )? # title is optional
931
+ (?:\n+|\Z)
932
+ }x
933
+
934
+ ### Strip link definitions from +str+, storing them in the given RenderState
935
+ ### +rs+.
936
+ def strip_link_definitions( str, rs )
937
+ str.gsub( LinkRegexp ) {|match|
938
+ id, url, title = $1, $2, $3
939
+
940
+ rs.urls[ id.downcase ] = encode_html( url )
941
+ unless title.nil?
942
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
943
+ end
944
+
945
+ ""
946
+ }
947
+ end
948
+
949
+ # Footnotes defs are in the form: [^id]: footnote contents.
950
+ FootnoteDefinitionRegexp = %r{
951
+ ^[ ]{0,#{TabWidth - 1}}
952
+ \[\^(.+?)\]\: # id = $1
953
+ [ ]*
954
+ (.*) # first line content = $2
955
+ (?:\n|\Z)
956
+
957
+ ( # second or more lines content = $3
958
+ (?:
959
+ [ ]{#{TabWidth},} # indented
960
+ .*
961
+ (?:\n|\Z)
962
+ |
963
+ \n # blank line
964
+ )*
965
+ )?
966
+
967
+ }x
968
+
969
+ FootnoteIdRegexp = /^[a-zA-Z0-9\:\._-]+$/
970
+
971
+ def strip_footnote_definitions(str, rs)
972
+ str.gsub( FootnoteDefinitionRegexp ) {|match|
973
+ id = $1; content1 = $2; content2 = $3
974
+
975
+ unless id =~ FootnoteIdRegexp then
976
+ rs.warnings << "illegal footnote id - #{id} (legal chars: a-zA-Z0-9_-.:)"
977
+ end
978
+
979
+ if content2 then
980
+ @log.debug " Stripping multi-line definition %p, %p" % [$2, $3]
981
+ content = content1 + "\n" + outdent(content2.chomp)
982
+ @log.debug " Stripped multi-line definition %p, %p" % [id, content]
983
+ rs.footnotes[id] = content
984
+ else
985
+ content = content1 || ''
986
+ @log.debug " Stripped single-line definition %p, %p" % [id, content]
987
+ rs.footnotes[id] = content
988
+ end
989
+
990
+
991
+
992
+ ""
993
+ }
994
+ end
995
+
996
+
997
+ ### Escape special characters in the given +str+
998
+ def escape_special_chars( str )
999
+ @log.debug " Escaping special characters"
1000
+ text = ''
1001
+
1002
+ # The original Markdown source has something called '$tags_to_skip'
1003
+ # declared here, but it's never used, so I don't define it.
1004
+
1005
+ tokenize_html( str ) {|token, str|
1006
+ @log.debug " Adding %p token %p" % [ token, str ]
1007
+ case token
1008
+
1009
+ # Within tags, encode * and _
1010
+ when :tag
1011
+ text += str.
1012
+ gsub( /\*/, EscapeTable['*'][:md5] ).
1013
+ gsub( /_/, EscapeTable['_'][:md5] )
1014
+
1015
+ # Encode backslashed stuff in regular text
1016
+ when :text
1017
+ text += encode_backslash_escapes( str )
1018
+ else
1019
+ raise TypeError, "Unknown token type %p" % token
1020
+ end
1021
+ }
1022
+
1023
+ @log.debug " Text with escapes is now: %p" % text
1024
+ return text
1025
+ end
1026
+
1027
+
1028
+ ### Swap escaped special characters in a copy of the given +str+ and return
1029
+ ### it.
1030
+ def unescape_special_chars( str )
1031
+ EscapeTable.each {|char, hash|
1032
+ @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
1033
+ str.gsub!( hash[:md5re], hash[:unescape] )
1034
+ }
1035
+
1036
+ return str
1037
+ end
1038
+
1039
+
1040
+ ### Return a copy of the given +str+ with any backslashed special character
1041
+ ### in it replaced with MD5 placeholders.
1042
+ def encode_backslash_escapes( str )
1043
+ # Make a copy with any double-escaped backslashes encoded
1044
+ text = str.gsub( /\\\\/, EscapeTable['\\\\'][:md5] )
1045
+
1046
+ EscapeTable.each_pair {|char, esc|
1047
+ next if char == '\\\\'
1048
+ next unless char =~ /\\./
1049
+ text.gsub!( esc[:re], esc[:md5] )
1050
+ }
1051
+
1052
+ return text
1053
+ end
1054
+
1055
+
1056
+ def pretransform_block_separators(str, rs)
1057
+ str.gsub(/^[ ]{0,#{TabWidth - 1}}[~][ ]*\n/){
1058
+ "\n~\n\n"
1059
+ }
1060
+ end
1061
+
1062
+
1063
+ TOCRegexp = %r{
1064
+ ^\{ # bracket on line-head
1065
+ [ ]* # optional inner space
1066
+ toc
1067
+
1068
+ (?:
1069
+ (?:
1070
+ [:] # colon
1071
+ | # or
1072
+ [ ]+ # 1 or more space
1073
+ )
1074
+ (.+?) # $1 = parameter
1075
+ )?
1076
+
1077
+ [ ]* # optional inner space
1078
+ \} # closer
1079
+ [ ]*$ # optional space on line-foot
1080
+ }ix
1081
+
1082
+ TOCStartLevelRegexp = %r{
1083
+ ^
1084
+ (?: # optional start
1085
+ h
1086
+ ([1-6]) # $1 = start level
1087
+ )?
1088
+
1089
+ (?: # range symbol
1090
+ [.]{2,}|[-] # .. or -
1091
+ )
1092
+
1093
+ (?: # optional end
1094
+ h? # optional 'h'
1095
+ ([1-6]) # $2 = end level
1096
+ )?$
1097
+ }ix
1098
+
1099
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1100
+ ### +str+ and return it.
1101
+ def transform_toc( str, rs )
1102
+ @log.debug " Transforming tables of contents"
1103
+ str.gsub(TOCRegexp){
1104
+ start_level = 2 # default
1105
+ end_level = 6
1106
+
1107
+ param = $1
1108
+ if param then
1109
+ if param =~ TOCStartLevelRegexp then
1110
+ if !($1) and !($2) then
1111
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1112
+ else
1113
+ start_level = ($1 ? $1.to_i : 2)
1114
+ end_level = ($2 ? $2.to_i : 6)
1115
+ end
1116
+ else
1117
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1118
+ end
1119
+ end
1120
+
1121
+ if rs.headers.first and rs.headers.first.level >= (start_level + 1) then
1122
+ rs.warnings << "illegal structure of headers - h#{start_level} should be set before h#{rs.headers.first.level}"
1123
+ end
1124
+
1125
+
1126
+ ul_text = "\n\n"
1127
+ rs.headers.each do |header|
1128
+ if header.level >= start_level and header.level <= end_level then
1129
+ ul_text << ' ' * TabWidth * (header.level - start_level)
1130
+ ul_text << '* '
1131
+ ul_text << %Q|<a href="##{header.id}" rel="toc">#{header.content_html}</a>|
1132
+ ul_text << "\n"
1133
+ end
1134
+ end
1135
+ ul_text << "\n"
1136
+
1137
+ ul_text # output
1138
+
1139
+ }
1140
+ end
1141
+
1142
+ TableRegexp = %r{
1143
+ (?:
1144
+ ^([ ]{0,#{TabWidth - 1}}) # not indented
1145
+ (?:[|][ ]*) # NOT optional border
1146
+
1147
+ \S.*? # 1st cell content
1148
+
1149
+ (?: # 2nd cell or later
1150
+ [|] # cell splitter
1151
+ .+? # content
1152
+ )+ # 1 or more..
1153
+
1154
+ [|]? # optional border
1155
+ (?:\n|\Z) # line end
1156
+ )+
1157
+ }x
1158
+
1159
+ # Transform tables.
1160
+ def transform_tables(str, rs)
1161
+ str.gsub(TableRegexp){
1162
+ transform_table_rows($~[0], rs)
1163
+ }
1164
+ end
1165
+
1166
+ TableSeparatorCellRegexp = %r{
1167
+ ^
1168
+ [ ]*
1169
+ ([:])? # $1 = left-align symbol
1170
+ [ ]*
1171
+ [-]+ # border
1172
+ [ ]*
1173
+ ([:])? # $2 = right-align symbol
1174
+ [ ]*
1175
+ $
1176
+ }x
1177
+
1178
+ def transform_table_rows(str, rs)
1179
+
1180
+ # split cells to 2-d array
1181
+ data = str.split("\n").map{|x| x.split('|')}
1182
+
1183
+
1184
+ data.each do |row|
1185
+ # cut left space
1186
+ row.first.lstrip!
1187
+
1188
+ # cut when optional side-borders is included
1189
+ row.shift if row.first.empty?
1190
+ end
1191
+
1192
+ column_attrs = []
1193
+
1194
+ re = ''
1195
+ re << "<table>\n"
1196
+
1197
+ # head is exist?
1198
+ if data.size >= 3 and data[1].all?{|x| x =~ TableSeparatorCellRegexp} then
1199
+ head_row = data.shift
1200
+ separator_row = data.shift
1201
+
1202
+ separator_row.each do |cell|
1203
+ cell.match TableSeparatorCellRegexp
1204
+ left = $1; right = $2
1205
+
1206
+ if left and right then
1207
+ column_attrs << ' style="text-align: center"'
1208
+ elsif right then
1209
+ column_attrs << ' style="text-align: right"'
1210
+ elsif left then
1211
+ column_attrs << ' style="text-align: left"'
1212
+ else
1213
+ column_attrs << ''
1214
+ end
1215
+ end
1216
+
1217
+ re << "\t<thead><tr>\n"
1218
+ head_row.each_with_index do |cell, i|
1219
+ re << "\t\t<th#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</th>\n"
1220
+ end
1221
+ re << "\t</tr></thead>\n"
1222
+ end
1223
+
1224
+ # data row
1225
+ re << "\t<tbody>\n"
1226
+ data.each do |row|
1227
+ re << "\t\t<tr>\n"
1228
+ row.each_with_index do |cell, i|
1229
+ re << "\t\t\t<td#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</td>\n"
1230
+ end
1231
+ re << "\t\t</tr>\n"
1232
+ end
1233
+ re << "\t</tbody>\n"
1234
+
1235
+ re << "</table>\n"
1236
+
1237
+ re
1238
+ end
1239
+
1240
+
1241
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1242
+ ### +str+ and return it.
1243
+ def transform_hrules( str, rs )
1244
+ @log.debug " Transforming horizontal rules"
1245
+ str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
1246
+ end
1247
+
1248
+
1249
+
1250
+ # Patterns to match and transform lists
1251
+ ListMarkerOl = %r{\d+\.}
1252
+ ListMarkerUl = %r{[*+-]}
1253
+ ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
1254
+
1255
+ ListRegexp = %r{
1256
+ (?:
1257
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1258
+ (#{ListMarkerAny}) # unordered or ordered ($1)
1259
+ [ ]+ # At least one space
1260
+ )
1261
+ (?m:.+?) # item content (include newlines)
1262
+ (?:
1263
+ \z # Either EOF
1264
+ | # or
1265
+ \n{2,} # Blank line...
1266
+ (?=\S) # ...followed by non-space
1267
+ (?![ ]* # ...but not another item
1268
+ (#{ListMarkerAny})
1269
+ [ ]+)
1270
+ )
1271
+ }x
1272
+
1273
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
1274
+ ### return it.
1275
+ def transform_lists( str, rs )
1276
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
1277
+
1278
+ str.gsub( ListRegexp ) {|list|
1279
+ @log.debug " Found list %p" % list
1280
+ bullet = $1
1281
+ list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
1282
+
1283
+ %{<%s>\n%s</%s>\n} % [
1284
+ list_type,
1285
+ transform_list_items( list, rs ),
1286
+ list_type,
1287
+ ]
1288
+ }
1289
+ end
1290
+
1291
+ # Pattern for transforming list items
1292
+ ListItemRegexp = %r{
1293
+ (\n)? # leading line = $1
1294
+ (^[ ]*) # leading whitespace = $2
1295
+ (#{ListMarkerAny}) [ ]+ # list marker = $3
1296
+ ((?m:.+?) # list item text = $4
1297
+ \n)
1298
+ (?= (\n*) (\z | \2 (#{ListMarkerAny}) [ ]+))
1299
+ }x
1300
+
1301
+ ### Transform list items in a copy of the given +str+ and return it.
1302
+ def transform_list_items( str, rs )
1303
+ @log.debug " Transforming list items"
1304
+
1305
+ # Trim trailing blank lines
1306
+ str = str.sub( /\n{2,}\z/, "\n" )
1307
+ str.gsub( ListItemRegexp ) {|line|
1308
+ @log.debug " Found item line %p" % line
1309
+ leading_line, item = $1, $4
1310
+ separating_lines = $5
1311
+
1312
+ if leading_line or /\n{2,}/.match(item) or not separating_lines.empty? then
1313
+ @log.debug " Found leading line or item has a blank"
1314
+ item = apply_block_transforms( outdent(item), rs )
1315
+ else
1316
+ # Recursion for sub-lists
1317
+ @log.debug " Recursing for sublist"
1318
+ item = transform_lists( outdent(item), rs ).chomp
1319
+ item = apply_span_transforms( item, rs )
1320
+ end
1321
+
1322
+ %{<li>%s</li>\n} % item
1323
+ }
1324
+ end
1325
+
1326
+ DefinitionListRegexp = %r{
1327
+ (?:
1328
+ (?:^.+\n)+ # dt
1329
+ \n*
1330
+ (?:
1331
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1332
+ \: # dd marker (line head)
1333
+ [ ]* # space
1334
+ ((?m:.+?)) # dd content
1335
+ (?:
1336
+ \s*\z # end of string
1337
+ | # or
1338
+ \n{2,} # blank line
1339
+ (?=[ ]{0,#{TabWidth - 1}}\S) # ...followed by
1340
+ )
1341
+ )+
1342
+ )+
1343
+ }x
1344
+
1345
+ def transform_definition_lists(str, rs)
1346
+ @log.debug " Transforming definition lists at %p" % (str[0,100] + '...')
1347
+ str.gsub( DefinitionListRegexp ) {|list|
1348
+ @log.debug " Found definition list %p (captures=%p)" % [list, $~.captures]
1349
+ transform_definition_list_items(list, rs)
1350
+ }
1351
+ end
1352
+
1353
+ DDLineRegexp = /^\:[ ]{0,#{TabWidth - 1}}(.*)/
1354
+
1355
+
1356
+ def transform_definition_list_items(str, rs)
1357
+ buf = Util.generate_blank_string_io(str)
1358
+ buf.puts %Q|<dl>|
1359
+
1360
+ lines = str.split("\n")
1361
+ until lines.empty? do
1362
+
1363
+ dts = []
1364
+
1365
+ # get dt items
1366
+ while lines.first =~ /^(?!\:).+$/ do
1367
+ dts << lines.shift
1368
+ end
1369
+
1370
+
1371
+ dd_as_block = false
1372
+
1373
+ # skip blank lines
1374
+ while not lines.empty? and lines.first.empty? do
1375
+ lines.shift
1376
+ dd_as_block = true
1377
+ end
1378
+
1379
+
1380
+ dds = []
1381
+ while lines.first =~ DDLineRegexp do
1382
+ dd_buf = []
1383
+
1384
+ # dd first line
1385
+ unless (line = lines.shift).empty? then
1386
+ dd_buf << $1 << "\n"
1387
+ end
1388
+
1389
+ # dd second and more lines (sequential with 1st-line)
1390
+ until lines.empty? or # stop if read all
1391
+ lines.first =~ /^[ ]{0,#{TabWidth - 1}}$/ or # stop if blank line
1392
+ lines.first =~ DDLineRegexp do # stop if new dd found
1393
+ dd_buf << outdent(lines.shift) << "\n"
1394
+ end
1395
+
1396
+ # dd second and more lines (separated with 1st-line)
1397
+ until lines.empty? do # stop if all was read
1398
+ if lines.first.empty? then
1399
+ # blank line (skip)
1400
+ lines.shift
1401
+ dd_buf << "\n"
1402
+ elsif lines.first =~ /^[ ]{#{TabWidth},}/ then
1403
+ # indented body
1404
+ dd_buf << outdent(lines.shift) << "\n"
1405
+ else
1406
+ # not indented body
1407
+ break
1408
+ end
1409
+
1410
+ end
1411
+
1412
+
1413
+ dds << dd_buf.join
1414
+
1415
+ # skip blank lines
1416
+ unless lines.empty? then
1417
+ while lines.first.empty? do
1418
+ lines.shift
1419
+ end
1420
+ end
1421
+ end
1422
+
1423
+ # html output
1424
+ dts.each do |dt|
1425
+ buf.puts %Q| <dt>#{apply_span_transforms(dt, rs)}</dt>|
1426
+ end
1427
+
1428
+ dds.each do |dd|
1429
+ if dd_as_block then
1430
+ buf.puts %Q| <dd>#{apply_block_transforms(dd, rs)}</dd>|
1431
+ else
1432
+ dd.gsub!(/\n+\z/, '') # chomp linefeeds
1433
+ buf.puts %Q| <dd>#{apply_span_transforms(dd.chomp, rs)}</dd>|
1434
+ end
1435
+ end
1436
+ end
1437
+
1438
+ buf.puts %Q|</dl>|
1439
+
1440
+ return(buf.string)
1441
+ end
1442
+
1443
+ # old
1444
+
1445
+
1446
+ # Pattern for matching codeblocks
1447
+ CodeBlockRegexp = %r{
1448
+ (?:\n\n|\A|\A\n)
1449
+ ( # $1 = the code block
1450
+ (?:
1451
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
1452
+ .*\n+
1453
+ )+
1454
+ )
1455
+ (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
1456
+ # line-start, or end of doc
1457
+ }x
1458
+
1459
+
1460
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
1461
+ ### return it.
1462
+ def transform_code_blocks( str, rs )
1463
+ @log.debug " Transforming code blocks"
1464
+
1465
+ str.gsub( CodeBlockRegexp ) {|block|
1466
+ codeblock = $1
1467
+ remainder = $2
1468
+
1469
+
1470
+ tmpl = %{\n\n<pre><code>%s\n</code></pre>\n\n%s}
1471
+
1472
+ # patch for ruby 1.9.1 bug
1473
+ if tmpl.respond_to?(:force_encoding) then
1474
+ tmpl.force_encoding(str.encoding)
1475
+ end
1476
+ args = [ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
1477
+
1478
+ # recover all backslash escaped to original form
1479
+ EscapeTable.each {|char, hash|
1480
+ args[0].gsub!( hash[:md5re]){char}
1481
+ }
1482
+
1483
+ # Generate the codeblock
1484
+ tmpl % args
1485
+ }
1486
+ end
1487
+
1488
+
1489
+ FencedCodeBlockRegexp = /^(\~{3,})\n((?m:.+?)\n)\1\n/
1490
+
1491
+ def pretransform_fenced_code_blocks( str, rs )
1492
+ @log.debug " Transforming fenced code blocks => standard code blocks"
1493
+
1494
+ str.gsub( FencedCodeBlockRegexp ) {|block|
1495
+ "\n~\n\n" + indent($2) + "\n~\n\n"
1496
+ }
1497
+ end
1498
+
1499
+
1500
+
1501
+ # Pattern for matching Markdown blockquote blocks
1502
+ BlockQuoteRegexp = %r{
1503
+ (?:
1504
+ ^[ ]*>[ ]? # '>' at the start of a line
1505
+ .+\n # rest of the first line
1506
+ (?:.+\n)* # subsequent consecutive lines
1507
+ \n* # blanks
1508
+ )+
1509
+ }x
1510
+ PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
1511
+
1512
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
1513
+ ### and return it.
1514
+ def transform_block_quotes( str, rs )
1515
+ @log.debug " Transforming block quotes"
1516
+
1517
+ str.gsub( BlockQuoteRegexp ) {|quote|
1518
+ @log.debug "Making blockquote from %p" % quote
1519
+
1520
+ quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
1521
+ quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
1522
+
1523
+ indent = " " * TabWidth
1524
+ quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
1525
+ apply_block_transforms( quote, rs ).
1526
+ gsub( /^/, indent ).
1527
+ gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
1528
+ @log.debug "Blockquoted chunk is: %p" % quoted
1529
+ quoted
1530
+ }
1531
+ end
1532
+
1533
+
1534
+ # AoBane change:
1535
+ # allow loosely urls and addresses (BlueCloth is very strict)
1536
+ #
1537
+ # loose examples:
1538
+ # <skype:tetra-dice> (other protocol)
1539
+ # <ema+il@example.com> (ex: gmail alias)
1540
+ #
1541
+ # not adapted addresses:
1542
+ # <"Abc@def"@example.com> (refer to quoted-string of RFC 5321)
1543
+
1544
+
1545
+ AutoAnchorURLRegexp = /<(#{URI.regexp})>/ # $1 = url
1546
+
1547
+ AutoAnchorEmailRegexp = /<([^'">\s]+?\@[^'">\s]+[.][a-zA-Z]+)>/ # $2 = address
1548
+
1549
+ ### Transform URLs in a copy of the specified +str+ into links and return
1550
+ ### it.
1551
+ def transform_auto_links( str, rs )
1552
+ @log.debug " Transforming auto-links"
1553
+ str.gsub(AutoAnchorURLRegexp){
1554
+ %|<a href="#{Util.escape_html($1)}">#{Util.escape_html($1)}</a>|
1555
+ }.gsub( AutoAnchorEmailRegexp ) {|addr|
1556
+ encode_email_address( unescape_special_chars($1) )
1557
+ }
1558
+ end
1559
+
1560
+
1561
+ # Encoder functions to turn characters of an email address into encoded
1562
+ # entities.
1563
+ Encoders = [
1564
+ lambda {|char| "&#%03d;" % char},
1565
+ lambda {|char| "&#x%X;" % char},
1566
+ lambda {|char| char.chr },
1567
+ ]
1568
+
1569
+ ### Transform a copy of the given email +addr+ into an escaped version safer
1570
+ ### for posting publicly.
1571
+ def encode_email_address( addr )
1572
+
1573
+ rval = ''
1574
+ ("mailto:" + addr).each_byte {|b|
1575
+ case b
1576
+ when ?:
1577
+ rval += ":"
1578
+ when ?@
1579
+ rval += Encoders[ rand(2) ][ b ]
1580
+ else
1581
+ r = rand(100)
1582
+ rval += (
1583
+ r > 90 ? Encoders[2][ b ] :
1584
+ r < 45 ? Encoders[1][ b ] :
1585
+ Encoders[0][ b ]
1586
+ )
1587
+ end
1588
+ }
1589
+
1590
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
1591
+ end
1592
+
1593
+
1594
+ # Regexp for matching Setext-style headers
1595
+ SetextHeaderRegexp = %r{
1596
+ (.+?) # The title text ($1)
1597
+
1598
+ (?: # Markdown Extra: Header Id Attribute (optional)
1599
+ [ ]* # space after closing #'s
1600
+ \{\#
1601
+ (\S+?) # $2 = Id
1602
+ \}
1603
+ [ \t]* # allowed lazy spaces
1604
+ )?
1605
+ \n
1606
+ ([\-=])+ # Match a line of = or -. Save only one in $3.
1607
+ [ ]*\n+
1608
+ }x
1609
+
1610
+ # Regexp for matching ATX-style headers
1611
+ AtxHeaderRegexp = %r{
1612
+ ^(\#+) # $1 = string of #'s
1613
+ [ ]*
1614
+ (.+?) # $2 = Header text
1615
+ [ ]*
1616
+ \#* # optional closing #'s (not counted)
1617
+
1618
+ (?: # Markdown Extra: Header Id Attribute (optional)
1619
+ [ ]* # space after closing #'s
1620
+ \{\#
1621
+ (\S+?) # $3 = Id
1622
+ \}
1623
+ [ \t]* # allowed lazy spaces
1624
+ )?
1625
+
1626
+ \n+
1627
+ }x
1628
+
1629
+ HeaderRegexp = Regexp.union(SetextHeaderRegexp, AtxHeaderRegexp)
1630
+
1631
+ IdRegexp = /^[a-zA-Z][a-zA-Z0-9\:\._-]*$/
1632
+
1633
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
1634
+ ### state +rs+ and return the result.
1635
+ def transform_headers( str, rs )
1636
+ @log.debug " Transforming headers"
1637
+
1638
+ # Setext-style headers:
1639
+ # Header 1
1640
+ # ========
1641
+ #
1642
+ # Header 2
1643
+ # --------
1644
+ #
1645
+
1646
+ section_numbers = [nil, nil, nil, nil, nil]
1647
+
1648
+ str.
1649
+ gsub( HeaderRegexp ) {|m|
1650
+ if $1 then
1651
+ @log.debug "Found setext-style header"
1652
+ title, id, hdrchar = $1, $2, $3
1653
+
1654
+ case hdrchar
1655
+ when '='
1656
+ level = 1
1657
+ when '-'
1658
+ level = 2
1659
+ end
1660
+ else
1661
+ @log.debug "Found ATX-style header"
1662
+ hdrchars, title, id = $4, $5, $6
1663
+ level = hdrchars.length
1664
+
1665
+ if level >= 7 then
1666
+ rs.warnings << "illegal header level - h#{level} ('#' symbols are too many)"
1667
+ end
1668
+ end
1669
+
1670
+ prefix = ''
1671
+ if rs.numbering? then
1672
+ if level >= rs.numbering_start_level and level <= 6 then
1673
+ depth = level - rs.numbering_start_level
1674
+
1675
+ section_numbers.each_index do |i|
1676
+ if i == depth and section_numbers[depth] then
1677
+ # increment a deepest number if current header's level equals last header's
1678
+ section_numbers[i] += 1
1679
+ elsif i <= depth then
1680
+ # set default number if nil
1681
+ section_numbers[i] ||= 1
1682
+ else
1683
+ # clear discardeds
1684
+ section_numbers[i] = nil
1685
+ end
1686
+ end
1687
+
1688
+ no = ''
1689
+ (0..depth).each do |i|
1690
+ no << "#{section_numbers[i]}."
1691
+ end
1692
+
1693
+ prefix = "#{no} "
1694
+ end
1695
+ end
1696
+
1697
+ title_html = apply_span_transforms( title, rs )
1698
+
1699
+ unless id then
1700
+ case rs.header_id_type
1701
+ when HeaderIDType::ESCAPE
1702
+ id = escape_to_header_id(title_html)
1703
+ if rs.headers.find{|h| h.id == id} then
1704
+ rs.warnings << "header id collision - #{id}"
1705
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1706
+ end
1707
+ else
1708
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1709
+ end
1710
+ end
1711
+
1712
+ title = "#{prefix}#{title}"
1713
+ title_html = "#{prefix}#{title_html}"
1714
+
1715
+
1716
+ unless id =~ IdRegexp then
1717
+ rs.warnings << "illegal header id - #{id} (legal chars: [a-zA-Z0-9_-.] | 1st: [a-zA-Z])"
1718
+ end
1719
+
1720
+ if rs.block_transform_depth == 1 then
1721
+ rs.headers << RenderState::Header.new(id, level, title, title_html)
1722
+ end
1723
+
1724
+ if @use_header_id then
1725
+ %{<h%d id="%s">%s</h%d>\n\n} % [ level, id, title_html, level ]
1726
+ else
1727
+ %{<h%d>%s</h%d>\n\n} % [ level, title_html, level ]
1728
+ end
1729
+ }
1730
+ end
1731
+
1732
+
1733
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
1734
+ ### tags and return it.
1735
+ def form_paragraphs( str, rs )
1736
+ @log.debug " Forming paragraphs"
1737
+ grafs = str.
1738
+ sub( /\A\n+/, '' ).
1739
+ sub( /\n+\z/, '' ).
1740
+ split( /\n{2,}/ )
1741
+
1742
+ rval = grafs.collect {|graf|
1743
+
1744
+ # Unhashify HTML blocks if this is a placeholder
1745
+ if rs.html_blocks.key?( graf )
1746
+ rs.html_blocks[ graf ]
1747
+
1748
+ # no output if this is block separater
1749
+ elsif graf == '~' then
1750
+ ''
1751
+
1752
+ # Otherwise, wrap in <p> tags
1753
+ else
1754
+ apply_span_transforms(graf, rs).
1755
+ sub( /^[ ]*/, '<p>' ) + '</p>'
1756
+ end
1757
+ }.join( "\n\n" )
1758
+
1759
+ @log.debug " Formed paragraphs: %p" % rval
1760
+ return rval
1761
+ end
1762
+
1763
+
1764
+ # Pattern to match the linkid part of an anchor tag for reference-style
1765
+ # links.
1766
+ RefLinkIdRegexp = %r{
1767
+ [ ]? # Optional leading space
1768
+ (?:\n[ ]*)? # Optional newline + spaces
1769
+ \[
1770
+ (.*?) # Id = $1
1771
+ \]
1772
+ }x
1773
+
1774
+ InlineLinkRegexp = %r{
1775
+ \( # Literal paren
1776
+ [ ]* # Zero or more spaces
1777
+ <?(.+?)>? # URI = $1
1778
+ [ ]* # Zero or more spaces
1779
+ (?: #
1780
+ ([\"\']) # Opening quote char = $2
1781
+ (.*?) # Title = $3
1782
+ \2 # Matching quote char
1783
+ )? # Title is optional
1784
+ \)
1785
+ }x
1786
+
1787
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
1788
+ ### the given render state +rs+ and return it.
1789
+ def transform_anchors( str, rs )
1790
+ @log.debug " Transforming anchors"
1791
+ @scanner.string = str.dup
1792
+ text = ''
1793
+
1794
+ # Scan the whole string
1795
+ until @scanner.empty?
1796
+
1797
+ if @scanner.scan( /\[/ )
1798
+ link = ''; linkid = ''
1799
+ depth = 1
1800
+ startpos = @scanner.pos
1801
+ @log.debug " Found a bracket-open at %d" % startpos
1802
+
1803
+ # Scan the rest of the tag, allowing unlimited nested []s. If
1804
+ # the scanner runs out of text before the opening bracket is
1805
+ # closed, append the text and return (wasn't a valid anchor).
1806
+ while depth.nonzero?
1807
+ linktext = @scanner.scan_until( /\]|\[/ )
1808
+
1809
+ if linktext
1810
+ @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
1811
+ link += linktext
1812
+
1813
+ # Decrement depth for each closing bracket
1814
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
1815
+ @log.debug " Depth is now #{depth}"
1816
+
1817
+ # If there's no more brackets, it must not be an anchor, so
1818
+ # just abort.
1819
+ else
1820
+ @log.debug " Missing closing brace, assuming non-link."
1821
+ link += @scanner.rest
1822
+ @scanner.terminate
1823
+ return text + '[' + link
1824
+ end
1825
+ end
1826
+ link.slice!( -1 ) # Trim final ']'
1827
+ @log.debug " Found leading link %p" % link
1828
+
1829
+
1830
+
1831
+ # Markdown Extra: Footnote
1832
+ if link =~ /^\^(.+)/ then
1833
+ id = $1
1834
+ if rs.footnotes[id] then
1835
+ rs.found_footnote_ids << id
1836
+ label = "[#{rs.found_footnote_ids.size}]"
1837
+ else
1838
+ rs.warnings << "undefined footnote id - #{id}"
1839
+ label = '[?]'
1840
+ end
1841
+
1842
+ text += %Q|<sup id="footnote-ref:#{id}"><a href="#footnote:#{id}" rel="footnote">#{label}</a></sup>|
1843
+
1844
+ # Look for a reference-style second part
1845
+ elsif @scanner.scan( RefLinkIdRegexp )
1846
+ linkid = @scanner[1]
1847
+ linkid = link.dup if linkid.empty?
1848
+ linkid.downcase!
1849
+ @log.debug " Found a linkid: %p" % linkid
1850
+
1851
+ # If there's a matching link in the link table, build an
1852
+ # anchor tag for it.
1853
+ if rs.urls.key?( linkid )
1854
+ @log.debug " Found link key in the link table: %p" % rs.urls[linkid]
1855
+ url = escape_md( rs.urls[linkid] )
1856
+
1857
+ text += %{<a href="#{url}"}
1858
+ if rs.titles.key?(linkid)
1859
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
1860
+ end
1861
+ text += %{>#{link}</a>}
1862
+
1863
+ # If the link referred to doesn't exist, just append the raw
1864
+ # source to the result
1865
+ else
1866
+ @log.debug " Linkid %p not found in link table" % linkid
1867
+ @log.debug " Appending original string instead: "
1868
+ @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1869
+
1870
+ rs.warnings << "link-id not found - #{linkid}"
1871
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1872
+ end
1873
+
1874
+ # ...or for an inline style second part
1875
+ elsif @scanner.scan( InlineLinkRegexp )
1876
+ url = @scanner[1]
1877
+ title = @scanner[3]
1878
+ @log.debug " Found an inline link to %p" % url
1879
+
1880
+ url = "##{link}" if url == '#' # target anchor briefing (since AoBane 0.40)
1881
+
1882
+ text += %{<a href="%s"} % escape_md( url )
1883
+ if title
1884
+ title.gsub!( /"/, "&quot;" )
1885
+ text += %{ title="%s"} % escape_md( title )
1886
+ end
1887
+ text += %{>#{link}</a>}
1888
+
1889
+ # No linkid part: just append the first part as-is.
1890
+ else
1891
+ @log.debug "No linkid, so no anchor. Appending literal text."
1892
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1893
+ end # if linkid
1894
+
1895
+ # Plain text
1896
+ else
1897
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
1898
+ text += @scanner.scan( /[^\[]+/ )
1899
+ end
1900
+
1901
+ end # until @scanner.empty?
1902
+
1903
+ return text
1904
+ end
1905
+
1906
+
1907
+ # Pattern to match strong emphasis in Markdown text
1908
+ BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
1909
+
1910
+ # Pattern to match normal emphasis in Markdown text
1911
+ ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
1912
+
1913
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
1914
+ ### and return it.
1915
+ def transform_italic_and_bold( str, rs )
1916
+ @log.debug " Transforming italic and bold"
1917
+
1918
+ str.
1919
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
1920
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
1921
+ end
1922
+
1923
+
1924
+ ### Transform backticked spans into <code> spans.
1925
+ def transform_code_spans( str, rs )
1926
+ @log.debug " Transforming code spans"
1927
+
1928
+ # Set up the string scanner and just return the string unless there's at
1929
+ # least one backtick.
1930
+ @scanner.string = str.dup
1931
+ unless @scanner.exist?( /`/ )
1932
+ @scanner.terminate
1933
+ @log.debug "No backticks found for code span in %p" % str
1934
+ return str
1935
+ end
1936
+
1937
+ @log.debug "Transforming code spans in %p" % str
1938
+
1939
+ # Build the transformed text anew
1940
+ text = ''
1941
+
1942
+ # Scan to the end of the string
1943
+ until @scanner.empty?
1944
+
1945
+ # Scan up to an opening backtick
1946
+ if pre = @scanner.scan_until( /.??(?=`)/m )
1947
+ text += pre
1948
+ @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
1949
+
1950
+ # Make a pattern to find the end of the span
1951
+ opener = @scanner.scan( /`+/ )
1952
+ len = opener.length
1953
+ closer = Regexp::new( opener )
1954
+ @log.debug "Scanning for end of code span with %p" % closer
1955
+
1956
+ # Scan until the end of the closing backtick sequence. Chop the
1957
+ # backticks off the resultant string, strip leading and trailing
1958
+ # whitespace, and encode any enitites contained in it.
1959
+ codespan = @scanner.scan_until( closer ) or
1960
+ raise FormatError::new( @scanner.rest[0,20],
1961
+ "No %p found before end" % opener )
1962
+
1963
+ @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
1964
+ codespan.slice!( -len, len )
1965
+ text += "<code>%s</code>" %
1966
+ encode_code( codespan.strip, rs )
1967
+
1968
+ # If there's no more backticks, just append the rest of the string
1969
+ # and move the scan pointer to the end
1970
+ else
1971
+ text += @scanner.rest
1972
+ @scanner.terminate
1973
+ end
1974
+ end
1975
+
1976
+ return text
1977
+ end
1978
+
1979
+
1980
+ # Next, handle inline images: ![alt text](url "optional title")
1981
+ # Don't forget: encode * and _
1982
+ InlineImageRegexp = %r{
1983
+ ( # Whole match = $1
1984
+ !\[ (.*?) \] # alt text = $2
1985
+ \([ ]*
1986
+ <?(\S+?)>? # source url = $3
1987
+ [ ]*
1988
+ (?: #
1989
+ (["']) # quote char = $4
1990
+ (.*?) # title = $5
1991
+ \4 # matching quote
1992
+ [ ]*
1993
+ )? # title is optional
1994
+ \)
1995
+ )
1996
+ }x #"
1997
+
1998
+
1999
+ # Reference-style images
2000
+ ReferenceImageRegexp = %r{
2001
+ ( # Whole match = $1
2002
+ !\[ (.*?) \] # Alt text = $2
2003
+ [ ]? # Optional space
2004
+ (?:\n[ ]*)? # One optional newline + spaces
2005
+ \[ (.*?) \] # id = $3
2006
+ )
2007
+ }x
2008
+
2009
+ ### Turn image markup into image tags.
2010
+ def transform_images( str, rs )
2011
+ @log.debug " Transforming images %p" % str
2012
+
2013
+ # Handle reference-style labeled images: ![alt text][id]
2014
+ str.
2015
+ gsub( ReferenceImageRegexp ) {|match|
2016
+ whole, alt, linkid = $1, $2, $3.downcase
2017
+ @log.debug "Matched %p" % match
2018
+ res = nil
2019
+ alt.gsub!( /"/, '&quot;' )
2020
+
2021
+ # for shortcut links like ![this][].
2022
+ linkid = alt.downcase if linkid.empty?
2023
+
2024
+ if rs.urls.key?( linkid )
2025
+ url = escape_md( rs.urls[linkid] )
2026
+ @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
2027
+
2028
+ # Build the tag
2029
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2030
+ if rs.titles.key?( linkid )
2031
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
2032
+ end
2033
+ result += EmptyElementSuffix
2034
+
2035
+ else
2036
+ result = whole
2037
+ end
2038
+
2039
+ @log.debug "Replacing %p with %p" % [ match, result ]
2040
+ result
2041
+ }.
2042
+
2043
+ # Inline image style
2044
+ gsub( InlineImageRegexp ) {|match|
2045
+ @log.debug "Found inline image %p" % match
2046
+ whole, alt, title = $1, $2, $5
2047
+ url = escape_md( $3 )
2048
+ alt.gsub!( /"/, '&quot;' )
2049
+
2050
+ # Build the tag
2051
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2052
+ unless title.nil?
2053
+ title.gsub!( /"/, '&quot;' )
2054
+ result += %{ title="%s"} % escape_md( title )
2055
+ end
2056
+ result += EmptyElementSuffix
2057
+
2058
+ @log.debug "Replacing %p with %p" % [ match, result ]
2059
+ result
2060
+ }
2061
+ end
2062
+
2063
+
2064
+ # Regexp to match special characters in a code block
2065
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
2066
+
2067
+ ### Escape any characters special to HTML and encode any characters special
2068
+ ### to Markdown in a copy of the given +str+ and return it.
2069
+ def encode_code( str, rs )
2070
+ #str.gsub( %r{&}, '&amp;' ).
2071
+ #gsub( %r{<}, '&lt;' ).
2072
+ #gsub( %r{>}, '&gt;' ).
2073
+ #gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
2074
+ end
2075
+
2076
+ def escape_to_header_id(str)
2077
+ URI.escape(escape_md(str.gsub(/<\/?[^>]*>/, "").gsub(/\s/, "_")).gsub("/", ".2F")).gsub("%", ".")
2078
+ end
2079
+
2080
+ #################################################################
2081
+ ### U T I L I T Y F U N C T I O N S
2082
+ #################################################################
2083
+
2084
+ ### Escape any markdown characters in a copy of the given +str+ and return
2085
+ ### it.
2086
+ def escape_md( str )
2087
+ str.
2088
+ gsub( /\*|_/ ){|symbol| EscapeTable[symbol][:md5]}
2089
+ end
2090
+
2091
+
2092
+ # Matching constructs for tokenizing X/HTML
2093
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
2094
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
2095
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
2096
+
2097
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
2098
+ HTMLTagCloseRegexp = %r{ > }x
2099
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
2100
+
2101
+ ### Break the HTML source in +str+ into a series of tokens and return
2102
+ ### them. The tokens are just 2-element Array tuples with a type and the
2103
+ ### actual content. If this function is called with a block, the type and
2104
+ ### text parts of each token will be yielded to it one at a time as they are
2105
+ ### extracted.
2106
+ def tokenize_html( str )
2107
+ depth = 0
2108
+ tokens = []
2109
+ @scanner.string = str.dup
2110
+ type, token = nil, nil
2111
+
2112
+ until @scanner.empty?
2113
+ @log.debug "Scanning from %p" % @scanner.rest
2114
+
2115
+ # Match comments and PIs without nesting
2116
+ if (( token = @scanner.scan(MetaTag) ))
2117
+ type = :tag
2118
+
2119
+ # Do nested matching for HTML tags
2120
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
2121
+ tagstart = @scanner.pos
2122
+ @log.debug " Found the start of a plain tag at %d" % tagstart
2123
+
2124
+ # Start the token with the opening angle
2125
+ depth = 1
2126
+ type = :tag
2127
+
2128
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
2129
+ # the scanner runs out of text before the tag is closed, raise
2130
+ # an error.
2131
+ while depth.nonzero?
2132
+
2133
+ # Scan either an opener or a closer
2134
+ chunk = @scanner.scan( HTMLTagPart ) or
2135
+ break # AoBane Fix (refer to spec/code-block.rb)
2136
+
2137
+ @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
2138
+
2139
+ token += chunk
2140
+
2141
+ # If the last character of the token so far is a closing
2142
+ # angle bracket, decrement the depth. Otherwise increment
2143
+ # it for a nested tag.
2144
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
2145
+ @log.debug " Depth is now #{depth}"
2146
+ end
2147
+
2148
+ # Match text segments
2149
+ else
2150
+ @log.debug " Looking for a chunk of text"
2151
+ type = :text
2152
+
2153
+ # Scan forward, always matching at least one character to move
2154
+ # the pointer beyond any non-tag '<'.
2155
+ token = @scanner.scan_until( /[^<]+/m )
2156
+ end
2157
+
2158
+ @log.debug " type: %p, token: %p" % [ type, token ]
2159
+
2160
+ # If a block is given, feed it one token at a time. Add the token to
2161
+ # the token list to be returned regardless.
2162
+ if block_given?
2163
+ yield( type, token )
2164
+ end
2165
+ tokens << [ type, token ]
2166
+ end
2167
+
2168
+ return tokens
2169
+ end
2170
+
2171
+
2172
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
2173
+ def encode_html( str )
2174
+ #str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
2175
+ #gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
2176
+ return str
2177
+ end
2178
+
2179
+
2180
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
2181
+ ### return it.
2182
+ def outdent( str )
2183
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
2184
+ end
2185
+
2186
+ def indent(str)
2187
+ str.gsub( /^/, ' ' * TabWidth)
2188
+ end
2189
+
2190
+ end
2191
+ end