AoBane 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/AoBane.rb ADDED
@@ -0,0 +1,2191 @@
1
+ #
2
+ # AoBane - Extended Markdown Converter
3
+ #
4
+ # Author of Original BlueFeather: Dice <tetradice@gmail.com>
5
+ # Remaker: set.minami <set.minami@gmail.com>
6
+ # Website: https://github.com/setminami/AoBane/blob/master/README.md
7
+ # License: GPL version 2 or later
8
+ #
9
+ # If you want to know better about AoBane, See the Website.
10
+ #
11
+ #
12
+ #
13
+ #-- Copyrights & License -------------------------------------------------------
14
+ #
15
+ # Original Markdown:
16
+ # Copyright (c) 2003-2004 John Gruber
17
+ # <http://daringfireball.net/>
18
+ # All rights reserved.
19
+ #
20
+ # Orignal BlueCloth:
21
+ # Copyright (c) 2004 The FaerieMUD Consortium.
22
+ #
23
+ # AoBane:
24
+ # Copyright (c) 2013 Set.Minami
25
+ #
26
+ # AoBane is free software; you can redistribute it and/or modify it under
27
+ # the terms of the GNU General Public License as published by the Free Software
28
+ # Foundation; either version 2 of the License, or (at your option) any later
29
+ # version.
30
+ #
31
+ # AoBane is distributed in the hope that it will be useful, but WITHOUT ANY
32
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
33
+ # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
34
+
35
+
36
+ require 'digest/md5'
37
+ require 'logger'
38
+ require 'strscan'
39
+ require 'stringio'
40
+ require 'uri'
41
+
42
+
43
+ module AoBane
44
+ VERSION = '0.01'
45
+ VERSION_NUMBER = 0.01
46
+ RELEASE_DATE = '2013-03-30'
47
+ VERSION_LABEL = "#{VERSION} (#{RELEASE_DATE})"
48
+
49
+ UTF8_BOM = "\xef\xbb\xbf"
50
+ UTF8_BOM_PATTERN = /^#{UTF8_BOM}/
51
+
52
+
53
+ # Fancy methods
54
+ class << self
55
+ def parse_text(src)
56
+ Parser.new.parse_text(src)
57
+ end
58
+
59
+ alias parse parse_text
60
+
61
+ def parse_document(src, default_enc = EncodingType::UTF8)
62
+ Parser.new.parse_document(src, default_enc)
63
+ end
64
+
65
+
66
+ def parse_text_file(path)
67
+ Parser.new.parse_text_file(path)
68
+ end
69
+
70
+ alias parse_file parse_text_file
71
+
72
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
73
+ Parser.new.parse_document_file(path, default_enc)
74
+ end
75
+ end
76
+
77
+ ### Exception class on AoBane running.
78
+ class Error < ::RuntimeError
79
+ end
80
+
81
+ class EncodingError < Error
82
+ end
83
+
84
+ ### Exception class for formatting errors.
85
+ class FormatError < Error
86
+
87
+ ### Create a new FormatError with the given source +str+ and an optional
88
+ ### message about the +specific+ error.
89
+ def initialize( str, specific=nil )
90
+ if specific
91
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
92
+ else
93
+ msg = "Bad markdown format near %p" % str
94
+ end
95
+
96
+ super( msg )
97
+ end
98
+ end
99
+
100
+ module HeaderIDType
101
+ MD5 = 'md5'
102
+ ESCAPE = 'escape'
103
+ end
104
+
105
+ module EncodingType
106
+ EUC = 'euc-jp'
107
+ EUCJP = EUC_JP = EUC
108
+
109
+ SJIS = 'shift_jis'
110
+ SHIFT_JIS = SJIS
111
+
112
+ UTF8 = 'utf-8'
113
+ UTF_8 = UTF8
114
+
115
+ ASCII = 'ascii'
116
+ US_ASCII = ASCII
117
+
118
+ def self.regulate(str_value)
119
+ case str_value.downcase
120
+ when 'shift-jis', 'shift_jis'
121
+ SJIS
122
+ when 'euc-jp'
123
+ EUC
124
+ when 'utf-8'
125
+ UTF8
126
+ when 'ascii'
127
+ ASCII
128
+ else
129
+ raise EncodingError, "not adapted encoding type - #{str_value} (shift[-_]jis, euc-jp, utf-8, or ascii)"
130
+ end
131
+ end
132
+
133
+ def self.convert_to_kcode(str_value)
134
+ type = self.regulate(str_value)
135
+ case type
136
+ when EUC, SJIS, UTF8
137
+ type
138
+ when ASCII
139
+ 'none'
140
+ end
141
+ end
142
+
143
+
144
+ def self.convert_to_charset(str_value)
145
+ type = self.regulate(str_value)
146
+ case type
147
+ when EUC
148
+ 'euc-jp'
149
+ when SJIS
150
+ 'shift_jis'
151
+ when UTF8
152
+ 'utf-8'
153
+ when ASCII
154
+ nil
155
+ end
156
+ end
157
+
158
+ end
159
+
160
+ module Util
161
+ HTML_ESC = {
162
+ '&' => '&amp;',
163
+ '"' => '&quot;',
164
+ '<' => '&lt;',
165
+ '>' => '&gt;'
166
+ }
167
+
168
+ module_function
169
+
170
+ # from http://jp.rubyist.net/magazine/?0010-CodeReview#l28
171
+ # (Author: Minero Aoki)
172
+ def escape_html(str)
173
+ #table = HTML_ESC # optimize
174
+ #str.gsub(/[&"<>]/) {|s| table[s] }
175
+ return str
176
+ end
177
+
178
+ def generate_blank_string_io(encoding_base)
179
+ io = StringIO.new
180
+
181
+ if io.respond_to?(:set_encoding) then
182
+ io.set_encoding(encoding_base.encoding)
183
+ end
184
+
185
+ return io
186
+ end
187
+
188
+ def change_kcode(kcode = nil)
189
+ if defined?(Encoding) then
190
+ # ruby 1.9 later
191
+ yield
192
+ else
193
+ # ruby 1.8 earlier
194
+ original_kcode = $KCODE
195
+
196
+ begin
197
+ $KCODE = kcode if kcode
198
+ yield
199
+
200
+ ensure
201
+ # recover
202
+ $KCODE = original_kcode
203
+ end
204
+ end # if defined?
205
+ end # def
206
+
207
+
208
+ def utf8_bom?(str)
209
+ if str.respond_to?(:getbyte) and str.respond_to?(:bytesize) then
210
+ if str.bytesize >= 3 and
211
+ str.getbyte(0) == UTF8_BOM.getbyte(0) and
212
+ str.getbyte(1) == UTF8_BOM.getbyte(1) and
213
+ str.getbyte(2) == UTF8_BOM.getbyte(2) then
214
+ return true
215
+ else
216
+ return false
217
+ end
218
+
219
+ else
220
+ return(str =~ UTF8_BOM_PATTERN ? true : false)
221
+ end
222
+ end
223
+ end
224
+
225
+ class Document
226
+ HEADER_PATTERN = /^([a-zA-Z0-9-]+?)\s*\:\s*(.+?)\s*(?:\n|\Z)/
227
+ BLANK_LINE_PATTERN = /^\n/
228
+ HEADER_SEQUEL_PATTERN = /^\s+(.+)$/
229
+
230
+ attr_accessor :headers, :body
231
+ alias text body
232
+ alias text= body=
233
+
234
+ class << self
235
+ def parse_io(input, default_enc = EncodingType::UTF8)
236
+ headers = {}
237
+ body = nil
238
+ first_pos = input.pos
239
+ default_enc = EncodingType.regulate(default_enc)
240
+
241
+ Util.change_kcode(EncodingType.convert_to_kcode(default_enc)){
242
+ # default encoding
243
+ if defined?(Encoding) then
244
+ input.set_encoding(Encoding.find(default_enc))
245
+ end
246
+
247
+
248
+
249
+ # get headers
250
+ pos_before_gets = nil
251
+ first_line = true
252
+
253
+ loop do
254
+ pos_before_gets = input.pos
255
+ line = input.gets
256
+
257
+ # cut UTF-8 BOM
258
+ if first_line and Util.utf8_bom?(line) then
259
+ line.slice!(UTF8_BOM_PATTERN)
260
+ end
261
+ first_line = false
262
+
263
+ if line and line.chomp =~ HEADER_PATTERN then
264
+ key = $1.downcase; value = $2
265
+
266
+ if key == 'encoding' and not headers.include?('encoding') then
267
+ kc = EncodingType.convert_to_kcode(value.downcase)
268
+ if input.respond_to?(:set_encoding) then
269
+ input.set_encoding(EncodingType.regulate(value))
270
+
271
+ # rewind (reason => [ruby-list:45988])
272
+ input.pos = first_pos
273
+ first_line = true
274
+ else
275
+ $KCODE = kc
276
+ end
277
+ end
278
+
279
+ headers[key] = value
280
+ else
281
+ # EOF or Metadata end
282
+ break
283
+ end
284
+ end
285
+
286
+ # back
287
+ input.pos = pos_before_gets
288
+
289
+
290
+
291
+ # skip blank lines
292
+ loop do
293
+ pos_before_gets = input.pos
294
+
295
+ line = input.gets
296
+ if line.nil? or not line =~ BLANK_LINE_PATTERN then
297
+ break
298
+ end
299
+ end
300
+
301
+ # back
302
+ input.pos = pos_before_gets
303
+
304
+
305
+
306
+ # get body
307
+ body = input.read
308
+
309
+ }
310
+
311
+
312
+ return self.new(headers, body)
313
+ end
314
+
315
+ def parse(str, default_enc = EncodingType::UTF8)
316
+ parse_io(StringIO.new(str), default_enc)
317
+ end
318
+
319
+ end
320
+
321
+
322
+ def initialize(headers = {}, body = '')
323
+ @headers = {}
324
+ headers.each do |k, v|
325
+ self[k] = v
326
+ end
327
+ @body = body
328
+ end
329
+
330
+ def [](key)
331
+ @headers[key.to_s.downcase]
332
+ end
333
+
334
+ def []=(key, value)
335
+ @headers[key.to_s.downcase] = value.to_s
336
+ end
337
+
338
+ def title
339
+ @headers['title']
340
+ end
341
+
342
+ def css
343
+ @headers['css']
344
+ end
345
+
346
+ def numbering
347
+ case @headers['numbering']
348
+ when 'yes', '1', 'true', 'on'
349
+ true
350
+ else
351
+ false
352
+ end
353
+ end
354
+
355
+ alias numbering? numbering
356
+
357
+ def numbering_start_level
358
+ level = (@headers['numbering-start-level'] || 2).to_i
359
+ if level >= 1 and level <= 6 then
360
+ return level
361
+ else
362
+ return 2
363
+ end
364
+ end
365
+
366
+ def encoding_type
367
+ @headers['encoding'] || EncodingType::UTF8
368
+ end
369
+
370
+ def header_id_type
371
+ (@headers['header-id-type'] || HeaderIDType::MD5).downcase
372
+ end
373
+
374
+ def kcode
375
+ self.encoding_type && EncodingType.convert_to_kcode(self.encoding_type)
376
+ end
377
+
378
+ def to_html
379
+ Parser.new.document_to_html(self)
380
+ end
381
+ end
382
+
383
+
384
+ class Parser
385
+ # Rendering state class Keeps track of URLs, titles, and HTML blocks
386
+ # midway through a render. I prefer this to the globals of the Perl version
387
+ # because globals make me break out in hives. Or something.
388
+ class RenderState
389
+ # Headers struct.
390
+ Header = Struct.new(:id, :level, :content, :content_html)
391
+
392
+ # from Original BlueCloth
393
+ attr_accessor :urls, :titles, :html_blocks, :log
394
+
395
+ # AoBane Extension
396
+ attr_accessor :footnotes, :found_footnote_ids, :warnings
397
+ attr_accessor :headers, :block_transform_depth
398
+ attr_accessor :header_id_type # option switch
399
+ attr_accessor :numbering, :numbering_start_level # option switch
400
+ alias numbering? numbering
401
+
402
+ def initialize
403
+ @urls, @titles, @html_blocks = {}, {}, {}
404
+ @log = nil
405
+ @footnotes, @found_footnote_ids, @warnings = {}, [], []
406
+ @headers = []
407
+ @block_transform_depth = 0
408
+ @header_id_type = HeaderIDType::MD5
409
+ @numbering = false
410
+ @numbering_start_level = 2
411
+ end
412
+
413
+ end
414
+
415
+ # Tab width for #detab! if none is specified
416
+ TabWidth = 4
417
+
418
+ # The tag-closing string -- set to '>' for HTML
419
+ EmptyElementSuffix = " />";
420
+
421
+ # Table of MD5 sums for escaped characters
422
+ EscapeTable = {}
423
+ '\\`*_{}[]()#.!|:~'.split(//).each {|char|
424
+ hash = Digest::MD5::hexdigest( char )
425
+
426
+ EscapeTable[ char ] = {
427
+ :md5 => hash,
428
+ :md5re => Regexp::new( hash ),
429
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
430
+ :unescape => char,
431
+ }
432
+
433
+ escaped = "\\#{char}"
434
+ hash = Digest::MD5::hexdigest(escaped)
435
+ EscapeTable[escaped] = {
436
+ :md5 => hash,
437
+ :md5re => Regexp::new( hash ),
438
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
439
+ :unescape => char,
440
+ }
441
+ }
442
+
443
+
444
+ #################################################################
445
+ ### I N S T A N C E M E T H O D S
446
+ #################################################################
447
+
448
+ ### Create a new AoBane parser.
449
+ def initialize(*restrictions)
450
+ @log = Logger::new( $deferr )
451
+ @log.level = $DEBUG ?
452
+ Logger::DEBUG :
453
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
454
+ @scanner = nil
455
+
456
+ # Add any restrictions, and set the line-folding attribute to reflect
457
+ # what happens by default.
458
+ @filter_html = nil
459
+ @filter_styles = nil
460
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
461
+ @fold_lines = true
462
+
463
+ @use_header_id = true
464
+ @display_warnings = true
465
+
466
+ @log.debug "String is: %p" % self
467
+ end
468
+
469
+
470
+ ######
471
+ public
472
+ ######
473
+
474
+ # Filters for controlling what gets output for untrusted input. (But really,
475
+ # you're filtering bad stuff out of untrusted input at submission-time via
476
+ # untainting, aren't you?)
477
+ attr_accessor :filter_html, :filter_styles
478
+
479
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
480
+ # so this isn't used by anything.
481
+ attr_accessor :fold_lines
482
+
483
+ # AoBane Extension: display warnings on the top of output html (default: true)
484
+ attr_accessor :display_warnings
485
+
486
+ # AoBane Extension: add id to each header, for toc and anchors. (default: true)
487
+ attr_accessor :use_header_id
488
+
489
+ ### Render Markdown-formatted text in this string object as HTML and return
490
+ ### it. The parameter is for compatibility with RedCloth, and is currently
491
+ ### unused, though that may change in the future.
492
+ def parse_text(source, rs = nil)
493
+ rs ||= RenderState.new
494
+
495
+ # check
496
+ case rs.header_id_type
497
+ when HeaderIDType::MD5, HeaderIDType::ESCAPE
498
+ else
499
+ rs.warnings << "illegal header id type - #{rs.header_id_type}"
500
+ end
501
+
502
+ # Create a StringScanner we can reuse for various lexing tasks
503
+ @scanner = StringScanner::new( '' )
504
+
505
+ # Make a copy of the string with normalized line endings, tabs turned to
506
+ # spaces, and a couple of guaranteed newlines at the end
507
+
508
+ text = detab(source.gsub( /\r\n?/, "\n" ))
509
+ text += "\n\n"
510
+ @log.debug "Normalized line-endings: %p" % text
511
+
512
+ #Insert by set.minami 2013-03-30
513
+ text.gsub!(/\*\[(.*?)\]\((.*?)(\|.*?)*(#.*?)*\)/){
514
+ |match|
515
+ '<font color="' +
516
+ if $2.nil? then '' else $2 end +'" ' +
517
+ 'face="' +
518
+ if $3.nil? then '' else $3.delete('|') end + '" ' +
519
+ 'size="' +
520
+ if $4.nil? then '' else $4.delete('#') end + '">' +
521
+ $1 + '</font>'
522
+ }
523
+ #Insert by set.minami
524
+
525
+ # Filter HTML if we're asked to do so
526
+ if self.filter_html
527
+ #text.gsub!( "<", "&lt;" )
528
+ #text.gsub!( ">", "&gt;" )
529
+ @log.debug "Filtered HTML: %p" % text
530
+ end
531
+
532
+ # Simplify blank lines
533
+ text.gsub!( /^ +$/, '' )
534
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
535
+
536
+
537
+ # Replace HTML blocks with placeholders
538
+ text = hide_html_blocks( text, rs )
539
+ @log.debug "Hid HTML blocks: %p" % text
540
+ @log.debug "Render state: %p" % rs
541
+
542
+
543
+ # Strip footnote definitions, store in render state
544
+ text = strip_footnote_definitions( text, rs )
545
+ @log.debug "Stripped footnote definitions: %p" % text
546
+ @log.debug "Render state: %p" % rs
547
+
548
+
549
+ # Strip link definitions, store in render state
550
+ text = strip_link_definitions( text, rs )
551
+ @log.debug "Stripped link definitions: %p" % text
552
+ @log.debug "Render state: %p" % rs
553
+
554
+ # Escape meta-characters
555
+ text = escape_special_chars( text )
556
+ @log.debug "Escaped special characters: %p" % text
557
+
558
+ # Transform block-level constructs
559
+ text = apply_block_transforms( text, rs )
560
+ @log.debug "After block-level transforms: %p" % text
561
+
562
+ # Now swap back in all the escaped characters
563
+ text = unescape_special_chars( text )
564
+ @log.debug "After unescaping special characters: %p" % text
565
+
566
+ # Extend footnotes
567
+ unless rs.footnotes.empty? then
568
+ text << %Q|<div class="footnotes"><hr#{EmptyElementSuffix}\n<ol>\n|
569
+ rs.found_footnote_ids.each do |id|
570
+ content = rs.footnotes[id]
571
+ html = apply_block_transforms(content.sub(/\n+\Z/, '') + %Q| <a href="#footnote-ref:#{id}" rev="footnote">&#8617;</a>|, rs)
572
+ text << %Q|<li id="footnote:#{id}">\n#{html}\n</li>|
573
+ end
574
+ text << %Q|</ol>\n</div>\n|
575
+ end
576
+
577
+ # Display warnings
578
+ if @display_warnings then
579
+ unless rs.warnings.empty? then
580
+ html = %Q|<pre><strong>[WARNINGS]\n|
581
+ html << rs.warnings.map{|x| Util.escape_html(x)}.join("\n")
582
+ html << %Q|</strong></pre>|
583
+
584
+ text = html + text
585
+ end
586
+ end
587
+
588
+ #Insert by set.minami 2013-03-30
589
+ output = []
590
+ text.lines {|line|
591
+ if /<pre><code>/ =~ line
592
+ output << line
593
+ next
594
+ until /<\/code><\/pre>/ =~ line
595
+ output << line
596
+ next
597
+ end
598
+ else
599
+ line.gsub!(/\-\-|<=>|<\->|\->|<\-|=>|<=|\|\^|\|\|\/|\|\/|\^|>>|<<|\+_|!=|~~|~=|>_|<_|\|FA|\|EX|\|=|\(+\)|\(x\)|\\&|\(c\)|\(R\)|\(SS\)|\(TM\)/,
600
+ "\-\-" => "&mdash;",
601
+ "<=" => "&hArr;",
602
+ "<\->" => "&harr;",
603
+ "\->" =>"&rarr;",
604
+ "<\-" =>"&larr;",
605
+ "=>" => "&rArr;",
606
+ "<=" => "&lArr;",
607
+ "\|\|\^" => "&uArr;",
608
+ "\|\|\/" => "&dArr;",
609
+ "\|\/" => "&darr;",
610
+ "\|\^" => "&uarr;",
611
+ ">>" => "&raquo;",
612
+ "<<" => "&laquo;",
613
+ "+_" => "&plusmn;",
614
+ "!=" => "&ne;",
615
+ "~~" => "&asymp;",
616
+ "~=" => "&cong;",
617
+ "<_" => "&le;",
618
+ ">_" => "&ge",
619
+ "\|FA" => "&forall;",
620
+ "\|EX" => "&exist;",
621
+ "\|=" => "&equiv;",
622
+ "\(+\)" => "&oplus",
623
+ "\(x\)" => "&otimes;",
624
+ "\\&" =>"&amp;",
625
+ "\(c\)" => "&copy;",
626
+ "\(R\)" =>"&reg;",
627
+ "\(SS\)" => "&sect;",
628
+ "\(TM\)" => "&trade;" #29
629
+ )
630
+ output << line
631
+ end
632
+ }
633
+ return output
634
+ #Insert by set.minami
635
+ #return text
636
+
637
+ end
638
+
639
+ alias parse parse_text
640
+
641
+ # return values are extended. (mainly for testing)
642
+ def parse_text_with_render_state(str, rs = nil)
643
+ rs ||= RenderState.new
644
+ html = parse_text(str, rs)
645
+
646
+ return [html, rs]
647
+ end
648
+
649
+ def parse_text_file(path)
650
+ parse_text(File.read(path))
651
+ end
652
+
653
+ alias parse_file parse_text_file
654
+
655
+
656
+ def parse_document(source, default_enc = EncodingType::UTF8)
657
+ doc = Document.parse(source, default_enc)
658
+
659
+ return document_to_html(doc)
660
+ end
661
+
662
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
663
+ doc = nil
664
+ open(path){|f|
665
+ doc = Document.parse_io(f, default_enc)
666
+ }
667
+
668
+ return document_to_html(doc)
669
+ end
670
+
671
+
672
+ def document_to_html(doc)
673
+ rs = RenderState.new
674
+ if doc.numbering? then
675
+ rs.numbering = true
676
+ end
677
+ rs.numbering_start_level = doc.numbering_start_level
678
+ rs.header_id_type = doc.header_id_type
679
+
680
+ body_html = nil
681
+
682
+ if doc.encoding_type then
683
+ Util.change_kcode(doc.kcode){
684
+ body_html = parse_text(doc.body, rs)
685
+ }
686
+ else
687
+ body_html = parse_text(doc.body, rs)
688
+ end
689
+
690
+ out = Util.generate_blank_string_io(doc.body)
691
+
692
+ # XHTML decleration
693
+ out.puts %Q|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">|
694
+
695
+ # html start
696
+ out.puts %Q|<html>|
697
+
698
+ # head
699
+ out.puts %Q|<head>|
700
+
701
+ if doc.encoding_type and (charset = EncodingType.convert_to_charset(doc.encoding_type)) then
702
+ out.puts %Q|<meta http-equiv="Content-Type" content="text/html; charset=#{charset}" />|
703
+ end
704
+
705
+ h1 = rs.headers.find{|x| x.level == 1}
706
+ h1_content = (h1 ? h1.content : nil)
707
+ title = Util.escape_html(doc.title || h1_content || 'no title (Generated by AoBane)')
708
+ out.puts %Q|<title>#{title}</title>|
709
+
710
+ %w(description keywords).each do |name|
711
+ if doc[name] then
712
+ content = Util.escape_html(doc[name])
713
+ out.puts %Q|<meta name="#{name}" content="#{content}" />|
714
+ end
715
+ end
716
+
717
+
718
+ if doc['css'] then
719
+ href = Util.escape_html(doc.css)
720
+ out.puts %Q|<link rel="stylesheet" type="text/css" href="#{href}" />|
721
+
722
+ end
723
+
724
+ if doc['rdf-feed'] then
725
+ href = Util.escape_html(doc['rdf-feed'])
726
+ out.puts %Q|<link rel="alternate" type="application/rdf+xml" href="#{href}" />|
727
+ end
728
+
729
+
730
+
731
+ if doc['rss-feed'] then
732
+ href = Util.escape_html(doc['rss-feed'])
733
+ out.puts %Q|<link rel="alternate" type="application/rss+xml" href="#{href}" />|
734
+ end
735
+
736
+ if doc['atom-feed'] then
737
+ href = Util.escape_html(doc['atom-feed'])
738
+ out.puts %Q|<link rel="alternate" type="application/atom+xml" href="#{href}" />|
739
+ end
740
+
741
+ out.puts %Q|</head>|
742
+
743
+ # body
744
+ out.puts %Q|<body>|
745
+ out.puts
746
+ out.puts body_html
747
+ out.puts
748
+ out.puts %Q|</body>|
749
+
750
+ # html end
751
+ out.puts %Q|</html>|
752
+
753
+
754
+ return out.string
755
+ end
756
+
757
+ alias doc2html document_to_html
758
+
759
+
760
+
761
+
762
+ #######
763
+ #private
764
+ #######
765
+
766
+ ### Convert tabs in +str+ to spaces.
767
+ ### (this method is reformed to function-like method from original BlueCloth)
768
+ def detab( str, tabwidth=TabWidth )
769
+ re = str.split( /\n/ ).collect {|line|
770
+ line.gsub( /(.*?)\t/ ) do
771
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
772
+ end
773
+ }.join("\n")
774
+
775
+ re
776
+ end
777
+
778
+
779
+
780
+
781
+ ### Do block-level transforms on a copy of +str+ using the specified render
782
+ ### state +rs+ and return the results.
783
+ def apply_block_transforms( str, rs )
784
+ rs.block_transform_depth += 1
785
+
786
+ # Port: This was called '_runBlockGamut' in the original
787
+
788
+ @log.debug "Applying block transforms to:\n %p" % str
789
+ text = str
790
+ text = pretransform_fenced_code_blocks( text, rs )
791
+ text = pretransform_block_separators(text, rs)
792
+
793
+ text = transform_headers( text, rs )
794
+ text = transform_toc(text, rs)
795
+
796
+ text = transform_hrules( text, rs )
797
+ text = transform_lists( text, rs )
798
+ text = transform_definition_lists( text, rs ) # AoBane Extension
799
+ text = transform_code_blocks( text, rs )
800
+ text = transform_block_quotes( text, rs )
801
+ text = transform_tables(text, rs)
802
+ text = hide_html_blocks( text, rs )
803
+
804
+ text = form_paragraphs( text, rs )
805
+
806
+ rs.block_transform_depth -= 1
807
+ @log.debug "Done with block transforms:\n %p" % text
808
+ return text
809
+ end
810
+
811
+
812
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
813
+ ### given render state +rs+ and return it.
814
+ def apply_span_transforms( str, rs )
815
+ @log.debug "Applying span transforms to:\n %p" % str
816
+
817
+ str = transform_code_spans( str, rs )
818
+ str = transform_auto_links( str, rs )
819
+ str = encode_html( str )
820
+ str = transform_images( str, rs )
821
+ str = transform_anchors( str, rs )
822
+ str = transform_italic_and_bold( str, rs )
823
+
824
+ # Hard breaks
825
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
826
+
827
+ @log.debug "Done with span transforms:\n %p" % str
828
+ return str
829
+ end
830
+
831
+
832
+ # The list of tags which are considered block-level constructs and an
833
+ # alternation pattern suitable for use in regexps made from the list
834
+ StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
835
+ form fieldset iframe math ins del ]
836
+ StrictTagPattern = StrictBlockTags.join('|')
837
+
838
+ LooseBlockTags = StrictBlockTags - %w[ins del]
839
+ LooseTagPattern = LooseBlockTags.join('|')
840
+
841
+ # Nested blocks:
842
+ # <div>
843
+ # <div>
844
+ # tags for inner block must be indented.
845
+ # </div>
846
+ # </div>
847
+ StrictBlockRegexp = %r{
848
+ ^ # Start of line
849
+ <(#{StrictTagPattern}) # Start tag: \2
850
+ \b # word break
851
+ (.*\n)*? # Any number of lines, minimal match
852
+ </\1> # Matching end tag
853
+ [ ]* # trailing spaces
854
+ $ # End of line or document
855
+ }ix
856
+
857
+ # More-liberal block-matching
858
+ LooseBlockRegexp = %r{
859
+ ^ # Start of line
860
+ <(#{LooseTagPattern}) # start tag: \2
861
+ \b # word break
862
+ (.*\n)*? # Any number of lines, minimal match
863
+ .*</\1> # Anything + Matching end tag
864
+ [ ]* # trailing spaces
865
+ $ # End of line or document
866
+ }ix
867
+
868
+ # Special case for <hr />.
869
+ HruleBlockRegexp = %r{
870
+ ( # $1
871
+ \A\n? # Start of doc + optional \n
872
+ | # or
873
+ .*\n\n # anything + blank line
874
+ )
875
+ ( # save in $2
876
+ # AoBane fix: Not allow any space on line top
877
+ <hr # Tag open
878
+ \b # Word break
879
+ ([^<>])*? # Attributes
880
+ /?> # Tag close
881
+ $ # followed by a blank line or end of document
882
+ )
883
+ }ix
884
+
885
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
886
+ ### tokens.
887
+ def hide_html_blocks( str, rs )
888
+ @log.debug "Hiding HTML blocks in %p" % str
889
+
890
+ # Tokenizer proc to pass to gsub
891
+ tokenize = lambda {|match|
892
+ key = Digest::MD5::hexdigest( match )
893
+ rs.html_blocks[ key ] = match
894
+ @log.debug "Replacing %p with %p" % [ match, key ]
895
+ "\n\n#{key}\n\n"
896
+ }
897
+
898
+ rval = str.dup
899
+
900
+ @log.debug "Finding blocks with the strict regex..."
901
+ rval.gsub!( StrictBlockRegexp, &tokenize )
902
+
903
+ @log.debug "Finding blocks with the loose regex..."
904
+ rval.gsub!( LooseBlockRegexp, &tokenize )
905
+
906
+ @log.debug "Finding hrules..."
907
+ rval.gsub!( HruleBlockRegexp ) {|match| $1 + tokenize[$2] }
908
+
909
+ return rval
910
+ end
911
+
912
+
913
+ # Link defs are in the form: ^[id]: url "optional title"
914
+ LinkRegexp = %r{
915
+ ^[ ]{0,#{TabWidth - 1}} # AoBane fix: indent < tab width
916
+ \[(.+)\]: # id = $1
917
+ [ ]*
918
+ \n? # maybe *one* newline
919
+ [ ]*
920
+ <?(\S+?)>? # url = $2
921
+ [ ]*
922
+ \n? # maybe one newline
923
+ [ ]*
924
+ (?:
925
+ # Titles are delimited by "quotes" or (parens).
926
+ ["(]
927
+ (.+?) # title = $3
928
+ [")] # Matching ) or "
929
+ [ ]*
930
+ )? # title is optional
931
+ (?:\n+|\Z)
932
+ }x
933
+
934
+ ### Strip link definitions from +str+, storing them in the given RenderState
935
+ ### +rs+.
936
+ def strip_link_definitions( str, rs )
937
+ str.gsub( LinkRegexp ) {|match|
938
+ id, url, title = $1, $2, $3
939
+
940
+ rs.urls[ id.downcase ] = encode_html( url )
941
+ unless title.nil?
942
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
943
+ end
944
+
945
+ ""
946
+ }
947
+ end
948
+
949
+ # Footnotes defs are in the form: [^id]: footnote contents.
950
+ FootnoteDefinitionRegexp = %r{
951
+ ^[ ]{0,#{TabWidth - 1}}
952
+ \[\^(.+?)\]\: # id = $1
953
+ [ ]*
954
+ (.*) # first line content = $2
955
+ (?:\n|\Z)
956
+
957
+ ( # second or more lines content = $3
958
+ (?:
959
+ [ ]{#{TabWidth},} # indented
960
+ .*
961
+ (?:\n|\Z)
962
+ |
963
+ \n # blank line
964
+ )*
965
+ )?
966
+
967
+ }x
968
+
969
+ FootnoteIdRegexp = /^[a-zA-Z0-9\:\._-]+$/
970
+
971
+ def strip_footnote_definitions(str, rs)
972
+ str.gsub( FootnoteDefinitionRegexp ) {|match|
973
+ id = $1; content1 = $2; content2 = $3
974
+
975
+ unless id =~ FootnoteIdRegexp then
976
+ rs.warnings << "illegal footnote id - #{id} (legal chars: a-zA-Z0-9_-.:)"
977
+ end
978
+
979
+ if content2 then
980
+ @log.debug " Stripping multi-line definition %p, %p" % [$2, $3]
981
+ content = content1 + "\n" + outdent(content2.chomp)
982
+ @log.debug " Stripped multi-line definition %p, %p" % [id, content]
983
+ rs.footnotes[id] = content
984
+ else
985
+ content = content1 || ''
986
+ @log.debug " Stripped single-line definition %p, %p" % [id, content]
987
+ rs.footnotes[id] = content
988
+ end
989
+
990
+
991
+
992
+ ""
993
+ }
994
+ end
995
+
996
+
997
+ ### Escape special characters in the given +str+
998
+ def escape_special_chars( str )
999
+ @log.debug " Escaping special characters"
1000
+ text = ''
1001
+
1002
+ # The original Markdown source has something called '$tags_to_skip'
1003
+ # declared here, but it's never used, so I don't define it.
1004
+
1005
+ tokenize_html( str ) {|token, str|
1006
+ @log.debug " Adding %p token %p" % [ token, str ]
1007
+ case token
1008
+
1009
+ # Within tags, encode * and _
1010
+ when :tag
1011
+ text += str.
1012
+ gsub( /\*/, EscapeTable['*'][:md5] ).
1013
+ gsub( /_/, EscapeTable['_'][:md5] )
1014
+
1015
+ # Encode backslashed stuff in regular text
1016
+ when :text
1017
+ text += encode_backslash_escapes( str )
1018
+ else
1019
+ raise TypeError, "Unknown token type %p" % token
1020
+ end
1021
+ }
1022
+
1023
+ @log.debug " Text with escapes is now: %p" % text
1024
+ return text
1025
+ end
1026
+
1027
+
1028
+ ### Swap escaped special characters in a copy of the given +str+ and return
1029
+ ### it.
1030
+ def unescape_special_chars( str )
1031
+ EscapeTable.each {|char, hash|
1032
+ @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
1033
+ str.gsub!( hash[:md5re], hash[:unescape] )
1034
+ }
1035
+
1036
+ return str
1037
+ end
1038
+
1039
+
1040
+ ### Return a copy of the given +str+ with any backslashed special character
1041
+ ### in it replaced with MD5 placeholders.
1042
+ def encode_backslash_escapes( str )
1043
+ # Make a copy with any double-escaped backslashes encoded
1044
+ text = str.gsub( /\\\\/, EscapeTable['\\\\'][:md5] )
1045
+
1046
+ EscapeTable.each_pair {|char, esc|
1047
+ next if char == '\\\\'
1048
+ next unless char =~ /\\./
1049
+ text.gsub!( esc[:re], esc[:md5] )
1050
+ }
1051
+
1052
+ return text
1053
+ end
1054
+
1055
+
1056
+ def pretransform_block_separators(str, rs)
1057
+ str.gsub(/^[ ]{0,#{TabWidth - 1}}[~][ ]*\n/){
1058
+ "\n~\n\n"
1059
+ }
1060
+ end
1061
+
1062
+
1063
+ TOCRegexp = %r{
1064
+ ^\{ # bracket on line-head
1065
+ [ ]* # optional inner space
1066
+ toc
1067
+
1068
+ (?:
1069
+ (?:
1070
+ [:] # colon
1071
+ | # or
1072
+ [ ]+ # 1 or more space
1073
+ )
1074
+ (.+?) # $1 = parameter
1075
+ )?
1076
+
1077
+ [ ]* # optional inner space
1078
+ \} # closer
1079
+ [ ]*$ # optional space on line-foot
1080
+ }ix
1081
+
1082
+ TOCStartLevelRegexp = %r{
1083
+ ^
1084
+ (?: # optional start
1085
+ h
1086
+ ([1-6]) # $1 = start level
1087
+ )?
1088
+
1089
+ (?: # range symbol
1090
+ [.]{2,}|[-] # .. or -
1091
+ )
1092
+
1093
+ (?: # optional end
1094
+ h? # optional 'h'
1095
+ ([1-6]) # $2 = end level
1096
+ )?$
1097
+ }ix
1098
+
1099
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1100
+ ### +str+ and return it.
1101
+ def transform_toc( str, rs )
1102
+ @log.debug " Transforming tables of contents"
1103
+ str.gsub(TOCRegexp){
1104
+ start_level = 2 # default
1105
+ end_level = 6
1106
+
1107
+ param = $1
1108
+ if param then
1109
+ if param =~ TOCStartLevelRegexp then
1110
+ if !($1) and !($2) then
1111
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1112
+ else
1113
+ start_level = ($1 ? $1.to_i : 2)
1114
+ end_level = ($2 ? $2.to_i : 6)
1115
+ end
1116
+ else
1117
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1118
+ end
1119
+ end
1120
+
1121
+ if rs.headers.first and rs.headers.first.level >= (start_level + 1) then
1122
+ rs.warnings << "illegal structure of headers - h#{start_level} should be set before h#{rs.headers.first.level}"
1123
+ end
1124
+
1125
+
1126
+ ul_text = "\n\n"
1127
+ rs.headers.each do |header|
1128
+ if header.level >= start_level and header.level <= end_level then
1129
+ ul_text << ' ' * TabWidth * (header.level - start_level)
1130
+ ul_text << '* '
1131
+ ul_text << %Q|<a href="##{header.id}" rel="toc">#{header.content_html}</a>|
1132
+ ul_text << "\n"
1133
+ end
1134
+ end
1135
+ ul_text << "\n"
1136
+
1137
+ ul_text # output
1138
+
1139
+ }
1140
+ end
1141
+
1142
+ TableRegexp = %r{
1143
+ (?:
1144
+ ^([ ]{0,#{TabWidth - 1}}) # not indented
1145
+ (?:[|][ ]*) # NOT optional border
1146
+
1147
+ \S.*? # 1st cell content
1148
+
1149
+ (?: # 2nd cell or later
1150
+ [|] # cell splitter
1151
+ .+? # content
1152
+ )+ # 1 or more..
1153
+
1154
+ [|]? # optional border
1155
+ (?:\n|\Z) # line end
1156
+ )+
1157
+ }x
1158
+
1159
+ # Transform tables.
1160
+ def transform_tables(str, rs)
1161
+ str.gsub(TableRegexp){
1162
+ transform_table_rows($~[0], rs)
1163
+ }
1164
+ end
1165
+
1166
+ TableSeparatorCellRegexp = %r{
1167
+ ^
1168
+ [ ]*
1169
+ ([:])? # $1 = left-align symbol
1170
+ [ ]*
1171
+ [-]+ # border
1172
+ [ ]*
1173
+ ([:])? # $2 = right-align symbol
1174
+ [ ]*
1175
+ $
1176
+ }x
1177
+
1178
+ def transform_table_rows(str, rs)
1179
+
1180
+ # split cells to 2-d array
1181
+ data = str.split("\n").map{|x| x.split('|')}
1182
+
1183
+
1184
+ data.each do |row|
1185
+ # cut left space
1186
+ row.first.lstrip!
1187
+
1188
+ # cut when optional side-borders is included
1189
+ row.shift if row.first.empty?
1190
+ end
1191
+
1192
+ column_attrs = []
1193
+
1194
+ re = ''
1195
+ re << "<table>\n"
1196
+
1197
+ # head is exist?
1198
+ if data.size >= 3 and data[1].all?{|x| x =~ TableSeparatorCellRegexp} then
1199
+ head_row = data.shift
1200
+ separator_row = data.shift
1201
+
1202
+ separator_row.each do |cell|
1203
+ cell.match TableSeparatorCellRegexp
1204
+ left = $1; right = $2
1205
+
1206
+ if left and right then
1207
+ column_attrs << ' style="text-align: center"'
1208
+ elsif right then
1209
+ column_attrs << ' style="text-align: right"'
1210
+ elsif left then
1211
+ column_attrs << ' style="text-align: left"'
1212
+ else
1213
+ column_attrs << ''
1214
+ end
1215
+ end
1216
+
1217
+ re << "\t<thead><tr>\n"
1218
+ head_row.each_with_index do |cell, i|
1219
+ re << "\t\t<th#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</th>\n"
1220
+ end
1221
+ re << "\t</tr></thead>\n"
1222
+ end
1223
+
1224
+ # data row
1225
+ re << "\t<tbody>\n"
1226
+ data.each do |row|
1227
+ re << "\t\t<tr>\n"
1228
+ row.each_with_index do |cell, i|
1229
+ re << "\t\t\t<td#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</td>\n"
1230
+ end
1231
+ re << "\t\t</tr>\n"
1232
+ end
1233
+ re << "\t</tbody>\n"
1234
+
1235
+ re << "</table>\n"
1236
+
1237
+ re
1238
+ end
1239
+
1240
+
1241
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1242
+ ### +str+ and return it.
1243
+ def transform_hrules( str, rs )
1244
+ @log.debug " Transforming horizontal rules"
1245
+ str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
1246
+ end
1247
+
1248
+
1249
+
1250
+ # Patterns to match and transform lists
1251
+ ListMarkerOl = %r{\d+\.}
1252
+ ListMarkerUl = %r{[*+-]}
1253
+ ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
1254
+
1255
+ ListRegexp = %r{
1256
+ (?:
1257
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1258
+ (#{ListMarkerAny}) # unordered or ordered ($1)
1259
+ [ ]+ # At least one space
1260
+ )
1261
+ (?m:.+?) # item content (include newlines)
1262
+ (?:
1263
+ \z # Either EOF
1264
+ | # or
1265
+ \n{2,} # Blank line...
1266
+ (?=\S) # ...followed by non-space
1267
+ (?![ ]* # ...but not another item
1268
+ (#{ListMarkerAny})
1269
+ [ ]+)
1270
+ )
1271
+ }x
1272
+
1273
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
1274
+ ### return it.
1275
+ def transform_lists( str, rs )
1276
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
1277
+
1278
+ str.gsub( ListRegexp ) {|list|
1279
+ @log.debug " Found list %p" % list
1280
+ bullet = $1
1281
+ list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
1282
+
1283
+ %{<%s>\n%s</%s>\n} % [
1284
+ list_type,
1285
+ transform_list_items( list, rs ),
1286
+ list_type,
1287
+ ]
1288
+ }
1289
+ end
1290
+
1291
+ # Pattern for transforming list items
1292
+ ListItemRegexp = %r{
1293
+ (\n)? # leading line = $1
1294
+ (^[ ]*) # leading whitespace = $2
1295
+ (#{ListMarkerAny}) [ ]+ # list marker = $3
1296
+ ((?m:.+?) # list item text = $4
1297
+ \n)
1298
+ (?= (\n*) (\z | \2 (#{ListMarkerAny}) [ ]+))
1299
+ }x
1300
+
1301
+ ### Transform list items in a copy of the given +str+ and return it.
1302
+ def transform_list_items( str, rs )
1303
+ @log.debug " Transforming list items"
1304
+
1305
+ # Trim trailing blank lines
1306
+ str = str.sub( /\n{2,}\z/, "\n" )
1307
+ str.gsub( ListItemRegexp ) {|line|
1308
+ @log.debug " Found item line %p" % line
1309
+ leading_line, item = $1, $4
1310
+ separating_lines = $5
1311
+
1312
+ if leading_line or /\n{2,}/.match(item) or not separating_lines.empty? then
1313
+ @log.debug " Found leading line or item has a blank"
1314
+ item = apply_block_transforms( outdent(item), rs )
1315
+ else
1316
+ # Recursion for sub-lists
1317
+ @log.debug " Recursing for sublist"
1318
+ item = transform_lists( outdent(item), rs ).chomp
1319
+ item = apply_span_transforms( item, rs )
1320
+ end
1321
+
1322
+ %{<li>%s</li>\n} % item
1323
+ }
1324
+ end
1325
+
1326
+ DefinitionListRegexp = %r{
1327
+ (?:
1328
+ (?:^.+\n)+ # dt
1329
+ \n*
1330
+ (?:
1331
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1332
+ \: # dd marker (line head)
1333
+ [ ]* # space
1334
+ ((?m:.+?)) # dd content
1335
+ (?:
1336
+ \s*\z # end of string
1337
+ | # or
1338
+ \n{2,} # blank line
1339
+ (?=[ ]{0,#{TabWidth - 1}}\S) # ...followed by
1340
+ )
1341
+ )+
1342
+ )+
1343
+ }x
1344
+
1345
+ def transform_definition_lists(str, rs)
1346
+ @log.debug " Transforming definition lists at %p" % (str[0,100] + '...')
1347
+ str.gsub( DefinitionListRegexp ) {|list|
1348
+ @log.debug " Found definition list %p (captures=%p)" % [list, $~.captures]
1349
+ transform_definition_list_items(list, rs)
1350
+ }
1351
+ end
1352
+
1353
+ DDLineRegexp = /^\:[ ]{0,#{TabWidth - 1}}(.*)/
1354
+
1355
+
1356
+ def transform_definition_list_items(str, rs)
1357
+ buf = Util.generate_blank_string_io(str)
1358
+ buf.puts %Q|<dl>|
1359
+
1360
+ lines = str.split("\n")
1361
+ until lines.empty? do
1362
+
1363
+ dts = []
1364
+
1365
+ # get dt items
1366
+ while lines.first =~ /^(?!\:).+$/ do
1367
+ dts << lines.shift
1368
+ end
1369
+
1370
+
1371
+ dd_as_block = false
1372
+
1373
+ # skip blank lines
1374
+ while not lines.empty? and lines.first.empty? do
1375
+ lines.shift
1376
+ dd_as_block = true
1377
+ end
1378
+
1379
+
1380
+ dds = []
1381
+ while lines.first =~ DDLineRegexp do
1382
+ dd_buf = []
1383
+
1384
+ # dd first line
1385
+ unless (line = lines.shift).empty? then
1386
+ dd_buf << $1 << "\n"
1387
+ end
1388
+
1389
+ # dd second and more lines (sequential with 1st-line)
1390
+ until lines.empty? or # stop if read all
1391
+ lines.first =~ /^[ ]{0,#{TabWidth - 1}}$/ or # stop if blank line
1392
+ lines.first =~ DDLineRegexp do # stop if new dd found
1393
+ dd_buf << outdent(lines.shift) << "\n"
1394
+ end
1395
+
1396
+ # dd second and more lines (separated with 1st-line)
1397
+ until lines.empty? do # stop if all was read
1398
+ if lines.first.empty? then
1399
+ # blank line (skip)
1400
+ lines.shift
1401
+ dd_buf << "\n"
1402
+ elsif lines.first =~ /^[ ]{#{TabWidth},}/ then
1403
+ # indented body
1404
+ dd_buf << outdent(lines.shift) << "\n"
1405
+ else
1406
+ # not indented body
1407
+ break
1408
+ end
1409
+
1410
+ end
1411
+
1412
+
1413
+ dds << dd_buf.join
1414
+
1415
+ # skip blank lines
1416
+ unless lines.empty? then
1417
+ while lines.first.empty? do
1418
+ lines.shift
1419
+ end
1420
+ end
1421
+ end
1422
+
1423
+ # html output
1424
+ dts.each do |dt|
1425
+ buf.puts %Q| <dt>#{apply_span_transforms(dt, rs)}</dt>|
1426
+ end
1427
+
1428
+ dds.each do |dd|
1429
+ if dd_as_block then
1430
+ buf.puts %Q| <dd>#{apply_block_transforms(dd, rs)}</dd>|
1431
+ else
1432
+ dd.gsub!(/\n+\z/, '') # chomp linefeeds
1433
+ buf.puts %Q| <dd>#{apply_span_transforms(dd.chomp, rs)}</dd>|
1434
+ end
1435
+ end
1436
+ end
1437
+
1438
+ buf.puts %Q|</dl>|
1439
+
1440
+ return(buf.string)
1441
+ end
1442
+
1443
+ # old
1444
+
1445
+
1446
+ # Pattern for matching codeblocks
1447
+ CodeBlockRegexp = %r{
1448
+ (?:\n\n|\A|\A\n)
1449
+ ( # $1 = the code block
1450
+ (?:
1451
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
1452
+ .*\n+
1453
+ )+
1454
+ )
1455
+ (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
1456
+ # line-start, or end of doc
1457
+ }x
1458
+
1459
+
1460
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
1461
+ ### return it.
1462
+ def transform_code_blocks( str, rs )
1463
+ @log.debug " Transforming code blocks"
1464
+
1465
+ str.gsub( CodeBlockRegexp ) {|block|
1466
+ codeblock = $1
1467
+ remainder = $2
1468
+
1469
+
1470
+ tmpl = %{\n\n<pre><code>%s\n</code></pre>\n\n%s}
1471
+
1472
+ # patch for ruby 1.9.1 bug
1473
+ if tmpl.respond_to?(:force_encoding) then
1474
+ tmpl.force_encoding(str.encoding)
1475
+ end
1476
+ args = [ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
1477
+
1478
+ # recover all backslash escaped to original form
1479
+ EscapeTable.each {|char, hash|
1480
+ args[0].gsub!( hash[:md5re]){char}
1481
+ }
1482
+
1483
+ # Generate the codeblock
1484
+ tmpl % args
1485
+ }
1486
+ end
1487
+
1488
+
1489
+ FencedCodeBlockRegexp = /^(\~{3,})\n((?m:.+?)\n)\1\n/
1490
+
1491
+ def pretransform_fenced_code_blocks( str, rs )
1492
+ @log.debug " Transforming fenced code blocks => standard code blocks"
1493
+
1494
+ str.gsub( FencedCodeBlockRegexp ) {|block|
1495
+ "\n~\n\n" + indent($2) + "\n~\n\n"
1496
+ }
1497
+ end
1498
+
1499
+
1500
+
1501
+ # Pattern for matching Markdown blockquote blocks
1502
+ BlockQuoteRegexp = %r{
1503
+ (?:
1504
+ ^[ ]*>[ ]? # '>' at the start of a line
1505
+ .+\n # rest of the first line
1506
+ (?:.+\n)* # subsequent consecutive lines
1507
+ \n* # blanks
1508
+ )+
1509
+ }x
1510
+ PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
1511
+
1512
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
1513
+ ### and return it.
1514
+ def transform_block_quotes( str, rs )
1515
+ @log.debug " Transforming block quotes"
1516
+
1517
+ str.gsub( BlockQuoteRegexp ) {|quote|
1518
+ @log.debug "Making blockquote from %p" % quote
1519
+
1520
+ quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
1521
+ quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
1522
+
1523
+ indent = " " * TabWidth
1524
+ quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
1525
+ apply_block_transforms( quote, rs ).
1526
+ gsub( /^/, indent ).
1527
+ gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
1528
+ @log.debug "Blockquoted chunk is: %p" % quoted
1529
+ quoted
1530
+ }
1531
+ end
1532
+
1533
+
1534
+ # AoBane change:
1535
+ # allow loosely urls and addresses (BlueCloth is very strict)
1536
+ #
1537
+ # loose examples:
1538
+ # <skype:tetra-dice> (other protocol)
1539
+ # <ema+il@example.com> (ex: gmail alias)
1540
+ #
1541
+ # not adapted addresses:
1542
+ # <"Abc@def"@example.com> (refer to quoted-string of RFC 5321)
1543
+
1544
+
1545
+ AutoAnchorURLRegexp = /<(#{URI.regexp})>/ # $1 = url
1546
+
1547
+ AutoAnchorEmailRegexp = /<([^'">\s]+?\@[^'">\s]+[.][a-zA-Z]+)>/ # $2 = address
1548
+
1549
+ ### Transform URLs in a copy of the specified +str+ into links and return
1550
+ ### it.
1551
+ def transform_auto_links( str, rs )
1552
+ @log.debug " Transforming auto-links"
1553
+ str.gsub(AutoAnchorURLRegexp){
1554
+ %|<a href="#{Util.escape_html($1)}">#{Util.escape_html($1)}</a>|
1555
+ }.gsub( AutoAnchorEmailRegexp ) {|addr|
1556
+ encode_email_address( unescape_special_chars($1) )
1557
+ }
1558
+ end
1559
+
1560
+
1561
+ # Encoder functions to turn characters of an email address into encoded
1562
+ # entities.
1563
+ Encoders = [
1564
+ lambda {|char| "&#%03d;" % char},
1565
+ lambda {|char| "&#x%X;" % char},
1566
+ lambda {|char| char.chr },
1567
+ ]
1568
+
1569
+ ### Transform a copy of the given email +addr+ into an escaped version safer
1570
+ ### for posting publicly.
1571
+ def encode_email_address( addr )
1572
+
1573
+ rval = ''
1574
+ ("mailto:" + addr).each_byte {|b|
1575
+ case b
1576
+ when ?:
1577
+ rval += ":"
1578
+ when ?@
1579
+ rval += Encoders[ rand(2) ][ b ]
1580
+ else
1581
+ r = rand(100)
1582
+ rval += (
1583
+ r > 90 ? Encoders[2][ b ] :
1584
+ r < 45 ? Encoders[1][ b ] :
1585
+ Encoders[0][ b ]
1586
+ )
1587
+ end
1588
+ }
1589
+
1590
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
1591
+ end
1592
+
1593
+
1594
+ # Regexp for matching Setext-style headers
1595
+ SetextHeaderRegexp = %r{
1596
+ (.+?) # The title text ($1)
1597
+
1598
+ (?: # Markdown Extra: Header Id Attribute (optional)
1599
+ [ ]* # space after closing #'s
1600
+ \{\#
1601
+ (\S+?) # $2 = Id
1602
+ \}
1603
+ [ \t]* # allowed lazy spaces
1604
+ )?
1605
+ \n
1606
+ ([\-=])+ # Match a line of = or -. Save only one in $3.
1607
+ [ ]*\n+
1608
+ }x
1609
+
1610
+ # Regexp for matching ATX-style headers
1611
+ AtxHeaderRegexp = %r{
1612
+ ^(\#+) # $1 = string of #'s
1613
+ [ ]*
1614
+ (.+?) # $2 = Header text
1615
+ [ ]*
1616
+ \#* # optional closing #'s (not counted)
1617
+
1618
+ (?: # Markdown Extra: Header Id Attribute (optional)
1619
+ [ ]* # space after closing #'s
1620
+ \{\#
1621
+ (\S+?) # $3 = Id
1622
+ \}
1623
+ [ \t]* # allowed lazy spaces
1624
+ )?
1625
+
1626
+ \n+
1627
+ }x
1628
+
1629
+ HeaderRegexp = Regexp.union(SetextHeaderRegexp, AtxHeaderRegexp)
1630
+
1631
+ IdRegexp = /^[a-zA-Z][a-zA-Z0-9\:\._-]*$/
1632
+
1633
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
1634
+ ### state +rs+ and return the result.
1635
+ def transform_headers( str, rs )
1636
+ @log.debug " Transforming headers"
1637
+
1638
+ # Setext-style headers:
1639
+ # Header 1
1640
+ # ========
1641
+ #
1642
+ # Header 2
1643
+ # --------
1644
+ #
1645
+
1646
+ section_numbers = [nil, nil, nil, nil, nil]
1647
+
1648
+ str.
1649
+ gsub( HeaderRegexp ) {|m|
1650
+ if $1 then
1651
+ @log.debug "Found setext-style header"
1652
+ title, id, hdrchar = $1, $2, $3
1653
+
1654
+ case hdrchar
1655
+ when '='
1656
+ level = 1
1657
+ when '-'
1658
+ level = 2
1659
+ end
1660
+ else
1661
+ @log.debug "Found ATX-style header"
1662
+ hdrchars, title, id = $4, $5, $6
1663
+ level = hdrchars.length
1664
+
1665
+ if level >= 7 then
1666
+ rs.warnings << "illegal header level - h#{level} ('#' symbols are too many)"
1667
+ end
1668
+ end
1669
+
1670
+ prefix = ''
1671
+ if rs.numbering? then
1672
+ if level >= rs.numbering_start_level and level <= 6 then
1673
+ depth = level - rs.numbering_start_level
1674
+
1675
+ section_numbers.each_index do |i|
1676
+ if i == depth and section_numbers[depth] then
1677
+ # increment a deepest number if current header's level equals last header's
1678
+ section_numbers[i] += 1
1679
+ elsif i <= depth then
1680
+ # set default number if nil
1681
+ section_numbers[i] ||= 1
1682
+ else
1683
+ # clear discardeds
1684
+ section_numbers[i] = nil
1685
+ end
1686
+ end
1687
+
1688
+ no = ''
1689
+ (0..depth).each do |i|
1690
+ no << "#{section_numbers[i]}."
1691
+ end
1692
+
1693
+ prefix = "#{no} "
1694
+ end
1695
+ end
1696
+
1697
+ title_html = apply_span_transforms( title, rs )
1698
+
1699
+ unless id then
1700
+ case rs.header_id_type
1701
+ when HeaderIDType::ESCAPE
1702
+ id = escape_to_header_id(title_html)
1703
+ if rs.headers.find{|h| h.id == id} then
1704
+ rs.warnings << "header id collision - #{id}"
1705
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1706
+ end
1707
+ else
1708
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1709
+ end
1710
+ end
1711
+
1712
+ title = "#{prefix}#{title}"
1713
+ title_html = "#{prefix}#{title_html}"
1714
+
1715
+
1716
+ unless id =~ IdRegexp then
1717
+ rs.warnings << "illegal header id - #{id} (legal chars: [a-zA-Z0-9_-.] | 1st: [a-zA-Z])"
1718
+ end
1719
+
1720
+ if rs.block_transform_depth == 1 then
1721
+ rs.headers << RenderState::Header.new(id, level, title, title_html)
1722
+ end
1723
+
1724
+ if @use_header_id then
1725
+ %{<h%d id="%s">%s</h%d>\n\n} % [ level, id, title_html, level ]
1726
+ else
1727
+ %{<h%d>%s</h%d>\n\n} % [ level, title_html, level ]
1728
+ end
1729
+ }
1730
+ end
1731
+
1732
+
1733
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
1734
+ ### tags and return it.
1735
+ def form_paragraphs( str, rs )
1736
+ @log.debug " Forming paragraphs"
1737
+ grafs = str.
1738
+ sub( /\A\n+/, '' ).
1739
+ sub( /\n+\z/, '' ).
1740
+ split( /\n{2,}/ )
1741
+
1742
+ rval = grafs.collect {|graf|
1743
+
1744
+ # Unhashify HTML blocks if this is a placeholder
1745
+ if rs.html_blocks.key?( graf )
1746
+ rs.html_blocks[ graf ]
1747
+
1748
+ # no output if this is block separater
1749
+ elsif graf == '~' then
1750
+ ''
1751
+
1752
+ # Otherwise, wrap in <p> tags
1753
+ else
1754
+ apply_span_transforms(graf, rs).
1755
+ sub( /^[ ]*/, '<p>' ) + '</p>'
1756
+ end
1757
+ }.join( "\n\n" )
1758
+
1759
+ @log.debug " Formed paragraphs: %p" % rval
1760
+ return rval
1761
+ end
1762
+
1763
+
1764
+ # Pattern to match the linkid part of an anchor tag for reference-style
1765
+ # links.
1766
+ RefLinkIdRegexp = %r{
1767
+ [ ]? # Optional leading space
1768
+ (?:\n[ ]*)? # Optional newline + spaces
1769
+ \[
1770
+ (.*?) # Id = $1
1771
+ \]
1772
+ }x
1773
+
1774
+ InlineLinkRegexp = %r{
1775
+ \( # Literal paren
1776
+ [ ]* # Zero or more spaces
1777
+ <?(.+?)>? # URI = $1
1778
+ [ ]* # Zero or more spaces
1779
+ (?: #
1780
+ ([\"\']) # Opening quote char = $2
1781
+ (.*?) # Title = $3
1782
+ \2 # Matching quote char
1783
+ )? # Title is optional
1784
+ \)
1785
+ }x
1786
+
1787
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
1788
+ ### the given render state +rs+ and return it.
1789
+ def transform_anchors( str, rs )
1790
+ @log.debug " Transforming anchors"
1791
+ @scanner.string = str.dup
1792
+ text = ''
1793
+
1794
+ # Scan the whole string
1795
+ until @scanner.empty?
1796
+
1797
+ if @scanner.scan( /\[/ )
1798
+ link = ''; linkid = ''
1799
+ depth = 1
1800
+ startpos = @scanner.pos
1801
+ @log.debug " Found a bracket-open at %d" % startpos
1802
+
1803
+ # Scan the rest of the tag, allowing unlimited nested []s. If
1804
+ # the scanner runs out of text before the opening bracket is
1805
+ # closed, append the text and return (wasn't a valid anchor).
1806
+ while depth.nonzero?
1807
+ linktext = @scanner.scan_until( /\]|\[/ )
1808
+
1809
+ if linktext
1810
+ @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
1811
+ link += linktext
1812
+
1813
+ # Decrement depth for each closing bracket
1814
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
1815
+ @log.debug " Depth is now #{depth}"
1816
+
1817
+ # If there's no more brackets, it must not be an anchor, so
1818
+ # just abort.
1819
+ else
1820
+ @log.debug " Missing closing brace, assuming non-link."
1821
+ link += @scanner.rest
1822
+ @scanner.terminate
1823
+ return text + '[' + link
1824
+ end
1825
+ end
1826
+ link.slice!( -1 ) # Trim final ']'
1827
+ @log.debug " Found leading link %p" % link
1828
+
1829
+
1830
+
1831
+ # Markdown Extra: Footnote
1832
+ if link =~ /^\^(.+)/ then
1833
+ id = $1
1834
+ if rs.footnotes[id] then
1835
+ rs.found_footnote_ids << id
1836
+ label = "[#{rs.found_footnote_ids.size}]"
1837
+ else
1838
+ rs.warnings << "undefined footnote id - #{id}"
1839
+ label = '[?]'
1840
+ end
1841
+
1842
+ text += %Q|<sup id="footnote-ref:#{id}"><a href="#footnote:#{id}" rel="footnote">#{label}</a></sup>|
1843
+
1844
+ # Look for a reference-style second part
1845
+ elsif @scanner.scan( RefLinkIdRegexp )
1846
+ linkid = @scanner[1]
1847
+ linkid = link.dup if linkid.empty?
1848
+ linkid.downcase!
1849
+ @log.debug " Found a linkid: %p" % linkid
1850
+
1851
+ # If there's a matching link in the link table, build an
1852
+ # anchor tag for it.
1853
+ if rs.urls.key?( linkid )
1854
+ @log.debug " Found link key in the link table: %p" % rs.urls[linkid]
1855
+ url = escape_md( rs.urls[linkid] )
1856
+
1857
+ text += %{<a href="#{url}"}
1858
+ if rs.titles.key?(linkid)
1859
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
1860
+ end
1861
+ text += %{>#{link}</a>}
1862
+
1863
+ # If the link referred to doesn't exist, just append the raw
1864
+ # source to the result
1865
+ else
1866
+ @log.debug " Linkid %p not found in link table" % linkid
1867
+ @log.debug " Appending original string instead: "
1868
+ @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1869
+
1870
+ rs.warnings << "link-id not found - #{linkid}"
1871
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1872
+ end
1873
+
1874
+ # ...or for an inline style second part
1875
+ elsif @scanner.scan( InlineLinkRegexp )
1876
+ url = @scanner[1]
1877
+ title = @scanner[3]
1878
+ @log.debug " Found an inline link to %p" % url
1879
+
1880
+ url = "##{link}" if url == '#' # target anchor briefing (since AoBane 0.40)
1881
+
1882
+ text += %{<a href="%s"} % escape_md( url )
1883
+ if title
1884
+ title.gsub!( /"/, "&quot;" )
1885
+ text += %{ title="%s"} % escape_md( title )
1886
+ end
1887
+ text += %{>#{link}</a>}
1888
+
1889
+ # No linkid part: just append the first part as-is.
1890
+ else
1891
+ @log.debug "No linkid, so no anchor. Appending literal text."
1892
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1893
+ end # if linkid
1894
+
1895
+ # Plain text
1896
+ else
1897
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
1898
+ text += @scanner.scan( /[^\[]+/ )
1899
+ end
1900
+
1901
+ end # until @scanner.empty?
1902
+
1903
+ return text
1904
+ end
1905
+
1906
+
1907
+ # Pattern to match strong emphasis in Markdown text
1908
+ BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
1909
+
1910
+ # Pattern to match normal emphasis in Markdown text
1911
+ ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
1912
+
1913
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
1914
+ ### and return it.
1915
+ def transform_italic_and_bold( str, rs )
1916
+ @log.debug " Transforming italic and bold"
1917
+
1918
+ str.
1919
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
1920
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
1921
+ end
1922
+
1923
+
1924
+ ### Transform backticked spans into <code> spans.
1925
+ def transform_code_spans( str, rs )
1926
+ @log.debug " Transforming code spans"
1927
+
1928
+ # Set up the string scanner and just return the string unless there's at
1929
+ # least one backtick.
1930
+ @scanner.string = str.dup
1931
+ unless @scanner.exist?( /`/ )
1932
+ @scanner.terminate
1933
+ @log.debug "No backticks found for code span in %p" % str
1934
+ return str
1935
+ end
1936
+
1937
+ @log.debug "Transforming code spans in %p" % str
1938
+
1939
+ # Build the transformed text anew
1940
+ text = ''
1941
+
1942
+ # Scan to the end of the string
1943
+ until @scanner.empty?
1944
+
1945
+ # Scan up to an opening backtick
1946
+ if pre = @scanner.scan_until( /.??(?=`)/m )
1947
+ text += pre
1948
+ @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
1949
+
1950
+ # Make a pattern to find the end of the span
1951
+ opener = @scanner.scan( /`+/ )
1952
+ len = opener.length
1953
+ closer = Regexp::new( opener )
1954
+ @log.debug "Scanning for end of code span with %p" % closer
1955
+
1956
+ # Scan until the end of the closing backtick sequence. Chop the
1957
+ # backticks off the resultant string, strip leading and trailing
1958
+ # whitespace, and encode any enitites contained in it.
1959
+ codespan = @scanner.scan_until( closer ) or
1960
+ raise FormatError::new( @scanner.rest[0,20],
1961
+ "No %p found before end" % opener )
1962
+
1963
+ @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
1964
+ codespan.slice!( -len, len )
1965
+ text += "<code>%s</code>" %
1966
+ encode_code( codespan.strip, rs )
1967
+
1968
+ # If there's no more backticks, just append the rest of the string
1969
+ # and move the scan pointer to the end
1970
+ else
1971
+ text += @scanner.rest
1972
+ @scanner.terminate
1973
+ end
1974
+ end
1975
+
1976
+ return text
1977
+ end
1978
+
1979
+
1980
+ # Next, handle inline images: ![alt text](url "optional title")
1981
+ # Don't forget: encode * and _
1982
+ InlineImageRegexp = %r{
1983
+ ( # Whole match = $1
1984
+ !\[ (.*?) \] # alt text = $2
1985
+ \([ ]*
1986
+ <?(\S+?)>? # source url = $3
1987
+ [ ]*
1988
+ (?: #
1989
+ (["']) # quote char = $4
1990
+ (.*?) # title = $5
1991
+ \4 # matching quote
1992
+ [ ]*
1993
+ )? # title is optional
1994
+ \)
1995
+ )
1996
+ }x #"
1997
+
1998
+
1999
+ # Reference-style images
2000
+ ReferenceImageRegexp = %r{
2001
+ ( # Whole match = $1
2002
+ !\[ (.*?) \] # Alt text = $2
2003
+ [ ]? # Optional space
2004
+ (?:\n[ ]*)? # One optional newline + spaces
2005
+ \[ (.*?) \] # id = $3
2006
+ )
2007
+ }x
2008
+
2009
+ ### Turn image markup into image tags.
2010
+ def transform_images( str, rs )
2011
+ @log.debug " Transforming images %p" % str
2012
+
2013
+ # Handle reference-style labeled images: ![alt text][id]
2014
+ str.
2015
+ gsub( ReferenceImageRegexp ) {|match|
2016
+ whole, alt, linkid = $1, $2, $3.downcase
2017
+ @log.debug "Matched %p" % match
2018
+ res = nil
2019
+ alt.gsub!( /"/, '&quot;' )
2020
+
2021
+ # for shortcut links like ![this][].
2022
+ linkid = alt.downcase if linkid.empty?
2023
+
2024
+ if rs.urls.key?( linkid )
2025
+ url = escape_md( rs.urls[linkid] )
2026
+ @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
2027
+
2028
+ # Build the tag
2029
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2030
+ if rs.titles.key?( linkid )
2031
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
2032
+ end
2033
+ result += EmptyElementSuffix
2034
+
2035
+ else
2036
+ result = whole
2037
+ end
2038
+
2039
+ @log.debug "Replacing %p with %p" % [ match, result ]
2040
+ result
2041
+ }.
2042
+
2043
+ # Inline image style
2044
+ gsub( InlineImageRegexp ) {|match|
2045
+ @log.debug "Found inline image %p" % match
2046
+ whole, alt, title = $1, $2, $5
2047
+ url = escape_md( $3 )
2048
+ alt.gsub!( /"/, '&quot;' )
2049
+
2050
+ # Build the tag
2051
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2052
+ unless title.nil?
2053
+ title.gsub!( /"/, '&quot;' )
2054
+ result += %{ title="%s"} % escape_md( title )
2055
+ end
2056
+ result += EmptyElementSuffix
2057
+
2058
+ @log.debug "Replacing %p with %p" % [ match, result ]
2059
+ result
2060
+ }
2061
+ end
2062
+
2063
+
2064
+ # Regexp to match special characters in a code block
2065
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
2066
+
2067
+ ### Escape any characters special to HTML and encode any characters special
2068
+ ### to Markdown in a copy of the given +str+ and return it.
2069
+ def encode_code( str, rs )
2070
+ #str.gsub( %r{&}, '&amp;' ).
2071
+ #gsub( %r{<}, '&lt;' ).
2072
+ #gsub( %r{>}, '&gt;' ).
2073
+ #gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
2074
+ end
2075
+
2076
+ def escape_to_header_id(str)
2077
+ URI.escape(escape_md(str.gsub(/<\/?[^>]*>/, "").gsub(/\s/, "_")).gsub("/", ".2F")).gsub("%", ".")
2078
+ end
2079
+
2080
+ #################################################################
2081
+ ### U T I L I T Y F U N C T I O N S
2082
+ #################################################################
2083
+
2084
+ ### Escape any markdown characters in a copy of the given +str+ and return
2085
+ ### it.
2086
+ def escape_md( str )
2087
+ str.
2088
+ gsub( /\*|_/ ){|symbol| EscapeTable[symbol][:md5]}
2089
+ end
2090
+
2091
+
2092
+ # Matching constructs for tokenizing X/HTML
2093
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
2094
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
2095
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
2096
+
2097
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
2098
+ HTMLTagCloseRegexp = %r{ > }x
2099
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
2100
+
2101
+ ### Break the HTML source in +str+ into a series of tokens and return
2102
+ ### them. The tokens are just 2-element Array tuples with a type and the
2103
+ ### actual content. If this function is called with a block, the type and
2104
+ ### text parts of each token will be yielded to it one at a time as they are
2105
+ ### extracted.
2106
+ def tokenize_html( str )
2107
+ depth = 0
2108
+ tokens = []
2109
+ @scanner.string = str.dup
2110
+ type, token = nil, nil
2111
+
2112
+ until @scanner.empty?
2113
+ @log.debug "Scanning from %p" % @scanner.rest
2114
+
2115
+ # Match comments and PIs without nesting
2116
+ if (( token = @scanner.scan(MetaTag) ))
2117
+ type = :tag
2118
+
2119
+ # Do nested matching for HTML tags
2120
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
2121
+ tagstart = @scanner.pos
2122
+ @log.debug " Found the start of a plain tag at %d" % tagstart
2123
+
2124
+ # Start the token with the opening angle
2125
+ depth = 1
2126
+ type = :tag
2127
+
2128
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
2129
+ # the scanner runs out of text before the tag is closed, raise
2130
+ # an error.
2131
+ while depth.nonzero?
2132
+
2133
+ # Scan either an opener or a closer
2134
+ chunk = @scanner.scan( HTMLTagPart ) or
2135
+ break # AoBane Fix (refer to spec/code-block.rb)
2136
+
2137
+ @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
2138
+
2139
+ token += chunk
2140
+
2141
+ # If the last character of the token so far is a closing
2142
+ # angle bracket, decrement the depth. Otherwise increment
2143
+ # it for a nested tag.
2144
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
2145
+ @log.debug " Depth is now #{depth}"
2146
+ end
2147
+
2148
+ # Match text segments
2149
+ else
2150
+ @log.debug " Looking for a chunk of text"
2151
+ type = :text
2152
+
2153
+ # Scan forward, always matching at least one character to move
2154
+ # the pointer beyond any non-tag '<'.
2155
+ token = @scanner.scan_until( /[^<]+/m )
2156
+ end
2157
+
2158
+ @log.debug " type: %p, token: %p" % [ type, token ]
2159
+
2160
+ # If a block is given, feed it one token at a time. Add the token to
2161
+ # the token list to be returned regardless.
2162
+ if block_given?
2163
+ yield( type, token )
2164
+ end
2165
+ tokens << [ type, token ]
2166
+ end
2167
+
2168
+ return tokens
2169
+ end
2170
+
2171
+
2172
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
2173
+ def encode_html( str )
2174
+ #str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
2175
+ #gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
2176
+ return str
2177
+ end
2178
+
2179
+
2180
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
2181
+ ### return it.
2182
+ def outdent( str )
2183
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
2184
+ end
2185
+
2186
+ def indent(str)
2187
+ str.gsub( /^/, ' ' * TabWidth)
2188
+ end
2189
+
2190
+ end
2191
+ end