AoBane 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/AoBane.rb CHANGED
@@ -1,2196 +1,2253 @@
1
- #
2
- # AoBane - Extended Markdown Converter
3
- #
4
- # Author of Original BlueFeather: Dice <tetradice@gmail.com>
5
- # Remaker: set.minami <set.minami@gmail.com>
6
- # Website: https://github.com/setminami/AoBane/blob/master/README.md
7
- # License: GPL version 2 or later
8
- #
9
- # If you want to know better about AoBane, See the Website.
10
- #
11
- #
12
- #
13
- #-- Copyrights & License -------------------------------------------------------
14
- #
15
- # Original Markdown:
16
- # Copyright (c) 2003-2004 John Gruber
17
- # <http://daringfireball.net/>
18
- # All rights reserved.
19
- #
20
- # Orignal BlueCloth:
21
- # Copyright (c) 2004 The FaerieMUD Consortium.
22
- #
23
- # AoBane:
24
- # Copyright (c) 2013 Set.Minami
25
- #
26
- # AoBane is free software; you can redistribute it and/or modify it under
27
- # the terms of the GNU General Public License as published by the Free Software
28
- # Foundation; either version 2 of the License, or (at your option) any later
29
- # version.
30
- #
31
- # AoBane is distributed in the hope that it will be useful, but WITHOUT ANY
32
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
33
- # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
34
-
35
-
36
- require 'digest/md5'
37
- require 'logger'
38
- require 'strscan'
39
- require 'stringio'
40
- require 'uri'
41
- require 'math_ml/string'
42
-
43
- module AoBane
44
- VERSION = '0.01'
45
- VERSION_NUMBER = 0.01
46
- RELEASE_DATE = '2013-03-30'
47
- VERSION_LABEL = "#{VERSION} (#{RELEASE_DATE})"
48
-
49
- UTF8_BOM = "\xef\xbb\xbf"
50
- UTF8_BOM_PATTERN = /^#{UTF8_BOM}/
51
-
52
-
53
- # Fancy methods
54
- class << self
55
- def parse_text(src)
56
- Parser.new.parse_text(src)
57
- end
58
-
59
- alias parse parse_text
60
-
61
- def parse_document(src, default_enc = EncodingType::UTF8)
62
- Parser.new.parse_document(src, default_enc)
63
- end
64
-
65
-
66
- def parse_text_file(path)
67
- Parser.new.parse_text_file(path)
68
- end
69
-
70
- alias parse_file parse_text_file
71
-
72
- def parse_document_file(path, default_enc = EncodingType::UTF8)
73
- Parser.new.parse_document_file(path, default_enc)
74
- end
75
- end
76
-
77
- ### Exception class on AoBane running.
78
- class Error < ::RuntimeError
79
- end
80
-
81
- class EncodingError < Error
82
- end
83
-
84
- ### Exception class for formatting errors.
85
- class FormatError < Error
86
-
87
- ### Create a new FormatError with the given source +str+ and an optional
88
- ### message about the +specific+ error.
89
- def initialize( str, specific=nil )
90
- if specific
91
- msg = "Bad markdown format near %p: %s" % [ str, specific ]
92
- else
93
- msg = "Bad markdown format near %p" % str
94
- end
95
-
96
- super( msg )
97
- end
98
- end
99
-
100
- module HeaderIDType
101
- MD5 = 'md5'
102
- ESCAPE = 'escape'
103
- end
104
-
105
- module EncodingType
106
- EUC = 'euc-jp'
107
- EUCJP = EUC_JP = EUC
108
-
109
- SJIS = 'shift_jis'
110
- SHIFT_JIS = SJIS
111
-
112
- UTF8 = 'utf-8'
113
- UTF_8 = UTF8
114
-
115
- ASCII = 'ascii'
116
- US_ASCII = ASCII
117
-
118
- def self.regulate(str_value)
119
- case str_value.downcase
120
- when 'shift-jis', 'shift_jis'
121
- SJIS
122
- when 'euc-jp'
123
- EUC
124
- when 'utf-8'
125
- UTF8
126
- when 'ascii'
127
- ASCII
128
- else
129
- raise EncodingError, "not adapted encoding type - #{str_value} (shift[-_]jis, euc-jp, utf-8, or ascii)"
130
- end
131
- end
132
-
133
- def self.convert_to_kcode(str_value)
134
- type = self.regulate(str_value)
135
- case type
136
- when EUC, SJIS, UTF8
137
- type
138
- when ASCII
139
- 'none'
140
- end
141
- end
142
-
143
-
144
- def self.convert_to_charset(str_value)
145
- type = self.regulate(str_value)
146
- case type
147
- when EUC
148
- 'euc-jp'
149
- when SJIS
150
- 'shift_jis'
151
- when UTF8
152
- 'utf-8'
153
- when ASCII
154
- nil
155
- end
156
- end
157
-
158
- end
159
-
160
- module Util
161
- HTML_ESC = {
162
- '&' => '&amp;',
163
- '"' => '&quot;',
164
- '<' => '&lt;',
165
- '>' => '&gt;'
166
- }
167
-
168
- module_function
169
-
170
- # from http://jp.rubyist.net/magazine/?0010-CodeReview#l28
171
- # (Author: Minero Aoki)
172
- def escape_html(str)
173
- #table = HTML_ESC # optimize
174
- #str.gsub(/[&"<>]/) {|s| table[s] }
175
- return str
176
- end
177
-
178
- def generate_blank_string_io(encoding_base)
179
- io = StringIO.new
180
-
181
- if io.respond_to?(:set_encoding) then
182
- io.set_encoding(encoding_base.encoding)
183
- end
184
-
185
- return io
186
- end
187
-
188
- def change_kcode(kcode = nil)
189
- if defined?(Encoding) then
190
- # ruby 1.9 later
191
- yield
192
- else
193
- # ruby 1.8 earlier
194
- original_kcode = $KCODE
195
-
196
- begin
197
- $KCODE = kcode if kcode
198
- yield
199
-
200
- ensure
201
- # recover
202
- $KCODE = original_kcode
203
- end
204
- end # if defined?
205
- end # def
206
-
207
-
208
- def utf8_bom?(str)
209
- if str.respond_to?(:getbyte) and str.respond_to?(:bytesize) then
210
- if str.bytesize >= 3 and
211
- str.getbyte(0) == UTF8_BOM.getbyte(0) and
212
- str.getbyte(1) == UTF8_BOM.getbyte(1) and
213
- str.getbyte(2) == UTF8_BOM.getbyte(2) then
214
- return true
215
- else
216
- return false
217
- end
218
-
219
- else
220
- return(str =~ UTF8_BOM_PATTERN ? true : false)
221
- end
222
- end
223
- end
224
-
225
- class Document
226
- HEADER_PATTERN = /^([a-zA-Z0-9-]+?)\s*\:\s*(.+?)\s*(?:\n|\Z)/
227
- BLANK_LINE_PATTERN = /^\n/
228
- HEADER_SEQUEL_PATTERN = /^\s+(.+)$/
229
-
230
- attr_accessor :headers, :body
231
- alias text body
232
- alias text= body=
233
-
234
- class << self
235
- def parse_io(input, default_enc = EncodingType::UTF8)
236
- headers = {}
237
- body = nil
238
- first_pos = input.pos
239
- default_enc = EncodingType.regulate(default_enc)
240
-
241
- Util.change_kcode(EncodingType.convert_to_kcode(default_enc)){
242
- # default encoding
243
- if defined?(Encoding) then
244
- input.set_encoding(Encoding.find(default_enc))
245
- end
246
-
247
-
248
-
249
- # get headers
250
- pos_before_gets = nil
251
- first_line = true
252
-
253
- loop do
254
- pos_before_gets = input.pos
255
- line = input.gets
256
-
257
- # cut UTF-8 BOM
258
- if first_line and Util.utf8_bom?(line) then
259
- line.slice!(UTF8_BOM_PATTERN)
260
- end
261
- first_line = false
262
-
263
- if line and line.chomp =~ HEADER_PATTERN then
264
- key = $1.downcase; value = $2
265
-
266
- if key == 'encoding' and not headers.include?('encoding') then
267
- kc = EncodingType.convert_to_kcode(value.downcase)
268
- if input.respond_to?(:set_encoding) then
269
- input.set_encoding(EncodingType.regulate(value))
270
-
271
- # rewind (reason => [ruby-list:45988])
272
- input.pos = first_pos
273
- first_line = true
274
- else
275
- $KCODE = kc
276
- end
277
- end
278
-
279
- headers[key] = value
280
- else
281
- # EOF or Metadata end
282
- break
283
- end
284
- end
285
-
286
- # back
287
- input.pos = pos_before_gets
288
-
289
-
290
-
291
- # skip blank lines
292
- loop do
293
- pos_before_gets = input.pos
294
-
295
- line = input.gets
296
- if line.nil? or not line =~ BLANK_LINE_PATTERN then
297
- break
298
- end
299
- end
300
-
301
- # back
302
- input.pos = pos_before_gets
303
-
304
-
305
-
306
- # get body
307
- body = input.read
308
-
309
- }
310
-
311
-
312
- return self.new(headers, body)
313
- end
314
-
315
- def parse(str, default_enc = EncodingType::UTF8)
316
- parse_io(StringIO.new(str), default_enc)
317
- end
318
-
319
- end
320
-
321
-
322
- def initialize(headers = {}, body = '')
323
- @headers = {}
324
- headers.each do |k, v|
325
- self[k] = v
326
- end
327
- @body = body
328
- end
329
-
330
- def [](key)
331
- @headers[key.to_s.downcase]
332
- end
333
-
334
- def []=(key, value)
335
- @headers[key.to_s.downcase] = value.to_s
336
- end
337
-
338
- def title
339
- @headers['title']
340
- end
341
-
342
- def css
343
- @headers['css']
344
- end
345
-
346
- def numbering
347
- case @headers['numbering']
348
- when 'yes', '1', 'true', 'on'
349
- true
350
- else
351
- false
352
- end
353
- end
354
-
355
- alias numbering? numbering
356
-
357
- def numbering_start_level
358
- level = (@headers['numbering-start-level'] || 2).to_i
359
- if level >= 1 and level <= 6 then
360
- return level
361
- else
362
- return 2
363
- end
364
- end
365
-
366
- def encoding_type
367
- @headers['encoding'] || EncodingType::UTF8
368
- end
369
-
370
- def header_id_type
371
- (@headers['header-id-type'] || HeaderIDType::MD5).downcase
372
- end
373
-
374
- def kcode
375
- self.encoding_type && EncodingType.convert_to_kcode(self.encoding_type)
376
- end
377
-
378
- def to_html
379
- Parser.new.document_to_html(self)
380
- end
381
- end
382
-
383
-
384
- class Parser
385
- # Rendering state class Keeps track of URLs, titles, and HTML blocks
386
- # midway through a render. I prefer this to the globals of the Perl version
387
- # because globals make me break out in hives. Or something.
388
- class RenderState
389
- # Headers struct.
390
- Header = Struct.new(:id, :level, :content, :content_html)
391
-
392
- # from Original BlueCloth
393
- attr_accessor :urls, :titles, :html_blocks, :log
394
-
395
- # AoBane Extension
396
- attr_accessor :footnotes, :found_footnote_ids, :warnings
397
- attr_accessor :headers, :block_transform_depth
398
- attr_accessor :header_id_type # option switch
399
- attr_accessor :numbering, :numbering_start_level # option switch
400
- alias numbering? numbering
401
-
402
- def initialize
403
- @urls, @titles, @html_blocks = {}, {}, {}
404
- @log = nil
405
- @footnotes, @found_footnote_ids, @warnings = {}, [], []
406
- @headers = []
407
- @block_transform_depth = 0
408
- @header_id_type = HeaderIDType::MD5
409
- @numbering = false
410
- @numbering_start_level = 2
411
- end
412
-
413
- end
414
-
415
- # Tab width for #detab! if none is specified
416
- TabWidth = 4
417
-
418
- # The tag-closing string -- set to '>' for HTML
419
- EmptyElementSuffix = " />";
420
-
421
- # Table of MD5 sums for escaped characters
422
- EscapeTable = {}
423
- '\\`*_{}[]()#.!|:~'.split(//).each {|char|
424
- hash = Digest::MD5::hexdigest( char )
425
-
426
- EscapeTable[ char ] = {
427
- :md5 => hash,
428
- :md5re => Regexp::new( hash ),
429
- :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
430
- :unescape => char,
431
- }
432
-
433
- escaped = "\\#{char}"
434
- hash = Digest::MD5::hexdigest(escaped)
435
- EscapeTable[escaped] = {
436
- :md5 => hash,
437
- :md5re => Regexp::new( hash ),
438
- :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
439
- :unescape => char,
440
- }
441
- }
442
-
443
-
444
- #################################################################
445
- ### I N S T A N C E M E T H O D S
446
- #################################################################
447
-
448
- ### Create a new AoBane parser.
449
- def initialize(*restrictions)
450
- @log = Logger::new( $deferr )
451
- @log.level = $DEBUG ?
452
- Logger::DEBUG :
453
- ($VERBOSE ? Logger::INFO : Logger::WARN)
454
- @scanner = nil
455
-
456
- # Add any restrictions, and set the line-folding attribute to reflect
457
- # what happens by default.
458
- @filter_html = nil
459
- @filter_styles = nil
460
- restrictions.flatten.each {|r| __send__("#{r}=", true) }
461
- @fold_lines = true
462
-
463
- @use_header_id = true
464
- @display_warnings = true
465
-
466
- @log.debug "String is: %p" % self
467
- end
468
-
469
-
470
- ######
471
- public
472
- ######
473
-
474
- # Filters for controlling what gets output for untrusted input. (But really,
475
- # you're filtering bad stuff out of untrusted input at submission-time via
476
- # untainting, aren't you?)
477
- attr_accessor :filter_html, :filter_styles
478
-
479
- # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
480
- # so this isn't used by anything.
481
- attr_accessor :fold_lines
482
-
483
- # AoBane Extension: display warnings on the top of output html (default: true)
484
- attr_accessor :display_warnings
485
-
486
- # AoBane Extension: add id to each header, for toc and anchors. (default: true)
487
- attr_accessor :use_header_id
488
-
489
- ### Render Markdown-formatted text in this string object as HTML and return
490
- ### it. The parameter is for compatibility with RedCloth, and is currently
491
- ### unused, though that may change in the future.
492
- def parse_text(source, rs = nil)
493
- rs ||= RenderState.new
494
-
495
- # check
496
- case rs.header_id_type
497
- when HeaderIDType::MD5, HeaderIDType::ESCAPE
498
- else
499
- rs.warnings << "illegal header id type - #{rs.header_id_type}"
500
- end
501
-
502
- # Create a StringScanner we can reuse for various lexing tasks
503
- @scanner = StringScanner::new( '' )
504
-
505
- # Make a copy of the string with normalized line endings, tabs turned to
506
- # spaces, and a couple of guaranteed newlines at the end
507
-
508
- text = detab(source.gsub( /\r\n?/, "\n" ))
509
- text += "\n\n"
510
- @log.debug "Normalized line-endings: %p" % text
511
-
512
- #Insert by set.minami 2013-03-30
513
- text.gsub!(/\*\[(.*?)\]\((.*?)(\|.*?)*(#.*?)*\)/){
514
- |match|
515
- '<font color="' +
516
- if $2.nil? then '' else $2 end +'" ' +
517
- 'face="' +
518
- if $3.nil? then '' else $3.delete('|') end + '" ' +
519
- 'size="' +
520
- if $4.nil? then '' else $4.delete('#') end + '">' +
521
- $1 + '</font>'
522
- }
523
-
524
- #Insert by set.minami 2013-04-01
525
- text.gsub!(/\\TeX{(.*?)\\TeX}/){ |match|
526
- if $1.nil? then '' else $1.to_mathml end
527
- }
528
- #Insert by set.minami
529
-
530
- # Filter HTML if we're asked to do so
531
- if self.filter_html
532
- #text.gsub!( "<", "&lt;" )
533
- #text.gsub!( ">", "&gt;" )
534
- @log.debug "Filtered HTML: %p" % text
535
- end
536
-
537
- # Simplify blank lines
538
- text.gsub!( /^ +$/, '' )
539
- @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
540
-
541
-
542
- # Replace HTML blocks with placeholders
543
- text = hide_html_blocks( text, rs )
544
- @log.debug "Hid HTML blocks: %p" % text
545
- @log.debug "Render state: %p" % rs
546
-
547
-
548
- # Strip footnote definitions, store in render state
549
- text = strip_footnote_definitions( text, rs )
550
- @log.debug "Stripped footnote definitions: %p" % text
551
- @log.debug "Render state: %p" % rs
552
-
553
-
554
- # Strip link definitions, store in render state
555
- text = strip_link_definitions( text, rs )
556
- @log.debug "Stripped link definitions: %p" % text
557
- @log.debug "Render state: %p" % rs
558
-
559
- # Escape meta-characters
560
- text = escape_special_chars( text )
561
- @log.debug "Escaped special characters: %p" % text
562
-
563
- # Transform block-level constructs
564
- text = apply_block_transforms( text, rs )
565
- @log.debug "After block-level transforms: %p" % text
566
-
567
- # Now swap back in all the escaped characters
568
- text = unescape_special_chars( text )
569
- @log.debug "After unescaping special characters: %p" % text
570
-
571
- # Extend footnotes
572
- unless rs.footnotes.empty? then
573
- text << %Q|<div class="footnotes"><hr#{EmptyElementSuffix}\n<ol>\n|
574
- rs.found_footnote_ids.each do |id|
575
- content = rs.footnotes[id]
576
- html = apply_block_transforms(content.sub(/\n+\Z/, '') + %Q| <a href="#footnote-ref:#{id}" rev="footnote">&#8617;</a>|, rs)
577
- text << %Q|<li id="footnote:#{id}">\n#{html}\n</li>|
578
- end
579
- text << %Q|</ol>\n</div>\n|
580
- end
581
-
582
- # Display warnings
583
- if @display_warnings then
584
- unless rs.warnings.empty? then
585
- html = %Q|<pre><strong>[WARNINGS]\n|
586
- html << rs.warnings.map{|x| Util.escape_html(x)}.join("\n")
587
- html << %Q|</strong></pre>|
588
-
589
- text = html + text
590
- end
591
- end
592
-
593
- #Insert by set.minami 2013-03-30
594
- output = []
595
- text.lines {|line|
596
- if /<pre><code>/ =~ line
597
- output << line
598
- next
599
- until /<\/code><\/pre>/ =~ line
600
- output << line
601
- next
602
- end
603
- else
604
- line.gsub!(/\-\-|<=>|<\->|\->|<\-|=>|<=|\|\^|\|\|\/|\|\/|\^|>>|<<|\+_|!=|~~|~=|>_|<_|\|FA|\|EX|\|=|\(+\)|\(x\)|\\&|\(c\)|\(R\)|\(SS\)|\(TM\)/,
605
- "\-\-" => "&mdash;",
606
- "<=" => "&hArr;",
607
- "<\->" => "&harr;",
608
- "\->" =>"&rarr;",
609
- "<\-" =>"&larr;",
610
- "=>" => "&rArr;",
611
- "<=" => "&lArr;",
612
- "\|\|\^" => "&uArr;",
613
- "\|\|\/" => "&dArr;",
614
- "\|\/" => "&darr;",
615
- "\|\^" => "&uarr;",
616
- ">>" => "&raquo;",
617
- "<<" => "&laquo;",
618
- "+_" => "&plusmn;",
619
- "!=" => "&ne;",
620
- "~~" => "&asymp;",
621
- "~=" => "&cong;",
622
- "<_" => "&le;",
623
- ">_" => "&ge",
624
- "\|FA" => "&forall;",
625
- "\|EX" => "&exist;",
626
- "\|=" => "&equiv;",
627
- "\(+\)" => "&oplus",
628
- "\(x\)" => "&otimes;",
629
- "\\&" =>"&amp;",
630
- "\(c\)" => "&copy;",
631
- "\(R\)" =>"&reg;",
632
- "\(SS\)" => "&sect;",
633
- "\(TM\)" => "&trade;" #29
634
- )
635
- output << line
636
- end
637
- }
638
- return output
639
- #Insert by set.minami
640
- #return text
641
-
642
- end
643
-
644
- alias parse parse_text
645
-
646
- # return values are extended. (mainly for testing)
647
- def parse_text_with_render_state(str, rs = nil)
648
- rs ||= RenderState.new
649
- html = parse_text(str, rs)
650
-
651
- return [html, rs]
652
- end
653
-
654
- def parse_text_file(path)
655
- parse_text(File.read(path))
656
- end
657
-
658
- alias parse_file parse_text_file
659
-
660
-
661
- def parse_document(source, default_enc = EncodingType::UTF8)
662
- doc = Document.parse(source, default_enc)
663
-
664
- return document_to_html(doc)
665
- end
666
-
667
- def parse_document_file(path, default_enc = EncodingType::UTF8)
668
- doc = nil
669
- open(path){|f|
670
- doc = Document.parse_io(f, default_enc)
671
- }
672
-
673
- return document_to_html(doc)
674
- end
675
-
676
-
677
- def document_to_html(doc)
678
- rs = RenderState.new
679
- if doc.numbering? then
680
- rs.numbering = true
681
- end
682
- rs.numbering_start_level = doc.numbering_start_level
683
- rs.header_id_type = doc.header_id_type
684
-
685
- body_html = nil
686
-
687
- if doc.encoding_type then
688
- Util.change_kcode(doc.kcode){
689
- body_html = parse_text(doc.body, rs)
690
- }
691
- else
692
- body_html = parse_text(doc.body, rs)
693
- end
694
-
695
- out = Util.generate_blank_string_io(doc.body)
696
-
697
- # XHTML decleration
698
- out.puts %Q|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">|
699
-
700
- # html start
701
- out.puts %Q|<html>|
702
-
703
- # head
704
- out.puts %Q|<head>|
705
-
706
- if doc.encoding_type and (charset = EncodingType.convert_to_charset(doc.encoding_type)) then
707
- out.puts %Q|<meta http-equiv="Content-Type" content="text/html; charset=#{charset}" />|
708
- end
709
-
710
- h1 = rs.headers.find{|x| x.level == 1}
711
- h1_content = (h1 ? h1.content : nil)
712
- title = Util.escape_html(doc.title || h1_content || 'no title (Generated by AoBane)')
713
- out.puts %Q|<title>#{title}</title>|
714
-
715
- %w(description keywords).each do |name|
716
- if doc[name] then
717
- content = Util.escape_html(doc[name])
718
- out.puts %Q|<meta name="#{name}" content="#{content}" />|
719
- end
720
- end
721
-
722
-
723
- if doc['css'] then
724
- href = Util.escape_html(doc.css)
725
- out.puts %Q|<link rel="stylesheet" type="text/css" href="#{href}" />|
726
-
727
- end
728
-
729
- if doc['rdf-feed'] then
730
- href = Util.escape_html(doc['rdf-feed'])
731
- out.puts %Q|<link rel="alternate" type="application/rdf+xml" href="#{href}" />|
732
- end
733
-
734
-
735
-
736
- if doc['rss-feed'] then
737
- href = Util.escape_html(doc['rss-feed'])
738
- out.puts %Q|<link rel="alternate" type="application/rss+xml" href="#{href}" />|
739
- end
740
-
741
- if doc['atom-feed'] then
742
- href = Util.escape_html(doc['atom-feed'])
743
- out.puts %Q|<link rel="alternate" type="application/atom+xml" href="#{href}" />|
744
- end
745
-
746
- out.puts %Q|</head>|
747
-
748
- # body
749
- out.puts %Q|<body>|
750
- out.puts
751
- out.puts body_html
752
- out.puts
753
- out.puts %Q|</body>|
754
-
755
- # html end
756
- out.puts %Q|</html>|
757
-
758
-
759
- return out.string
760
- end
761
-
762
- alias doc2html document_to_html
763
-
764
-
765
-
766
-
767
- #######
768
- #private
769
- #######
770
-
771
- ### Convert tabs in +str+ to spaces.
772
- ### (this method is reformed to function-like method from original BlueCloth)
773
- def detab( str, tabwidth=TabWidth )
774
- re = str.split( /\n/ ).collect {|line|
775
- line.gsub( /(.*?)\t/ ) do
776
- $1 + ' ' * (tabwidth - $1.length % tabwidth)
777
- end
778
- }.join("\n")
779
-
780
- re
781
- end
782
-
783
-
784
-
785
-
786
- ### Do block-level transforms on a copy of +str+ using the specified render
787
- ### state +rs+ and return the results.
788
- def apply_block_transforms( str, rs )
789
- rs.block_transform_depth += 1
790
-
791
- # Port: This was called '_runBlockGamut' in the original
792
-
793
- @log.debug "Applying block transforms to:\n %p" % str
794
- text = str
795
- text = pretransform_fenced_code_blocks( text, rs )
796
- text = pretransform_block_separators(text, rs)
797
-
798
- text = transform_headers( text, rs )
799
- text = transform_toc(text, rs)
800
-
801
- text = transform_hrules( text, rs )
802
- text = transform_lists( text, rs )
803
- text = transform_definition_lists( text, rs ) # AoBane Extension
804
- text = transform_code_blocks( text, rs )
805
- text = transform_block_quotes( text, rs )
806
- text = transform_tables(text, rs)
807
- text = hide_html_blocks( text, rs )
808
-
809
- text = form_paragraphs( text, rs )
810
-
811
- rs.block_transform_depth -= 1
812
- @log.debug "Done with block transforms:\n %p" % text
813
- return text
814
- end
815
-
816
-
817
- ### Apply Markdown span transforms to a copy of the specified +str+ with the
818
- ### given render state +rs+ and return it.
819
- def apply_span_transforms( str, rs )
820
- @log.debug "Applying span transforms to:\n %p" % str
821
-
822
- str = transform_code_spans( str, rs )
823
- str = transform_auto_links( str, rs )
824
- str = encode_html( str )
825
- str = transform_images( str, rs )
826
- str = transform_anchors( str, rs )
827
- str = transform_italic_and_bold( str, rs )
828
-
829
- # Hard breaks
830
- str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
831
-
832
- @log.debug "Done with span transforms:\n %p" % str
833
- return str
834
- end
835
-
836
-
837
- # The list of tags which are considered block-level constructs and an
838
- # alternation pattern suitable for use in regexps made from the list
839
- StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
840
- form fieldset iframe math ins del ]
841
- StrictTagPattern = StrictBlockTags.join('|')
842
-
843
- LooseBlockTags = StrictBlockTags - %w[ins del]
844
- LooseTagPattern = LooseBlockTags.join('|')
845
-
846
- # Nested blocks:
847
- # <div>
848
- # <div>
849
- # tags for inner block must be indented.
850
- # </div>
851
- # </div>
852
- StrictBlockRegexp = %r{
853
- ^ # Start of line
854
- <(#{StrictTagPattern}) # Start tag: \2
855
- \b # word break
856
- (.*\n)*? # Any number of lines, minimal match
857
- </\1> # Matching end tag
858
- [ ]* # trailing spaces
859
- $ # End of line or document
860
- }ix
861
-
862
- # More-liberal block-matching
863
- LooseBlockRegexp = %r{
864
- ^ # Start of line
865
- <(#{LooseTagPattern}) # start tag: \2
866
- \b # word break
867
- (.*\n)*? # Any number of lines, minimal match
868
- .*</\1> # Anything + Matching end tag
869
- [ ]* # trailing spaces
870
- $ # End of line or document
871
- }ix
872
-
873
- # Special case for <hr />.
874
- HruleBlockRegexp = %r{
875
- ( # $1
876
- \A\n? # Start of doc + optional \n
877
- | # or
878
- .*\n\n # anything + blank line
879
- )
880
- ( # save in $2
881
- # AoBane fix: Not allow any space on line top
882
- <hr # Tag open
883
- \b # Word break
884
- ([^<>])*? # Attributes
885
- /?> # Tag close
886
- $ # followed by a blank line or end of document
887
- )
888
- }ix
889
-
890
- ### Replace all blocks of HTML in +str+ that start in the left margin with
891
- ### tokens.
892
- def hide_html_blocks( str, rs )
893
- @log.debug "Hiding HTML blocks in %p" % str
894
-
895
- # Tokenizer proc to pass to gsub
896
- tokenize = lambda {|match|
897
- key = Digest::MD5::hexdigest( match )
898
- rs.html_blocks[ key ] = match
899
- @log.debug "Replacing %p with %p" % [ match, key ]
900
- "\n\n#{key}\n\n"
901
- }
902
-
903
- rval = str.dup
904
-
905
- @log.debug "Finding blocks with the strict regex..."
906
- rval.gsub!( StrictBlockRegexp, &tokenize )
907
-
908
- @log.debug "Finding blocks with the loose regex..."
909
- rval.gsub!( LooseBlockRegexp, &tokenize )
910
-
911
- @log.debug "Finding hrules..."
912
- rval.gsub!( HruleBlockRegexp ) {|match| $1 + tokenize[$2] }
913
-
914
- return rval
915
- end
916
-
917
-
918
- # Link defs are in the form: ^[id]: url "optional title"
919
- LinkRegexp = %r{
920
- ^[ ]{0,#{TabWidth - 1}} # AoBane fix: indent < tab width
921
- \[(.+)\]: # id = $1
922
- [ ]*
923
- \n? # maybe *one* newline
924
- [ ]*
925
- <?(\S+?)>? # url = $2
926
- [ ]*
927
- \n? # maybe one newline
928
- [ ]*
929
- (?:
930
- # Titles are delimited by "quotes" or (parens).
931
- ["(]
932
- (.+?) # title = $3
933
- [")] # Matching ) or "
934
- [ ]*
935
- )? # title is optional
936
- (?:\n+|\Z)
937
- }x
938
-
939
- ### Strip link definitions from +str+, storing them in the given RenderState
940
- ### +rs+.
941
- def strip_link_definitions( str, rs )
942
- str.gsub( LinkRegexp ) {|match|
943
- id, url, title = $1, $2, $3
944
-
945
- rs.urls[ id.downcase ] = encode_html( url )
946
- unless title.nil?
947
- rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
948
- end
949
-
950
- ""
951
- }
952
- end
953
-
954
- # Footnotes defs are in the form: [^id]: footnote contents.
955
- FootnoteDefinitionRegexp = %r{
956
- ^[ ]{0,#{TabWidth - 1}}
957
- \[\^(.+?)\]\: # id = $1
958
- [ ]*
959
- (.*) # first line content = $2
960
- (?:\n|\Z)
961
-
962
- ( # second or more lines content = $3
963
- (?:
964
- [ ]{#{TabWidth},} # indented
965
- .*
966
- (?:\n|\Z)
967
- |
968
- \n # blank line
969
- )*
970
- )?
971
-
972
- }x
973
-
974
- FootnoteIdRegexp = /^[a-zA-Z0-9\:\._-]+$/
975
-
976
- def strip_footnote_definitions(str, rs)
977
- str.gsub( FootnoteDefinitionRegexp ) {|match|
978
- id = $1; content1 = $2; content2 = $3
979
-
980
- unless id =~ FootnoteIdRegexp then
981
- rs.warnings << "illegal footnote id - #{id} (legal chars: a-zA-Z0-9_-.:)"
982
- end
983
-
984
- if content2 then
985
- @log.debug " Stripping multi-line definition %p, %p" % [$2, $3]
986
- content = content1 + "\n" + outdent(content2.chomp)
987
- @log.debug " Stripped multi-line definition %p, %p" % [id, content]
988
- rs.footnotes[id] = content
989
- else
990
- content = content1 || ''
991
- @log.debug " Stripped single-line definition %p, %p" % [id, content]
992
- rs.footnotes[id] = content
993
- end
994
-
995
-
996
-
997
- ""
998
- }
999
- end
1000
-
1001
-
1002
- ### Escape special characters in the given +str+
1003
- def escape_special_chars( str )
1004
- @log.debug " Escaping special characters"
1005
- text = ''
1006
-
1007
- # The original Markdown source has something called '$tags_to_skip'
1008
- # declared here, but it's never used, so I don't define it.
1009
-
1010
- tokenize_html( str ) {|token, str|
1011
- @log.debug " Adding %p token %p" % [ token, str ]
1012
- case token
1013
-
1014
- # Within tags, encode * and _
1015
- when :tag
1016
- text += str.
1017
- gsub( /\*/, EscapeTable['*'][:md5] ).
1018
- gsub( /_/, EscapeTable['_'][:md5] )
1019
-
1020
- # Encode backslashed stuff in regular text
1021
- when :text
1022
- text += encode_backslash_escapes( str )
1023
- else
1024
- raise TypeError, "Unknown token type %p" % token
1025
- end
1026
- }
1027
-
1028
- @log.debug " Text with escapes is now: %p" % text
1029
- return text
1030
- end
1031
-
1032
-
1033
- ### Swap escaped special characters in a copy of the given +str+ and return
1034
- ### it.
1035
- def unescape_special_chars( str )
1036
- EscapeTable.each {|char, hash|
1037
- @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
1038
- str.gsub!( hash[:md5re], hash[:unescape] )
1039
- }
1040
-
1041
- return str
1042
- end
1043
-
1044
-
1045
- ### Return a copy of the given +str+ with any backslashed special character
1046
- ### in it replaced with MD5 placeholders.
1047
- def encode_backslash_escapes( str )
1048
- # Make a copy with any double-escaped backslashes encoded
1049
- text = str.gsub( /\\\\/, EscapeTable['\\\\'][:md5] )
1050
-
1051
- EscapeTable.each_pair {|char, esc|
1052
- next if char == '\\\\'
1053
- next unless char =~ /\\./
1054
- text.gsub!( esc[:re], esc[:md5] )
1055
- }
1056
-
1057
- return text
1058
- end
1059
-
1060
-
1061
- def pretransform_block_separators(str, rs)
1062
- str.gsub(/^[ ]{0,#{TabWidth - 1}}[~][ ]*\n/){
1063
- "\n~\n\n"
1064
- }
1065
- end
1066
-
1067
-
1068
- TOCRegexp = %r{
1069
- ^\{ # bracket on line-head
1070
- [ ]* # optional inner space
1071
- toc
1072
-
1073
- (?:
1074
- (?:
1075
- [:] # colon
1076
- | # or
1077
- [ ]+ # 1 or more space
1078
- )
1079
- (.+?) # $1 = parameter
1080
- )?
1081
-
1082
- [ ]* # optional inner space
1083
- \} # closer
1084
- [ ]*$ # optional space on line-foot
1085
- }ix
1086
-
1087
- TOCStartLevelRegexp = %r{
1088
- ^
1089
- (?: # optional start
1090
- h
1091
- ([1-6]) # $1 = start level
1092
- )?
1093
-
1094
- (?: # range symbol
1095
- [.]{2,}|[-] # .. or -
1096
- )
1097
-
1098
- (?: # optional end
1099
- h? # optional 'h'
1100
- ([1-6]) # $2 = end level
1101
- )?$
1102
- }ix
1103
-
1104
- ### Transform any Markdown-style horizontal rules in a copy of the specified
1105
- ### +str+ and return it.
1106
- def transform_toc( str, rs )
1107
- @log.debug " Transforming tables of contents"
1108
- str.gsub(TOCRegexp){
1109
- start_level = 2 # default
1110
- end_level = 6
1111
-
1112
- param = $1
1113
- if param then
1114
- if param =~ TOCStartLevelRegexp then
1115
- if !($1) and !($2) then
1116
- rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1117
- else
1118
- start_level = ($1 ? $1.to_i : 2)
1119
- end_level = ($2 ? $2.to_i : 6)
1120
- end
1121
- else
1122
- rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1123
- end
1124
- end
1125
-
1126
- if rs.headers.first and rs.headers.first.level >= (start_level + 1) then
1127
- rs.warnings << "illegal structure of headers - h#{start_level} should be set before h#{rs.headers.first.level}"
1128
- end
1129
-
1130
-
1131
- ul_text = "\n\n"
1132
- rs.headers.each do |header|
1133
- if header.level >= start_level and header.level <= end_level then
1134
- ul_text << ' ' * TabWidth * (header.level - start_level)
1135
- ul_text << '* '
1136
- ul_text << %Q|<a href="##{header.id}" rel="toc">#{header.content_html}</a>|
1137
- ul_text << "\n"
1138
- end
1139
- end
1140
- ul_text << "\n"
1141
-
1142
- ul_text # output
1143
-
1144
- }
1145
- end
1146
-
1147
- TableRegexp = %r{
1148
- (?:
1149
- ^([ ]{0,#{TabWidth - 1}}) # not indented
1150
- (?:[|][ ]*) # NOT optional border
1151
-
1152
- \S.*? # 1st cell content
1153
-
1154
- (?: # 2nd cell or later
1155
- [|] # cell splitter
1156
- .+? # content
1157
- )+ # 1 or more..
1158
-
1159
- [|]? # optional border
1160
- (?:\n|\Z) # line end
1161
- )+
1162
- }x
1163
-
1164
- # Transform tables.
1165
- def transform_tables(str, rs)
1166
- str.gsub(TableRegexp){
1167
- transform_table_rows($~[0], rs)
1168
- }
1169
- end
1170
-
1171
- TableSeparatorCellRegexp = %r{
1172
- ^
1173
- [ ]*
1174
- ([:])? # $1 = left-align symbol
1175
- [ ]*
1176
- [-]+ # border
1177
- [ ]*
1178
- ([:])? # $2 = right-align symbol
1179
- [ ]*
1180
- $
1181
- }x
1182
-
1183
- def transform_table_rows(str, rs)
1184
-
1185
- # split cells to 2-d array
1186
- data = str.split("\n").map{|x| x.split('|')}
1187
-
1188
-
1189
- data.each do |row|
1190
- # cut left space
1191
- row.first.lstrip!
1192
-
1193
- # cut when optional side-borders is included
1194
- row.shift if row.first.empty?
1195
- end
1196
-
1197
- column_attrs = []
1198
-
1199
- re = ''
1200
- re << "<table>\n"
1201
-
1202
- # head is exist?
1203
- if data.size >= 3 and data[1].all?{|x| x =~ TableSeparatorCellRegexp} then
1204
- head_row = data.shift
1205
- separator_row = data.shift
1206
-
1207
- separator_row.each do |cell|
1208
- cell.match TableSeparatorCellRegexp
1209
- left = $1; right = $2
1210
-
1211
- if left and right then
1212
- column_attrs << ' style="text-align: center"'
1213
- elsif right then
1214
- column_attrs << ' style="text-align: right"'
1215
- elsif left then
1216
- column_attrs << ' style="text-align: left"'
1217
- else
1218
- column_attrs << ''
1219
- end
1220
- end
1221
-
1222
- re << "\t<thead><tr>\n"
1223
- head_row.each_with_index do |cell, i|
1224
- re << "\t\t<th#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</th>\n"
1225
- end
1226
- re << "\t</tr></thead>\n"
1227
- end
1228
-
1229
- # data row
1230
- re << "\t<tbody>\n"
1231
- data.each do |row|
1232
- re << "\t\t<tr>\n"
1233
- row.each_with_index do |cell, i|
1234
- re << "\t\t\t<td#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</td>\n"
1235
- end
1236
- re << "\t\t</tr>\n"
1237
- end
1238
- re << "\t</tbody>\n"
1239
-
1240
- re << "</table>\n"
1241
-
1242
- re
1243
- end
1244
-
1245
-
1246
- ### Transform any Markdown-style horizontal rules in a copy of the specified
1247
- ### +str+ and return it.
1248
- def transform_hrules( str, rs )
1249
- @log.debug " Transforming horizontal rules"
1250
- str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
1251
- end
1252
-
1253
-
1254
-
1255
- # Patterns to match and transform lists
1256
- ListMarkerOl = %r{\d+\.}
1257
- ListMarkerUl = %r{[*+-]}
1258
- ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
1259
-
1260
- ListRegexp = %r{
1261
- (?:
1262
- ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1263
- (#{ListMarkerAny}) # unordered or ordered ($1)
1264
- [ ]+ # At least one space
1265
- )
1266
- (?m:.+?) # item content (include newlines)
1267
- (?:
1268
- \z # Either EOF
1269
- | # or
1270
- \n{2,} # Blank line...
1271
- (?=\S) # ...followed by non-space
1272
- (?![ ]* # ...but not another item
1273
- (#{ListMarkerAny})
1274
- [ ]+)
1275
- )
1276
- }x
1277
-
1278
- ### Transform Markdown-style lists in a copy of the specified +str+ and
1279
- ### return it.
1280
- def transform_lists( str, rs )
1281
- @log.debug " Transforming lists at %p" % (str[0,100] + '...')
1282
-
1283
- str.gsub( ListRegexp ) {|list|
1284
- @log.debug " Found list %p" % list
1285
- bullet = $1
1286
- list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
1287
-
1288
- %{<%s>\n%s</%s>\n} % [
1289
- list_type,
1290
- transform_list_items( list, rs ),
1291
- list_type,
1292
- ]
1293
- }
1294
- end
1295
-
1296
- # Pattern for transforming list items
1297
- ListItemRegexp = %r{
1298
- (\n)? # leading line = $1
1299
- (^[ ]*) # leading whitespace = $2
1300
- (#{ListMarkerAny}) [ ]+ # list marker = $3
1301
- ((?m:.+?) # list item text = $4
1302
- \n)
1303
- (?= (\n*) (\z | \2 (#{ListMarkerAny}) [ ]+))
1304
- }x
1305
-
1306
- ### Transform list items in a copy of the given +str+ and return it.
1307
- def transform_list_items( str, rs )
1308
- @log.debug " Transforming list items"
1309
-
1310
- # Trim trailing blank lines
1311
- str = str.sub( /\n{2,}\z/, "\n" )
1312
- str.gsub( ListItemRegexp ) {|line|
1313
- @log.debug " Found item line %p" % line
1314
- leading_line, item = $1, $4
1315
- separating_lines = $5
1316
-
1317
- if leading_line or /\n{2,}/.match(item) or not separating_lines.empty? then
1318
- @log.debug " Found leading line or item has a blank"
1319
- item = apply_block_transforms( outdent(item), rs )
1320
- else
1321
- # Recursion for sub-lists
1322
- @log.debug " Recursing for sublist"
1323
- item = transform_lists( outdent(item), rs ).chomp
1324
- item = apply_span_transforms( item, rs )
1325
- end
1326
-
1327
- %{<li>%s</li>\n} % item
1328
- }
1329
- end
1330
-
1331
- DefinitionListRegexp = %r{
1332
- (?:
1333
- (?:^.+\n)+ # dt
1334
- \n*
1335
- (?:
1336
- ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1337
- \: # dd marker (line head)
1338
- [ ]* # space
1339
- ((?m:.+?)) # dd content
1340
- (?:
1341
- \s*\z # end of string
1342
- | # or
1343
- \n{2,} # blank line
1344
- (?=[ ]{0,#{TabWidth - 1}}\S) # ...followed by
1345
- )
1346
- )+
1347
- )+
1348
- }x
1349
-
1350
- def transform_definition_lists(str, rs)
1351
- @log.debug " Transforming definition lists at %p" % (str[0,100] + '...')
1352
- str.gsub( DefinitionListRegexp ) {|list|
1353
- @log.debug " Found definition list %p (captures=%p)" % [list, $~.captures]
1354
- transform_definition_list_items(list, rs)
1355
- }
1356
- end
1357
-
1358
- DDLineRegexp = /^\:[ ]{0,#{TabWidth - 1}}(.*)/
1359
-
1360
-
1361
- def transform_definition_list_items(str, rs)
1362
- buf = Util.generate_blank_string_io(str)
1363
- buf.puts %Q|<dl>|
1364
-
1365
- lines = str.split("\n")
1366
- until lines.empty? do
1367
-
1368
- dts = []
1369
-
1370
- # get dt items
1371
- while lines.first =~ /^(?!\:).+$/ do
1372
- dts << lines.shift
1373
- end
1374
-
1375
-
1376
- dd_as_block = false
1377
-
1378
- # skip blank lines
1379
- while not lines.empty? and lines.first.empty? do
1380
- lines.shift
1381
- dd_as_block = true
1382
- end
1383
-
1384
-
1385
- dds = []
1386
- while lines.first =~ DDLineRegexp do
1387
- dd_buf = []
1388
-
1389
- # dd first line
1390
- unless (line = lines.shift).empty? then
1391
- dd_buf << $1 << "\n"
1392
- end
1393
-
1394
- # dd second and more lines (sequential with 1st-line)
1395
- until lines.empty? or # stop if read all
1396
- lines.first =~ /^[ ]{0,#{TabWidth - 1}}$/ or # stop if blank line
1397
- lines.first =~ DDLineRegexp do # stop if new dd found
1398
- dd_buf << outdent(lines.shift) << "\n"
1399
- end
1400
-
1401
- # dd second and more lines (separated with 1st-line)
1402
- until lines.empty? do # stop if all was read
1403
- if lines.first.empty? then
1404
- # blank line (skip)
1405
- lines.shift
1406
- dd_buf << "\n"
1407
- elsif lines.first =~ /^[ ]{#{TabWidth},}/ then
1408
- # indented body
1409
- dd_buf << outdent(lines.shift) << "\n"
1410
- else
1411
- # not indented body
1412
- break
1413
- end
1414
-
1415
- end
1416
-
1417
-
1418
- dds << dd_buf.join
1419
-
1420
- # skip blank lines
1421
- unless lines.empty? then
1422
- while lines.first.empty? do
1423
- lines.shift
1424
- end
1425
- end
1426
- end
1427
-
1428
- # html output
1429
- dts.each do |dt|
1430
- buf.puts %Q| <dt>#{apply_span_transforms(dt, rs)}</dt>|
1431
- end
1432
-
1433
- dds.each do |dd|
1434
- if dd_as_block then
1435
- buf.puts %Q| <dd>#{apply_block_transforms(dd, rs)}</dd>|
1436
- else
1437
- dd.gsub!(/\n+\z/, '') # chomp linefeeds
1438
- buf.puts %Q| <dd>#{apply_span_transforms(dd.chomp, rs)}</dd>|
1439
- end
1440
- end
1441
- end
1442
-
1443
- buf.puts %Q|</dl>|
1444
-
1445
- return(buf.string)
1446
- end
1447
-
1448
- # old
1449
-
1450
-
1451
- # Pattern for matching codeblocks
1452
- CodeBlockRegexp = %r{
1453
- (?:\n\n|\A|\A\n)
1454
- ( # $1 = the code block
1455
- (?:
1456
- (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
1457
- .*\n+
1458
- )+
1459
- )
1460
- (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
1461
- # line-start, or end of doc
1462
- }x
1463
-
1464
-
1465
- ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
1466
- ### return it.
1467
- def transform_code_blocks( str, rs )
1468
- @log.debug " Transforming code blocks"
1469
-
1470
- str.gsub( CodeBlockRegexp ) {|block|
1471
- codeblock = $1
1472
- remainder = $2
1473
-
1474
-
1475
- tmpl = %{\n\n<pre><code>%s\n</code></pre>\n\n%s}
1476
-
1477
- # patch for ruby 1.9.1 bug
1478
- if tmpl.respond_to?(:force_encoding) then
1479
- tmpl.force_encoding(str.encoding)
1480
- end
1481
- args = [ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
1482
-
1483
- # recover all backslash escaped to original form
1484
- EscapeTable.each {|char, hash|
1485
- args[0].gsub!( hash[:md5re]){char}
1486
- }
1487
-
1488
- # Generate the codeblock
1489
- tmpl % args
1490
- }
1491
- end
1492
-
1493
-
1494
- FencedCodeBlockRegexp = /^(\~{3,})\n((?m:.+?)\n)\1\n/
1495
-
1496
- def pretransform_fenced_code_blocks( str, rs )
1497
- @log.debug " Transforming fenced code blocks => standard code blocks"
1498
-
1499
- str.gsub( FencedCodeBlockRegexp ) {|block|
1500
- "\n~\n\n" + indent($2) + "\n~\n\n"
1501
- }
1502
- end
1503
-
1504
-
1505
-
1506
- # Pattern for matching Markdown blockquote blocks
1507
- BlockQuoteRegexp = %r{
1508
- (?:
1509
- ^[ ]*>[ ]? # '>' at the start of a line
1510
- .+\n # rest of the first line
1511
- (?:.+\n)* # subsequent consecutive lines
1512
- \n* # blanks
1513
- )+
1514
- }x
1515
- PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
1516
-
1517
- ### Transform Markdown-style blockquotes in a copy of the specified +str+
1518
- ### and return it.
1519
- def transform_block_quotes( str, rs )
1520
- @log.debug " Transforming block quotes"
1521
-
1522
- str.gsub( BlockQuoteRegexp ) {|quote|
1523
- @log.debug "Making blockquote from %p" % quote
1524
-
1525
- quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
1526
- quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
1527
-
1528
- indent = " " * TabWidth
1529
- quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
1530
- apply_block_transforms( quote, rs ).
1531
- gsub( /^/, indent ).
1532
- gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
1533
- @log.debug "Blockquoted chunk is: %p" % quoted
1534
- quoted
1535
- }
1536
- end
1537
-
1538
-
1539
- # AoBane change:
1540
- # allow loosely urls and addresses (BlueCloth is very strict)
1541
- #
1542
- # loose examples:
1543
- # <skype:tetra-dice> (other protocol)
1544
- # <ema+il@example.com> (ex: gmail alias)
1545
- #
1546
- # not adapted addresses:
1547
- # <"Abc@def"@example.com> (refer to quoted-string of RFC 5321)
1548
-
1549
-
1550
- AutoAnchorURLRegexp = /<(#{URI.regexp})>/ # $1 = url
1551
-
1552
- AutoAnchorEmailRegexp = /<([^'">\s]+?\@[^'">\s]+[.][a-zA-Z]+)>/ # $2 = address
1553
-
1554
- ### Transform URLs in a copy of the specified +str+ into links and return
1555
- ### it.
1556
- def transform_auto_links( str, rs )
1557
- @log.debug " Transforming auto-links"
1558
- str.gsub(AutoAnchorURLRegexp){
1559
- %|<a href="#{Util.escape_html($1)}">#{Util.escape_html($1)}</a>|
1560
- }.gsub( AutoAnchorEmailRegexp ) {|addr|
1561
- encode_email_address( unescape_special_chars($1) )
1562
- }
1563
- end
1564
-
1565
-
1566
- # Encoder functions to turn characters of an email address into encoded
1567
- # entities.
1568
- Encoders = [
1569
- lambda {|char| "&#%03d;" % char},
1570
- lambda {|char| "&#x%X;" % char},
1571
- lambda {|char| char.chr },
1572
- ]
1573
-
1574
- ### Transform a copy of the given email +addr+ into an escaped version safer
1575
- ### for posting publicly.
1576
- def encode_email_address( addr )
1577
-
1578
- rval = ''
1579
- ("mailto:" + addr).each_byte {|b|
1580
- case b
1581
- when ?:
1582
- rval += ":"
1583
- when ?@
1584
- rval += Encoders[ rand(2) ][ b ]
1585
- else
1586
- r = rand(100)
1587
- rval += (
1588
- r > 90 ? Encoders[2][ b ] :
1589
- r < 45 ? Encoders[1][ b ] :
1590
- Encoders[0][ b ]
1591
- )
1592
- end
1593
- }
1594
-
1595
- return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
1596
- end
1597
-
1598
-
1599
- # Regexp for matching Setext-style headers
1600
- SetextHeaderRegexp = %r{
1601
- (.+?) # The title text ($1)
1602
-
1603
- (?: # Markdown Extra: Header Id Attribute (optional)
1604
- [ ]* # space after closing #'s
1605
- \{\#
1606
- (\S+?) # $2 = Id
1607
- \}
1608
- [ \t]* # allowed lazy spaces
1609
- )?
1610
- \n
1611
- ([\-=])+ # Match a line of = or -. Save only one in $3.
1612
- [ ]*\n+
1613
- }x
1614
-
1615
- # Regexp for matching ATX-style headers
1616
- AtxHeaderRegexp = %r{
1617
- ^(\#+) # $1 = string of #'s
1618
- [ ]*
1619
- (.+?) # $2 = Header text
1620
- [ ]*
1621
- \#* # optional closing #'s (not counted)
1622
-
1623
- (?: # Markdown Extra: Header Id Attribute (optional)
1624
- [ ]* # space after closing #'s
1625
- \{\#
1626
- (\S+?) # $3 = Id
1627
- \}
1628
- [ \t]* # allowed lazy spaces
1629
- )?
1630
-
1631
- \n+
1632
- }x
1633
-
1634
- HeaderRegexp = Regexp.union(SetextHeaderRegexp, AtxHeaderRegexp)
1635
-
1636
- IdRegexp = /^[a-zA-Z][a-zA-Z0-9\:\._-]*$/
1637
-
1638
- ### Apply Markdown header transforms to a copy of the given +str+ amd render
1639
- ### state +rs+ and return the result.
1640
- def transform_headers( str, rs )
1641
- @log.debug " Transforming headers"
1642
-
1643
- # Setext-style headers:
1644
- # Header 1
1645
- # ========
1646
- #
1647
- # Header 2
1648
- # --------
1649
- #
1650
-
1651
- section_numbers = [nil, nil, nil, nil, nil]
1652
-
1653
- str.
1654
- gsub( HeaderRegexp ) {|m|
1655
- if $1 then
1656
- @log.debug "Found setext-style header"
1657
- title, id, hdrchar = $1, $2, $3
1658
-
1659
- case hdrchar
1660
- when '='
1661
- level = 1
1662
- when '-'
1663
- level = 2
1664
- end
1665
- else
1666
- @log.debug "Found ATX-style header"
1667
- hdrchars, title, id = $4, $5, $6
1668
- level = hdrchars.length
1669
-
1670
- if level >= 7 then
1671
- rs.warnings << "illegal header level - h#{level} ('#' symbols are too many)"
1672
- end
1673
- end
1674
-
1675
- prefix = ''
1676
- if rs.numbering? then
1677
- if level >= rs.numbering_start_level and level <= 6 then
1678
- depth = level - rs.numbering_start_level
1679
-
1680
- section_numbers.each_index do |i|
1681
- if i == depth and section_numbers[depth] then
1682
- # increment a deepest number if current header's level equals last header's
1683
- section_numbers[i] += 1
1684
- elsif i <= depth then
1685
- # set default number if nil
1686
- section_numbers[i] ||= 1
1687
- else
1688
- # clear discardeds
1689
- section_numbers[i] = nil
1690
- end
1691
- end
1692
-
1693
- no = ''
1694
- (0..depth).each do |i|
1695
- no << "#{section_numbers[i]}."
1696
- end
1697
-
1698
- prefix = "#{no} "
1699
- end
1700
- end
1701
-
1702
- title_html = apply_span_transforms( title, rs )
1703
-
1704
- unless id then
1705
- case rs.header_id_type
1706
- when HeaderIDType::ESCAPE
1707
- id = escape_to_header_id(title_html)
1708
- if rs.headers.find{|h| h.id == id} then
1709
- rs.warnings << "header id collision - #{id}"
1710
- id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1711
- end
1712
- else
1713
- id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1714
- end
1715
- end
1716
-
1717
- title = "#{prefix}#{title}"
1718
- title_html = "#{prefix}#{title_html}"
1719
-
1720
-
1721
- unless id =~ IdRegexp then
1722
- rs.warnings << "illegal header id - #{id} (legal chars: [a-zA-Z0-9_-.] | 1st: [a-zA-Z])"
1723
- end
1724
-
1725
- if rs.block_transform_depth == 1 then
1726
- rs.headers << RenderState::Header.new(id, level, title, title_html)
1727
- end
1728
-
1729
- if @use_header_id then
1730
- %{<h%d id="%s">%s</h%d>\n\n} % [ level, id, title_html, level ]
1731
- else
1732
- %{<h%d>%s</h%d>\n\n} % [ level, title_html, level ]
1733
- end
1734
- }
1735
- end
1736
-
1737
-
1738
- ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
1739
- ### tags and return it.
1740
- def form_paragraphs( str, rs )
1741
- @log.debug " Forming paragraphs"
1742
- grafs = str.
1743
- sub( /\A\n+/, '' ).
1744
- sub( /\n+\z/, '' ).
1745
- split( /\n{2,}/ )
1746
-
1747
- rval = grafs.collect {|graf|
1748
-
1749
- # Unhashify HTML blocks if this is a placeholder
1750
- if rs.html_blocks.key?( graf )
1751
- rs.html_blocks[ graf ]
1752
-
1753
- # no output if this is block separater
1754
- elsif graf == '~' then
1755
- ''
1756
-
1757
- # Otherwise, wrap in <p> tags
1758
- else
1759
- apply_span_transforms(graf, rs).
1760
- sub( /^[ ]*/, '<p>' ) + '</p>'
1761
- end
1762
- }.join( "\n\n" )
1763
-
1764
- @log.debug " Formed paragraphs: %p" % rval
1765
- return rval
1766
- end
1767
-
1768
-
1769
- # Pattern to match the linkid part of an anchor tag for reference-style
1770
- # links.
1771
- RefLinkIdRegexp = %r{
1772
- [ ]? # Optional leading space
1773
- (?:\n[ ]*)? # Optional newline + spaces
1774
- \[
1775
- (.*?) # Id = $1
1776
- \]
1777
- }x
1778
-
1779
- InlineLinkRegexp = %r{
1780
- \( # Literal paren
1781
- [ ]* # Zero or more spaces
1782
- <?(.+?)>? # URI = $1
1783
- [ ]* # Zero or more spaces
1784
- (?: #
1785
- ([\"\']) # Opening quote char = $2
1786
- (.*?) # Title = $3
1787
- \2 # Matching quote char
1788
- )? # Title is optional
1789
- \)
1790
- }x
1791
-
1792
- ### Apply Markdown anchor transforms to a copy of the specified +str+ with
1793
- ### the given render state +rs+ and return it.
1794
- def transform_anchors( str, rs )
1795
- @log.debug " Transforming anchors"
1796
- @scanner.string = str.dup
1797
- text = ''
1798
-
1799
- # Scan the whole string
1800
- until @scanner.empty?
1801
-
1802
- if @scanner.scan( /\[/ )
1803
- link = ''; linkid = ''
1804
- depth = 1
1805
- startpos = @scanner.pos
1806
- @log.debug " Found a bracket-open at %d" % startpos
1807
-
1808
- # Scan the rest of the tag, allowing unlimited nested []s. If
1809
- # the scanner runs out of text before the opening bracket is
1810
- # closed, append the text and return (wasn't a valid anchor).
1811
- while depth.nonzero?
1812
- linktext = @scanner.scan_until( /\]|\[/ )
1813
-
1814
- if linktext
1815
- @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
1816
- link += linktext
1817
-
1818
- # Decrement depth for each closing bracket
1819
- depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
1820
- @log.debug " Depth is now #{depth}"
1821
-
1822
- # If there's no more brackets, it must not be an anchor, so
1823
- # just abort.
1824
- else
1825
- @log.debug " Missing closing brace, assuming non-link."
1826
- link += @scanner.rest
1827
- @scanner.terminate
1828
- return text + '[' + link
1829
- end
1830
- end
1831
- link.slice!( -1 ) # Trim final ']'
1832
- @log.debug " Found leading link %p" % link
1833
-
1834
-
1835
-
1836
- # Markdown Extra: Footnote
1837
- if link =~ /^\^(.+)/ then
1838
- id = $1
1839
- if rs.footnotes[id] then
1840
- rs.found_footnote_ids << id
1841
- label = "[#{rs.found_footnote_ids.size}]"
1842
- else
1843
- rs.warnings << "undefined footnote id - #{id}"
1844
- label = '[?]'
1845
- end
1846
-
1847
- text += %Q|<sup id="footnote-ref:#{id}"><a href="#footnote:#{id}" rel="footnote">#{label}</a></sup>|
1848
-
1849
- # Look for a reference-style second part
1850
- elsif @scanner.scan( RefLinkIdRegexp )
1851
- linkid = @scanner[1]
1852
- linkid = link.dup if linkid.empty?
1853
- linkid.downcase!
1854
- @log.debug " Found a linkid: %p" % linkid
1855
-
1856
- # If there's a matching link in the link table, build an
1857
- # anchor tag for it.
1858
- if rs.urls.key?( linkid )
1859
- @log.debug " Found link key in the link table: %p" % rs.urls[linkid]
1860
- url = escape_md( rs.urls[linkid] )
1861
-
1862
- text += %{<a href="#{url}"}
1863
- if rs.titles.key?(linkid)
1864
- text += %{ title="%s"} % escape_md( rs.titles[linkid] )
1865
- end
1866
- text += %{>#{link}</a>}
1867
-
1868
- # If the link referred to doesn't exist, just append the raw
1869
- # source to the result
1870
- else
1871
- @log.debug " Linkid %p not found in link table" % linkid
1872
- @log.debug " Appending original string instead: "
1873
- @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1874
-
1875
- rs.warnings << "link-id not found - #{linkid}"
1876
- text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1877
- end
1878
-
1879
- # ...or for an inline style second part
1880
- elsif @scanner.scan( InlineLinkRegexp )
1881
- url = @scanner[1]
1882
- title = @scanner[3]
1883
- @log.debug " Found an inline link to %p" % url
1884
-
1885
- url = "##{link}" if url == '#' # target anchor briefing (since AoBane 0.40)
1886
-
1887
- text += %{<a href="%s"} % escape_md( url )
1888
- if title
1889
- title.gsub!( /"/, "&quot;" )
1890
- text += %{ title="%s"} % escape_md( title )
1891
- end
1892
- text += %{>#{link}</a>}
1893
-
1894
- # No linkid part: just append the first part as-is.
1895
- else
1896
- @log.debug "No linkid, so no anchor. Appending literal text."
1897
- text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1898
- end # if linkid
1899
-
1900
- # Plain text
1901
- else
1902
- @log.debug " Scanning to the next link from %p" % @scanner.rest
1903
- text += @scanner.scan( /[^\[]+/ )
1904
- end
1905
-
1906
- end # until @scanner.empty?
1907
-
1908
- return text
1909
- end
1910
-
1911
-
1912
- # Pattern to match strong emphasis in Markdown text
1913
- BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
1914
-
1915
- # Pattern to match normal emphasis in Markdown text
1916
- ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
1917
-
1918
- ### Transform italic- and bold-encoded text in a copy of the specified +str+
1919
- ### and return it.
1920
- def transform_italic_and_bold( str, rs )
1921
- @log.debug " Transforming italic and bold"
1922
-
1923
- str.
1924
- gsub( BoldRegexp, %{<strong>\\2</strong>} ).
1925
- gsub( ItalicRegexp, %{<em>\\2</em>} )
1926
- end
1927
-
1928
-
1929
- ### Transform backticked spans into <code> spans.
1930
- def transform_code_spans( str, rs )
1931
- @log.debug " Transforming code spans"
1932
-
1933
- # Set up the string scanner and just return the string unless there's at
1934
- # least one backtick.
1935
- @scanner.string = str.dup
1936
- unless @scanner.exist?( /`/ )
1937
- @scanner.terminate
1938
- @log.debug "No backticks found for code span in %p" % str
1939
- return str
1940
- end
1941
-
1942
- @log.debug "Transforming code spans in %p" % str
1943
-
1944
- # Build the transformed text anew
1945
- text = ''
1946
-
1947
- # Scan to the end of the string
1948
- until @scanner.empty?
1949
-
1950
- # Scan up to an opening backtick
1951
- if pre = @scanner.scan_until( /.??(?=`)/m )
1952
- text += pre
1953
- @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
1954
-
1955
- # Make a pattern to find the end of the span
1956
- opener = @scanner.scan( /`+/ )
1957
- len = opener.length
1958
- closer = Regexp::new( opener )
1959
- @log.debug "Scanning for end of code span with %p" % closer
1960
-
1961
- # Scan until the end of the closing backtick sequence. Chop the
1962
- # backticks off the resultant string, strip leading and trailing
1963
- # whitespace, and encode any enitites contained in it.
1964
- codespan = @scanner.scan_until( closer ) or
1965
- raise FormatError::new( @scanner.rest[0,20],
1966
- "No %p found before end" % opener )
1967
-
1968
- @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
1969
- codespan.slice!( -len, len )
1970
- text += "<code>%s</code>" %
1971
- encode_code( codespan.strip, rs )
1972
-
1973
- # If there's no more backticks, just append the rest of the string
1974
- # and move the scan pointer to the end
1975
- else
1976
- text += @scanner.rest
1977
- @scanner.terminate
1978
- end
1979
- end
1980
-
1981
- return text
1982
- end
1983
-
1984
-
1985
- # Next, handle inline images: ![alt text](url "optional title")
1986
- # Don't forget: encode * and _
1987
- InlineImageRegexp = %r{
1988
- ( # Whole match = $1
1989
- !\[ (.*?) \] # alt text = $2
1990
- \([ ]*
1991
- <?(\S+?)>? # source url = $3
1992
- [ ]*
1993
- (?: #
1994
- (["']) # quote char = $4
1995
- (.*?) # title = $5
1996
- \4 # matching quote
1997
- [ ]*
1998
- )? # title is optional
1999
- \)
2000
- )
2001
- }x #"
2002
-
2003
-
2004
- # Reference-style images
2005
- ReferenceImageRegexp = %r{
2006
- ( # Whole match = $1
2007
- !\[ (.*?) \] # Alt text = $2
2008
- [ ]? # Optional space
2009
- (?:\n[ ]*)? # One optional newline + spaces
2010
- \[ (.*?) \] # id = $3
2011
- )
2012
- }x
2013
-
2014
- ### Turn image markup into image tags.
2015
- def transform_images( str, rs )
2016
- @log.debug " Transforming images %p" % str
2017
-
2018
- # Handle reference-style labeled images: ![alt text][id]
2019
- str.
2020
- gsub( ReferenceImageRegexp ) {|match|
2021
- whole, alt, linkid = $1, $2, $3.downcase
2022
- @log.debug "Matched %p" % match
2023
- res = nil
2024
- alt.gsub!( /"/, '&quot;' )
2025
-
2026
- # for shortcut links like ![this][].
2027
- linkid = alt.downcase if linkid.empty?
2028
-
2029
- if rs.urls.key?( linkid )
2030
- url = escape_md( rs.urls[linkid] )
2031
- @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
2032
-
2033
- # Build the tag
2034
- result = %{<img src="%s" alt="%s"} % [ url, alt ]
2035
- if rs.titles.key?( linkid )
2036
- result += %{ title="%s"} % escape_md( rs.titles[linkid] )
2037
- end
2038
- result += EmptyElementSuffix
2039
-
2040
- else
2041
- result = whole
2042
- end
2043
-
2044
- @log.debug "Replacing %p with %p" % [ match, result ]
2045
- result
2046
- }.
2047
-
2048
- # Inline image style
2049
- gsub( InlineImageRegexp ) {|match|
2050
- @log.debug "Found inline image %p" % match
2051
- whole, alt, title = $1, $2, $5
2052
- url = escape_md( $3 )
2053
- alt.gsub!( /"/, '&quot;' )
2054
-
2055
- # Build the tag
2056
- result = %{<img src="%s" alt="%s"} % [ url, alt ]
2057
- unless title.nil?
2058
- title.gsub!( /"/, '&quot;' )
2059
- result += %{ title="%s"} % escape_md( title )
2060
- end
2061
- result += EmptyElementSuffix
2062
-
2063
- @log.debug "Replacing %p with %p" % [ match, result ]
2064
- result
2065
- }
2066
- end
2067
-
2068
-
2069
- # Regexp to match special characters in a code block
2070
- CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
2071
-
2072
- ### Escape any characters special to HTML and encode any characters special
2073
- ### to Markdown in a copy of the given +str+ and return it.
2074
- def encode_code( str, rs )
2075
- #str.gsub( %r{&}, '&amp;' ).
2076
- #gsub( %r{<}, '&lt;' ).
2077
- #gsub( %r{>}, '&gt;' ).
2078
- #gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
2079
- end
2080
-
2081
- def escape_to_header_id(str)
2082
- URI.escape(escape_md(str.gsub(/<\/?[^>]*>/, "").gsub(/\s/, "_")).gsub("/", ".2F")).gsub("%", ".")
2083
- end
2084
-
2085
- #################################################################
2086
- ### U T I L I T Y F U N C T I O N S
2087
- #################################################################
2088
-
2089
- ### Escape any markdown characters in a copy of the given +str+ and return
2090
- ### it.
2091
- def escape_md( str )
2092
- str.
2093
- gsub( /\*|_/ ){|symbol| EscapeTable[symbol][:md5]}
2094
- end
2095
-
2096
-
2097
- # Matching constructs for tokenizing X/HTML
2098
- HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
2099
- XMLProcInstRegexp = %r{ <\? .*? \?> }mx
2100
- MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
2101
-
2102
- HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
2103
- HTMLTagCloseRegexp = %r{ > }x
2104
- HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
2105
-
2106
- ### Break the HTML source in +str+ into a series of tokens and return
2107
- ### them. The tokens are just 2-element Array tuples with a type and the
2108
- ### actual content. If this function is called with a block, the type and
2109
- ### text parts of each token will be yielded to it one at a time as they are
2110
- ### extracted.
2111
- def tokenize_html( str )
2112
- depth = 0
2113
- tokens = []
2114
- @scanner.string = str.dup
2115
- type, token = nil, nil
2116
-
2117
- until @scanner.empty?
2118
- @log.debug "Scanning from %p" % @scanner.rest
2119
-
2120
- # Match comments and PIs without nesting
2121
- if (( token = @scanner.scan(MetaTag) ))
2122
- type = :tag
2123
-
2124
- # Do nested matching for HTML tags
2125
- elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
2126
- tagstart = @scanner.pos
2127
- @log.debug " Found the start of a plain tag at %d" % tagstart
2128
-
2129
- # Start the token with the opening angle
2130
- depth = 1
2131
- type = :tag
2132
-
2133
- # Scan the rest of the tag, allowing unlimited nested <>s. If
2134
- # the scanner runs out of text before the tag is closed, raise
2135
- # an error.
2136
- while depth.nonzero?
2137
-
2138
- # Scan either an opener or a closer
2139
- chunk = @scanner.scan( HTMLTagPart ) or
2140
- break # AoBane Fix (refer to spec/code-block.rb)
2141
-
2142
- @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
2143
-
2144
- token += chunk
2145
-
2146
- # If the last character of the token so far is a closing
2147
- # angle bracket, decrement the depth. Otherwise increment
2148
- # it for a nested tag.
2149
- depth += ( token[-1, 1] == '>' ? -1 : 1 )
2150
- @log.debug " Depth is now #{depth}"
2151
- end
2152
-
2153
- # Match text segments
2154
- else
2155
- @log.debug " Looking for a chunk of text"
2156
- type = :text
2157
-
2158
- # Scan forward, always matching at least one character to move
2159
- # the pointer beyond any non-tag '<'.
2160
- token = @scanner.scan_until( /[^<]+/m )
2161
- end
2162
-
2163
- @log.debug " type: %p, token: %p" % [ type, token ]
2164
-
2165
- # If a block is given, feed it one token at a time. Add the token to
2166
- # the token list to be returned regardless.
2167
- if block_given?
2168
- yield( type, token )
2169
- end
2170
- tokens << [ type, token ]
2171
- end
2172
-
2173
- return tokens
2174
- end
2175
-
2176
-
2177
- ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
2178
- def encode_html( str )
2179
- #str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
2180
- #gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
2181
- return str
2182
- end
2183
-
2184
-
2185
- ### Return one level of line-leading tabs or spaces from a copy of +str+ and
2186
- ### return it.
2187
- def outdent( str )
2188
- str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
2189
- end
2190
-
2191
- def indent(str)
2192
- str.gsub( /^/, ' ' * TabWidth)
2193
- end
2194
-
2195
- end
2196
- end
1
+ #
2
+ # AoBane - Extended Markdown Converter
3
+ #
4
+ # Author of Original BlueFeather: Dice <tetradice@gmail.com>
5
+ # Remaker: set.minami <set.minami@gmail.com>
6
+ # Website: https://github.com/setminami/AoBane/
7
+ # License: MIT
8
+ #
9
+ # If you want to know better about AoBane, See the Website.
10
+ #
11
+ #
12
+ #
13
+ #-- Copyrights & License -------------------------------------------------------
14
+ #
15
+ # Original Markdown:
16
+ # Copyright (c) 2003-2004 John Gruber
17
+ # <http://daringfireball.net/>
18
+ # All rights reserved.
19
+ #
20
+ # Orignal BlueCloth:
21
+ # Copyright (c) 2004 The FaerieMUD Consortium.
22
+ #
23
+ # Original BlueFeather:
24
+ # Copyright (c) 2013 Dice
25
+ #
26
+ # AoBane:
27
+ # Copyright (c) 2013 Set.Minami
28
+ #
29
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of this
30
+ # software and associated documentation files (AoBane), to deal in the Software
31
+ # without restriction, including without limitation the rights to use, copy, modify,
32
+ # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
33
+ # permit persons to whom the Software is furnished to do so, subject to the following
34
+ # conditions:
35
+ # The above copyright notice and this permission notice shall be included in all copies or
36
+ # substantial portions of the Software.
37
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
38
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
39
+ # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
40
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
41
+ # OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
42
+ # OTHER DEALINGS IN THE SOFTWARE.
43
+
44
+
45
+ require 'digest/md5'
46
+ require 'logger'
47
+ require 'strscan'
48
+ require 'stringio'
49
+ require 'uri'
50
+ require 'AoBane/utilities'
51
+ require 'math_ml/string'
52
+
53
+ module AoBane
54
+ VERSION = '0.1.0'
55
+ VERSION_NUMBER = 0.0100
56
+ RELEASE_DATE = '2013-04-08'
57
+ VERSION_LABEL = "#{VERSION} (#{RELEASE_DATE})"
58
+
59
+ UTF8_BOM = "\xef\xbb\xbf"
60
+ UTF8_BOM_PATTERN = /^#{UTF8_BOM}/
61
+
62
+
63
+ # Fancy methods
64
+ class << self
65
+ def parse_text(src)
66
+ Parser.new.parse_text(src)
67
+ end
68
+
69
+ alias parse parse_text
70
+
71
+ def parse_document(src, default_enc = EncodingType::UTF8)
72
+ Parser.new.parse_document(src, default_enc)
73
+ end
74
+
75
+
76
+ def parse_text_file(path)
77
+ Parser.new.parse_text_file(path)
78
+ end
79
+
80
+ alias parse_file parse_text_file
81
+
82
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
83
+ Parser.new.parse_document_file(path, default_enc)
84
+ end
85
+ end
86
+
87
+ ### Exception class on AoBane running.
88
+ class Error < ::RuntimeError
89
+ end
90
+
91
+ class EncodingError < Error
92
+ end
93
+
94
+ ### Exception class for formatting errors.
95
+ class FormatError < Error
96
+
97
+ ### Create a new FormatError with the given source +str+ and an optional
98
+ ### message about the +specific+ error.
99
+ def initialize( str, specific=nil )
100
+ if specific
101
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
102
+ else
103
+ msg = "Bad markdown format near %p" % str
104
+ end
105
+
106
+ super( msg )
107
+ end
108
+ end
109
+
110
+ module HeaderIDType
111
+ MD5 = 'md5'
112
+ ESCAPE = 'escape'
113
+ end
114
+
115
+ module EncodingType
116
+ EUC = 'euc-jp'
117
+ EUCJP = EUC_JP = EUC
118
+
119
+ SJIS = 'shift_jis'
120
+ SHIFT_JIS = SJIS
121
+
122
+ UTF8 = 'utf-8'
123
+ UTF_8 = UTF8
124
+
125
+ ASCII = 'ascii'
126
+ US_ASCII = ASCII
127
+
128
+ def self.regulate(str_value)
129
+ case str_value.downcase
130
+ when 'shift-jis', 'shift_jis'
131
+ SJIS
132
+ when 'euc-jp'
133
+ EUC
134
+ when 'utf-8'
135
+ UTF8
136
+ when 'ascii'
137
+ ASCII
138
+ else
139
+ raise EncodingError, "not adapted encoding type - #{str_value} (shift[-_]jis, euc-jp, utf-8, or ascii)"
140
+ end
141
+ end
142
+
143
+ def self.convert_to_kcode(str_value)
144
+ type = self.regulate(str_value)
145
+ case type
146
+ when EUC, SJIS, UTF8
147
+ type
148
+ when ASCII
149
+ 'none'
150
+ end
151
+ end
152
+
153
+
154
+ def self.convert_to_charset(str_value)
155
+ type = self.regulate(str_value)
156
+ case type
157
+ when EUC
158
+ 'euc-jp'
159
+ when SJIS
160
+ 'shift_jis'
161
+ when UTF8
162
+ 'utf-8'
163
+ when ASCII
164
+ nil
165
+ end
166
+ end
167
+
168
+ end
169
+
170
+ module Util
171
+ HTML_ESC = {
172
+ '&' => '&amp;',
173
+ '"' => '&quot;',
174
+ '<' => '&lt;',
175
+ '>' => '&gt;'
176
+ }
177
+
178
+ module_function
179
+
180
+ # from http://jp.rubyist.net/magazine/?0010-CodeReview#l28
181
+ # (Author: Minero Aoki)
182
+ def escape_html(str)
183
+ #table = HTML_ESC # optimize
184
+ #str.gsub(/[&"<>]/) {|s| table[s] }
185
+ return str
186
+ end
187
+
188
+ def generate_blank_string_io(encoding_base)
189
+ io = StringIO.new
190
+
191
+ if io.respond_to?(:set_encoding) then
192
+ io.set_encoding(encoding_base.encoding)
193
+ end
194
+
195
+ return io
196
+ end
197
+
198
+ def change_kcode(kcode = nil)
199
+ if defined?(Encoding) then
200
+ # ruby 1.9 later
201
+ yield
202
+ else
203
+ # ruby 1.8 earlier
204
+ original_kcode = $KCODE
205
+
206
+ begin
207
+ $KCODE = kcode if kcode
208
+ yield
209
+
210
+ ensure
211
+ # recover
212
+ $KCODE = original_kcode
213
+ end
214
+ end # if defined?
215
+ end # def
216
+
217
+
218
+ def utf8_bom?(str)
219
+ if str.respond_to?(:getbyte) and str.respond_to?(:bytesize) then
220
+ if str.bytesize >= 3 and
221
+ str.getbyte(0) == UTF8_BOM.getbyte(0) and
222
+ str.getbyte(1) == UTF8_BOM.getbyte(1) and
223
+ str.getbyte(2) == UTF8_BOM.getbyte(2) then
224
+ return true
225
+ else
226
+ return false
227
+ end
228
+
229
+ else
230
+ return(str =~ UTF8_BOM_PATTERN ? true : false)
231
+ end
232
+ end
233
+ end
234
+
235
+ class Document
236
+ HEADER_PATTERN = /^([a-zA-Z0-9-]+?)\s*\:\s*(.+?)\s*(?:\n|\Z)/
237
+ BLANK_LINE_PATTERN = /^\n/
238
+ HEADER_SEQUEL_PATTERN = /^\s+(.+)$/
239
+
240
+ attr_accessor :headers, :body
241
+ alias text body
242
+ alias text= body=
243
+
244
+ class << self
245
+ def parse_io(input, default_enc = EncodingType::UTF8)
246
+ headers = {}
247
+ body = nil
248
+ first_pos = input.pos
249
+ default_enc = EncodingType.regulate(default_enc)
250
+
251
+ Util.change_kcode(EncodingType.convert_to_kcode(default_enc)){
252
+ # default encoding
253
+ if defined?(Encoding) then
254
+ input.set_encoding(Encoding.find(default_enc))
255
+ end
256
+
257
+
258
+
259
+ # get headers
260
+ pos_before_gets = nil
261
+ first_line = true
262
+
263
+ loop do
264
+ pos_before_gets = input.pos
265
+ line = input.gets
266
+
267
+ # cut UTF-8 BOM
268
+ if first_line and Util.utf8_bom?(line) then
269
+ line.slice!(UTF8_BOM_PATTERN)
270
+ end
271
+ first_line = false
272
+
273
+ if line and line.chomp =~ HEADER_PATTERN then
274
+ key = $1.downcase; value = $2
275
+
276
+ if key == 'encoding' and not headers.include?('encoding') then
277
+ kc = EncodingType.convert_to_kcode(value.downcase)
278
+ if input.respond_to?(:set_encoding) then
279
+ input.set_encoding(EncodingType.regulate(value))
280
+
281
+ # rewind (reason => [ruby-list:45988])
282
+ input.pos = first_pos
283
+ first_line = true
284
+ else
285
+ $KCODE = kc
286
+ end
287
+ end
288
+
289
+ headers[key] = value
290
+ else
291
+ # EOF or Metadata end
292
+ break
293
+ end
294
+ end
295
+
296
+ # back
297
+ input.pos = pos_before_gets
298
+
299
+
300
+
301
+ # skip blank lines
302
+ loop do
303
+ pos_before_gets = input.pos
304
+
305
+ line = input.gets
306
+ if line.nil? or not line =~ BLANK_LINE_PATTERN then
307
+ break
308
+ end
309
+ end
310
+
311
+ # back
312
+ input.pos = pos_before_gets
313
+
314
+
315
+
316
+ # get body
317
+ body = input.read
318
+
319
+ }
320
+
321
+
322
+ return self.new(headers, body)
323
+ end
324
+
325
+ def parse(str, default_enc = EncodingType::UTF8)
326
+ parse_io(StringIO.new(str), default_enc)
327
+ end
328
+
329
+ end
330
+
331
+
332
+ def initialize(headers = {}, body = '')
333
+ @headers = {}
334
+ headers.each do |k, v|
335
+ self[k] = v
336
+ end
337
+ @body = body
338
+ end
339
+
340
+ def [](key)
341
+ @headers[key.to_s.downcase]
342
+ end
343
+
344
+ def []=(key, value)
345
+ @headers[key.to_s.downcase] = value.to_s
346
+ end
347
+
348
+ def title
349
+ @headers['title']
350
+ end
351
+
352
+ def css
353
+ @headers['css']
354
+ end
355
+
356
+ def numbering
357
+ case @headers['numbering']
358
+ when 'yes', '1', 'true', 'on'
359
+ true
360
+ else
361
+ false
362
+ end
363
+ end
364
+
365
+ alias numbering? numbering
366
+
367
+ def numbering_start_level
368
+ level = (@headers['numbering-start-level'] || 2).to_i
369
+ if level >= 1 and level <= 6 then
370
+ return level
371
+ else
372
+ return 2
373
+ end
374
+ end
375
+
376
+ def encoding_type
377
+ @headers['encoding'] || EncodingType::UTF8
378
+ end
379
+
380
+ def header_id_type
381
+ (@headers['header-id-type'] || HeaderIDType::MD5).downcase
382
+ end
383
+
384
+ def kcode
385
+ self.encoding_type && EncodingType.convert_to_kcode(self.encoding_type)
386
+ end
387
+
388
+ def to_html
389
+ Parser.new.document_to_html(self)
390
+ end
391
+ end
392
+
393
+
394
+ class Parser
395
+ # Rendering state class Keeps track of URLs, titles, and HTML blocks
396
+ # midway through a render. I prefer this to the globals of the Perl version
397
+ # because globals make me break out in hives. Or something.
398
+ class RenderState
399
+ # Headers struct.
400
+ Header = Struct.new(:id, :level, :content, :content_html)
401
+
402
+ # from Original BlueCloth
403
+ attr_accessor :urls, :titles, :html_blocks, :log
404
+
405
+ # AoBane Extension
406
+ attr_accessor :footnotes, :found_footnote_ids, :warnings
407
+ attr_accessor :headers, :block_transform_depth
408
+ attr_accessor :header_id_type # option switch
409
+ attr_accessor :numbering, :numbering_start_level # option switch
410
+ alias numbering? numbering
411
+
412
+ def initialize
413
+ @urls, @titles, @html_blocks = {}, {}, {}
414
+ @log = nil
415
+ @footnotes, @found_footnote_ids, @warnings = {}, [], []
416
+ @headers = []
417
+ @block_transform_depth = 0
418
+ @header_id_type = HeaderIDType::MD5
419
+ @numbering = false
420
+ @numbering_start_level = 2
421
+ end
422
+
423
+ end
424
+
425
+ # Tab width for #detab! if none is specified
426
+ TabWidth = 4
427
+
428
+ # The tag-closing string -- set to '>' for HTML
429
+ EmptyElementSuffix = " />";
430
+
431
+ # Table of MD5 sums for escaped characters
432
+ EscapeTable = {}
433
+ '\\`*_{}[]()#.!|:~'.split(//).each {|char|
434
+ hash = Digest::MD5::hexdigest( char )
435
+
436
+ EscapeTable[ char ] = {
437
+ :md5 => hash,
438
+ :md5re => Regexp::new( hash ),
439
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
440
+ :unescape => char,
441
+ }
442
+
443
+ escaped = "\\#{char}"
444
+ hash = Digest::MD5::hexdigest(escaped)
445
+ EscapeTable[escaped] = {
446
+ :md5 => hash,
447
+ :md5re => Regexp::new( hash ),
448
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
449
+ :unescape => char,
450
+ }
451
+ }
452
+
453
+
454
+ #################################################################
455
+ ### I N S T A N C E M E T H O D S
456
+ #################################################################
457
+
458
+ ### Create a new AoBane parser.
459
+ def initialize(*restrictions)
460
+ @log = Logger::new( $deferr )
461
+ @log.level = $DEBUG ?
462
+ Logger::DEBUG :
463
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
464
+ @scanner = nil
465
+
466
+ # Add any restrictions, and set the line-folding attribute to reflect
467
+ # what happens by default.
468
+ @filter_html = nil
469
+ @filter_styles = nil
470
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
471
+ @fold_lines = true
472
+
473
+ @use_header_id = true
474
+ @display_warnings = true
475
+
476
+ @log.debug "String is: %p" % self
477
+ end
478
+
479
+
480
+ ######
481
+ public
482
+ ######
483
+
484
+ # Filters for controlling what gets output for untrusted input. (But really,
485
+ # you're filtering bad stuff out of untrusted input at submission-time via
486
+ # untainting, aren't you?)
487
+ attr_accessor :filter_html, :filter_styles
488
+
489
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
490
+ # so this isn't used by anything.
491
+ attr_accessor :fold_lines
492
+
493
+ # AoBane Extension: display warnings on the top of output html (default: true)
494
+ attr_accessor :display_warnings
495
+
496
+ # AoBane Extension: add id to each header, for toc and anchors. (default: true)
497
+ attr_accessor :use_header_id
498
+
499
+
500
+
501
+
502
+ ### Render Markdown-formatted text in this string object as HTML and return
503
+ ### it. The parameter is for compatibility with RedCloth, and is currently
504
+ ### unused, though that may change in the future.
505
+ def parse_text(source, rs = nil)
506
+ rs ||= RenderState.new
507
+
508
+ # check
509
+ case rs.header_id_type
510
+ when HeaderIDType::MD5, HeaderIDType::ESCAPE
511
+ else
512
+ rs.warnings << "illegal header id type - #{rs.header_id_type}"
513
+ end
514
+
515
+ # Create a StringScanner we can reuse for various lexing tasks
516
+ @scanner = StringScanner::new( '' )
517
+
518
+ # Make a copy of the string with normalized line endings, tabs turned to
519
+ # spaces, and a couple of guaranteed newlines at the end
520
+
521
+ text = detab(source.gsub( /\r\n?/, "\n" ))
522
+ text += "\n\n"
523
+ @log.debug "Normalized line-endings: %p" % text
524
+
525
+ #Insert by set.minami 2013-03-30
526
+ text.gsub!(/\*\[(.*?)\]\((.*?)(\|.*?)*(#.*?)*\)/){|match|
527
+ '<font color="' +
528
+ if $2.nil? then '' else $2 end +'" ' +
529
+ 'face="' +
530
+ if $3.nil? then '' else $3.delete('|') end + '" ' +
531
+ 'size="' +
532
+ if $4.nil? then '' else $4.delete('#') end + '">' +
533
+ $1 + '</font>'
534
+ }
535
+
536
+ #Insert by set.minami 2013-04-03
537
+ nrange = []
538
+ departure = 1
539
+ preproc = Marshal.load(Marshal.dump(text))
540
+ text.clear
541
+ html_text_number = 0
542
+ preproc.lines { |line|
543
+ html_text_number += 1
544
+ begin
545
+ line.gsub!(/^\{nrange:(.*?)(;\d+){0,1}\}/){ |match|
546
+ depNum = $2.delete(';').to_i
547
+ departure =
548
+ if depNum > 0 then depNum else 1 end
549
+ if /[hH]([1-6])\-[hH]([1-6])/ =~ $1
550
+ nrange.push($1)
551
+ nrange.push($2)
552
+ if nrange.size > 2 then
553
+ nrange.pop
554
+ nrange.pop
555
+ raise "Syntax Error!"
556
+ end
557
+ match = ""
558
+ end
559
+ next
560
+ }
561
+ #Insert by set.minami 2013-04-01
562
+ line.gsub!(/\\TeX{(.*?)\\TeX}/){ |match|
563
+ if $1.nil? then '' else $1.to_mathml end
564
+ }
565
+ #calculate numbering
566
+ range = nrange[1].to_i - nrange[0].to_i
567
+ if range < 0 then
568
+ p "AoBane Syntax Error:Header range is WRONG!" +
569
+ "@ l.#{html_text_number}";exit(-1)
570
+ raise FatalError,"AoBane Syntax Error:Header range is WRONG!"
571
+ end
572
+ line.gsub!(/^(%{1,#{range}})(.*?)\n$/){ |match|
573
+ line = Utilities.
574
+ calcSectionNo(nrange.min,range,$1.size,departure,$2)
575
+ }
576
+ text << line
577
+ @log.debug nrange.minmax
578
+ rescue => e
579
+ @log.warn "AoBane Syntax WARNING l.#{html_text_number}:#{line.chomp} haven't adopted"
580
+ @log.warn e
581
+ end
582
+ }
583
+
584
+ #Insert by set.minami
585
+
586
+ # Filter HTML if we're asked to do so
587
+ if self.filter_html
588
+ #text.gsub!( "<", "&lt;" )
589
+ #text.gsub!( ">", "&gt;" )
590
+ @log.debug "Filtered HTML: %p" % text
591
+ end
592
+
593
+ # Simplify blank lines
594
+ text.gsub!( /^ +$/, '' )
595
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
596
+
597
+
598
+ # Replace HTML blocks with placeholders
599
+ text = hide_html_blocks( text, rs )
600
+ @log.debug "Hid HTML blocks: %p" % text
601
+ @log.debug "Render state: %p" % rs
602
+
603
+
604
+ # Strip footnote definitions, store in render state
605
+ text = strip_footnote_definitions( text, rs )
606
+ @log.debug "Stripped footnote definitions: %p" % text
607
+ @log.debug "Render state: %p" % rs
608
+
609
+
610
+ # Strip link definitions, store in render state
611
+ text = strip_link_definitions( text, rs )
612
+ @log.debug "Stripped link definitions: %p" % text
613
+ @log.debug "Render state: %p" % rs
614
+
615
+ # Escape meta-characters
616
+ text = escape_special_chars( text )
617
+ @log.debug "Escaped special characters: %p" % text
618
+
619
+ # Transform block-level constructs
620
+ text = apply_block_transforms( text, rs )
621
+ @log.debug "After block-level transforms: %p" % text
622
+
623
+ # Now swap back in all the escaped characters
624
+ text = unescape_special_chars( text )
625
+ @log.debug "After unescaping special characters: %p" % text
626
+
627
+ # Extend footnotes
628
+ unless rs.footnotes.empty? then
629
+ text << %Q|<div class="footnotes"><hr#{EmptyElementSuffix}\n<ol>\n|
630
+ rs.found_footnote_ids.each do |id|
631
+ content = rs.footnotes[id]
632
+ html = apply_block_transforms(content.sub(/\n+\Z/, '') + %Q| <a href="#footnote-ref:#{id}" rev="footnote">&#8617;</a>|, rs)
633
+ text << %Q|<li id="footnote:#{id}">\n#{html}\n</li>|
634
+ end
635
+ text << %Q|</ol>\n</div>\n|
636
+ end
637
+
638
+ # Display warnings
639
+ if @display_warnings then
640
+ unless rs.warnings.empty? then
641
+ html = %Q|<pre><strong>[WARNINGS]\n|
642
+ html << rs.warnings.map{|x| Util.escape_html(x)}.join("\n")
643
+ html << %Q|</strong></pre>|
644
+
645
+ text = html + text
646
+ end
647
+ end
648
+
649
+ #Insert by set.minami 2013-03-30
650
+ output = []
651
+ text.lines {|line|
652
+ if /<pre><code>/ =~ line
653
+ output << line
654
+ next
655
+ until /<\/code><\/pre>/ =~ line
656
+ output << line
657
+ next
658
+ end
659
+ else
660
+ line.gsub!(/\-\-|<=>|<\->|\->|<\-|=>|<=|\|\^|\|\|\/|\|\/|\^|
661
+ \>\>|\<\<|\+_|!=|~~|~=|>_|<_|\|FA|\|EX|\|=|\(+\)|\(x\)|
662
+ \\&|\(c\)|\(R\)|\(SS\)|\(TM\)/,
663
+ "\-\-" => "&mdash;",
664
+ "<=" => "&hArr;",
665
+ "<\->" => "&harr;",
666
+ "\->" =>"&rarr;",
667
+ "<\-" =>"&larr;",
668
+ "=>" => "&rArr;",
669
+ "<=" => "&lArr;",
670
+ "\|\|\^" => "&uArr;",
671
+ "\|\|\/" => "&dArr;",
672
+ "\|\/" => "&darr;",
673
+ "\|\^" => "&uarr;",
674
+ ">>" => "&raquo;",
675
+ "\<\<" => "&laquo;",
676
+ "+_" => "&plusmn;",
677
+ "!=" => "&ne;",
678
+ "~~" => "&asymp;",
679
+ "~=" => "&cong;",
680
+ "<_" => "&le;",
681
+ ">_" => "&ge",
682
+ "\|FA" => "&forall;",
683
+ "\|EX" => "&exist;",
684
+ "\|=" => "&equiv;",
685
+ "\(+\)" => "&oplus",
686
+ "\(x\)" => "&otimes;",
687
+ "\\&" =>"&amp;",
688
+ "\(c\)" => "&copy;",
689
+ "\(R\)" =>"&reg;",
690
+ "\(SS\)" => "&sect;",
691
+ "\(TM\)" => "&trade;")
692
+ output << line
693
+ end
694
+ }
695
+ return output
696
+ #Insert by set.minami
697
+ #return text
698
+
699
+ end
700
+
701
+ alias parse parse_text
702
+
703
+ # return values are extended. (mainly for testing)
704
+ def parse_text_with_render_state(str, rs = nil)
705
+ rs ||= RenderState.new
706
+ html = parse_text(str, rs)
707
+
708
+ return [html, rs]
709
+ end
710
+
711
+ def parse_text_file(path)
712
+ parse_text(File.read(path))
713
+ end
714
+
715
+ alias parse_file parse_text_file
716
+
717
+
718
+ def parse_document(source, default_enc = EncodingType::UTF8)
719
+ doc = Document.parse(source, default_enc)
720
+
721
+ return document_to_html(doc)
722
+ end
723
+
724
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
725
+ doc = nil
726
+ open(path){|f|
727
+ doc = Document.parse_io(f, default_enc)
728
+ }
729
+
730
+ return document_to_html(doc)
731
+ end
732
+
733
+
734
+ def document_to_html(doc)
735
+ rs = RenderState.new
736
+ if doc.numbering? then
737
+ rs.numbering = true
738
+ end
739
+ rs.numbering_start_level = doc.numbering_start_level
740
+ rs.header_id_type = doc.header_id_type
741
+
742
+ body_html = nil
743
+
744
+ if doc.encoding_type then
745
+ Util.change_kcode(doc.kcode){
746
+ body_html = parse_text(doc.body, rs)
747
+ }
748
+ else
749
+ body_html = parse_text(doc.body, rs)
750
+ end
751
+
752
+ out = Util.generate_blank_string_io(doc.body)
753
+
754
+ # XHTML decleration
755
+ out.puts %Q|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">|
756
+
757
+ # html start
758
+ out.puts %Q|<html>|
759
+
760
+ # head
761
+ out.puts %Q|<head>|
762
+
763
+ if doc.encoding_type and (charset = EncodingType.convert_to_charset(doc.encoding_type)) then
764
+ out.puts %Q|<meta http-equiv="Content-Type" content="text/html; charset=#{charset}" />|
765
+ end
766
+
767
+ h1 = rs.headers.find{|x| x.level == 1}
768
+ h1_content = (h1 ? h1.content : nil)
769
+ title = Util.escape_html(doc.title || h1_content || 'no title (Generated by AoBane)')
770
+ out.puts %Q|<title>#{title}</title>|
771
+
772
+ %w(description keywords).each do |name|
773
+ if doc[name] then
774
+ content = Util.escape_html(doc[name])
775
+ out.puts %Q|<meta name="#{name}" content="#{content}" />|
776
+ end
777
+ end
778
+
779
+
780
+ if doc['css'] then
781
+ href = Util.escape_html(doc.css)
782
+ out.puts %Q|<link rel="stylesheet" type="text/css" href="#{href}" />|
783
+
784
+ end
785
+
786
+ if doc['rdf-feed'] then
787
+ href = Util.escape_html(doc['rdf-feed'])
788
+ out.puts %Q|<link rel="alternate" type="application/rdf+xml" href="#{href}" />|
789
+ end
790
+
791
+
792
+
793
+ if doc['rss-feed'] then
794
+ href = Util.escape_html(doc['rss-feed'])
795
+ out.puts %Q|<link rel="alternate" type="application/rss+xml" href="#{href}" />|
796
+ end
797
+
798
+ if doc['atom-feed'] then
799
+ href = Util.escape_html(doc['atom-feed'])
800
+ out.puts %Q|<link rel="alternate" type="application/atom+xml" href="#{href}" />|
801
+ end
802
+
803
+ out.puts %Q|</head>|
804
+
805
+ # body
806
+ out.puts %Q|<body>|
807
+ out.puts
808
+ out.puts body_html
809
+ out.puts
810
+ out.puts %Q|</body>|
811
+
812
+ # html end
813
+ out.puts %Q|</html>|
814
+
815
+
816
+ return out.string
817
+ end
818
+
819
+ alias doc2html document_to_html
820
+
821
+
822
+
823
+
824
+ #######
825
+ #private
826
+ #######
827
+
828
+ ### Convert tabs in +str+ to spaces.
829
+ ### (this method is reformed to function-like method from original BlueCloth)
830
+ def detab( str, tabwidth=TabWidth )
831
+ re = str.split( /\n/ ).collect {|line|
832
+ line.gsub( /(.*?)\t/ ) do
833
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
834
+ end
835
+ }.join("\n")
836
+
837
+ re
838
+ end
839
+
840
+
841
+
842
+
843
+ ### Do block-level transforms on a copy of +str+ using the specified render
844
+ ### state +rs+ and return the results.
845
+ def apply_block_transforms( str, rs )
846
+ rs.block_transform_depth += 1
847
+
848
+ # Port: This was called '_runBlockGamut' in the original
849
+
850
+ @log.debug "Applying block transforms to:\n %p" % str
851
+ text = str
852
+ text = pretransform_fenced_code_blocks( text, rs )
853
+ text = pretransform_block_separators(text, rs)
854
+
855
+ text = transform_headers( text, rs )
856
+ text = transform_toc(text, rs)
857
+
858
+ text = transform_hrules( text, rs )
859
+ text = transform_lists( text, rs )
860
+ text = transform_definition_lists( text, rs ) # AoBane Extension
861
+ text = transform_code_blocks( text, rs )
862
+ text = transform_block_quotes( text, rs )
863
+ text = transform_tables(text, rs)
864
+ text = hide_html_blocks( text, rs )
865
+
866
+ text = form_paragraphs( text, rs )
867
+
868
+ rs.block_transform_depth -= 1
869
+ @log.debug "Done with block transforms:\n %p" % text
870
+ return text
871
+ end
872
+
873
+
874
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
875
+ ### given render state +rs+ and return it.
876
+ def apply_span_transforms( str, rs )
877
+ @log.debug "Applying span transforms to:\n %p" % str
878
+
879
+ str = transform_code_spans( str, rs )
880
+ str = transform_auto_links( str, rs )
881
+ str = encode_html( str )
882
+ str = transform_images( str, rs )
883
+ str = transform_anchors( str, rs )
884
+ str = transform_italic_and_bold( str, rs )
885
+
886
+ # Hard breaks
887
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
888
+
889
+ @log.debug "Done with span transforms:\n %p" % str
890
+ return str
891
+ end
892
+
893
+
894
+ # The list of tags which are considered block-level constructs and an
895
+ # alternation pattern suitable for use in regexps made from the list
896
+ StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
897
+ form fieldset iframe math ins del ]
898
+ StrictTagPattern = StrictBlockTags.join('|')
899
+
900
+ LooseBlockTags = StrictBlockTags - %w[ins del]
901
+ LooseTagPattern = LooseBlockTags.join('|')
902
+
903
+ # Nested blocks:
904
+ # <div>
905
+ # <div>
906
+ # tags for inner block must be indented.
907
+ # </div>
908
+ # </div>
909
+ StrictBlockRegexp = %r{
910
+ ^ # Start of line
911
+ <(#{StrictTagPattern}) # Start tag: \2
912
+ \b # word break
913
+ (.*\n)*? # Any number of lines, minimal match
914
+ </\1> # Matching end tag
915
+ [ ]* # trailing spaces
916
+ $ # End of line or document
917
+ }ix
918
+
919
+ # More-liberal block-matching
920
+ LooseBlockRegexp = %r{
921
+ ^ # Start of line
922
+ <(#{LooseTagPattern}) # start tag: \2
923
+ \b # word break
924
+ (.*\n)*? # Any number of lines, minimal match
925
+ .*</\1> # Anything + Matching end tag
926
+ [ ]* # trailing spaces
927
+ $ # End of line or document
928
+ }ix
929
+
930
+ # Special case for <hr />.
931
+ HruleBlockRegexp = %r{
932
+ ( # $1
933
+ \A\n? # Start of doc + optional \n
934
+ | # or
935
+ .*\n\n # anything + blank line
936
+ )
937
+ ( # save in $2
938
+ # AoBane fix: Not allow any space on line top
939
+ <hr # Tag open
940
+ \b # Word break
941
+ ([^<>])*? # Attributes
942
+ /?> # Tag close
943
+ $ # followed by a blank line or end of document
944
+ )
945
+ }ix
946
+
947
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
948
+ ### tokens.
949
+ def hide_html_blocks( str, rs )
950
+ @log.debug "Hiding HTML blocks in %p" % str
951
+
952
+ # Tokenizer proc to pass to gsub
953
+ tokenize = lambda {|match|
954
+ key = Digest::MD5::hexdigest( match )
955
+ rs.html_blocks[ key ] = match
956
+ @log.debug "Replacing %p with %p" % [ match, key ]
957
+ "\n\n#{key}\n\n"
958
+ }
959
+
960
+ rval = str.dup
961
+
962
+ @log.debug "Finding blocks with the strict regex..."
963
+ rval.gsub!( StrictBlockRegexp, &tokenize )
964
+
965
+ @log.debug "Finding blocks with the loose regex..."
966
+ rval.gsub!( LooseBlockRegexp, &tokenize )
967
+
968
+ @log.debug "Finding hrules..."
969
+ rval.gsub!( HruleBlockRegexp ) {|match| $1 + tokenize[$2] }
970
+
971
+ return rval
972
+ end
973
+
974
+
975
+ # Link defs are in the form: ^[id]: url "optional title"
976
+ LinkRegexp = %r{
977
+ ^[ ]{0,#{TabWidth - 1}} # AoBane fix: indent < tab width
978
+ \[(.+)\]: # id = $1
979
+ [ ]*
980
+ \n? # maybe *one* newline
981
+ [ ]*
982
+ <?(\S+?)>? # url = $2
983
+ [ ]*
984
+ \n? # maybe one newline
985
+ [ ]*
986
+ (?:
987
+ # Titles are delimited by "quotes" or (parens).
988
+ ["(]
989
+ (.+?) # title = $3
990
+ [")] # Matching ) or "
991
+ [ ]*
992
+ )? # title is optional
993
+ (?:\n+|\Z)
994
+ }x
995
+
996
+ ### Strip link definitions from +str+, storing them in the given RenderState
997
+ ### +rs+.
998
+ def strip_link_definitions( str, rs )
999
+ str.gsub( LinkRegexp ) {|match|
1000
+ id, url, title = $1, $2, $3
1001
+
1002
+ rs.urls[ id.downcase ] = encode_html( url )
1003
+ unless title.nil?
1004
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
1005
+ end
1006
+
1007
+ ""
1008
+ }
1009
+ end
1010
+
1011
+ # Footnotes defs are in the form: [^id]: footnote contents.
1012
+ FootnoteDefinitionRegexp = %r{
1013
+ ^[ ]{0,#{TabWidth - 1}}
1014
+ \[\^(.+?)\]\: # id = $1
1015
+ [ ]*
1016
+ (.*) # first line content = $2
1017
+ (?:\n|\Z)
1018
+
1019
+ ( # second or more lines content = $3
1020
+ (?:
1021
+ [ ]{#{TabWidth},} # indented
1022
+ .*
1023
+ (?:\n|\Z)
1024
+ |
1025
+ \n # blank line
1026
+ )*
1027
+ )?
1028
+
1029
+ }x
1030
+
1031
+ FootnoteIdRegexp = /^[a-zA-Z0-9\:\._-]+$/
1032
+
1033
+ def strip_footnote_definitions(str, rs)
1034
+ str.gsub( FootnoteDefinitionRegexp ) {|match|
1035
+ id = $1; content1 = $2; content2 = $3
1036
+
1037
+ unless id =~ FootnoteIdRegexp then
1038
+ rs.warnings << "illegal footnote id - #{id} (legal chars: a-zA-Z0-9_-.:)"
1039
+ end
1040
+
1041
+ if content2 then
1042
+ @log.debug " Stripping multi-line definition %p, %p" % [$2, $3]
1043
+ content = content1 + "\n" + outdent(content2.chomp)
1044
+ @log.debug " Stripped multi-line definition %p, %p" % [id, content]
1045
+ rs.footnotes[id] = content
1046
+ else
1047
+ content = content1 || ''
1048
+ @log.debug " Stripped single-line definition %p, %p" % [id, content]
1049
+ rs.footnotes[id] = content
1050
+ end
1051
+
1052
+
1053
+
1054
+ ""
1055
+ }
1056
+ end
1057
+
1058
+
1059
+ ### Escape special characters in the given +str+
1060
+ def escape_special_chars( str )
1061
+ @log.debug " Escaping special characters"
1062
+ text = ''
1063
+
1064
+ # The original Markdown source has something called '$tags_to_skip'
1065
+ # declared here, but it's never used, so I don't define it.
1066
+
1067
+ tokenize_html( str ) {|token, str|
1068
+ @log.debug " Adding %p token %p" % [ token, str ]
1069
+ case token
1070
+
1071
+ # Within tags, encode * and _
1072
+ when :tag
1073
+ text += str.
1074
+ gsub( /\*/, EscapeTable['*'][:md5] ).
1075
+ gsub( /_/, EscapeTable['_'][:md5] )
1076
+
1077
+ # Encode backslashed stuff in regular text
1078
+ when :text
1079
+ text += encode_backslash_escapes( str )
1080
+ else
1081
+ raise TypeError, "Unknown token type %p" % token
1082
+ end
1083
+ }
1084
+
1085
+ @log.debug " Text with escapes is now: %p" % text
1086
+ return text
1087
+ end
1088
+
1089
+
1090
+ ### Swap escaped special characters in a copy of the given +str+ and return
1091
+ ### it.
1092
+ def unescape_special_chars( str )
1093
+ EscapeTable.each {|char, hash|
1094
+ @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
1095
+ str.gsub!( hash[:md5re], hash[:unescape] )
1096
+ }
1097
+
1098
+ return str
1099
+ end
1100
+
1101
+
1102
+ ### Return a copy of the given +str+ with any backslashed special character
1103
+ ### in it replaced with MD5 placeholders.
1104
+ def encode_backslash_escapes( str )
1105
+ # Make a copy with any double-escaped backslashes encoded
1106
+ text = str.gsub( /\\\\/, EscapeTable['\\\\'][:md5] )
1107
+
1108
+ EscapeTable.each_pair {|char, esc|
1109
+ next if char == '\\\\'
1110
+ next unless char =~ /\\./
1111
+ text.gsub!( esc[:re], esc[:md5] )
1112
+ }
1113
+
1114
+ return text
1115
+ end
1116
+
1117
+
1118
+ def pretransform_block_separators(str, rs)
1119
+ str.gsub(/^[ ]{0,#{TabWidth - 1}}[~][ ]*\n/){
1120
+ "\n~\n\n"
1121
+ }
1122
+ end
1123
+
1124
+
1125
+ TOCRegexp = %r{
1126
+ ^\{ # bracket on line-head
1127
+ [ ]* # optional inner space
1128
+ toc
1129
+
1130
+ (?:
1131
+ (?:
1132
+ [:] # colon
1133
+ | # or
1134
+ [ ]+ # 1 or more space
1135
+ )
1136
+ (.+?) # $1 = parameter
1137
+ )?
1138
+
1139
+ [ ]* # optional inner space
1140
+ \} # closer
1141
+ [ ]*$ # optional space on line-foot
1142
+ }ix
1143
+
1144
+ TOCStartLevelRegexp = %r{
1145
+ ^
1146
+ (?: # optional start
1147
+ h
1148
+ ([1-6]) # $1 = start level
1149
+ )?
1150
+
1151
+ (?: # range symbol
1152
+ [.]{2,}|[-] # .. or -
1153
+ )
1154
+
1155
+ (?: # optional end
1156
+ h? # optional 'h'
1157
+ ([1-6]) # $2 = end level
1158
+ )?$
1159
+ }ix
1160
+
1161
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1162
+ ### +str+ and return it.
1163
+ def transform_toc( str, rs )
1164
+ @log.debug " Transforming tables of contents"
1165
+ str.gsub(TOCRegexp){
1166
+ start_level = 2 # default
1167
+ end_level = 6
1168
+
1169
+ param = $1
1170
+ if param then
1171
+ if param =~ TOCStartLevelRegexp then
1172
+ if !($1) and !($2) then
1173
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1174
+ else
1175
+ start_level = ($1 ? $1.to_i : 2)
1176
+ end_level = ($2 ? $2.to_i : 6)
1177
+ end
1178
+ else
1179
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1180
+ end
1181
+ end
1182
+
1183
+ if rs.headers.first and rs.headers.first.level >= (start_level + 1) then
1184
+ rs.warnings << "illegal structure of headers - h#{start_level} should be set before h#{rs.headers.first.level}"
1185
+ end
1186
+
1187
+
1188
+ ul_text = "\n\n"
1189
+ rs.headers.each do |header|
1190
+ if header.level >= start_level and header.level <= end_level then
1191
+ ul_text << ' ' * TabWidth * (header.level - start_level)
1192
+ ul_text << '* '
1193
+ ul_text << %Q|<a href="##{header.id}" rel="toc">#{header.content_html}</a>|
1194
+ ul_text << "\n"
1195
+ end
1196
+ end
1197
+ ul_text << "\n"
1198
+
1199
+ ul_text # output
1200
+
1201
+ }
1202
+ end
1203
+
1204
+ TableRegexp = %r{
1205
+ (?:
1206
+ ^([ ]{0,#{TabWidth - 1}}) # not indented
1207
+ (?:[|][ ]*) # NOT optional border
1208
+
1209
+ \S.*? # 1st cell content
1210
+
1211
+ (?: # 2nd cell or later
1212
+ [|] # cell splitter
1213
+ .+? # content
1214
+ )+ # 1 or more..
1215
+
1216
+ [|]? # optional border
1217
+ (?:\n|\Z) # line end
1218
+ )+
1219
+ }x
1220
+
1221
+ # Transform tables.
1222
+ def transform_tables(str, rs)
1223
+ str.gsub(TableRegexp){
1224
+ transform_table_rows($~[0], rs)
1225
+ }
1226
+ end
1227
+
1228
+ TableSeparatorCellRegexp = %r{
1229
+ ^
1230
+ [ ]*
1231
+ ([:])? # $1 = left-align symbol
1232
+ [ ]*
1233
+ [-]+ # border
1234
+ [ ]*
1235
+ ([:])? # $2 = right-align symbol
1236
+ [ ]*
1237
+ $
1238
+ }x
1239
+
1240
+ def transform_table_rows(str, rs)
1241
+
1242
+ # split cells to 2-d array
1243
+ data = str.split("\n").map{|x| x.split('|')}
1244
+
1245
+
1246
+ data.each do |row|
1247
+ # cut left space
1248
+ row.first.lstrip!
1249
+
1250
+ # cut when optional side-borders is included
1251
+ row.shift if row.first.empty?
1252
+ end
1253
+
1254
+ column_attrs = []
1255
+
1256
+ re = ''
1257
+ re << "<table>\n"
1258
+
1259
+ # head is exist?
1260
+ if data.size >= 3 and data[1].all?{|x| x =~ TableSeparatorCellRegexp} then
1261
+ head_row = data.shift
1262
+ separator_row = data.shift
1263
+
1264
+ separator_row.each do |cell|
1265
+ cell.match TableSeparatorCellRegexp
1266
+ left = $1; right = $2
1267
+
1268
+ if left and right then
1269
+ column_attrs << ' style="text-align: center"'
1270
+ elsif right then
1271
+ column_attrs << ' style="text-align: right"'
1272
+ elsif left then
1273
+ column_attrs << ' style="text-align: left"'
1274
+ else
1275
+ column_attrs << ''
1276
+ end
1277
+ end
1278
+
1279
+ re << "\t<thead><tr>\n"
1280
+ head_row.each_with_index do |cell, i|
1281
+ re << "\t\t<th#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</th>\n"
1282
+ end
1283
+ re << "\t</tr></thead>\n"
1284
+ end
1285
+
1286
+ # data row
1287
+ re << "\t<tbody>\n"
1288
+ data.each do |row|
1289
+ re << "\t\t<tr>\n"
1290
+ row.each_with_index do |cell, i|
1291
+ re << "\t\t\t<td#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</td>\n"
1292
+ end
1293
+ re << "\t\t</tr>\n"
1294
+ end
1295
+ re << "\t</tbody>\n"
1296
+
1297
+ re << "</table>\n"
1298
+
1299
+ re
1300
+ end
1301
+
1302
+
1303
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1304
+ ### +str+ and return it.
1305
+ def transform_hrules( str, rs )
1306
+ @log.debug " Transforming horizontal rules"
1307
+ str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
1308
+ end
1309
+
1310
+
1311
+
1312
+ # Patterns to match and transform lists
1313
+ ListMarkerOl = %r{\d+\.}
1314
+ ListMarkerUl = %r{[*+-]}
1315
+ ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
1316
+
1317
+ ListRegexp = %r{
1318
+ (?:
1319
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1320
+ (#{ListMarkerAny}) # unordered or ordered ($1)
1321
+ [ ]+ # At least one space
1322
+ )
1323
+ (?m:.+?) # item content (include newlines)
1324
+ (?:
1325
+ \z # Either EOF
1326
+ | # or
1327
+ \n{2,} # Blank line...
1328
+ (?=\S) # ...followed by non-space
1329
+ (?![ ]* # ...but not another item
1330
+ (#{ListMarkerAny})
1331
+ [ ]+)
1332
+ )
1333
+ }x
1334
+
1335
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
1336
+ ### return it.
1337
+ def transform_lists( str, rs )
1338
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
1339
+
1340
+ str.gsub( ListRegexp ) {|list|
1341
+ @log.debug " Found list %p" % list
1342
+ bullet = $1
1343
+ list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
1344
+
1345
+ %{<%s>\n%s</%s>\n} % [
1346
+ list_type,
1347
+ transform_list_items( list, rs ),
1348
+ list_type,
1349
+ ]
1350
+ }
1351
+ end
1352
+
1353
+ # Pattern for transforming list items
1354
+ ListItemRegexp = %r{
1355
+ (\n)? # leading line = $1
1356
+ (^[ ]*) # leading whitespace = $2
1357
+ (#{ListMarkerAny}) [ ]+ # list marker = $3
1358
+ ((?m:.+?) # list item text = $4
1359
+ \n)
1360
+ (?= (\n*) (\z | \2 (#{ListMarkerAny}) [ ]+))
1361
+ }x
1362
+
1363
+ ### Transform list items in a copy of the given +str+ and return it.
1364
+ def transform_list_items( str, rs )
1365
+ @log.debug " Transforming list items"
1366
+
1367
+ # Trim trailing blank lines
1368
+ str = str.sub( /\n{2,}\z/, "\n" )
1369
+ str.gsub( ListItemRegexp ) {|line|
1370
+ @log.debug " Found item line %p" % line
1371
+ leading_line, item = $1, $4
1372
+ separating_lines = $5
1373
+
1374
+ if leading_line or /\n{2,}/.match(item) or not separating_lines.empty? then
1375
+ @log.debug " Found leading line or item has a blank"
1376
+ item = apply_block_transforms( outdent(item), rs )
1377
+ else
1378
+ # Recursion for sub-lists
1379
+ @log.debug " Recursing for sublist"
1380
+ item = transform_lists( outdent(item), rs ).chomp
1381
+ item = apply_span_transforms( item, rs )
1382
+ end
1383
+
1384
+ %{<li>%s</li>\n} % item
1385
+ }
1386
+ end
1387
+
1388
+ DefinitionListRegexp = %r{
1389
+ (?:
1390
+ (?:^.+\n)+ # dt
1391
+ \n*
1392
+ (?:
1393
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1394
+ \: # dd marker (line head)
1395
+ [ ]* # space
1396
+ ((?m:.+?)) # dd content
1397
+ (?:
1398
+ \s*\z # end of string
1399
+ | # or
1400
+ \n{2,} # blank line
1401
+ (?=[ ]{0,#{TabWidth - 1}}\S) # ...followed by
1402
+ )
1403
+ )+
1404
+ )+
1405
+ }x
1406
+
1407
+ def transform_definition_lists(str, rs)
1408
+ @log.debug " Transforming definition lists at %p" % (str[0,100] + '...')
1409
+ str.gsub( DefinitionListRegexp ) {|list|
1410
+ @log.debug " Found definition list %p (captures=%p)" % [list, $~.captures]
1411
+ transform_definition_list_items(list, rs)
1412
+ }
1413
+ end
1414
+
1415
+ DDLineRegexp = /^\:[ ]{0,#{TabWidth - 1}}(.*)/
1416
+
1417
+
1418
+ def transform_definition_list_items(str, rs)
1419
+ buf = Util.generate_blank_string_io(str)
1420
+ buf.puts %Q|<dl>|
1421
+
1422
+ lines = str.split("\n")
1423
+ until lines.empty? do
1424
+
1425
+ dts = []
1426
+
1427
+ # get dt items
1428
+ while lines.first =~ /^(?!\:).+$/ do
1429
+ dts << lines.shift
1430
+ end
1431
+
1432
+
1433
+ dd_as_block = false
1434
+
1435
+ # skip blank lines
1436
+ while not lines.empty? and lines.first.empty? do
1437
+ lines.shift
1438
+ dd_as_block = true
1439
+ end
1440
+
1441
+
1442
+ dds = []
1443
+ while lines.first =~ DDLineRegexp do
1444
+ dd_buf = []
1445
+
1446
+ # dd first line
1447
+ unless (line = lines.shift).empty? then
1448
+ dd_buf << $1 << "\n"
1449
+ end
1450
+
1451
+ # dd second and more lines (sequential with 1st-line)
1452
+ until lines.empty? or # stop if read all
1453
+ lines.first =~ /^[ ]{0,#{TabWidth - 1}}$/ or # stop if blank line
1454
+ lines.first =~ DDLineRegexp do # stop if new dd found
1455
+ dd_buf << outdent(lines.shift) << "\n"
1456
+ end
1457
+
1458
+ # dd second and more lines (separated with 1st-line)
1459
+ until lines.empty? do # stop if all was read
1460
+ if lines.first.empty? then
1461
+ # blank line (skip)
1462
+ lines.shift
1463
+ dd_buf << "\n"
1464
+ elsif lines.first =~ /^[ ]{#{TabWidth},}/ then
1465
+ # indented body
1466
+ dd_buf << outdent(lines.shift) << "\n"
1467
+ else
1468
+ # not indented body
1469
+ break
1470
+ end
1471
+
1472
+ end
1473
+
1474
+
1475
+ dds << dd_buf.join
1476
+
1477
+ # skip blank lines
1478
+ unless lines.empty? then
1479
+ while lines.first.empty? do
1480
+ lines.shift
1481
+ end
1482
+ end
1483
+ end
1484
+
1485
+ # html output
1486
+ dts.each do |dt|
1487
+ buf.puts %Q| <dt>#{apply_span_transforms(dt, rs)}</dt>|
1488
+ end
1489
+
1490
+ dds.each do |dd|
1491
+ if dd_as_block then
1492
+ buf.puts %Q| <dd>#{apply_block_transforms(dd, rs)}</dd>|
1493
+ else
1494
+ dd.gsub!(/\n+\z/, '') # chomp linefeeds
1495
+ buf.puts %Q| <dd>#{apply_span_transforms(dd.chomp, rs)}</dd>|
1496
+ end
1497
+ end
1498
+ end
1499
+
1500
+ buf.puts %Q|</dl>|
1501
+
1502
+ return(buf.string)
1503
+ end
1504
+
1505
+ # old
1506
+
1507
+
1508
+ # Pattern for matching codeblocks
1509
+ CodeBlockRegexp = %r{
1510
+ (?:\n\n|\A|\A\n)
1511
+ ( # $1 = the code block
1512
+ (?:
1513
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
1514
+ .*\n+
1515
+ )+
1516
+ )
1517
+ (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
1518
+ # line-start, or end of doc
1519
+ }x
1520
+
1521
+
1522
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
1523
+ ### return it.
1524
+ def transform_code_blocks( str, rs )
1525
+ @log.debug " Transforming code blocks"
1526
+
1527
+ str.gsub( CodeBlockRegexp ) {|block|
1528
+ codeblock = $1
1529
+ remainder = $2
1530
+
1531
+
1532
+ tmpl = %{\n\n<pre><code>%s\n</code></pre>\n\n%s}
1533
+
1534
+ # patch for ruby 1.9.1 bug
1535
+ if tmpl.respond_to?(:force_encoding) then
1536
+ tmpl.force_encoding(str.encoding)
1537
+ end
1538
+ args = [ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
1539
+
1540
+ # recover all backslash escaped to original form
1541
+ EscapeTable.each {|char, hash|
1542
+ args[0].gsub!( hash[:md5re]){char}
1543
+ }
1544
+
1545
+ # Generate the codeblock
1546
+ tmpl % args
1547
+ }
1548
+ end
1549
+
1550
+
1551
+ FencedCodeBlockRegexp = /^(\~{3,})\n((?m:.+?)\n)\1\n/
1552
+
1553
+ def pretransform_fenced_code_blocks( str, rs )
1554
+ @log.debug " Transforming fenced code blocks => standard code blocks"
1555
+
1556
+ str.gsub( FencedCodeBlockRegexp ) {|block|
1557
+ "\n~\n\n" + indent($2) + "\n~\n\n"
1558
+ }
1559
+ end
1560
+
1561
+
1562
+
1563
+ # Pattern for matching Markdown blockquote blocks
1564
+ BlockQuoteRegexp = %r{
1565
+ (?:
1566
+ ^[ ]*>[ ]? # '>' at the start of a line
1567
+ .+\n # rest of the first line
1568
+ (?:.+\n)* # subsequent consecutive lines
1569
+ \n* # blanks
1570
+ )+
1571
+ }x
1572
+ PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
1573
+
1574
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
1575
+ ### and return it.
1576
+ def transform_block_quotes( str, rs )
1577
+ @log.debug " Transforming block quotes"
1578
+
1579
+ str.gsub( BlockQuoteRegexp ) {|quote|
1580
+ @log.debug "Making blockquote from %p" % quote
1581
+
1582
+ quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
1583
+ quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
1584
+
1585
+ indent = " " * TabWidth
1586
+ quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
1587
+ apply_block_transforms( quote, rs ).
1588
+ gsub( /^/, indent ).
1589
+ gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
1590
+ @log.debug "Blockquoted chunk is: %p" % quoted
1591
+ quoted
1592
+ }
1593
+ end
1594
+
1595
+
1596
+ # AoBane change:
1597
+ # allow loosely urls and addresses (BlueCloth is very strict)
1598
+ #
1599
+ # loose examples:
1600
+ # <skype:tetra-dice> (other protocol)
1601
+ # <ema+il@example.com> (ex: gmail alias)
1602
+ #
1603
+ # not adapted addresses:
1604
+ # <"Abc@def"@example.com> (refer to quoted-string of RFC 5321)
1605
+
1606
+
1607
+ AutoAnchorURLRegexp = /<(#{URI.regexp})>/ # $1 = url
1608
+
1609
+ AutoAnchorEmailRegexp = /<([^'">\s]+?\@[^'">\s]+[.][a-zA-Z]+)>/ # $2 = address
1610
+
1611
+ ### Transform URLs in a copy of the specified +str+ into links and return
1612
+ ### it.
1613
+ def transform_auto_links( str, rs )
1614
+ @log.debug " Transforming auto-links"
1615
+ str.gsub(AutoAnchorURLRegexp){
1616
+ %|<a href="#{Util.escape_html($1)}">#{Util.escape_html($1)}</a>|
1617
+ }.gsub( AutoAnchorEmailRegexp ) {|addr|
1618
+ encode_email_address( unescape_special_chars($1) )
1619
+ }
1620
+ end
1621
+
1622
+
1623
+ # Encoder functions to turn characters of an email address into encoded
1624
+ # entities.
1625
+ Encoders = [
1626
+ lambda {|char| "&#%03d;" % char},
1627
+ lambda {|char| "&#x%X;" % char},
1628
+ lambda {|char| char.chr },
1629
+ ]
1630
+
1631
+ ### Transform a copy of the given email +addr+ into an escaped version safer
1632
+ ### for posting publicly.
1633
+ def encode_email_address( addr )
1634
+
1635
+ rval = ''
1636
+ ("mailto:" + addr).each_byte {|b|
1637
+ case b
1638
+ when ?:
1639
+ rval += ":"
1640
+ when ?@
1641
+ rval += Encoders[ rand(2) ][ b ]
1642
+ else
1643
+ r = rand(100)
1644
+ rval += (
1645
+ r > 90 ? Encoders[2][ b ] :
1646
+ r < 45 ? Encoders[1][ b ] :
1647
+ Encoders[0][ b ]
1648
+ )
1649
+ end
1650
+ }
1651
+
1652
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
1653
+ end
1654
+
1655
+
1656
+ # Regexp for matching Setext-style headers
1657
+ SetextHeaderRegexp = %r{
1658
+ (.+?) # The title text ($1)
1659
+
1660
+ (?: # Markdown Extra: Header Id Attribute (optional)
1661
+ [ ]* # space after closing #'s
1662
+ \{\#
1663
+ (\S+?) # $2 = Id
1664
+ \}
1665
+ [ \t]* # allowed lazy spaces
1666
+ )?
1667
+ \n
1668
+ ([\-=])+ # Match a line of = or -. Save only one in $3.
1669
+ [ ]*\n+
1670
+ }x
1671
+
1672
+ # Regexp for matching ATX-style headers
1673
+ AtxHeaderRegexp = %r{
1674
+ ^(\#+) # $1 = string of #'s
1675
+ [ ]*
1676
+ (.+?) # $2 = Header text
1677
+ [ ]*
1678
+ \#* # optional closing #'s (not counted)
1679
+
1680
+ (?: # Markdown Extra: Header Id Attribute (optional)
1681
+ [ ]* # space after closing #'s
1682
+ \{\#
1683
+ (\S+?) # $3 = Id
1684
+ \}
1685
+ [ \t]* # allowed lazy spaces
1686
+ )?
1687
+
1688
+ \n+
1689
+ }x
1690
+
1691
+ HeaderRegexp = Regexp.union(SetextHeaderRegexp, AtxHeaderRegexp)
1692
+
1693
+ IdRegexp = /^[a-zA-Z][a-zA-Z0-9\:\._-]*$/
1694
+
1695
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
1696
+ ### state +rs+ and return the result.
1697
+ def transform_headers( str, rs )
1698
+ @log.debug " Transforming headers"
1699
+
1700
+ # Setext-style headers:
1701
+ # Header 1
1702
+ # ========
1703
+ #
1704
+ # Header 2
1705
+ # --------
1706
+ #
1707
+
1708
+ section_numbers = [nil, nil, nil, nil, nil]
1709
+
1710
+ str.
1711
+ gsub( HeaderRegexp ) {|m|
1712
+ if $1 then
1713
+ @log.debug "Found setext-style header"
1714
+ title, id, hdrchar = $1, $2, $3
1715
+
1716
+ case hdrchar
1717
+ when '='
1718
+ level = 1
1719
+ when '-'
1720
+ level = 2
1721
+ end
1722
+ else
1723
+ @log.debug "Found ATX-style header"
1724
+ hdrchars, title, id = $4, $5, $6
1725
+ level = hdrchars.length
1726
+
1727
+ if level >= 7 then
1728
+ rs.warnings << "illegal header level - h#{level} ('#' symbols are too many)"
1729
+ end
1730
+ end
1731
+
1732
+ prefix = ''
1733
+ if rs.numbering? then
1734
+ if level >= rs.numbering_start_level and level <= 6 then
1735
+ depth = level - rs.numbering_start_level
1736
+
1737
+ section_numbers.each_index do |i|
1738
+ if i == depth and section_numbers[depth] then
1739
+ # increment a deepest number if current header's level equals last header's
1740
+ section_numbers[i] += 1
1741
+ elsif i <= depth then
1742
+ # set default number if nil
1743
+ section_numbers[i] ||= 1
1744
+ else
1745
+ # clear discardeds
1746
+ section_numbers[i] = nil
1747
+ end
1748
+ end
1749
+
1750
+ no = ''
1751
+ (0..depth).each do |i|
1752
+ no << "#{section_numbers[i]}."
1753
+ end
1754
+
1755
+ prefix = "#{no} "
1756
+ end
1757
+ end
1758
+
1759
+ title_html = apply_span_transforms( title, rs )
1760
+
1761
+ unless id then
1762
+ case rs.header_id_type
1763
+ when HeaderIDType::ESCAPE
1764
+ id = escape_to_header_id(title_html)
1765
+ if rs.headers.find{|h| h.id == id} then
1766
+ rs.warnings << "header id collision - #{id}"
1767
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1768
+ end
1769
+ else
1770
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1771
+ end
1772
+ end
1773
+
1774
+ title = "#{prefix}#{title}"
1775
+ title_html = "#{prefix}#{title_html}"
1776
+
1777
+
1778
+ unless id =~ IdRegexp then
1779
+ rs.warnings << "illegal header id - #{id} (legal chars: [a-zA-Z0-9_-.] | 1st: [a-zA-Z])"
1780
+ end
1781
+
1782
+ if rs.block_transform_depth == 1 then
1783
+ rs.headers << RenderState::Header.new(id, level, title, title_html)
1784
+ end
1785
+
1786
+ if @use_header_id then
1787
+ %{<h%d id="%s">%s</h%d>\n\n} % [ level, id, title_html, level ]
1788
+ else
1789
+ %{<h%d>%s</h%d>\n\n} % [ level, title_html, level ]
1790
+ end
1791
+ }
1792
+ end
1793
+
1794
+
1795
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
1796
+ ### tags and return it.
1797
+ def form_paragraphs( str, rs )
1798
+ @log.debug " Forming paragraphs"
1799
+ grafs = str.
1800
+ sub( /\A\n+/, '' ).
1801
+ sub( /\n+\z/, '' ).
1802
+ split( /\n{2,}/ )
1803
+
1804
+ rval = grafs.collect {|graf|
1805
+
1806
+ # Unhashify HTML blocks if this is a placeholder
1807
+ if rs.html_blocks.key?( graf )
1808
+ rs.html_blocks[ graf ]
1809
+
1810
+ # no output if this is block separater
1811
+ elsif graf == '~' then
1812
+ ''
1813
+
1814
+ # Otherwise, wrap in <p> tags
1815
+ else
1816
+ apply_span_transforms(graf, rs).
1817
+ sub( /^[ ]*/, '<p>' ) + '</p>'
1818
+ end
1819
+ }.join( "\n\n" )
1820
+
1821
+ @log.debug " Formed paragraphs: %p" % rval
1822
+ return rval
1823
+ end
1824
+
1825
+
1826
+ # Pattern to match the linkid part of an anchor tag for reference-style
1827
+ # links.
1828
+ RefLinkIdRegexp = %r{
1829
+ [ ]? # Optional leading space
1830
+ (?:\n[ ]*)? # Optional newline + spaces
1831
+ \[
1832
+ (.*?) # Id = $1
1833
+ \]
1834
+ }x
1835
+
1836
+ InlineLinkRegexp = %r{
1837
+ \( # Literal paren
1838
+ [ ]* # Zero or more spaces
1839
+ <?(.+?)>? # URI = $1
1840
+ [ ]* # Zero or more spaces
1841
+ (?: #
1842
+ ([\"\']) # Opening quote char = $2
1843
+ (.*?) # Title = $3
1844
+ \2 # Matching quote char
1845
+ )? # Title is optional
1846
+ \)
1847
+ }x
1848
+
1849
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
1850
+ ### the given render state +rs+ and return it.
1851
+ def transform_anchors( str, rs )
1852
+ @log.debug " Transforming anchors"
1853
+ @scanner.string = str.dup
1854
+ text = ''
1855
+
1856
+ # Scan the whole string
1857
+ until @scanner.empty?
1858
+
1859
+ if @scanner.scan( /\[/ )
1860
+ link = ''; linkid = ''
1861
+ depth = 1
1862
+ startpos = @scanner.pos
1863
+ @log.debug " Found a bracket-open at %d" % startpos
1864
+
1865
+ # Scan the rest of the tag, allowing unlimited nested []s. If
1866
+ # the scanner runs out of text before the opening bracket is
1867
+ # closed, append the text and return (wasn't a valid anchor).
1868
+ while depth.nonzero?
1869
+ linktext = @scanner.scan_until( /\]|\[/ )
1870
+
1871
+ if linktext
1872
+ @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
1873
+ link += linktext
1874
+
1875
+ # Decrement depth for each closing bracket
1876
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
1877
+ @log.debug " Depth is now #{depth}"
1878
+
1879
+ # If there's no more brackets, it must not be an anchor, so
1880
+ # just abort.
1881
+ else
1882
+ @log.debug " Missing closing brace, assuming non-link."
1883
+ link += @scanner.rest
1884
+ @scanner.terminate
1885
+ return text + '[' + link
1886
+ end
1887
+ end
1888
+ link.slice!( -1 ) # Trim final ']'
1889
+ @log.debug " Found leading link %p" % link
1890
+
1891
+
1892
+
1893
+ # Markdown Extra: Footnote
1894
+ if link =~ /^\^(.+)/ then
1895
+ id = $1
1896
+ if rs.footnotes[id] then
1897
+ rs.found_footnote_ids << id
1898
+ label = "[#{rs.found_footnote_ids.size}]"
1899
+ else
1900
+ rs.warnings << "undefined footnote id - #{id}"
1901
+ label = '[?]'
1902
+ end
1903
+
1904
+ text += %Q|<sup id="footnote-ref:#{id}"><a href="#footnote:#{id}" rel="footnote">#{label}</a></sup>|
1905
+
1906
+ # Look for a reference-style second part
1907
+ elsif @scanner.scan( RefLinkIdRegexp )
1908
+ linkid = @scanner[1]
1909
+ linkid = link.dup if linkid.empty?
1910
+ linkid.downcase!
1911
+ @log.debug " Found a linkid: %p" % linkid
1912
+
1913
+ # If there's a matching link in the link table, build an
1914
+ # anchor tag for it.
1915
+ if rs.urls.key?( linkid )
1916
+ @log.debug " Found link key in the link table: %p" % rs.urls[linkid]
1917
+ url = escape_md( rs.urls[linkid] )
1918
+
1919
+ text += %{<a href="#{url}"}
1920
+ if rs.titles.key?(linkid)
1921
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
1922
+ end
1923
+ text += %{>#{link}</a>}
1924
+
1925
+ # If the link referred to doesn't exist, just append the raw
1926
+ # source to the result
1927
+ else
1928
+ @log.debug " Linkid %p not found in link table" % linkid
1929
+ @log.debug " Appending original string instead: "
1930
+ @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1931
+
1932
+ rs.warnings << "link-id not found - #{linkid}"
1933
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1934
+ end
1935
+
1936
+ # ...or for an inline style second part
1937
+ elsif @scanner.scan( InlineLinkRegexp )
1938
+ url = @scanner[1]
1939
+ title = @scanner[3]
1940
+ @log.debug " Found an inline link to %p" % url
1941
+
1942
+ url = "##{link}" if url == '#' # target anchor briefing (since AoBane 0.40)
1943
+
1944
+ text += %{<a href="%s"} % escape_md( url )
1945
+ if title
1946
+ title.gsub!( /"/, "&quot;" )
1947
+ text += %{ title="%s"} % escape_md( title )
1948
+ end
1949
+ text += %{>#{link}</a>}
1950
+
1951
+ # No linkid part: just append the first part as-is.
1952
+ else
1953
+ @log.debug "No linkid, so no anchor. Appending literal text."
1954
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1955
+ end # if linkid
1956
+
1957
+ # Plain text
1958
+ else
1959
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
1960
+ text += @scanner.scan( /[^\[]+/ )
1961
+ end
1962
+
1963
+ end # until @scanner.empty?
1964
+
1965
+ return text
1966
+ end
1967
+
1968
+
1969
+ # Pattern to match strong emphasis in Markdown text
1970
+ BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
1971
+
1972
+ # Pattern to match normal emphasis in Markdown text
1973
+ ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
1974
+
1975
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
1976
+ ### and return it.
1977
+ def transform_italic_and_bold( str, rs )
1978
+ @log.debug " Transforming italic and bold"
1979
+
1980
+ str.
1981
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
1982
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
1983
+ end
1984
+
1985
+
1986
+ ### Transform backticked spans into <code> spans.
1987
+ def transform_code_spans( str, rs )
1988
+ @log.debug " Transforming code spans"
1989
+
1990
+ # Set up the string scanner and just return the string unless there's at
1991
+ # least one backtick.
1992
+ @scanner.string = str.dup
1993
+ unless @scanner.exist?( /`/ )
1994
+ @scanner.terminate
1995
+ @log.debug "No backticks found for code span in %p" % str
1996
+ return str
1997
+ end
1998
+
1999
+ @log.debug "Transforming code spans in %p" % str
2000
+
2001
+ # Build the transformed text anew
2002
+ text = ''
2003
+
2004
+ # Scan to the end of the string
2005
+ until @scanner.empty?
2006
+
2007
+ # Scan up to an opening backtick
2008
+ if pre = @scanner.scan_until( /.??(?=`)/m )
2009
+ text += pre
2010
+ @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
2011
+
2012
+ # Make a pattern to find the end of the span
2013
+ opener = @scanner.scan( /`+/ )
2014
+ len = opener.length
2015
+ closer = Regexp::new( opener )
2016
+ @log.debug "Scanning for end of code span with %p" % closer
2017
+
2018
+ # Scan until the end of the closing backtick sequence. Chop the
2019
+ # backticks off the resultant string, strip leading and trailing
2020
+ # whitespace, and encode any enitites contained in it.
2021
+ codespan = @scanner.scan_until( closer ) or
2022
+ raise FormatError::new( @scanner.rest[0,20],
2023
+ "No %p found before end" % opener )
2024
+
2025
+ @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
2026
+ codespan.slice!( -len, len )
2027
+ text += "<code>%s</code>" %
2028
+ encode_code( codespan.strip, rs )
2029
+
2030
+ # If there's no more backticks, just append the rest of the string
2031
+ # and move the scan pointer to the end
2032
+ else
2033
+ text += @scanner.rest
2034
+ @scanner.terminate
2035
+ end
2036
+ end
2037
+
2038
+ return text
2039
+ end
2040
+
2041
+
2042
+ # Next, handle inline images: ![alt text](url "optional title")
2043
+ # Don't forget: encode * and _
2044
+ InlineImageRegexp = %r{
2045
+ ( # Whole match = $1
2046
+ !\[ (.*?) \] # alt text = $2
2047
+ \([ ]*
2048
+ <?(\S+?)>? # source url = $3
2049
+ [ ]*
2050
+ (?: #
2051
+ (["']) # quote char = $4
2052
+ (.*?) # title = $5
2053
+ \4 # matching quote
2054
+ [ ]*
2055
+ )? # title is optional
2056
+ \)
2057
+ )
2058
+ }x #"
2059
+
2060
+
2061
+ # Reference-style images
2062
+ ReferenceImageRegexp = %r{
2063
+ ( # Whole match = $1
2064
+ !\[ (.*?) \] # Alt text = $2
2065
+ [ ]? # Optional space
2066
+ (?:\n[ ]*)? # One optional newline + spaces
2067
+ \[ (.*?) \] # id = $3
2068
+ )
2069
+ }x
2070
+
2071
+ ### Turn image markup into image tags.
2072
+ def transform_images( str, rs )
2073
+ @log.debug " Transforming images %p" % str
2074
+
2075
+ # Handle reference-style labeled images: ![alt text][id]
2076
+ str.
2077
+ gsub( ReferenceImageRegexp ) {|match|
2078
+ whole, alt, linkid = $1, $2, $3.downcase
2079
+ @log.debug "Matched %p" % match
2080
+ res = nil
2081
+ alt.gsub!( /"/, '&quot;' )
2082
+
2083
+ # for shortcut links like ![this][].
2084
+ linkid = alt.downcase if linkid.empty?
2085
+
2086
+ if rs.urls.key?( linkid )
2087
+ url = escape_md( rs.urls[linkid] )
2088
+ @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
2089
+
2090
+ # Build the tag
2091
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2092
+ if rs.titles.key?( linkid )
2093
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
2094
+ end
2095
+ result += EmptyElementSuffix
2096
+
2097
+ else
2098
+ result = whole
2099
+ end
2100
+
2101
+ @log.debug "Replacing %p with %p" % [ match, result ]
2102
+ result
2103
+ }.
2104
+
2105
+ # Inline image style
2106
+ gsub( InlineImageRegexp ) {|match|
2107
+ @log.debug "Found inline image %p" % match
2108
+ whole, alt, title = $1, $2, $5
2109
+ url = escape_md( $3 )
2110
+ alt.gsub!( /"/, '&quot;' )
2111
+
2112
+ # Build the tag
2113
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2114
+ unless title.nil?
2115
+ title.gsub!( /"/, '&quot;' )
2116
+ result += %{ title="%s"} % escape_md( title )
2117
+ end
2118
+ result += EmptyElementSuffix
2119
+
2120
+ @log.debug "Replacing %p with %p" % [ match, result ]
2121
+ result
2122
+ }
2123
+ end
2124
+
2125
+
2126
+ # Regexp to match special characters in a code block
2127
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
2128
+
2129
+ ### Escape any characters special to HTML and encode any characters special
2130
+ ### to Markdown in a copy of the given +str+ and return it.
2131
+ def encode_code( str, rs )
2132
+ #str.gsub( %r{&}, '&amp;' ).
2133
+ #gsub( %r{<}, '&lt;' ).
2134
+ #gsub( %r{>}, '&gt;' ).
2135
+ #gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
2136
+ end
2137
+
2138
+ def escape_to_header_id(str)
2139
+ URI.escape(escape_md(str.gsub(/<\/?[^>]*>/, "").gsub(/\s/, "_")).gsub("/", ".2F")).gsub("%", ".")
2140
+ end
2141
+
2142
+ #################################################################
2143
+ ### U T I L I T Y F U N C T I O N S
2144
+ #################################################################
2145
+
2146
+ ### Escape any markdown characters in a copy of the given +str+ and return
2147
+ ### it.
2148
+ def escape_md( str )
2149
+ str.
2150
+ gsub( /\*|_/ ){|symbol| EscapeTable[symbol][:md5]}
2151
+ end
2152
+
2153
+
2154
+ # Matching constructs for tokenizing X/HTML
2155
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
2156
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
2157
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
2158
+
2159
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
2160
+ HTMLTagCloseRegexp = %r{ > }x
2161
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
2162
+
2163
+ ### Break the HTML source in +str+ into a series of tokens and return
2164
+ ### them. The tokens are just 2-element Array tuples with a type and the
2165
+ ### actual content. If this function is called with a block, the type and
2166
+ ### text parts of each token will be yielded to it one at a time as they are
2167
+ ### extracted.
2168
+ def tokenize_html( str )
2169
+ depth = 0
2170
+ tokens = []
2171
+ @scanner.string = str.dup
2172
+ type, token = nil, nil
2173
+
2174
+ until @scanner.empty?
2175
+ @log.debug "Scanning from %p" % @scanner.rest
2176
+
2177
+ # Match comments and PIs without nesting
2178
+ if (( token = @scanner.scan(MetaTag) ))
2179
+ type = :tag
2180
+
2181
+ # Do nested matching for HTML tags
2182
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
2183
+ tagstart = @scanner.pos
2184
+ @log.debug " Found the start of a plain tag at %d" % tagstart
2185
+
2186
+ # Start the token with the opening angle
2187
+ depth = 1
2188
+ type = :tag
2189
+
2190
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
2191
+ # the scanner runs out of text before the tag is closed, raise
2192
+ # an error.
2193
+ while depth.nonzero?
2194
+
2195
+ # Scan either an opener or a closer
2196
+ chunk = @scanner.scan( HTMLTagPart ) or
2197
+ break # AoBane Fix (refer to spec/code-block.rb)
2198
+
2199
+ @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
2200
+
2201
+ token += chunk
2202
+
2203
+ # If the last character of the token so far is a closing
2204
+ # angle bracket, decrement the depth. Otherwise increment
2205
+ # it for a nested tag.
2206
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
2207
+ @log.debug " Depth is now #{depth}"
2208
+ end
2209
+
2210
+ # Match text segments
2211
+ else
2212
+ @log.debug " Looking for a chunk of text"
2213
+ type = :text
2214
+
2215
+ # Scan forward, always matching at least one character to move
2216
+ # the pointer beyond any non-tag '<'.
2217
+ token = @scanner.scan_until( /[^<]+/m )
2218
+ end
2219
+
2220
+ @log.debug " type: %p, token: %p" % [ type, token ]
2221
+
2222
+ # If a block is given, feed it one token at a time. Add the token to
2223
+ # the token list to be returned regardless.
2224
+ if block_given?
2225
+ yield( type, token )
2226
+ end
2227
+ tokens << [ type, token ]
2228
+ end
2229
+
2230
+ return tokens
2231
+ end
2232
+
2233
+
2234
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
2235
+ def encode_html( str )
2236
+ #str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
2237
+ #gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
2238
+ return str
2239
+ end
2240
+
2241
+
2242
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
2243
+ ### return it.
2244
+ def outdent( str )
2245
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
2246
+ end
2247
+
2248
+ def indent(str)
2249
+ str.gsub( /^/, ' ' * TabWidth)
2250
+ end
2251
+
2252
+ end
2253
+ end