AoBane 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/AoBane.rb CHANGED
@@ -1,2196 +1,2253 @@
1
- #
2
- # AoBane - Extended Markdown Converter
3
- #
4
- # Author of Original BlueFeather: Dice <tetradice@gmail.com>
5
- # Remaker: set.minami <set.minami@gmail.com>
6
- # Website: https://github.com/setminami/AoBane/blob/master/README.md
7
- # License: GPL version 2 or later
8
- #
9
- # If you want to know better about AoBane, See the Website.
10
- #
11
- #
12
- #
13
- #-- Copyrights & License -------------------------------------------------------
14
- #
15
- # Original Markdown:
16
- # Copyright (c) 2003-2004 John Gruber
17
- # <http://daringfireball.net/>
18
- # All rights reserved.
19
- #
20
- # Orignal BlueCloth:
21
- # Copyright (c) 2004 The FaerieMUD Consortium.
22
- #
23
- # AoBane:
24
- # Copyright (c) 2013 Set.Minami
25
- #
26
- # AoBane is free software; you can redistribute it and/or modify it under
27
- # the terms of the GNU General Public License as published by the Free Software
28
- # Foundation; either version 2 of the License, or (at your option) any later
29
- # version.
30
- #
31
- # AoBane is distributed in the hope that it will be useful, but WITHOUT ANY
32
- # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
33
- # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
34
-
35
-
36
- require 'digest/md5'
37
- require 'logger'
38
- require 'strscan'
39
- require 'stringio'
40
- require 'uri'
41
- require 'math_ml/string'
42
-
43
- module AoBane
44
- VERSION = '0.01'
45
- VERSION_NUMBER = 0.01
46
- RELEASE_DATE = '2013-03-30'
47
- VERSION_LABEL = "#{VERSION} (#{RELEASE_DATE})"
48
-
49
- UTF8_BOM = "\xef\xbb\xbf"
50
- UTF8_BOM_PATTERN = /^#{UTF8_BOM}/
51
-
52
-
53
- # Fancy methods
54
- class << self
55
- def parse_text(src)
56
- Parser.new.parse_text(src)
57
- end
58
-
59
- alias parse parse_text
60
-
61
- def parse_document(src, default_enc = EncodingType::UTF8)
62
- Parser.new.parse_document(src, default_enc)
63
- end
64
-
65
-
66
- def parse_text_file(path)
67
- Parser.new.parse_text_file(path)
68
- end
69
-
70
- alias parse_file parse_text_file
71
-
72
- def parse_document_file(path, default_enc = EncodingType::UTF8)
73
- Parser.new.parse_document_file(path, default_enc)
74
- end
75
- end
76
-
77
- ### Exception class on AoBane running.
78
- class Error < ::RuntimeError
79
- end
80
-
81
- class EncodingError < Error
82
- end
83
-
84
- ### Exception class for formatting errors.
85
- class FormatError < Error
86
-
87
- ### Create a new FormatError with the given source +str+ and an optional
88
- ### message about the +specific+ error.
89
- def initialize( str, specific=nil )
90
- if specific
91
- msg = "Bad markdown format near %p: %s" % [ str, specific ]
92
- else
93
- msg = "Bad markdown format near %p" % str
94
- end
95
-
96
- super( msg )
97
- end
98
- end
99
-
100
- module HeaderIDType
101
- MD5 = 'md5'
102
- ESCAPE = 'escape'
103
- end
104
-
105
- module EncodingType
106
- EUC = 'euc-jp'
107
- EUCJP = EUC_JP = EUC
108
-
109
- SJIS = 'shift_jis'
110
- SHIFT_JIS = SJIS
111
-
112
- UTF8 = 'utf-8'
113
- UTF_8 = UTF8
114
-
115
- ASCII = 'ascii'
116
- US_ASCII = ASCII
117
-
118
- def self.regulate(str_value)
119
- case str_value.downcase
120
- when 'shift-jis', 'shift_jis'
121
- SJIS
122
- when 'euc-jp'
123
- EUC
124
- when 'utf-8'
125
- UTF8
126
- when 'ascii'
127
- ASCII
128
- else
129
- raise EncodingError, "not adapted encoding type - #{str_value} (shift[-_]jis, euc-jp, utf-8, or ascii)"
130
- end
131
- end
132
-
133
- def self.convert_to_kcode(str_value)
134
- type = self.regulate(str_value)
135
- case type
136
- when EUC, SJIS, UTF8
137
- type
138
- when ASCII
139
- 'none'
140
- end
141
- end
142
-
143
-
144
- def self.convert_to_charset(str_value)
145
- type = self.regulate(str_value)
146
- case type
147
- when EUC
148
- 'euc-jp'
149
- when SJIS
150
- 'shift_jis'
151
- when UTF8
152
- 'utf-8'
153
- when ASCII
154
- nil
155
- end
156
- end
157
-
158
- end
159
-
160
- module Util
161
- HTML_ESC = {
162
- '&' => '&amp;',
163
- '"' => '&quot;',
164
- '<' => '&lt;',
165
- '>' => '&gt;'
166
- }
167
-
168
- module_function
169
-
170
- # from http://jp.rubyist.net/magazine/?0010-CodeReview#l28
171
- # (Author: Minero Aoki)
172
- def escape_html(str)
173
- #table = HTML_ESC # optimize
174
- #str.gsub(/[&"<>]/) {|s| table[s] }
175
- return str
176
- end
177
-
178
- def generate_blank_string_io(encoding_base)
179
- io = StringIO.new
180
-
181
- if io.respond_to?(:set_encoding) then
182
- io.set_encoding(encoding_base.encoding)
183
- end
184
-
185
- return io
186
- end
187
-
188
- def change_kcode(kcode = nil)
189
- if defined?(Encoding) then
190
- # ruby 1.9 later
191
- yield
192
- else
193
- # ruby 1.8 earlier
194
- original_kcode = $KCODE
195
-
196
- begin
197
- $KCODE = kcode if kcode
198
- yield
199
-
200
- ensure
201
- # recover
202
- $KCODE = original_kcode
203
- end
204
- end # if defined?
205
- end # def
206
-
207
-
208
- def utf8_bom?(str)
209
- if str.respond_to?(:getbyte) and str.respond_to?(:bytesize) then
210
- if str.bytesize >= 3 and
211
- str.getbyte(0) == UTF8_BOM.getbyte(0) and
212
- str.getbyte(1) == UTF8_BOM.getbyte(1) and
213
- str.getbyte(2) == UTF8_BOM.getbyte(2) then
214
- return true
215
- else
216
- return false
217
- end
218
-
219
- else
220
- return(str =~ UTF8_BOM_PATTERN ? true : false)
221
- end
222
- end
223
- end
224
-
225
- class Document
226
- HEADER_PATTERN = /^([a-zA-Z0-9-]+?)\s*\:\s*(.+?)\s*(?:\n|\Z)/
227
- BLANK_LINE_PATTERN = /^\n/
228
- HEADER_SEQUEL_PATTERN = /^\s+(.+)$/
229
-
230
- attr_accessor :headers, :body
231
- alias text body
232
- alias text= body=
233
-
234
- class << self
235
- def parse_io(input, default_enc = EncodingType::UTF8)
236
- headers = {}
237
- body = nil
238
- first_pos = input.pos
239
- default_enc = EncodingType.regulate(default_enc)
240
-
241
- Util.change_kcode(EncodingType.convert_to_kcode(default_enc)){
242
- # default encoding
243
- if defined?(Encoding) then
244
- input.set_encoding(Encoding.find(default_enc))
245
- end
246
-
247
-
248
-
249
- # get headers
250
- pos_before_gets = nil
251
- first_line = true
252
-
253
- loop do
254
- pos_before_gets = input.pos
255
- line = input.gets
256
-
257
- # cut UTF-8 BOM
258
- if first_line and Util.utf8_bom?(line) then
259
- line.slice!(UTF8_BOM_PATTERN)
260
- end
261
- first_line = false
262
-
263
- if line and line.chomp =~ HEADER_PATTERN then
264
- key = $1.downcase; value = $2
265
-
266
- if key == 'encoding' and not headers.include?('encoding') then
267
- kc = EncodingType.convert_to_kcode(value.downcase)
268
- if input.respond_to?(:set_encoding) then
269
- input.set_encoding(EncodingType.regulate(value))
270
-
271
- # rewind (reason => [ruby-list:45988])
272
- input.pos = first_pos
273
- first_line = true
274
- else
275
- $KCODE = kc
276
- end
277
- end
278
-
279
- headers[key] = value
280
- else
281
- # EOF or Metadata end
282
- break
283
- end
284
- end
285
-
286
- # back
287
- input.pos = pos_before_gets
288
-
289
-
290
-
291
- # skip blank lines
292
- loop do
293
- pos_before_gets = input.pos
294
-
295
- line = input.gets
296
- if line.nil? or not line =~ BLANK_LINE_PATTERN then
297
- break
298
- end
299
- end
300
-
301
- # back
302
- input.pos = pos_before_gets
303
-
304
-
305
-
306
- # get body
307
- body = input.read
308
-
309
- }
310
-
311
-
312
- return self.new(headers, body)
313
- end
314
-
315
- def parse(str, default_enc = EncodingType::UTF8)
316
- parse_io(StringIO.new(str), default_enc)
317
- end
318
-
319
- end
320
-
321
-
322
- def initialize(headers = {}, body = '')
323
- @headers = {}
324
- headers.each do |k, v|
325
- self[k] = v
326
- end
327
- @body = body
328
- end
329
-
330
- def [](key)
331
- @headers[key.to_s.downcase]
332
- end
333
-
334
- def []=(key, value)
335
- @headers[key.to_s.downcase] = value.to_s
336
- end
337
-
338
- def title
339
- @headers['title']
340
- end
341
-
342
- def css
343
- @headers['css']
344
- end
345
-
346
- def numbering
347
- case @headers['numbering']
348
- when 'yes', '1', 'true', 'on'
349
- true
350
- else
351
- false
352
- end
353
- end
354
-
355
- alias numbering? numbering
356
-
357
- def numbering_start_level
358
- level = (@headers['numbering-start-level'] || 2).to_i
359
- if level >= 1 and level <= 6 then
360
- return level
361
- else
362
- return 2
363
- end
364
- end
365
-
366
- def encoding_type
367
- @headers['encoding'] || EncodingType::UTF8
368
- end
369
-
370
- def header_id_type
371
- (@headers['header-id-type'] || HeaderIDType::MD5).downcase
372
- end
373
-
374
- def kcode
375
- self.encoding_type && EncodingType.convert_to_kcode(self.encoding_type)
376
- end
377
-
378
- def to_html
379
- Parser.new.document_to_html(self)
380
- end
381
- end
382
-
383
-
384
- class Parser
385
- # Rendering state class Keeps track of URLs, titles, and HTML blocks
386
- # midway through a render. I prefer this to the globals of the Perl version
387
- # because globals make me break out in hives. Or something.
388
- class RenderState
389
- # Headers struct.
390
- Header = Struct.new(:id, :level, :content, :content_html)
391
-
392
- # from Original BlueCloth
393
- attr_accessor :urls, :titles, :html_blocks, :log
394
-
395
- # AoBane Extension
396
- attr_accessor :footnotes, :found_footnote_ids, :warnings
397
- attr_accessor :headers, :block_transform_depth
398
- attr_accessor :header_id_type # option switch
399
- attr_accessor :numbering, :numbering_start_level # option switch
400
- alias numbering? numbering
401
-
402
- def initialize
403
- @urls, @titles, @html_blocks = {}, {}, {}
404
- @log = nil
405
- @footnotes, @found_footnote_ids, @warnings = {}, [], []
406
- @headers = []
407
- @block_transform_depth = 0
408
- @header_id_type = HeaderIDType::MD5
409
- @numbering = false
410
- @numbering_start_level = 2
411
- end
412
-
413
- end
414
-
415
- # Tab width for #detab! if none is specified
416
- TabWidth = 4
417
-
418
- # The tag-closing string -- set to '>' for HTML
419
- EmptyElementSuffix = " />";
420
-
421
- # Table of MD5 sums for escaped characters
422
- EscapeTable = {}
423
- '\\`*_{}[]()#.!|:~'.split(//).each {|char|
424
- hash = Digest::MD5::hexdigest( char )
425
-
426
- EscapeTable[ char ] = {
427
- :md5 => hash,
428
- :md5re => Regexp::new( hash ),
429
- :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
430
- :unescape => char,
431
- }
432
-
433
- escaped = "\\#{char}"
434
- hash = Digest::MD5::hexdigest(escaped)
435
- EscapeTable[escaped] = {
436
- :md5 => hash,
437
- :md5re => Regexp::new( hash ),
438
- :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
439
- :unescape => char,
440
- }
441
- }
442
-
443
-
444
- #################################################################
445
- ### I N S T A N C E M E T H O D S
446
- #################################################################
447
-
448
- ### Create a new AoBane parser.
449
- def initialize(*restrictions)
450
- @log = Logger::new( $deferr )
451
- @log.level = $DEBUG ?
452
- Logger::DEBUG :
453
- ($VERBOSE ? Logger::INFO : Logger::WARN)
454
- @scanner = nil
455
-
456
- # Add any restrictions, and set the line-folding attribute to reflect
457
- # what happens by default.
458
- @filter_html = nil
459
- @filter_styles = nil
460
- restrictions.flatten.each {|r| __send__("#{r}=", true) }
461
- @fold_lines = true
462
-
463
- @use_header_id = true
464
- @display_warnings = true
465
-
466
- @log.debug "String is: %p" % self
467
- end
468
-
469
-
470
- ######
471
- public
472
- ######
473
-
474
- # Filters for controlling what gets output for untrusted input. (But really,
475
- # you're filtering bad stuff out of untrusted input at submission-time via
476
- # untainting, aren't you?)
477
- attr_accessor :filter_html, :filter_styles
478
-
479
- # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
480
- # so this isn't used by anything.
481
- attr_accessor :fold_lines
482
-
483
- # AoBane Extension: display warnings on the top of output html (default: true)
484
- attr_accessor :display_warnings
485
-
486
- # AoBane Extension: add id to each header, for toc and anchors. (default: true)
487
- attr_accessor :use_header_id
488
-
489
- ### Render Markdown-formatted text in this string object as HTML and return
490
- ### it. The parameter is for compatibility with RedCloth, and is currently
491
- ### unused, though that may change in the future.
492
- def parse_text(source, rs = nil)
493
- rs ||= RenderState.new
494
-
495
- # check
496
- case rs.header_id_type
497
- when HeaderIDType::MD5, HeaderIDType::ESCAPE
498
- else
499
- rs.warnings << "illegal header id type - #{rs.header_id_type}"
500
- end
501
-
502
- # Create a StringScanner we can reuse for various lexing tasks
503
- @scanner = StringScanner::new( '' )
504
-
505
- # Make a copy of the string with normalized line endings, tabs turned to
506
- # spaces, and a couple of guaranteed newlines at the end
507
-
508
- text = detab(source.gsub( /\r\n?/, "\n" ))
509
- text += "\n\n"
510
- @log.debug "Normalized line-endings: %p" % text
511
-
512
- #Insert by set.minami 2013-03-30
513
- text.gsub!(/\*\[(.*?)\]\((.*?)(\|.*?)*(#.*?)*\)/){
514
- |match|
515
- '<font color="' +
516
- if $2.nil? then '' else $2 end +'" ' +
517
- 'face="' +
518
- if $3.nil? then '' else $3.delete('|') end + '" ' +
519
- 'size="' +
520
- if $4.nil? then '' else $4.delete('#') end + '">' +
521
- $1 + '</font>'
522
- }
523
-
524
- #Insert by set.minami 2013-04-01
525
- text.gsub!(/\\TeX{(.*?)\\TeX}/){ |match|
526
- if $1.nil? then '' else $1.to_mathml end
527
- }
528
- #Insert by set.minami
529
-
530
- # Filter HTML if we're asked to do so
531
- if self.filter_html
532
- #text.gsub!( "<", "&lt;" )
533
- #text.gsub!( ">", "&gt;" )
534
- @log.debug "Filtered HTML: %p" % text
535
- end
536
-
537
- # Simplify blank lines
538
- text.gsub!( /^ +$/, '' )
539
- @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
540
-
541
-
542
- # Replace HTML blocks with placeholders
543
- text = hide_html_blocks( text, rs )
544
- @log.debug "Hid HTML blocks: %p" % text
545
- @log.debug "Render state: %p" % rs
546
-
547
-
548
- # Strip footnote definitions, store in render state
549
- text = strip_footnote_definitions( text, rs )
550
- @log.debug "Stripped footnote definitions: %p" % text
551
- @log.debug "Render state: %p" % rs
552
-
553
-
554
- # Strip link definitions, store in render state
555
- text = strip_link_definitions( text, rs )
556
- @log.debug "Stripped link definitions: %p" % text
557
- @log.debug "Render state: %p" % rs
558
-
559
- # Escape meta-characters
560
- text = escape_special_chars( text )
561
- @log.debug "Escaped special characters: %p" % text
562
-
563
- # Transform block-level constructs
564
- text = apply_block_transforms( text, rs )
565
- @log.debug "After block-level transforms: %p" % text
566
-
567
- # Now swap back in all the escaped characters
568
- text = unescape_special_chars( text )
569
- @log.debug "After unescaping special characters: %p" % text
570
-
571
- # Extend footnotes
572
- unless rs.footnotes.empty? then
573
- text << %Q|<div class="footnotes"><hr#{EmptyElementSuffix}\n<ol>\n|
574
- rs.found_footnote_ids.each do |id|
575
- content = rs.footnotes[id]
576
- html = apply_block_transforms(content.sub(/\n+\Z/, '') + %Q| <a href="#footnote-ref:#{id}" rev="footnote">&#8617;</a>|, rs)
577
- text << %Q|<li id="footnote:#{id}">\n#{html}\n</li>|
578
- end
579
- text << %Q|</ol>\n</div>\n|
580
- end
581
-
582
- # Display warnings
583
- if @display_warnings then
584
- unless rs.warnings.empty? then
585
- html = %Q|<pre><strong>[WARNINGS]\n|
586
- html << rs.warnings.map{|x| Util.escape_html(x)}.join("\n")
587
- html << %Q|</strong></pre>|
588
-
589
- text = html + text
590
- end
591
- end
592
-
593
- #Insert by set.minami 2013-03-30
594
- output = []
595
- text.lines {|line|
596
- if /<pre><code>/ =~ line
597
- output << line
598
- next
599
- until /<\/code><\/pre>/ =~ line
600
- output << line
601
- next
602
- end
603
- else
604
- line.gsub!(/\-\-|<=>|<\->|\->|<\-|=>|<=|\|\^|\|\|\/|\|\/|\^|>>|<<|\+_|!=|~~|~=|>_|<_|\|FA|\|EX|\|=|\(+\)|\(x\)|\\&|\(c\)|\(R\)|\(SS\)|\(TM\)/,
605
- "\-\-" => "&mdash;",
606
- "<=" => "&hArr;",
607
- "<\->" => "&harr;",
608
- "\->" =>"&rarr;",
609
- "<\-" =>"&larr;",
610
- "=>" => "&rArr;",
611
- "<=" => "&lArr;",
612
- "\|\|\^" => "&uArr;",
613
- "\|\|\/" => "&dArr;",
614
- "\|\/" => "&darr;",
615
- "\|\^" => "&uarr;",
616
- ">>" => "&raquo;",
617
- "<<" => "&laquo;",
618
- "+_" => "&plusmn;",
619
- "!=" => "&ne;",
620
- "~~" => "&asymp;",
621
- "~=" => "&cong;",
622
- "<_" => "&le;",
623
- ">_" => "&ge",
624
- "\|FA" => "&forall;",
625
- "\|EX" => "&exist;",
626
- "\|=" => "&equiv;",
627
- "\(+\)" => "&oplus",
628
- "\(x\)" => "&otimes;",
629
- "\\&" =>"&amp;",
630
- "\(c\)" => "&copy;",
631
- "\(R\)" =>"&reg;",
632
- "\(SS\)" => "&sect;",
633
- "\(TM\)" => "&trade;" #29
634
- )
635
- output << line
636
- end
637
- }
638
- return output
639
- #Insert by set.minami
640
- #return text
641
-
642
- end
643
-
644
- alias parse parse_text
645
-
646
- # return values are extended. (mainly for testing)
647
- def parse_text_with_render_state(str, rs = nil)
648
- rs ||= RenderState.new
649
- html = parse_text(str, rs)
650
-
651
- return [html, rs]
652
- end
653
-
654
- def parse_text_file(path)
655
- parse_text(File.read(path))
656
- end
657
-
658
- alias parse_file parse_text_file
659
-
660
-
661
- def parse_document(source, default_enc = EncodingType::UTF8)
662
- doc = Document.parse(source, default_enc)
663
-
664
- return document_to_html(doc)
665
- end
666
-
667
- def parse_document_file(path, default_enc = EncodingType::UTF8)
668
- doc = nil
669
- open(path){|f|
670
- doc = Document.parse_io(f, default_enc)
671
- }
672
-
673
- return document_to_html(doc)
674
- end
675
-
676
-
677
- def document_to_html(doc)
678
- rs = RenderState.new
679
- if doc.numbering? then
680
- rs.numbering = true
681
- end
682
- rs.numbering_start_level = doc.numbering_start_level
683
- rs.header_id_type = doc.header_id_type
684
-
685
- body_html = nil
686
-
687
- if doc.encoding_type then
688
- Util.change_kcode(doc.kcode){
689
- body_html = parse_text(doc.body, rs)
690
- }
691
- else
692
- body_html = parse_text(doc.body, rs)
693
- end
694
-
695
- out = Util.generate_blank_string_io(doc.body)
696
-
697
- # XHTML decleration
698
- out.puts %Q|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">|
699
-
700
- # html start
701
- out.puts %Q|<html>|
702
-
703
- # head
704
- out.puts %Q|<head>|
705
-
706
- if doc.encoding_type and (charset = EncodingType.convert_to_charset(doc.encoding_type)) then
707
- out.puts %Q|<meta http-equiv="Content-Type" content="text/html; charset=#{charset}" />|
708
- end
709
-
710
- h1 = rs.headers.find{|x| x.level == 1}
711
- h1_content = (h1 ? h1.content : nil)
712
- title = Util.escape_html(doc.title || h1_content || 'no title (Generated by AoBane)')
713
- out.puts %Q|<title>#{title}</title>|
714
-
715
- %w(description keywords).each do |name|
716
- if doc[name] then
717
- content = Util.escape_html(doc[name])
718
- out.puts %Q|<meta name="#{name}" content="#{content}" />|
719
- end
720
- end
721
-
722
-
723
- if doc['css'] then
724
- href = Util.escape_html(doc.css)
725
- out.puts %Q|<link rel="stylesheet" type="text/css" href="#{href}" />|
726
-
727
- end
728
-
729
- if doc['rdf-feed'] then
730
- href = Util.escape_html(doc['rdf-feed'])
731
- out.puts %Q|<link rel="alternate" type="application/rdf+xml" href="#{href}" />|
732
- end
733
-
734
-
735
-
736
- if doc['rss-feed'] then
737
- href = Util.escape_html(doc['rss-feed'])
738
- out.puts %Q|<link rel="alternate" type="application/rss+xml" href="#{href}" />|
739
- end
740
-
741
- if doc['atom-feed'] then
742
- href = Util.escape_html(doc['atom-feed'])
743
- out.puts %Q|<link rel="alternate" type="application/atom+xml" href="#{href}" />|
744
- end
745
-
746
- out.puts %Q|</head>|
747
-
748
- # body
749
- out.puts %Q|<body>|
750
- out.puts
751
- out.puts body_html
752
- out.puts
753
- out.puts %Q|</body>|
754
-
755
- # html end
756
- out.puts %Q|</html>|
757
-
758
-
759
- return out.string
760
- end
761
-
762
- alias doc2html document_to_html
763
-
764
-
765
-
766
-
767
- #######
768
- #private
769
- #######
770
-
771
- ### Convert tabs in +str+ to spaces.
772
- ### (this method is reformed to function-like method from original BlueCloth)
773
- def detab( str, tabwidth=TabWidth )
774
- re = str.split( /\n/ ).collect {|line|
775
- line.gsub( /(.*?)\t/ ) do
776
- $1 + ' ' * (tabwidth - $1.length % tabwidth)
777
- end
778
- }.join("\n")
779
-
780
- re
781
- end
782
-
783
-
784
-
785
-
786
- ### Do block-level transforms on a copy of +str+ using the specified render
787
- ### state +rs+ and return the results.
788
- def apply_block_transforms( str, rs )
789
- rs.block_transform_depth += 1
790
-
791
- # Port: This was called '_runBlockGamut' in the original
792
-
793
- @log.debug "Applying block transforms to:\n %p" % str
794
- text = str
795
- text = pretransform_fenced_code_blocks( text, rs )
796
- text = pretransform_block_separators(text, rs)
797
-
798
- text = transform_headers( text, rs )
799
- text = transform_toc(text, rs)
800
-
801
- text = transform_hrules( text, rs )
802
- text = transform_lists( text, rs )
803
- text = transform_definition_lists( text, rs ) # AoBane Extension
804
- text = transform_code_blocks( text, rs )
805
- text = transform_block_quotes( text, rs )
806
- text = transform_tables(text, rs)
807
- text = hide_html_blocks( text, rs )
808
-
809
- text = form_paragraphs( text, rs )
810
-
811
- rs.block_transform_depth -= 1
812
- @log.debug "Done with block transforms:\n %p" % text
813
- return text
814
- end
815
-
816
-
817
- ### Apply Markdown span transforms to a copy of the specified +str+ with the
818
- ### given render state +rs+ and return it.
819
- def apply_span_transforms( str, rs )
820
- @log.debug "Applying span transforms to:\n %p" % str
821
-
822
- str = transform_code_spans( str, rs )
823
- str = transform_auto_links( str, rs )
824
- str = encode_html( str )
825
- str = transform_images( str, rs )
826
- str = transform_anchors( str, rs )
827
- str = transform_italic_and_bold( str, rs )
828
-
829
- # Hard breaks
830
- str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
831
-
832
- @log.debug "Done with span transforms:\n %p" % str
833
- return str
834
- end
835
-
836
-
837
- # The list of tags which are considered block-level constructs and an
838
- # alternation pattern suitable for use in regexps made from the list
839
- StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
840
- form fieldset iframe math ins del ]
841
- StrictTagPattern = StrictBlockTags.join('|')
842
-
843
- LooseBlockTags = StrictBlockTags - %w[ins del]
844
- LooseTagPattern = LooseBlockTags.join('|')
845
-
846
- # Nested blocks:
847
- # <div>
848
- # <div>
849
- # tags for inner block must be indented.
850
- # </div>
851
- # </div>
852
- StrictBlockRegexp = %r{
853
- ^ # Start of line
854
- <(#{StrictTagPattern}) # Start tag: \2
855
- \b # word break
856
- (.*\n)*? # Any number of lines, minimal match
857
- </\1> # Matching end tag
858
- [ ]* # trailing spaces
859
- $ # End of line or document
860
- }ix
861
-
862
- # More-liberal block-matching
863
- LooseBlockRegexp = %r{
864
- ^ # Start of line
865
- <(#{LooseTagPattern}) # start tag: \2
866
- \b # word break
867
- (.*\n)*? # Any number of lines, minimal match
868
- .*</\1> # Anything + Matching end tag
869
- [ ]* # trailing spaces
870
- $ # End of line or document
871
- }ix
872
-
873
- # Special case for <hr />.
874
- HruleBlockRegexp = %r{
875
- ( # $1
876
- \A\n? # Start of doc + optional \n
877
- | # or
878
- .*\n\n # anything + blank line
879
- )
880
- ( # save in $2
881
- # AoBane fix: Not allow any space on line top
882
- <hr # Tag open
883
- \b # Word break
884
- ([^<>])*? # Attributes
885
- /?> # Tag close
886
- $ # followed by a blank line or end of document
887
- )
888
- }ix
889
-
890
- ### Replace all blocks of HTML in +str+ that start in the left margin with
891
- ### tokens.
892
- def hide_html_blocks( str, rs )
893
- @log.debug "Hiding HTML blocks in %p" % str
894
-
895
- # Tokenizer proc to pass to gsub
896
- tokenize = lambda {|match|
897
- key = Digest::MD5::hexdigest( match )
898
- rs.html_blocks[ key ] = match
899
- @log.debug "Replacing %p with %p" % [ match, key ]
900
- "\n\n#{key}\n\n"
901
- }
902
-
903
- rval = str.dup
904
-
905
- @log.debug "Finding blocks with the strict regex..."
906
- rval.gsub!( StrictBlockRegexp, &tokenize )
907
-
908
- @log.debug "Finding blocks with the loose regex..."
909
- rval.gsub!( LooseBlockRegexp, &tokenize )
910
-
911
- @log.debug "Finding hrules..."
912
- rval.gsub!( HruleBlockRegexp ) {|match| $1 + tokenize[$2] }
913
-
914
- return rval
915
- end
916
-
917
-
918
- # Link defs are in the form: ^[id]: url "optional title"
919
- LinkRegexp = %r{
920
- ^[ ]{0,#{TabWidth - 1}} # AoBane fix: indent < tab width
921
- \[(.+)\]: # id = $1
922
- [ ]*
923
- \n? # maybe *one* newline
924
- [ ]*
925
- <?(\S+?)>? # url = $2
926
- [ ]*
927
- \n? # maybe one newline
928
- [ ]*
929
- (?:
930
- # Titles are delimited by "quotes" or (parens).
931
- ["(]
932
- (.+?) # title = $3
933
- [")] # Matching ) or "
934
- [ ]*
935
- )? # title is optional
936
- (?:\n+|\Z)
937
- }x
938
-
939
- ### Strip link definitions from +str+, storing them in the given RenderState
940
- ### +rs+.
941
- def strip_link_definitions( str, rs )
942
- str.gsub( LinkRegexp ) {|match|
943
- id, url, title = $1, $2, $3
944
-
945
- rs.urls[ id.downcase ] = encode_html( url )
946
- unless title.nil?
947
- rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
948
- end
949
-
950
- ""
951
- }
952
- end
953
-
954
- # Footnotes defs are in the form: [^id]: footnote contents.
955
- FootnoteDefinitionRegexp = %r{
956
- ^[ ]{0,#{TabWidth - 1}}
957
- \[\^(.+?)\]\: # id = $1
958
- [ ]*
959
- (.*) # first line content = $2
960
- (?:\n|\Z)
961
-
962
- ( # second or more lines content = $3
963
- (?:
964
- [ ]{#{TabWidth},} # indented
965
- .*
966
- (?:\n|\Z)
967
- |
968
- \n # blank line
969
- )*
970
- )?
971
-
972
- }x
973
-
974
- FootnoteIdRegexp = /^[a-zA-Z0-9\:\._-]+$/
975
-
976
- def strip_footnote_definitions(str, rs)
977
- str.gsub( FootnoteDefinitionRegexp ) {|match|
978
- id = $1; content1 = $2; content2 = $3
979
-
980
- unless id =~ FootnoteIdRegexp then
981
- rs.warnings << "illegal footnote id - #{id} (legal chars: a-zA-Z0-9_-.:)"
982
- end
983
-
984
- if content2 then
985
- @log.debug " Stripping multi-line definition %p, %p" % [$2, $3]
986
- content = content1 + "\n" + outdent(content2.chomp)
987
- @log.debug " Stripped multi-line definition %p, %p" % [id, content]
988
- rs.footnotes[id] = content
989
- else
990
- content = content1 || ''
991
- @log.debug " Stripped single-line definition %p, %p" % [id, content]
992
- rs.footnotes[id] = content
993
- end
994
-
995
-
996
-
997
- ""
998
- }
999
- end
1000
-
1001
-
1002
- ### Escape special characters in the given +str+
1003
- def escape_special_chars( str )
1004
- @log.debug " Escaping special characters"
1005
- text = ''
1006
-
1007
- # The original Markdown source has something called '$tags_to_skip'
1008
- # declared here, but it's never used, so I don't define it.
1009
-
1010
- tokenize_html( str ) {|token, str|
1011
- @log.debug " Adding %p token %p" % [ token, str ]
1012
- case token
1013
-
1014
- # Within tags, encode * and _
1015
- when :tag
1016
- text += str.
1017
- gsub( /\*/, EscapeTable['*'][:md5] ).
1018
- gsub( /_/, EscapeTable['_'][:md5] )
1019
-
1020
- # Encode backslashed stuff in regular text
1021
- when :text
1022
- text += encode_backslash_escapes( str )
1023
- else
1024
- raise TypeError, "Unknown token type %p" % token
1025
- end
1026
- }
1027
-
1028
- @log.debug " Text with escapes is now: %p" % text
1029
- return text
1030
- end
1031
-
1032
-
1033
- ### Swap escaped special characters in a copy of the given +str+ and return
1034
- ### it.
1035
- def unescape_special_chars( str )
1036
- EscapeTable.each {|char, hash|
1037
- @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
1038
- str.gsub!( hash[:md5re], hash[:unescape] )
1039
- }
1040
-
1041
- return str
1042
- end
1043
-
1044
-
1045
- ### Return a copy of the given +str+ with any backslashed special character
1046
- ### in it replaced with MD5 placeholders.
1047
- def encode_backslash_escapes( str )
1048
- # Make a copy with any double-escaped backslashes encoded
1049
- text = str.gsub( /\\\\/, EscapeTable['\\\\'][:md5] )
1050
-
1051
- EscapeTable.each_pair {|char, esc|
1052
- next if char == '\\\\'
1053
- next unless char =~ /\\./
1054
- text.gsub!( esc[:re], esc[:md5] )
1055
- }
1056
-
1057
- return text
1058
- end
1059
-
1060
-
1061
- def pretransform_block_separators(str, rs)
1062
- str.gsub(/^[ ]{0,#{TabWidth - 1}}[~][ ]*\n/){
1063
- "\n~\n\n"
1064
- }
1065
- end
1066
-
1067
-
1068
- TOCRegexp = %r{
1069
- ^\{ # bracket on line-head
1070
- [ ]* # optional inner space
1071
- toc
1072
-
1073
- (?:
1074
- (?:
1075
- [:] # colon
1076
- | # or
1077
- [ ]+ # 1 or more space
1078
- )
1079
- (.+?) # $1 = parameter
1080
- )?
1081
-
1082
- [ ]* # optional inner space
1083
- \} # closer
1084
- [ ]*$ # optional space on line-foot
1085
- }ix
1086
-
1087
- TOCStartLevelRegexp = %r{
1088
- ^
1089
- (?: # optional start
1090
- h
1091
- ([1-6]) # $1 = start level
1092
- )?
1093
-
1094
- (?: # range symbol
1095
- [.]{2,}|[-] # .. or -
1096
- )
1097
-
1098
- (?: # optional end
1099
- h? # optional 'h'
1100
- ([1-6]) # $2 = end level
1101
- )?$
1102
- }ix
1103
-
1104
- ### Transform any Markdown-style horizontal rules in a copy of the specified
1105
- ### +str+ and return it.
1106
- def transform_toc( str, rs )
1107
- @log.debug " Transforming tables of contents"
1108
- str.gsub(TOCRegexp){
1109
- start_level = 2 # default
1110
- end_level = 6
1111
-
1112
- param = $1
1113
- if param then
1114
- if param =~ TOCStartLevelRegexp then
1115
- if !($1) and !($2) then
1116
- rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1117
- else
1118
- start_level = ($1 ? $1.to_i : 2)
1119
- end_level = ($2 ? $2.to_i : 6)
1120
- end
1121
- else
1122
- rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1123
- end
1124
- end
1125
-
1126
- if rs.headers.first and rs.headers.first.level >= (start_level + 1) then
1127
- rs.warnings << "illegal structure of headers - h#{start_level} should be set before h#{rs.headers.first.level}"
1128
- end
1129
-
1130
-
1131
- ul_text = "\n\n"
1132
- rs.headers.each do |header|
1133
- if header.level >= start_level and header.level <= end_level then
1134
- ul_text << ' ' * TabWidth * (header.level - start_level)
1135
- ul_text << '* '
1136
- ul_text << %Q|<a href="##{header.id}" rel="toc">#{header.content_html}</a>|
1137
- ul_text << "\n"
1138
- end
1139
- end
1140
- ul_text << "\n"
1141
-
1142
- ul_text # output
1143
-
1144
- }
1145
- end
1146
-
1147
- TableRegexp = %r{
1148
- (?:
1149
- ^([ ]{0,#{TabWidth - 1}}) # not indented
1150
- (?:[|][ ]*) # NOT optional border
1151
-
1152
- \S.*? # 1st cell content
1153
-
1154
- (?: # 2nd cell or later
1155
- [|] # cell splitter
1156
- .+? # content
1157
- )+ # 1 or more..
1158
-
1159
- [|]? # optional border
1160
- (?:\n|\Z) # line end
1161
- )+
1162
- }x
1163
-
1164
- # Transform tables.
1165
- def transform_tables(str, rs)
1166
- str.gsub(TableRegexp){
1167
- transform_table_rows($~[0], rs)
1168
- }
1169
- end
1170
-
1171
- TableSeparatorCellRegexp = %r{
1172
- ^
1173
- [ ]*
1174
- ([:])? # $1 = left-align symbol
1175
- [ ]*
1176
- [-]+ # border
1177
- [ ]*
1178
- ([:])? # $2 = right-align symbol
1179
- [ ]*
1180
- $
1181
- }x
1182
-
1183
- def transform_table_rows(str, rs)
1184
-
1185
- # split cells to 2-d array
1186
- data = str.split("\n").map{|x| x.split('|')}
1187
-
1188
-
1189
- data.each do |row|
1190
- # cut left space
1191
- row.first.lstrip!
1192
-
1193
- # cut when optional side-borders is included
1194
- row.shift if row.first.empty?
1195
- end
1196
-
1197
- column_attrs = []
1198
-
1199
- re = ''
1200
- re << "<table>\n"
1201
-
1202
- # head is exist?
1203
- if data.size >= 3 and data[1].all?{|x| x =~ TableSeparatorCellRegexp} then
1204
- head_row = data.shift
1205
- separator_row = data.shift
1206
-
1207
- separator_row.each do |cell|
1208
- cell.match TableSeparatorCellRegexp
1209
- left = $1; right = $2
1210
-
1211
- if left and right then
1212
- column_attrs << ' style="text-align: center"'
1213
- elsif right then
1214
- column_attrs << ' style="text-align: right"'
1215
- elsif left then
1216
- column_attrs << ' style="text-align: left"'
1217
- else
1218
- column_attrs << ''
1219
- end
1220
- end
1221
-
1222
- re << "\t<thead><tr>\n"
1223
- head_row.each_with_index do |cell, i|
1224
- re << "\t\t<th#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</th>\n"
1225
- end
1226
- re << "\t</tr></thead>\n"
1227
- end
1228
-
1229
- # data row
1230
- re << "\t<tbody>\n"
1231
- data.each do |row|
1232
- re << "\t\t<tr>\n"
1233
- row.each_with_index do |cell, i|
1234
- re << "\t\t\t<td#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</td>\n"
1235
- end
1236
- re << "\t\t</tr>\n"
1237
- end
1238
- re << "\t</tbody>\n"
1239
-
1240
- re << "</table>\n"
1241
-
1242
- re
1243
- end
1244
-
1245
-
1246
- ### Transform any Markdown-style horizontal rules in a copy of the specified
1247
- ### +str+ and return it.
1248
- def transform_hrules( str, rs )
1249
- @log.debug " Transforming horizontal rules"
1250
- str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
1251
- end
1252
-
1253
-
1254
-
1255
- # Patterns to match and transform lists
1256
- ListMarkerOl = %r{\d+\.}
1257
- ListMarkerUl = %r{[*+-]}
1258
- ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
1259
-
1260
- ListRegexp = %r{
1261
- (?:
1262
- ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1263
- (#{ListMarkerAny}) # unordered or ordered ($1)
1264
- [ ]+ # At least one space
1265
- )
1266
- (?m:.+?) # item content (include newlines)
1267
- (?:
1268
- \z # Either EOF
1269
- | # or
1270
- \n{2,} # Blank line...
1271
- (?=\S) # ...followed by non-space
1272
- (?![ ]* # ...but not another item
1273
- (#{ListMarkerAny})
1274
- [ ]+)
1275
- )
1276
- }x
1277
-
1278
- ### Transform Markdown-style lists in a copy of the specified +str+ and
1279
- ### return it.
1280
- def transform_lists( str, rs )
1281
- @log.debug " Transforming lists at %p" % (str[0,100] + '...')
1282
-
1283
- str.gsub( ListRegexp ) {|list|
1284
- @log.debug " Found list %p" % list
1285
- bullet = $1
1286
- list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
1287
-
1288
- %{<%s>\n%s</%s>\n} % [
1289
- list_type,
1290
- transform_list_items( list, rs ),
1291
- list_type,
1292
- ]
1293
- }
1294
- end
1295
-
1296
- # Pattern for transforming list items
1297
- ListItemRegexp = %r{
1298
- (\n)? # leading line = $1
1299
- (^[ ]*) # leading whitespace = $2
1300
- (#{ListMarkerAny}) [ ]+ # list marker = $3
1301
- ((?m:.+?) # list item text = $4
1302
- \n)
1303
- (?= (\n*) (\z | \2 (#{ListMarkerAny}) [ ]+))
1304
- }x
1305
-
1306
- ### Transform list items in a copy of the given +str+ and return it.
1307
- def transform_list_items( str, rs )
1308
- @log.debug " Transforming list items"
1309
-
1310
- # Trim trailing blank lines
1311
- str = str.sub( /\n{2,}\z/, "\n" )
1312
- str.gsub( ListItemRegexp ) {|line|
1313
- @log.debug " Found item line %p" % line
1314
- leading_line, item = $1, $4
1315
- separating_lines = $5
1316
-
1317
- if leading_line or /\n{2,}/.match(item) or not separating_lines.empty? then
1318
- @log.debug " Found leading line or item has a blank"
1319
- item = apply_block_transforms( outdent(item), rs )
1320
- else
1321
- # Recursion for sub-lists
1322
- @log.debug " Recursing for sublist"
1323
- item = transform_lists( outdent(item), rs ).chomp
1324
- item = apply_span_transforms( item, rs )
1325
- end
1326
-
1327
- %{<li>%s</li>\n} % item
1328
- }
1329
- end
1330
-
1331
- DefinitionListRegexp = %r{
1332
- (?:
1333
- (?:^.+\n)+ # dt
1334
- \n*
1335
- (?:
1336
- ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1337
- \: # dd marker (line head)
1338
- [ ]* # space
1339
- ((?m:.+?)) # dd content
1340
- (?:
1341
- \s*\z # end of string
1342
- | # or
1343
- \n{2,} # blank line
1344
- (?=[ ]{0,#{TabWidth - 1}}\S) # ...followed by
1345
- )
1346
- )+
1347
- )+
1348
- }x
1349
-
1350
- def transform_definition_lists(str, rs)
1351
- @log.debug " Transforming definition lists at %p" % (str[0,100] + '...')
1352
- str.gsub( DefinitionListRegexp ) {|list|
1353
- @log.debug " Found definition list %p (captures=%p)" % [list, $~.captures]
1354
- transform_definition_list_items(list, rs)
1355
- }
1356
- end
1357
-
1358
- DDLineRegexp = /^\:[ ]{0,#{TabWidth - 1}}(.*)/
1359
-
1360
-
1361
- def transform_definition_list_items(str, rs)
1362
- buf = Util.generate_blank_string_io(str)
1363
- buf.puts %Q|<dl>|
1364
-
1365
- lines = str.split("\n")
1366
- until lines.empty? do
1367
-
1368
- dts = []
1369
-
1370
- # get dt items
1371
- while lines.first =~ /^(?!\:).+$/ do
1372
- dts << lines.shift
1373
- end
1374
-
1375
-
1376
- dd_as_block = false
1377
-
1378
- # skip blank lines
1379
- while not lines.empty? and lines.first.empty? do
1380
- lines.shift
1381
- dd_as_block = true
1382
- end
1383
-
1384
-
1385
- dds = []
1386
- while lines.first =~ DDLineRegexp do
1387
- dd_buf = []
1388
-
1389
- # dd first line
1390
- unless (line = lines.shift).empty? then
1391
- dd_buf << $1 << "\n"
1392
- end
1393
-
1394
- # dd second and more lines (sequential with 1st-line)
1395
- until lines.empty? or # stop if read all
1396
- lines.first =~ /^[ ]{0,#{TabWidth - 1}}$/ or # stop if blank line
1397
- lines.first =~ DDLineRegexp do # stop if new dd found
1398
- dd_buf << outdent(lines.shift) << "\n"
1399
- end
1400
-
1401
- # dd second and more lines (separated with 1st-line)
1402
- until lines.empty? do # stop if all was read
1403
- if lines.first.empty? then
1404
- # blank line (skip)
1405
- lines.shift
1406
- dd_buf << "\n"
1407
- elsif lines.first =~ /^[ ]{#{TabWidth},}/ then
1408
- # indented body
1409
- dd_buf << outdent(lines.shift) << "\n"
1410
- else
1411
- # not indented body
1412
- break
1413
- end
1414
-
1415
- end
1416
-
1417
-
1418
- dds << dd_buf.join
1419
-
1420
- # skip blank lines
1421
- unless lines.empty? then
1422
- while lines.first.empty? do
1423
- lines.shift
1424
- end
1425
- end
1426
- end
1427
-
1428
- # html output
1429
- dts.each do |dt|
1430
- buf.puts %Q| <dt>#{apply_span_transforms(dt, rs)}</dt>|
1431
- end
1432
-
1433
- dds.each do |dd|
1434
- if dd_as_block then
1435
- buf.puts %Q| <dd>#{apply_block_transforms(dd, rs)}</dd>|
1436
- else
1437
- dd.gsub!(/\n+\z/, '') # chomp linefeeds
1438
- buf.puts %Q| <dd>#{apply_span_transforms(dd.chomp, rs)}</dd>|
1439
- end
1440
- end
1441
- end
1442
-
1443
- buf.puts %Q|</dl>|
1444
-
1445
- return(buf.string)
1446
- end
1447
-
1448
- # old
1449
-
1450
-
1451
- # Pattern for matching codeblocks
1452
- CodeBlockRegexp = %r{
1453
- (?:\n\n|\A|\A\n)
1454
- ( # $1 = the code block
1455
- (?:
1456
- (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
1457
- .*\n+
1458
- )+
1459
- )
1460
- (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
1461
- # line-start, or end of doc
1462
- }x
1463
-
1464
-
1465
- ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
1466
- ### return it.
1467
- def transform_code_blocks( str, rs )
1468
- @log.debug " Transforming code blocks"
1469
-
1470
- str.gsub( CodeBlockRegexp ) {|block|
1471
- codeblock = $1
1472
- remainder = $2
1473
-
1474
-
1475
- tmpl = %{\n\n<pre><code>%s\n</code></pre>\n\n%s}
1476
-
1477
- # patch for ruby 1.9.1 bug
1478
- if tmpl.respond_to?(:force_encoding) then
1479
- tmpl.force_encoding(str.encoding)
1480
- end
1481
- args = [ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
1482
-
1483
- # recover all backslash escaped to original form
1484
- EscapeTable.each {|char, hash|
1485
- args[0].gsub!( hash[:md5re]){char}
1486
- }
1487
-
1488
- # Generate the codeblock
1489
- tmpl % args
1490
- }
1491
- end
1492
-
1493
-
1494
- FencedCodeBlockRegexp = /^(\~{3,})\n((?m:.+?)\n)\1\n/
1495
-
1496
- def pretransform_fenced_code_blocks( str, rs )
1497
- @log.debug " Transforming fenced code blocks => standard code blocks"
1498
-
1499
- str.gsub( FencedCodeBlockRegexp ) {|block|
1500
- "\n~\n\n" + indent($2) + "\n~\n\n"
1501
- }
1502
- end
1503
-
1504
-
1505
-
1506
- # Pattern for matching Markdown blockquote blocks
1507
- BlockQuoteRegexp = %r{
1508
- (?:
1509
- ^[ ]*>[ ]? # '>' at the start of a line
1510
- .+\n # rest of the first line
1511
- (?:.+\n)* # subsequent consecutive lines
1512
- \n* # blanks
1513
- )+
1514
- }x
1515
- PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
1516
-
1517
- ### Transform Markdown-style blockquotes in a copy of the specified +str+
1518
- ### and return it.
1519
- def transform_block_quotes( str, rs )
1520
- @log.debug " Transforming block quotes"
1521
-
1522
- str.gsub( BlockQuoteRegexp ) {|quote|
1523
- @log.debug "Making blockquote from %p" % quote
1524
-
1525
- quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
1526
- quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
1527
-
1528
- indent = " " * TabWidth
1529
- quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
1530
- apply_block_transforms( quote, rs ).
1531
- gsub( /^/, indent ).
1532
- gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
1533
- @log.debug "Blockquoted chunk is: %p" % quoted
1534
- quoted
1535
- }
1536
- end
1537
-
1538
-
1539
- # AoBane change:
1540
- # allow loosely urls and addresses (BlueCloth is very strict)
1541
- #
1542
- # loose examples:
1543
- # <skype:tetra-dice> (other protocol)
1544
- # <ema+il@example.com> (ex: gmail alias)
1545
- #
1546
- # not adapted addresses:
1547
- # <"Abc@def"@example.com> (refer to quoted-string of RFC 5321)
1548
-
1549
-
1550
- AutoAnchorURLRegexp = /<(#{URI.regexp})>/ # $1 = url
1551
-
1552
- AutoAnchorEmailRegexp = /<([^'">\s]+?\@[^'">\s]+[.][a-zA-Z]+)>/ # $2 = address
1553
-
1554
- ### Transform URLs in a copy of the specified +str+ into links and return
1555
- ### it.
1556
- def transform_auto_links( str, rs )
1557
- @log.debug " Transforming auto-links"
1558
- str.gsub(AutoAnchorURLRegexp){
1559
- %|<a href="#{Util.escape_html($1)}">#{Util.escape_html($1)}</a>|
1560
- }.gsub( AutoAnchorEmailRegexp ) {|addr|
1561
- encode_email_address( unescape_special_chars($1) )
1562
- }
1563
- end
1564
-
1565
-
1566
- # Encoder functions to turn characters of an email address into encoded
1567
- # entities.
1568
- Encoders = [
1569
- lambda {|char| "&#%03d;" % char},
1570
- lambda {|char| "&#x%X;" % char},
1571
- lambda {|char| char.chr },
1572
- ]
1573
-
1574
- ### Transform a copy of the given email +addr+ into an escaped version safer
1575
- ### for posting publicly.
1576
- def encode_email_address( addr )
1577
-
1578
- rval = ''
1579
- ("mailto:" + addr).each_byte {|b|
1580
- case b
1581
- when ?:
1582
- rval += ":"
1583
- when ?@
1584
- rval += Encoders[ rand(2) ][ b ]
1585
- else
1586
- r = rand(100)
1587
- rval += (
1588
- r > 90 ? Encoders[2][ b ] :
1589
- r < 45 ? Encoders[1][ b ] :
1590
- Encoders[0][ b ]
1591
- )
1592
- end
1593
- }
1594
-
1595
- return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
1596
- end
1597
-
1598
-
1599
- # Regexp for matching Setext-style headers
1600
- SetextHeaderRegexp = %r{
1601
- (.+?) # The title text ($1)
1602
-
1603
- (?: # Markdown Extra: Header Id Attribute (optional)
1604
- [ ]* # space after closing #'s
1605
- \{\#
1606
- (\S+?) # $2 = Id
1607
- \}
1608
- [ \t]* # allowed lazy spaces
1609
- )?
1610
- \n
1611
- ([\-=])+ # Match a line of = or -. Save only one in $3.
1612
- [ ]*\n+
1613
- }x
1614
-
1615
- # Regexp for matching ATX-style headers
1616
- AtxHeaderRegexp = %r{
1617
- ^(\#+) # $1 = string of #'s
1618
- [ ]*
1619
- (.+?) # $2 = Header text
1620
- [ ]*
1621
- \#* # optional closing #'s (not counted)
1622
-
1623
- (?: # Markdown Extra: Header Id Attribute (optional)
1624
- [ ]* # space after closing #'s
1625
- \{\#
1626
- (\S+?) # $3 = Id
1627
- \}
1628
- [ \t]* # allowed lazy spaces
1629
- )?
1630
-
1631
- \n+
1632
- }x
1633
-
1634
- HeaderRegexp = Regexp.union(SetextHeaderRegexp, AtxHeaderRegexp)
1635
-
1636
- IdRegexp = /^[a-zA-Z][a-zA-Z0-9\:\._-]*$/
1637
-
1638
- ### Apply Markdown header transforms to a copy of the given +str+ amd render
1639
- ### state +rs+ and return the result.
1640
- def transform_headers( str, rs )
1641
- @log.debug " Transforming headers"
1642
-
1643
- # Setext-style headers:
1644
- # Header 1
1645
- # ========
1646
- #
1647
- # Header 2
1648
- # --------
1649
- #
1650
-
1651
- section_numbers = [nil, nil, nil, nil, nil]
1652
-
1653
- str.
1654
- gsub( HeaderRegexp ) {|m|
1655
- if $1 then
1656
- @log.debug "Found setext-style header"
1657
- title, id, hdrchar = $1, $2, $3
1658
-
1659
- case hdrchar
1660
- when '='
1661
- level = 1
1662
- when '-'
1663
- level = 2
1664
- end
1665
- else
1666
- @log.debug "Found ATX-style header"
1667
- hdrchars, title, id = $4, $5, $6
1668
- level = hdrchars.length
1669
-
1670
- if level >= 7 then
1671
- rs.warnings << "illegal header level - h#{level} ('#' symbols are too many)"
1672
- end
1673
- end
1674
-
1675
- prefix = ''
1676
- if rs.numbering? then
1677
- if level >= rs.numbering_start_level and level <= 6 then
1678
- depth = level - rs.numbering_start_level
1679
-
1680
- section_numbers.each_index do |i|
1681
- if i == depth and section_numbers[depth] then
1682
- # increment a deepest number if current header's level equals last header's
1683
- section_numbers[i] += 1
1684
- elsif i <= depth then
1685
- # set default number if nil
1686
- section_numbers[i] ||= 1
1687
- else
1688
- # clear discardeds
1689
- section_numbers[i] = nil
1690
- end
1691
- end
1692
-
1693
- no = ''
1694
- (0..depth).each do |i|
1695
- no << "#{section_numbers[i]}."
1696
- end
1697
-
1698
- prefix = "#{no} "
1699
- end
1700
- end
1701
-
1702
- title_html = apply_span_transforms( title, rs )
1703
-
1704
- unless id then
1705
- case rs.header_id_type
1706
- when HeaderIDType::ESCAPE
1707
- id = escape_to_header_id(title_html)
1708
- if rs.headers.find{|h| h.id == id} then
1709
- rs.warnings << "header id collision - #{id}"
1710
- id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1711
- end
1712
- else
1713
- id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1714
- end
1715
- end
1716
-
1717
- title = "#{prefix}#{title}"
1718
- title_html = "#{prefix}#{title_html}"
1719
-
1720
-
1721
- unless id =~ IdRegexp then
1722
- rs.warnings << "illegal header id - #{id} (legal chars: [a-zA-Z0-9_-.] | 1st: [a-zA-Z])"
1723
- end
1724
-
1725
- if rs.block_transform_depth == 1 then
1726
- rs.headers << RenderState::Header.new(id, level, title, title_html)
1727
- end
1728
-
1729
- if @use_header_id then
1730
- %{<h%d id="%s">%s</h%d>\n\n} % [ level, id, title_html, level ]
1731
- else
1732
- %{<h%d>%s</h%d>\n\n} % [ level, title_html, level ]
1733
- end
1734
- }
1735
- end
1736
-
1737
-
1738
- ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
1739
- ### tags and return it.
1740
- def form_paragraphs( str, rs )
1741
- @log.debug " Forming paragraphs"
1742
- grafs = str.
1743
- sub( /\A\n+/, '' ).
1744
- sub( /\n+\z/, '' ).
1745
- split( /\n{2,}/ )
1746
-
1747
- rval = grafs.collect {|graf|
1748
-
1749
- # Unhashify HTML blocks if this is a placeholder
1750
- if rs.html_blocks.key?( graf )
1751
- rs.html_blocks[ graf ]
1752
-
1753
- # no output if this is block separater
1754
- elsif graf == '~' then
1755
- ''
1756
-
1757
- # Otherwise, wrap in <p> tags
1758
- else
1759
- apply_span_transforms(graf, rs).
1760
- sub( /^[ ]*/, '<p>' ) + '</p>'
1761
- end
1762
- }.join( "\n\n" )
1763
-
1764
- @log.debug " Formed paragraphs: %p" % rval
1765
- return rval
1766
- end
1767
-
1768
-
1769
- # Pattern to match the linkid part of an anchor tag for reference-style
1770
- # links.
1771
- RefLinkIdRegexp = %r{
1772
- [ ]? # Optional leading space
1773
- (?:\n[ ]*)? # Optional newline + spaces
1774
- \[
1775
- (.*?) # Id = $1
1776
- \]
1777
- }x
1778
-
1779
- InlineLinkRegexp = %r{
1780
- \( # Literal paren
1781
- [ ]* # Zero or more spaces
1782
- <?(.+?)>? # URI = $1
1783
- [ ]* # Zero or more spaces
1784
- (?: #
1785
- ([\"\']) # Opening quote char = $2
1786
- (.*?) # Title = $3
1787
- \2 # Matching quote char
1788
- )? # Title is optional
1789
- \)
1790
- }x
1791
-
1792
- ### Apply Markdown anchor transforms to a copy of the specified +str+ with
1793
- ### the given render state +rs+ and return it.
1794
- def transform_anchors( str, rs )
1795
- @log.debug " Transforming anchors"
1796
- @scanner.string = str.dup
1797
- text = ''
1798
-
1799
- # Scan the whole string
1800
- until @scanner.empty?
1801
-
1802
- if @scanner.scan( /\[/ )
1803
- link = ''; linkid = ''
1804
- depth = 1
1805
- startpos = @scanner.pos
1806
- @log.debug " Found a bracket-open at %d" % startpos
1807
-
1808
- # Scan the rest of the tag, allowing unlimited nested []s. If
1809
- # the scanner runs out of text before the opening bracket is
1810
- # closed, append the text and return (wasn't a valid anchor).
1811
- while depth.nonzero?
1812
- linktext = @scanner.scan_until( /\]|\[/ )
1813
-
1814
- if linktext
1815
- @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
1816
- link += linktext
1817
-
1818
- # Decrement depth for each closing bracket
1819
- depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
1820
- @log.debug " Depth is now #{depth}"
1821
-
1822
- # If there's no more brackets, it must not be an anchor, so
1823
- # just abort.
1824
- else
1825
- @log.debug " Missing closing brace, assuming non-link."
1826
- link += @scanner.rest
1827
- @scanner.terminate
1828
- return text + '[' + link
1829
- end
1830
- end
1831
- link.slice!( -1 ) # Trim final ']'
1832
- @log.debug " Found leading link %p" % link
1833
-
1834
-
1835
-
1836
- # Markdown Extra: Footnote
1837
- if link =~ /^\^(.+)/ then
1838
- id = $1
1839
- if rs.footnotes[id] then
1840
- rs.found_footnote_ids << id
1841
- label = "[#{rs.found_footnote_ids.size}]"
1842
- else
1843
- rs.warnings << "undefined footnote id - #{id}"
1844
- label = '[?]'
1845
- end
1846
-
1847
- text += %Q|<sup id="footnote-ref:#{id}"><a href="#footnote:#{id}" rel="footnote">#{label}</a></sup>|
1848
-
1849
- # Look for a reference-style second part
1850
- elsif @scanner.scan( RefLinkIdRegexp )
1851
- linkid = @scanner[1]
1852
- linkid = link.dup if linkid.empty?
1853
- linkid.downcase!
1854
- @log.debug " Found a linkid: %p" % linkid
1855
-
1856
- # If there's a matching link in the link table, build an
1857
- # anchor tag for it.
1858
- if rs.urls.key?( linkid )
1859
- @log.debug " Found link key in the link table: %p" % rs.urls[linkid]
1860
- url = escape_md( rs.urls[linkid] )
1861
-
1862
- text += %{<a href="#{url}"}
1863
- if rs.titles.key?(linkid)
1864
- text += %{ title="%s"} % escape_md( rs.titles[linkid] )
1865
- end
1866
- text += %{>#{link}</a>}
1867
-
1868
- # If the link referred to doesn't exist, just append the raw
1869
- # source to the result
1870
- else
1871
- @log.debug " Linkid %p not found in link table" % linkid
1872
- @log.debug " Appending original string instead: "
1873
- @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1874
-
1875
- rs.warnings << "link-id not found - #{linkid}"
1876
- text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1877
- end
1878
-
1879
- # ...or for an inline style second part
1880
- elsif @scanner.scan( InlineLinkRegexp )
1881
- url = @scanner[1]
1882
- title = @scanner[3]
1883
- @log.debug " Found an inline link to %p" % url
1884
-
1885
- url = "##{link}" if url == '#' # target anchor briefing (since AoBane 0.40)
1886
-
1887
- text += %{<a href="%s"} % escape_md( url )
1888
- if title
1889
- title.gsub!( /"/, "&quot;" )
1890
- text += %{ title="%s"} % escape_md( title )
1891
- end
1892
- text += %{>#{link}</a>}
1893
-
1894
- # No linkid part: just append the first part as-is.
1895
- else
1896
- @log.debug "No linkid, so no anchor. Appending literal text."
1897
- text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1898
- end # if linkid
1899
-
1900
- # Plain text
1901
- else
1902
- @log.debug " Scanning to the next link from %p" % @scanner.rest
1903
- text += @scanner.scan( /[^\[]+/ )
1904
- end
1905
-
1906
- end # until @scanner.empty?
1907
-
1908
- return text
1909
- end
1910
-
1911
-
1912
- # Pattern to match strong emphasis in Markdown text
1913
- BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
1914
-
1915
- # Pattern to match normal emphasis in Markdown text
1916
- ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
1917
-
1918
- ### Transform italic- and bold-encoded text in a copy of the specified +str+
1919
- ### and return it.
1920
- def transform_italic_and_bold( str, rs )
1921
- @log.debug " Transforming italic and bold"
1922
-
1923
- str.
1924
- gsub( BoldRegexp, %{<strong>\\2</strong>} ).
1925
- gsub( ItalicRegexp, %{<em>\\2</em>} )
1926
- end
1927
-
1928
-
1929
- ### Transform backticked spans into <code> spans.
1930
- def transform_code_spans( str, rs )
1931
- @log.debug " Transforming code spans"
1932
-
1933
- # Set up the string scanner and just return the string unless there's at
1934
- # least one backtick.
1935
- @scanner.string = str.dup
1936
- unless @scanner.exist?( /`/ )
1937
- @scanner.terminate
1938
- @log.debug "No backticks found for code span in %p" % str
1939
- return str
1940
- end
1941
-
1942
- @log.debug "Transforming code spans in %p" % str
1943
-
1944
- # Build the transformed text anew
1945
- text = ''
1946
-
1947
- # Scan to the end of the string
1948
- until @scanner.empty?
1949
-
1950
- # Scan up to an opening backtick
1951
- if pre = @scanner.scan_until( /.??(?=`)/m )
1952
- text += pre
1953
- @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
1954
-
1955
- # Make a pattern to find the end of the span
1956
- opener = @scanner.scan( /`+/ )
1957
- len = opener.length
1958
- closer = Regexp::new( opener )
1959
- @log.debug "Scanning for end of code span with %p" % closer
1960
-
1961
- # Scan until the end of the closing backtick sequence. Chop the
1962
- # backticks off the resultant string, strip leading and trailing
1963
- # whitespace, and encode any enitites contained in it.
1964
- codespan = @scanner.scan_until( closer ) or
1965
- raise FormatError::new( @scanner.rest[0,20],
1966
- "No %p found before end" % opener )
1967
-
1968
- @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
1969
- codespan.slice!( -len, len )
1970
- text += "<code>%s</code>" %
1971
- encode_code( codespan.strip, rs )
1972
-
1973
- # If there's no more backticks, just append the rest of the string
1974
- # and move the scan pointer to the end
1975
- else
1976
- text += @scanner.rest
1977
- @scanner.terminate
1978
- end
1979
- end
1980
-
1981
- return text
1982
- end
1983
-
1984
-
1985
- # Next, handle inline images: ![alt text](url "optional title")
1986
- # Don't forget: encode * and _
1987
- InlineImageRegexp = %r{
1988
- ( # Whole match = $1
1989
- !\[ (.*?) \] # alt text = $2
1990
- \([ ]*
1991
- <?(\S+?)>? # source url = $3
1992
- [ ]*
1993
- (?: #
1994
- (["']) # quote char = $4
1995
- (.*?) # title = $5
1996
- \4 # matching quote
1997
- [ ]*
1998
- )? # title is optional
1999
- \)
2000
- )
2001
- }x #"
2002
-
2003
-
2004
- # Reference-style images
2005
- ReferenceImageRegexp = %r{
2006
- ( # Whole match = $1
2007
- !\[ (.*?) \] # Alt text = $2
2008
- [ ]? # Optional space
2009
- (?:\n[ ]*)? # One optional newline + spaces
2010
- \[ (.*?) \] # id = $3
2011
- )
2012
- }x
2013
-
2014
- ### Turn image markup into image tags.
2015
- def transform_images( str, rs )
2016
- @log.debug " Transforming images %p" % str
2017
-
2018
- # Handle reference-style labeled images: ![alt text][id]
2019
- str.
2020
- gsub( ReferenceImageRegexp ) {|match|
2021
- whole, alt, linkid = $1, $2, $3.downcase
2022
- @log.debug "Matched %p" % match
2023
- res = nil
2024
- alt.gsub!( /"/, '&quot;' )
2025
-
2026
- # for shortcut links like ![this][].
2027
- linkid = alt.downcase if linkid.empty?
2028
-
2029
- if rs.urls.key?( linkid )
2030
- url = escape_md( rs.urls[linkid] )
2031
- @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
2032
-
2033
- # Build the tag
2034
- result = %{<img src="%s" alt="%s"} % [ url, alt ]
2035
- if rs.titles.key?( linkid )
2036
- result += %{ title="%s"} % escape_md( rs.titles[linkid] )
2037
- end
2038
- result += EmptyElementSuffix
2039
-
2040
- else
2041
- result = whole
2042
- end
2043
-
2044
- @log.debug "Replacing %p with %p" % [ match, result ]
2045
- result
2046
- }.
2047
-
2048
- # Inline image style
2049
- gsub( InlineImageRegexp ) {|match|
2050
- @log.debug "Found inline image %p" % match
2051
- whole, alt, title = $1, $2, $5
2052
- url = escape_md( $3 )
2053
- alt.gsub!( /"/, '&quot;' )
2054
-
2055
- # Build the tag
2056
- result = %{<img src="%s" alt="%s"} % [ url, alt ]
2057
- unless title.nil?
2058
- title.gsub!( /"/, '&quot;' )
2059
- result += %{ title="%s"} % escape_md( title )
2060
- end
2061
- result += EmptyElementSuffix
2062
-
2063
- @log.debug "Replacing %p with %p" % [ match, result ]
2064
- result
2065
- }
2066
- end
2067
-
2068
-
2069
- # Regexp to match special characters in a code block
2070
- CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
2071
-
2072
- ### Escape any characters special to HTML and encode any characters special
2073
- ### to Markdown in a copy of the given +str+ and return it.
2074
- def encode_code( str, rs )
2075
- #str.gsub( %r{&}, '&amp;' ).
2076
- #gsub( %r{<}, '&lt;' ).
2077
- #gsub( %r{>}, '&gt;' ).
2078
- #gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
2079
- end
2080
-
2081
- def escape_to_header_id(str)
2082
- URI.escape(escape_md(str.gsub(/<\/?[^>]*>/, "").gsub(/\s/, "_")).gsub("/", ".2F")).gsub("%", ".")
2083
- end
2084
-
2085
- #################################################################
2086
- ### U T I L I T Y F U N C T I O N S
2087
- #################################################################
2088
-
2089
- ### Escape any markdown characters in a copy of the given +str+ and return
2090
- ### it.
2091
- def escape_md( str )
2092
- str.
2093
- gsub( /\*|_/ ){|symbol| EscapeTable[symbol][:md5]}
2094
- end
2095
-
2096
-
2097
- # Matching constructs for tokenizing X/HTML
2098
- HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
2099
- XMLProcInstRegexp = %r{ <\? .*? \?> }mx
2100
- MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
2101
-
2102
- HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
2103
- HTMLTagCloseRegexp = %r{ > }x
2104
- HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
2105
-
2106
- ### Break the HTML source in +str+ into a series of tokens and return
2107
- ### them. The tokens are just 2-element Array tuples with a type and the
2108
- ### actual content. If this function is called with a block, the type and
2109
- ### text parts of each token will be yielded to it one at a time as they are
2110
- ### extracted.
2111
- def tokenize_html( str )
2112
- depth = 0
2113
- tokens = []
2114
- @scanner.string = str.dup
2115
- type, token = nil, nil
2116
-
2117
- until @scanner.empty?
2118
- @log.debug "Scanning from %p" % @scanner.rest
2119
-
2120
- # Match comments and PIs without nesting
2121
- if (( token = @scanner.scan(MetaTag) ))
2122
- type = :tag
2123
-
2124
- # Do nested matching for HTML tags
2125
- elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
2126
- tagstart = @scanner.pos
2127
- @log.debug " Found the start of a plain tag at %d" % tagstart
2128
-
2129
- # Start the token with the opening angle
2130
- depth = 1
2131
- type = :tag
2132
-
2133
- # Scan the rest of the tag, allowing unlimited nested <>s. If
2134
- # the scanner runs out of text before the tag is closed, raise
2135
- # an error.
2136
- while depth.nonzero?
2137
-
2138
- # Scan either an opener or a closer
2139
- chunk = @scanner.scan( HTMLTagPart ) or
2140
- break # AoBane Fix (refer to spec/code-block.rb)
2141
-
2142
- @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
2143
-
2144
- token += chunk
2145
-
2146
- # If the last character of the token so far is a closing
2147
- # angle bracket, decrement the depth. Otherwise increment
2148
- # it for a nested tag.
2149
- depth += ( token[-1, 1] == '>' ? -1 : 1 )
2150
- @log.debug " Depth is now #{depth}"
2151
- end
2152
-
2153
- # Match text segments
2154
- else
2155
- @log.debug " Looking for a chunk of text"
2156
- type = :text
2157
-
2158
- # Scan forward, always matching at least one character to move
2159
- # the pointer beyond any non-tag '<'.
2160
- token = @scanner.scan_until( /[^<]+/m )
2161
- end
2162
-
2163
- @log.debug " type: %p, token: %p" % [ type, token ]
2164
-
2165
- # If a block is given, feed it one token at a time. Add the token to
2166
- # the token list to be returned regardless.
2167
- if block_given?
2168
- yield( type, token )
2169
- end
2170
- tokens << [ type, token ]
2171
- end
2172
-
2173
- return tokens
2174
- end
2175
-
2176
-
2177
- ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
2178
- def encode_html( str )
2179
- #str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
2180
- #gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
2181
- return str
2182
- end
2183
-
2184
-
2185
- ### Return one level of line-leading tabs or spaces from a copy of +str+ and
2186
- ### return it.
2187
- def outdent( str )
2188
- str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
2189
- end
2190
-
2191
- def indent(str)
2192
- str.gsub( /^/, ' ' * TabWidth)
2193
- end
2194
-
2195
- end
2196
- end
1
+ #
2
+ # AoBane - Extended Markdown Converter
3
+ #
4
+ # Author of Original BlueFeather: Dice <tetradice@gmail.com>
5
+ # Remaker: set.minami <set.minami@gmail.com>
6
+ # Website: https://github.com/setminami/AoBane/
7
+ # License: MIT
8
+ #
9
+ # If you want to know better about AoBane, See the Website.
10
+ #
11
+ #
12
+ #
13
+ #-- Copyrights & License -------------------------------------------------------
14
+ #
15
+ # Original Markdown:
16
+ # Copyright (c) 2003-2004 John Gruber
17
+ # <http://daringfireball.net/>
18
+ # All rights reserved.
19
+ #
20
+ # Orignal BlueCloth:
21
+ # Copyright (c) 2004 The FaerieMUD Consortium.
22
+ #
23
+ # Original BlueFeather:
24
+ # Copyright (c) 2013 Dice
25
+ #
26
+ # AoBane:
27
+ # Copyright (c) 2013 Set.Minami
28
+ #
29
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of this
30
+ # software and associated documentation files (AoBane), to deal in the Software
31
+ # without restriction, including without limitation the rights to use, copy, modify,
32
+ # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
33
+ # permit persons to whom the Software is furnished to do so, subject to the following
34
+ # conditions:
35
+ # The above copyright notice and this permission notice shall be included in all copies or
36
+ # substantial portions of the Software.
37
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
38
+ # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
39
+ # PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
40
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
41
+ # OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
42
+ # OTHER DEALINGS IN THE SOFTWARE.
43
+
44
+
45
+ require 'digest/md5'
46
+ require 'logger'
47
+ require 'strscan'
48
+ require 'stringio'
49
+ require 'uri'
50
+ require 'AoBane/utilities'
51
+ require 'math_ml/string'
52
+
53
+ module AoBane
54
+ VERSION = '0.1.0'
55
+ VERSION_NUMBER = 0.0100
56
+ RELEASE_DATE = '2013-04-08'
57
+ VERSION_LABEL = "#{VERSION} (#{RELEASE_DATE})"
58
+
59
+ UTF8_BOM = "\xef\xbb\xbf"
60
+ UTF8_BOM_PATTERN = /^#{UTF8_BOM}/
61
+
62
+
63
+ # Fancy methods
64
+ class << self
65
+ def parse_text(src)
66
+ Parser.new.parse_text(src)
67
+ end
68
+
69
+ alias parse parse_text
70
+
71
+ def parse_document(src, default_enc = EncodingType::UTF8)
72
+ Parser.new.parse_document(src, default_enc)
73
+ end
74
+
75
+
76
+ def parse_text_file(path)
77
+ Parser.new.parse_text_file(path)
78
+ end
79
+
80
+ alias parse_file parse_text_file
81
+
82
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
83
+ Parser.new.parse_document_file(path, default_enc)
84
+ end
85
+ end
86
+
87
+ ### Exception class on AoBane running.
88
+ class Error < ::RuntimeError
89
+ end
90
+
91
+ class EncodingError < Error
92
+ end
93
+
94
+ ### Exception class for formatting errors.
95
+ class FormatError < Error
96
+
97
+ ### Create a new FormatError with the given source +str+ and an optional
98
+ ### message about the +specific+ error.
99
+ def initialize( str, specific=nil )
100
+ if specific
101
+ msg = "Bad markdown format near %p: %s" % [ str, specific ]
102
+ else
103
+ msg = "Bad markdown format near %p" % str
104
+ end
105
+
106
+ super( msg )
107
+ end
108
+ end
109
+
110
+ module HeaderIDType
111
+ MD5 = 'md5'
112
+ ESCAPE = 'escape'
113
+ end
114
+
115
+ module EncodingType
116
+ EUC = 'euc-jp'
117
+ EUCJP = EUC_JP = EUC
118
+
119
+ SJIS = 'shift_jis'
120
+ SHIFT_JIS = SJIS
121
+
122
+ UTF8 = 'utf-8'
123
+ UTF_8 = UTF8
124
+
125
+ ASCII = 'ascii'
126
+ US_ASCII = ASCII
127
+
128
+ def self.regulate(str_value)
129
+ case str_value.downcase
130
+ when 'shift-jis', 'shift_jis'
131
+ SJIS
132
+ when 'euc-jp'
133
+ EUC
134
+ when 'utf-8'
135
+ UTF8
136
+ when 'ascii'
137
+ ASCII
138
+ else
139
+ raise EncodingError, "not adapted encoding type - #{str_value} (shift[-_]jis, euc-jp, utf-8, or ascii)"
140
+ end
141
+ end
142
+
143
+ def self.convert_to_kcode(str_value)
144
+ type = self.regulate(str_value)
145
+ case type
146
+ when EUC, SJIS, UTF8
147
+ type
148
+ when ASCII
149
+ 'none'
150
+ end
151
+ end
152
+
153
+
154
+ def self.convert_to_charset(str_value)
155
+ type = self.regulate(str_value)
156
+ case type
157
+ when EUC
158
+ 'euc-jp'
159
+ when SJIS
160
+ 'shift_jis'
161
+ when UTF8
162
+ 'utf-8'
163
+ when ASCII
164
+ nil
165
+ end
166
+ end
167
+
168
+ end
169
+
170
+ module Util
171
+ HTML_ESC = {
172
+ '&' => '&amp;',
173
+ '"' => '&quot;',
174
+ '<' => '&lt;',
175
+ '>' => '&gt;'
176
+ }
177
+
178
+ module_function
179
+
180
+ # from http://jp.rubyist.net/magazine/?0010-CodeReview#l28
181
+ # (Author: Minero Aoki)
182
+ def escape_html(str)
183
+ #table = HTML_ESC # optimize
184
+ #str.gsub(/[&"<>]/) {|s| table[s] }
185
+ return str
186
+ end
187
+
188
+ def generate_blank_string_io(encoding_base)
189
+ io = StringIO.new
190
+
191
+ if io.respond_to?(:set_encoding) then
192
+ io.set_encoding(encoding_base.encoding)
193
+ end
194
+
195
+ return io
196
+ end
197
+
198
+ def change_kcode(kcode = nil)
199
+ if defined?(Encoding) then
200
+ # ruby 1.9 later
201
+ yield
202
+ else
203
+ # ruby 1.8 earlier
204
+ original_kcode = $KCODE
205
+
206
+ begin
207
+ $KCODE = kcode if kcode
208
+ yield
209
+
210
+ ensure
211
+ # recover
212
+ $KCODE = original_kcode
213
+ end
214
+ end # if defined?
215
+ end # def
216
+
217
+
218
+ def utf8_bom?(str)
219
+ if str.respond_to?(:getbyte) and str.respond_to?(:bytesize) then
220
+ if str.bytesize >= 3 and
221
+ str.getbyte(0) == UTF8_BOM.getbyte(0) and
222
+ str.getbyte(1) == UTF8_BOM.getbyte(1) and
223
+ str.getbyte(2) == UTF8_BOM.getbyte(2) then
224
+ return true
225
+ else
226
+ return false
227
+ end
228
+
229
+ else
230
+ return(str =~ UTF8_BOM_PATTERN ? true : false)
231
+ end
232
+ end
233
+ end
234
+
235
+ class Document
236
+ HEADER_PATTERN = /^([a-zA-Z0-9-]+?)\s*\:\s*(.+?)\s*(?:\n|\Z)/
237
+ BLANK_LINE_PATTERN = /^\n/
238
+ HEADER_SEQUEL_PATTERN = /^\s+(.+)$/
239
+
240
+ attr_accessor :headers, :body
241
+ alias text body
242
+ alias text= body=
243
+
244
+ class << self
245
+ def parse_io(input, default_enc = EncodingType::UTF8)
246
+ headers = {}
247
+ body = nil
248
+ first_pos = input.pos
249
+ default_enc = EncodingType.regulate(default_enc)
250
+
251
+ Util.change_kcode(EncodingType.convert_to_kcode(default_enc)){
252
+ # default encoding
253
+ if defined?(Encoding) then
254
+ input.set_encoding(Encoding.find(default_enc))
255
+ end
256
+
257
+
258
+
259
+ # get headers
260
+ pos_before_gets = nil
261
+ first_line = true
262
+
263
+ loop do
264
+ pos_before_gets = input.pos
265
+ line = input.gets
266
+
267
+ # cut UTF-8 BOM
268
+ if first_line and Util.utf8_bom?(line) then
269
+ line.slice!(UTF8_BOM_PATTERN)
270
+ end
271
+ first_line = false
272
+
273
+ if line and line.chomp =~ HEADER_PATTERN then
274
+ key = $1.downcase; value = $2
275
+
276
+ if key == 'encoding' and not headers.include?('encoding') then
277
+ kc = EncodingType.convert_to_kcode(value.downcase)
278
+ if input.respond_to?(:set_encoding) then
279
+ input.set_encoding(EncodingType.regulate(value))
280
+
281
+ # rewind (reason => [ruby-list:45988])
282
+ input.pos = first_pos
283
+ first_line = true
284
+ else
285
+ $KCODE = kc
286
+ end
287
+ end
288
+
289
+ headers[key] = value
290
+ else
291
+ # EOF or Metadata end
292
+ break
293
+ end
294
+ end
295
+
296
+ # back
297
+ input.pos = pos_before_gets
298
+
299
+
300
+
301
+ # skip blank lines
302
+ loop do
303
+ pos_before_gets = input.pos
304
+
305
+ line = input.gets
306
+ if line.nil? or not line =~ BLANK_LINE_PATTERN then
307
+ break
308
+ end
309
+ end
310
+
311
+ # back
312
+ input.pos = pos_before_gets
313
+
314
+
315
+
316
+ # get body
317
+ body = input.read
318
+
319
+ }
320
+
321
+
322
+ return self.new(headers, body)
323
+ end
324
+
325
+ def parse(str, default_enc = EncodingType::UTF8)
326
+ parse_io(StringIO.new(str), default_enc)
327
+ end
328
+
329
+ end
330
+
331
+
332
+ def initialize(headers = {}, body = '')
333
+ @headers = {}
334
+ headers.each do |k, v|
335
+ self[k] = v
336
+ end
337
+ @body = body
338
+ end
339
+
340
+ def [](key)
341
+ @headers[key.to_s.downcase]
342
+ end
343
+
344
+ def []=(key, value)
345
+ @headers[key.to_s.downcase] = value.to_s
346
+ end
347
+
348
+ def title
349
+ @headers['title']
350
+ end
351
+
352
+ def css
353
+ @headers['css']
354
+ end
355
+
356
+ def numbering
357
+ case @headers['numbering']
358
+ when 'yes', '1', 'true', 'on'
359
+ true
360
+ else
361
+ false
362
+ end
363
+ end
364
+
365
+ alias numbering? numbering
366
+
367
+ def numbering_start_level
368
+ level = (@headers['numbering-start-level'] || 2).to_i
369
+ if level >= 1 and level <= 6 then
370
+ return level
371
+ else
372
+ return 2
373
+ end
374
+ end
375
+
376
+ def encoding_type
377
+ @headers['encoding'] || EncodingType::UTF8
378
+ end
379
+
380
+ def header_id_type
381
+ (@headers['header-id-type'] || HeaderIDType::MD5).downcase
382
+ end
383
+
384
+ def kcode
385
+ self.encoding_type && EncodingType.convert_to_kcode(self.encoding_type)
386
+ end
387
+
388
+ def to_html
389
+ Parser.new.document_to_html(self)
390
+ end
391
+ end
392
+
393
+
394
+ class Parser
395
+ # Rendering state class Keeps track of URLs, titles, and HTML blocks
396
+ # midway through a render. I prefer this to the globals of the Perl version
397
+ # because globals make me break out in hives. Or something.
398
+ class RenderState
399
+ # Headers struct.
400
+ Header = Struct.new(:id, :level, :content, :content_html)
401
+
402
+ # from Original BlueCloth
403
+ attr_accessor :urls, :titles, :html_blocks, :log
404
+
405
+ # AoBane Extension
406
+ attr_accessor :footnotes, :found_footnote_ids, :warnings
407
+ attr_accessor :headers, :block_transform_depth
408
+ attr_accessor :header_id_type # option switch
409
+ attr_accessor :numbering, :numbering_start_level # option switch
410
+ alias numbering? numbering
411
+
412
+ def initialize
413
+ @urls, @titles, @html_blocks = {}, {}, {}
414
+ @log = nil
415
+ @footnotes, @found_footnote_ids, @warnings = {}, [], []
416
+ @headers = []
417
+ @block_transform_depth = 0
418
+ @header_id_type = HeaderIDType::MD5
419
+ @numbering = false
420
+ @numbering_start_level = 2
421
+ end
422
+
423
+ end
424
+
425
+ # Tab width for #detab! if none is specified
426
+ TabWidth = 4
427
+
428
+ # The tag-closing string -- set to '>' for HTML
429
+ EmptyElementSuffix = " />";
430
+
431
+ # Table of MD5 sums for escaped characters
432
+ EscapeTable = {}
433
+ '\\`*_{}[]()#.!|:~'.split(//).each {|char|
434
+ hash = Digest::MD5::hexdigest( char )
435
+
436
+ EscapeTable[ char ] = {
437
+ :md5 => hash,
438
+ :md5re => Regexp::new( hash ),
439
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
440
+ :unescape => char,
441
+ }
442
+
443
+ escaped = "\\#{char}"
444
+ hash = Digest::MD5::hexdigest(escaped)
445
+ EscapeTable[escaped] = {
446
+ :md5 => hash,
447
+ :md5re => Regexp::new( hash ),
448
+ :re => Regexp::new( '\\\\' + Regexp::escape(char) ),
449
+ :unescape => char,
450
+ }
451
+ }
452
+
453
+
454
+ #################################################################
455
+ ### I N S T A N C E M E T H O D S
456
+ #################################################################
457
+
458
+ ### Create a new AoBane parser.
459
+ def initialize(*restrictions)
460
+ @log = Logger::new( $deferr )
461
+ @log.level = $DEBUG ?
462
+ Logger::DEBUG :
463
+ ($VERBOSE ? Logger::INFO : Logger::WARN)
464
+ @scanner = nil
465
+
466
+ # Add any restrictions, and set the line-folding attribute to reflect
467
+ # what happens by default.
468
+ @filter_html = nil
469
+ @filter_styles = nil
470
+ restrictions.flatten.each {|r| __send__("#{r}=", true) }
471
+ @fold_lines = true
472
+
473
+ @use_header_id = true
474
+ @display_warnings = true
475
+
476
+ @log.debug "String is: %p" % self
477
+ end
478
+
479
+
480
+ ######
481
+ public
482
+ ######
483
+
484
+ # Filters for controlling what gets output for untrusted input. (But really,
485
+ # you're filtering bad stuff out of untrusted input at submission-time via
486
+ # untainting, aren't you?)
487
+ attr_accessor :filter_html, :filter_styles
488
+
489
+ # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
490
+ # so this isn't used by anything.
491
+ attr_accessor :fold_lines
492
+
493
+ # AoBane Extension: display warnings on the top of output html (default: true)
494
+ attr_accessor :display_warnings
495
+
496
+ # AoBane Extension: add id to each header, for toc and anchors. (default: true)
497
+ attr_accessor :use_header_id
498
+
499
+
500
+
501
+
502
+ ### Render Markdown-formatted text in this string object as HTML and return
503
+ ### it. The parameter is for compatibility with RedCloth, and is currently
504
+ ### unused, though that may change in the future.
505
+ def parse_text(source, rs = nil)
506
+ rs ||= RenderState.new
507
+
508
+ # check
509
+ case rs.header_id_type
510
+ when HeaderIDType::MD5, HeaderIDType::ESCAPE
511
+ else
512
+ rs.warnings << "illegal header id type - #{rs.header_id_type}"
513
+ end
514
+
515
+ # Create a StringScanner we can reuse for various lexing tasks
516
+ @scanner = StringScanner::new( '' )
517
+
518
+ # Make a copy of the string with normalized line endings, tabs turned to
519
+ # spaces, and a couple of guaranteed newlines at the end
520
+
521
+ text = detab(source.gsub( /\r\n?/, "\n" ))
522
+ text += "\n\n"
523
+ @log.debug "Normalized line-endings: %p" % text
524
+
525
+ #Insert by set.minami 2013-03-30
526
+ text.gsub!(/\*\[(.*?)\]\((.*?)(\|.*?)*(#.*?)*\)/){|match|
527
+ '<font color="' +
528
+ if $2.nil? then '' else $2 end +'" ' +
529
+ 'face="' +
530
+ if $3.nil? then '' else $3.delete('|') end + '" ' +
531
+ 'size="' +
532
+ if $4.nil? then '' else $4.delete('#') end + '">' +
533
+ $1 + '</font>'
534
+ }
535
+
536
+ #Insert by set.minami 2013-04-03
537
+ nrange = []
538
+ departure = 1
539
+ preproc = Marshal.load(Marshal.dump(text))
540
+ text.clear
541
+ html_text_number = 0
542
+ preproc.lines { |line|
543
+ html_text_number += 1
544
+ begin
545
+ line.gsub!(/^\{nrange:(.*?)(;\d+){0,1}\}/){ |match|
546
+ depNum = $2.delete(';').to_i
547
+ departure =
548
+ if depNum > 0 then depNum else 1 end
549
+ if /[hH]([1-6])\-[hH]([1-6])/ =~ $1
550
+ nrange.push($1)
551
+ nrange.push($2)
552
+ if nrange.size > 2 then
553
+ nrange.pop
554
+ nrange.pop
555
+ raise "Syntax Error!"
556
+ end
557
+ match = ""
558
+ end
559
+ next
560
+ }
561
+ #Insert by set.minami 2013-04-01
562
+ line.gsub!(/\\TeX{(.*?)\\TeX}/){ |match|
563
+ if $1.nil? then '' else $1.to_mathml end
564
+ }
565
+ #calculate numbering
566
+ range = nrange[1].to_i - nrange[0].to_i
567
+ if range < 0 then
568
+ p "AoBane Syntax Error:Header range is WRONG!" +
569
+ "@ l.#{html_text_number}";exit(-1)
570
+ raise FatalError,"AoBane Syntax Error:Header range is WRONG!"
571
+ end
572
+ line.gsub!(/^(%{1,#{range}})(.*?)\n$/){ |match|
573
+ line = Utilities.
574
+ calcSectionNo(nrange.min,range,$1.size,departure,$2)
575
+ }
576
+ text << line
577
+ @log.debug nrange.minmax
578
+ rescue => e
579
+ @log.warn "AoBane Syntax WARNING l.#{html_text_number}:#{line.chomp} haven't adopted"
580
+ @log.warn e
581
+ end
582
+ }
583
+
584
+ #Insert by set.minami
585
+
586
+ # Filter HTML if we're asked to do so
587
+ if self.filter_html
588
+ #text.gsub!( "<", "&lt;" )
589
+ #text.gsub!( ">", "&gt;" )
590
+ @log.debug "Filtered HTML: %p" % text
591
+ end
592
+
593
+ # Simplify blank lines
594
+ text.gsub!( /^ +$/, '' )
595
+ @log.debug "Tabs -> spaces/blank lines stripped: %p" % text
596
+
597
+
598
+ # Replace HTML blocks with placeholders
599
+ text = hide_html_blocks( text, rs )
600
+ @log.debug "Hid HTML blocks: %p" % text
601
+ @log.debug "Render state: %p" % rs
602
+
603
+
604
+ # Strip footnote definitions, store in render state
605
+ text = strip_footnote_definitions( text, rs )
606
+ @log.debug "Stripped footnote definitions: %p" % text
607
+ @log.debug "Render state: %p" % rs
608
+
609
+
610
+ # Strip link definitions, store in render state
611
+ text = strip_link_definitions( text, rs )
612
+ @log.debug "Stripped link definitions: %p" % text
613
+ @log.debug "Render state: %p" % rs
614
+
615
+ # Escape meta-characters
616
+ text = escape_special_chars( text )
617
+ @log.debug "Escaped special characters: %p" % text
618
+
619
+ # Transform block-level constructs
620
+ text = apply_block_transforms( text, rs )
621
+ @log.debug "After block-level transforms: %p" % text
622
+
623
+ # Now swap back in all the escaped characters
624
+ text = unescape_special_chars( text )
625
+ @log.debug "After unescaping special characters: %p" % text
626
+
627
+ # Extend footnotes
628
+ unless rs.footnotes.empty? then
629
+ text << %Q|<div class="footnotes"><hr#{EmptyElementSuffix}\n<ol>\n|
630
+ rs.found_footnote_ids.each do |id|
631
+ content = rs.footnotes[id]
632
+ html = apply_block_transforms(content.sub(/\n+\Z/, '') + %Q| <a href="#footnote-ref:#{id}" rev="footnote">&#8617;</a>|, rs)
633
+ text << %Q|<li id="footnote:#{id}">\n#{html}\n</li>|
634
+ end
635
+ text << %Q|</ol>\n</div>\n|
636
+ end
637
+
638
+ # Display warnings
639
+ if @display_warnings then
640
+ unless rs.warnings.empty? then
641
+ html = %Q|<pre><strong>[WARNINGS]\n|
642
+ html << rs.warnings.map{|x| Util.escape_html(x)}.join("\n")
643
+ html << %Q|</strong></pre>|
644
+
645
+ text = html + text
646
+ end
647
+ end
648
+
649
+ #Insert by set.minami 2013-03-30
650
+ output = []
651
+ text.lines {|line|
652
+ if /<pre><code>/ =~ line
653
+ output << line
654
+ next
655
+ until /<\/code><\/pre>/ =~ line
656
+ output << line
657
+ next
658
+ end
659
+ else
660
+ line.gsub!(/\-\-|<=>|<\->|\->|<\-|=>|<=|\|\^|\|\|\/|\|\/|\^|
661
+ \>\>|\<\<|\+_|!=|~~|~=|>_|<_|\|FA|\|EX|\|=|\(+\)|\(x\)|
662
+ \\&|\(c\)|\(R\)|\(SS\)|\(TM\)/,
663
+ "\-\-" => "&mdash;",
664
+ "<=" => "&hArr;",
665
+ "<\->" => "&harr;",
666
+ "\->" =>"&rarr;",
667
+ "<\-" =>"&larr;",
668
+ "=>" => "&rArr;",
669
+ "<=" => "&lArr;",
670
+ "\|\|\^" => "&uArr;",
671
+ "\|\|\/" => "&dArr;",
672
+ "\|\/" => "&darr;",
673
+ "\|\^" => "&uarr;",
674
+ ">>" => "&raquo;",
675
+ "\<\<" => "&laquo;",
676
+ "+_" => "&plusmn;",
677
+ "!=" => "&ne;",
678
+ "~~" => "&asymp;",
679
+ "~=" => "&cong;",
680
+ "<_" => "&le;",
681
+ ">_" => "&ge",
682
+ "\|FA" => "&forall;",
683
+ "\|EX" => "&exist;",
684
+ "\|=" => "&equiv;",
685
+ "\(+\)" => "&oplus",
686
+ "\(x\)" => "&otimes;",
687
+ "\\&" =>"&amp;",
688
+ "\(c\)" => "&copy;",
689
+ "\(R\)" =>"&reg;",
690
+ "\(SS\)" => "&sect;",
691
+ "\(TM\)" => "&trade;")
692
+ output << line
693
+ end
694
+ }
695
+ return output
696
+ #Insert by set.minami
697
+ #return text
698
+
699
+ end
700
+
701
+ alias parse parse_text
702
+
703
+ # return values are extended. (mainly for testing)
704
+ def parse_text_with_render_state(str, rs = nil)
705
+ rs ||= RenderState.new
706
+ html = parse_text(str, rs)
707
+
708
+ return [html, rs]
709
+ end
710
+
711
+ def parse_text_file(path)
712
+ parse_text(File.read(path))
713
+ end
714
+
715
+ alias parse_file parse_text_file
716
+
717
+
718
+ def parse_document(source, default_enc = EncodingType::UTF8)
719
+ doc = Document.parse(source, default_enc)
720
+
721
+ return document_to_html(doc)
722
+ end
723
+
724
+ def parse_document_file(path, default_enc = EncodingType::UTF8)
725
+ doc = nil
726
+ open(path){|f|
727
+ doc = Document.parse_io(f, default_enc)
728
+ }
729
+
730
+ return document_to_html(doc)
731
+ end
732
+
733
+
734
+ def document_to_html(doc)
735
+ rs = RenderState.new
736
+ if doc.numbering? then
737
+ rs.numbering = true
738
+ end
739
+ rs.numbering_start_level = doc.numbering_start_level
740
+ rs.header_id_type = doc.header_id_type
741
+
742
+ body_html = nil
743
+
744
+ if doc.encoding_type then
745
+ Util.change_kcode(doc.kcode){
746
+ body_html = parse_text(doc.body, rs)
747
+ }
748
+ else
749
+ body_html = parse_text(doc.body, rs)
750
+ end
751
+
752
+ out = Util.generate_blank_string_io(doc.body)
753
+
754
+ # XHTML decleration
755
+ out.puts %Q|<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">|
756
+
757
+ # html start
758
+ out.puts %Q|<html>|
759
+
760
+ # head
761
+ out.puts %Q|<head>|
762
+
763
+ if doc.encoding_type and (charset = EncodingType.convert_to_charset(doc.encoding_type)) then
764
+ out.puts %Q|<meta http-equiv="Content-Type" content="text/html; charset=#{charset}" />|
765
+ end
766
+
767
+ h1 = rs.headers.find{|x| x.level == 1}
768
+ h1_content = (h1 ? h1.content : nil)
769
+ title = Util.escape_html(doc.title || h1_content || 'no title (Generated by AoBane)')
770
+ out.puts %Q|<title>#{title}</title>|
771
+
772
+ %w(description keywords).each do |name|
773
+ if doc[name] then
774
+ content = Util.escape_html(doc[name])
775
+ out.puts %Q|<meta name="#{name}" content="#{content}" />|
776
+ end
777
+ end
778
+
779
+
780
+ if doc['css'] then
781
+ href = Util.escape_html(doc.css)
782
+ out.puts %Q|<link rel="stylesheet" type="text/css" href="#{href}" />|
783
+
784
+ end
785
+
786
+ if doc['rdf-feed'] then
787
+ href = Util.escape_html(doc['rdf-feed'])
788
+ out.puts %Q|<link rel="alternate" type="application/rdf+xml" href="#{href}" />|
789
+ end
790
+
791
+
792
+
793
+ if doc['rss-feed'] then
794
+ href = Util.escape_html(doc['rss-feed'])
795
+ out.puts %Q|<link rel="alternate" type="application/rss+xml" href="#{href}" />|
796
+ end
797
+
798
+ if doc['atom-feed'] then
799
+ href = Util.escape_html(doc['atom-feed'])
800
+ out.puts %Q|<link rel="alternate" type="application/atom+xml" href="#{href}" />|
801
+ end
802
+
803
+ out.puts %Q|</head>|
804
+
805
+ # body
806
+ out.puts %Q|<body>|
807
+ out.puts
808
+ out.puts body_html
809
+ out.puts
810
+ out.puts %Q|</body>|
811
+
812
+ # html end
813
+ out.puts %Q|</html>|
814
+
815
+
816
+ return out.string
817
+ end
818
+
819
+ alias doc2html document_to_html
820
+
821
+
822
+
823
+
824
+ #######
825
+ #private
826
+ #######
827
+
828
+ ### Convert tabs in +str+ to spaces.
829
+ ### (this method is reformed to function-like method from original BlueCloth)
830
+ def detab( str, tabwidth=TabWidth )
831
+ re = str.split( /\n/ ).collect {|line|
832
+ line.gsub( /(.*?)\t/ ) do
833
+ $1 + ' ' * (tabwidth - $1.length % tabwidth)
834
+ end
835
+ }.join("\n")
836
+
837
+ re
838
+ end
839
+
840
+
841
+
842
+
843
+ ### Do block-level transforms on a copy of +str+ using the specified render
844
+ ### state +rs+ and return the results.
845
+ def apply_block_transforms( str, rs )
846
+ rs.block_transform_depth += 1
847
+
848
+ # Port: This was called '_runBlockGamut' in the original
849
+
850
+ @log.debug "Applying block transforms to:\n %p" % str
851
+ text = str
852
+ text = pretransform_fenced_code_blocks( text, rs )
853
+ text = pretransform_block_separators(text, rs)
854
+
855
+ text = transform_headers( text, rs )
856
+ text = transform_toc(text, rs)
857
+
858
+ text = transform_hrules( text, rs )
859
+ text = transform_lists( text, rs )
860
+ text = transform_definition_lists( text, rs ) # AoBane Extension
861
+ text = transform_code_blocks( text, rs )
862
+ text = transform_block_quotes( text, rs )
863
+ text = transform_tables(text, rs)
864
+ text = hide_html_blocks( text, rs )
865
+
866
+ text = form_paragraphs( text, rs )
867
+
868
+ rs.block_transform_depth -= 1
869
+ @log.debug "Done with block transforms:\n %p" % text
870
+ return text
871
+ end
872
+
873
+
874
+ ### Apply Markdown span transforms to a copy of the specified +str+ with the
875
+ ### given render state +rs+ and return it.
876
+ def apply_span_transforms( str, rs )
877
+ @log.debug "Applying span transforms to:\n %p" % str
878
+
879
+ str = transform_code_spans( str, rs )
880
+ str = transform_auto_links( str, rs )
881
+ str = encode_html( str )
882
+ str = transform_images( str, rs )
883
+ str = transform_anchors( str, rs )
884
+ str = transform_italic_and_bold( str, rs )
885
+
886
+ # Hard breaks
887
+ str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
888
+
889
+ @log.debug "Done with span transforms:\n %p" % str
890
+ return str
891
+ end
892
+
893
+
894
+ # The list of tags which are considered block-level constructs and an
895
+ # alternation pattern suitable for use in regexps made from the list
896
+ StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
897
+ form fieldset iframe math ins del ]
898
+ StrictTagPattern = StrictBlockTags.join('|')
899
+
900
+ LooseBlockTags = StrictBlockTags - %w[ins del]
901
+ LooseTagPattern = LooseBlockTags.join('|')
902
+
903
+ # Nested blocks:
904
+ # <div>
905
+ # <div>
906
+ # tags for inner block must be indented.
907
+ # </div>
908
+ # </div>
909
+ StrictBlockRegexp = %r{
910
+ ^ # Start of line
911
+ <(#{StrictTagPattern}) # Start tag: \2
912
+ \b # word break
913
+ (.*\n)*? # Any number of lines, minimal match
914
+ </\1> # Matching end tag
915
+ [ ]* # trailing spaces
916
+ $ # End of line or document
917
+ }ix
918
+
919
+ # More-liberal block-matching
920
+ LooseBlockRegexp = %r{
921
+ ^ # Start of line
922
+ <(#{LooseTagPattern}) # start tag: \2
923
+ \b # word break
924
+ (.*\n)*? # Any number of lines, minimal match
925
+ .*</\1> # Anything + Matching end tag
926
+ [ ]* # trailing spaces
927
+ $ # End of line or document
928
+ }ix
929
+
930
+ # Special case for <hr />.
931
+ HruleBlockRegexp = %r{
932
+ ( # $1
933
+ \A\n? # Start of doc + optional \n
934
+ | # or
935
+ .*\n\n # anything + blank line
936
+ )
937
+ ( # save in $2
938
+ # AoBane fix: Not allow any space on line top
939
+ <hr # Tag open
940
+ \b # Word break
941
+ ([^<>])*? # Attributes
942
+ /?> # Tag close
943
+ $ # followed by a blank line or end of document
944
+ )
945
+ }ix
946
+
947
+ ### Replace all blocks of HTML in +str+ that start in the left margin with
948
+ ### tokens.
949
+ def hide_html_blocks( str, rs )
950
+ @log.debug "Hiding HTML blocks in %p" % str
951
+
952
+ # Tokenizer proc to pass to gsub
953
+ tokenize = lambda {|match|
954
+ key = Digest::MD5::hexdigest( match )
955
+ rs.html_blocks[ key ] = match
956
+ @log.debug "Replacing %p with %p" % [ match, key ]
957
+ "\n\n#{key}\n\n"
958
+ }
959
+
960
+ rval = str.dup
961
+
962
+ @log.debug "Finding blocks with the strict regex..."
963
+ rval.gsub!( StrictBlockRegexp, &tokenize )
964
+
965
+ @log.debug "Finding blocks with the loose regex..."
966
+ rval.gsub!( LooseBlockRegexp, &tokenize )
967
+
968
+ @log.debug "Finding hrules..."
969
+ rval.gsub!( HruleBlockRegexp ) {|match| $1 + tokenize[$2] }
970
+
971
+ return rval
972
+ end
973
+
974
+
975
+ # Link defs are in the form: ^[id]: url "optional title"
976
+ LinkRegexp = %r{
977
+ ^[ ]{0,#{TabWidth - 1}} # AoBane fix: indent < tab width
978
+ \[(.+)\]: # id = $1
979
+ [ ]*
980
+ \n? # maybe *one* newline
981
+ [ ]*
982
+ <?(\S+?)>? # url = $2
983
+ [ ]*
984
+ \n? # maybe one newline
985
+ [ ]*
986
+ (?:
987
+ # Titles are delimited by "quotes" or (parens).
988
+ ["(]
989
+ (.+?) # title = $3
990
+ [")] # Matching ) or "
991
+ [ ]*
992
+ )? # title is optional
993
+ (?:\n+|\Z)
994
+ }x
995
+
996
+ ### Strip link definitions from +str+, storing them in the given RenderState
997
+ ### +rs+.
998
+ def strip_link_definitions( str, rs )
999
+ str.gsub( LinkRegexp ) {|match|
1000
+ id, url, title = $1, $2, $3
1001
+
1002
+ rs.urls[ id.downcase ] = encode_html( url )
1003
+ unless title.nil?
1004
+ rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
1005
+ end
1006
+
1007
+ ""
1008
+ }
1009
+ end
1010
+
1011
+ # Footnotes defs are in the form: [^id]: footnote contents.
1012
+ FootnoteDefinitionRegexp = %r{
1013
+ ^[ ]{0,#{TabWidth - 1}}
1014
+ \[\^(.+?)\]\: # id = $1
1015
+ [ ]*
1016
+ (.*) # first line content = $2
1017
+ (?:\n|\Z)
1018
+
1019
+ ( # second or more lines content = $3
1020
+ (?:
1021
+ [ ]{#{TabWidth},} # indented
1022
+ .*
1023
+ (?:\n|\Z)
1024
+ |
1025
+ \n # blank line
1026
+ )*
1027
+ )?
1028
+
1029
+ }x
1030
+
1031
+ FootnoteIdRegexp = /^[a-zA-Z0-9\:\._-]+$/
1032
+
1033
+ def strip_footnote_definitions(str, rs)
1034
+ str.gsub( FootnoteDefinitionRegexp ) {|match|
1035
+ id = $1; content1 = $2; content2 = $3
1036
+
1037
+ unless id =~ FootnoteIdRegexp then
1038
+ rs.warnings << "illegal footnote id - #{id} (legal chars: a-zA-Z0-9_-.:)"
1039
+ end
1040
+
1041
+ if content2 then
1042
+ @log.debug " Stripping multi-line definition %p, %p" % [$2, $3]
1043
+ content = content1 + "\n" + outdent(content2.chomp)
1044
+ @log.debug " Stripped multi-line definition %p, %p" % [id, content]
1045
+ rs.footnotes[id] = content
1046
+ else
1047
+ content = content1 || ''
1048
+ @log.debug " Stripped single-line definition %p, %p" % [id, content]
1049
+ rs.footnotes[id] = content
1050
+ end
1051
+
1052
+
1053
+
1054
+ ""
1055
+ }
1056
+ end
1057
+
1058
+
1059
+ ### Escape special characters in the given +str+
1060
+ def escape_special_chars( str )
1061
+ @log.debug " Escaping special characters"
1062
+ text = ''
1063
+
1064
+ # The original Markdown source has something called '$tags_to_skip'
1065
+ # declared here, but it's never used, so I don't define it.
1066
+
1067
+ tokenize_html( str ) {|token, str|
1068
+ @log.debug " Adding %p token %p" % [ token, str ]
1069
+ case token
1070
+
1071
+ # Within tags, encode * and _
1072
+ when :tag
1073
+ text += str.
1074
+ gsub( /\*/, EscapeTable['*'][:md5] ).
1075
+ gsub( /_/, EscapeTable['_'][:md5] )
1076
+
1077
+ # Encode backslashed stuff in regular text
1078
+ when :text
1079
+ text += encode_backslash_escapes( str )
1080
+ else
1081
+ raise TypeError, "Unknown token type %p" % token
1082
+ end
1083
+ }
1084
+
1085
+ @log.debug " Text with escapes is now: %p" % text
1086
+ return text
1087
+ end
1088
+
1089
+
1090
+ ### Swap escaped special characters in a copy of the given +str+ and return
1091
+ ### it.
1092
+ def unescape_special_chars( str )
1093
+ EscapeTable.each {|char, hash|
1094
+ @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
1095
+ str.gsub!( hash[:md5re], hash[:unescape] )
1096
+ }
1097
+
1098
+ return str
1099
+ end
1100
+
1101
+
1102
+ ### Return a copy of the given +str+ with any backslashed special character
1103
+ ### in it replaced with MD5 placeholders.
1104
+ def encode_backslash_escapes( str )
1105
+ # Make a copy with any double-escaped backslashes encoded
1106
+ text = str.gsub( /\\\\/, EscapeTable['\\\\'][:md5] )
1107
+
1108
+ EscapeTable.each_pair {|char, esc|
1109
+ next if char == '\\\\'
1110
+ next unless char =~ /\\./
1111
+ text.gsub!( esc[:re], esc[:md5] )
1112
+ }
1113
+
1114
+ return text
1115
+ end
1116
+
1117
+
1118
+ def pretransform_block_separators(str, rs)
1119
+ str.gsub(/^[ ]{0,#{TabWidth - 1}}[~][ ]*\n/){
1120
+ "\n~\n\n"
1121
+ }
1122
+ end
1123
+
1124
+
1125
+ TOCRegexp = %r{
1126
+ ^\{ # bracket on line-head
1127
+ [ ]* # optional inner space
1128
+ toc
1129
+
1130
+ (?:
1131
+ (?:
1132
+ [:] # colon
1133
+ | # or
1134
+ [ ]+ # 1 or more space
1135
+ )
1136
+ (.+?) # $1 = parameter
1137
+ )?
1138
+
1139
+ [ ]* # optional inner space
1140
+ \} # closer
1141
+ [ ]*$ # optional space on line-foot
1142
+ }ix
1143
+
1144
+ TOCStartLevelRegexp = %r{
1145
+ ^
1146
+ (?: # optional start
1147
+ h
1148
+ ([1-6]) # $1 = start level
1149
+ )?
1150
+
1151
+ (?: # range symbol
1152
+ [.]{2,}|[-] # .. or -
1153
+ )
1154
+
1155
+ (?: # optional end
1156
+ h? # optional 'h'
1157
+ ([1-6]) # $2 = end level
1158
+ )?$
1159
+ }ix
1160
+
1161
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1162
+ ### +str+ and return it.
1163
+ def transform_toc( str, rs )
1164
+ @log.debug " Transforming tables of contents"
1165
+ str.gsub(TOCRegexp){
1166
+ start_level = 2 # default
1167
+ end_level = 6
1168
+
1169
+ param = $1
1170
+ if param then
1171
+ if param =~ TOCStartLevelRegexp then
1172
+ if !($1) and !($2) then
1173
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1174
+ else
1175
+ start_level = ($1 ? $1.to_i : 2)
1176
+ end_level = ($2 ? $2.to_i : 6)
1177
+ end
1178
+ else
1179
+ rs.warnings << "illegal TOC parameter - #{param} (valid example: 'h2..h4')"
1180
+ end
1181
+ end
1182
+
1183
+ if rs.headers.first and rs.headers.first.level >= (start_level + 1) then
1184
+ rs.warnings << "illegal structure of headers - h#{start_level} should be set before h#{rs.headers.first.level}"
1185
+ end
1186
+
1187
+
1188
+ ul_text = "\n\n"
1189
+ rs.headers.each do |header|
1190
+ if header.level >= start_level and header.level <= end_level then
1191
+ ul_text << ' ' * TabWidth * (header.level - start_level)
1192
+ ul_text << '* '
1193
+ ul_text << %Q|<a href="##{header.id}" rel="toc">#{header.content_html}</a>|
1194
+ ul_text << "\n"
1195
+ end
1196
+ end
1197
+ ul_text << "\n"
1198
+
1199
+ ul_text # output
1200
+
1201
+ }
1202
+ end
1203
+
1204
+ TableRegexp = %r{
1205
+ (?:
1206
+ ^([ ]{0,#{TabWidth - 1}}) # not indented
1207
+ (?:[|][ ]*) # NOT optional border
1208
+
1209
+ \S.*? # 1st cell content
1210
+
1211
+ (?: # 2nd cell or later
1212
+ [|] # cell splitter
1213
+ .+? # content
1214
+ )+ # 1 or more..
1215
+
1216
+ [|]? # optional border
1217
+ (?:\n|\Z) # line end
1218
+ )+
1219
+ }x
1220
+
1221
+ # Transform tables.
1222
+ def transform_tables(str, rs)
1223
+ str.gsub(TableRegexp){
1224
+ transform_table_rows($~[0], rs)
1225
+ }
1226
+ end
1227
+
1228
+ TableSeparatorCellRegexp = %r{
1229
+ ^
1230
+ [ ]*
1231
+ ([:])? # $1 = left-align symbol
1232
+ [ ]*
1233
+ [-]+ # border
1234
+ [ ]*
1235
+ ([:])? # $2 = right-align symbol
1236
+ [ ]*
1237
+ $
1238
+ }x
1239
+
1240
+ def transform_table_rows(str, rs)
1241
+
1242
+ # split cells to 2-d array
1243
+ data = str.split("\n").map{|x| x.split('|')}
1244
+
1245
+
1246
+ data.each do |row|
1247
+ # cut left space
1248
+ row.first.lstrip!
1249
+
1250
+ # cut when optional side-borders is included
1251
+ row.shift if row.first.empty?
1252
+ end
1253
+
1254
+ column_attrs = []
1255
+
1256
+ re = ''
1257
+ re << "<table>\n"
1258
+
1259
+ # head is exist?
1260
+ if data.size >= 3 and data[1].all?{|x| x =~ TableSeparatorCellRegexp} then
1261
+ head_row = data.shift
1262
+ separator_row = data.shift
1263
+
1264
+ separator_row.each do |cell|
1265
+ cell.match TableSeparatorCellRegexp
1266
+ left = $1; right = $2
1267
+
1268
+ if left and right then
1269
+ column_attrs << ' style="text-align: center"'
1270
+ elsif right then
1271
+ column_attrs << ' style="text-align: right"'
1272
+ elsif left then
1273
+ column_attrs << ' style="text-align: left"'
1274
+ else
1275
+ column_attrs << ''
1276
+ end
1277
+ end
1278
+
1279
+ re << "\t<thead><tr>\n"
1280
+ head_row.each_with_index do |cell, i|
1281
+ re << "\t\t<th#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</th>\n"
1282
+ end
1283
+ re << "\t</tr></thead>\n"
1284
+ end
1285
+
1286
+ # data row
1287
+ re << "\t<tbody>\n"
1288
+ data.each do |row|
1289
+ re << "\t\t<tr>\n"
1290
+ row.each_with_index do |cell, i|
1291
+ re << "\t\t\t<td#{column_attrs[i]}>#{apply_span_transforms(cell.strip, rs)}</td>\n"
1292
+ end
1293
+ re << "\t\t</tr>\n"
1294
+ end
1295
+ re << "\t</tbody>\n"
1296
+
1297
+ re << "</table>\n"
1298
+
1299
+ re
1300
+ end
1301
+
1302
+
1303
+ ### Transform any Markdown-style horizontal rules in a copy of the specified
1304
+ ### +str+ and return it.
1305
+ def transform_hrules( str, rs )
1306
+ @log.debug " Transforming horizontal rules"
1307
+ str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
1308
+ end
1309
+
1310
+
1311
+
1312
+ # Patterns to match and transform lists
1313
+ ListMarkerOl = %r{\d+\.}
1314
+ ListMarkerUl = %r{[*+-]}
1315
+ ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
1316
+
1317
+ ListRegexp = %r{
1318
+ (?:
1319
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1320
+ (#{ListMarkerAny}) # unordered or ordered ($1)
1321
+ [ ]+ # At least one space
1322
+ )
1323
+ (?m:.+?) # item content (include newlines)
1324
+ (?:
1325
+ \z # Either EOF
1326
+ | # or
1327
+ \n{2,} # Blank line...
1328
+ (?=\S) # ...followed by non-space
1329
+ (?![ ]* # ...but not another item
1330
+ (#{ListMarkerAny})
1331
+ [ ]+)
1332
+ )
1333
+ }x
1334
+
1335
+ ### Transform Markdown-style lists in a copy of the specified +str+ and
1336
+ ### return it.
1337
+ def transform_lists( str, rs )
1338
+ @log.debug " Transforming lists at %p" % (str[0,100] + '...')
1339
+
1340
+ str.gsub( ListRegexp ) {|list|
1341
+ @log.debug " Found list %p" % list
1342
+ bullet = $1
1343
+ list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
1344
+
1345
+ %{<%s>\n%s</%s>\n} % [
1346
+ list_type,
1347
+ transform_list_items( list, rs ),
1348
+ list_type,
1349
+ ]
1350
+ }
1351
+ end
1352
+
1353
+ # Pattern for transforming list items
1354
+ ListItemRegexp = %r{
1355
+ (\n)? # leading line = $1
1356
+ (^[ ]*) # leading whitespace = $2
1357
+ (#{ListMarkerAny}) [ ]+ # list marker = $3
1358
+ ((?m:.+?) # list item text = $4
1359
+ \n)
1360
+ (?= (\n*) (\z | \2 (#{ListMarkerAny}) [ ]+))
1361
+ }x
1362
+
1363
+ ### Transform list items in a copy of the given +str+ and return it.
1364
+ def transform_list_items( str, rs )
1365
+ @log.debug " Transforming list items"
1366
+
1367
+ # Trim trailing blank lines
1368
+ str = str.sub( /\n{2,}\z/, "\n" )
1369
+ str.gsub( ListItemRegexp ) {|line|
1370
+ @log.debug " Found item line %p" % line
1371
+ leading_line, item = $1, $4
1372
+ separating_lines = $5
1373
+
1374
+ if leading_line or /\n{2,}/.match(item) or not separating_lines.empty? then
1375
+ @log.debug " Found leading line or item has a blank"
1376
+ item = apply_block_transforms( outdent(item), rs )
1377
+ else
1378
+ # Recursion for sub-lists
1379
+ @log.debug " Recursing for sublist"
1380
+ item = transform_lists( outdent(item), rs ).chomp
1381
+ item = apply_span_transforms( item, rs )
1382
+ end
1383
+
1384
+ %{<li>%s</li>\n} % item
1385
+ }
1386
+ end
1387
+
1388
+ DefinitionListRegexp = %r{
1389
+ (?:
1390
+ (?:^.+\n)+ # dt
1391
+ \n*
1392
+ (?:
1393
+ ^[ ]{0,#{TabWidth - 1}} # Indent < tab width
1394
+ \: # dd marker (line head)
1395
+ [ ]* # space
1396
+ ((?m:.+?)) # dd content
1397
+ (?:
1398
+ \s*\z # end of string
1399
+ | # or
1400
+ \n{2,} # blank line
1401
+ (?=[ ]{0,#{TabWidth - 1}}\S) # ...followed by
1402
+ )
1403
+ )+
1404
+ )+
1405
+ }x
1406
+
1407
+ def transform_definition_lists(str, rs)
1408
+ @log.debug " Transforming definition lists at %p" % (str[0,100] + '...')
1409
+ str.gsub( DefinitionListRegexp ) {|list|
1410
+ @log.debug " Found definition list %p (captures=%p)" % [list, $~.captures]
1411
+ transform_definition_list_items(list, rs)
1412
+ }
1413
+ end
1414
+
1415
+ DDLineRegexp = /^\:[ ]{0,#{TabWidth - 1}}(.*)/
1416
+
1417
+
1418
+ def transform_definition_list_items(str, rs)
1419
+ buf = Util.generate_blank_string_io(str)
1420
+ buf.puts %Q|<dl>|
1421
+
1422
+ lines = str.split("\n")
1423
+ until lines.empty? do
1424
+
1425
+ dts = []
1426
+
1427
+ # get dt items
1428
+ while lines.first =~ /^(?!\:).+$/ do
1429
+ dts << lines.shift
1430
+ end
1431
+
1432
+
1433
+ dd_as_block = false
1434
+
1435
+ # skip blank lines
1436
+ while not lines.empty? and lines.first.empty? do
1437
+ lines.shift
1438
+ dd_as_block = true
1439
+ end
1440
+
1441
+
1442
+ dds = []
1443
+ while lines.first =~ DDLineRegexp do
1444
+ dd_buf = []
1445
+
1446
+ # dd first line
1447
+ unless (line = lines.shift).empty? then
1448
+ dd_buf << $1 << "\n"
1449
+ end
1450
+
1451
+ # dd second and more lines (sequential with 1st-line)
1452
+ until lines.empty? or # stop if read all
1453
+ lines.first =~ /^[ ]{0,#{TabWidth - 1}}$/ or # stop if blank line
1454
+ lines.first =~ DDLineRegexp do # stop if new dd found
1455
+ dd_buf << outdent(lines.shift) << "\n"
1456
+ end
1457
+
1458
+ # dd second and more lines (separated with 1st-line)
1459
+ until lines.empty? do # stop if all was read
1460
+ if lines.first.empty? then
1461
+ # blank line (skip)
1462
+ lines.shift
1463
+ dd_buf << "\n"
1464
+ elsif lines.first =~ /^[ ]{#{TabWidth},}/ then
1465
+ # indented body
1466
+ dd_buf << outdent(lines.shift) << "\n"
1467
+ else
1468
+ # not indented body
1469
+ break
1470
+ end
1471
+
1472
+ end
1473
+
1474
+
1475
+ dds << dd_buf.join
1476
+
1477
+ # skip blank lines
1478
+ unless lines.empty? then
1479
+ while lines.first.empty? do
1480
+ lines.shift
1481
+ end
1482
+ end
1483
+ end
1484
+
1485
+ # html output
1486
+ dts.each do |dt|
1487
+ buf.puts %Q| <dt>#{apply_span_transforms(dt, rs)}</dt>|
1488
+ end
1489
+
1490
+ dds.each do |dd|
1491
+ if dd_as_block then
1492
+ buf.puts %Q| <dd>#{apply_block_transforms(dd, rs)}</dd>|
1493
+ else
1494
+ dd.gsub!(/\n+\z/, '') # chomp linefeeds
1495
+ buf.puts %Q| <dd>#{apply_span_transforms(dd.chomp, rs)}</dd>|
1496
+ end
1497
+ end
1498
+ end
1499
+
1500
+ buf.puts %Q|</dl>|
1501
+
1502
+ return(buf.string)
1503
+ end
1504
+
1505
+ # old
1506
+
1507
+
1508
+ # Pattern for matching codeblocks
1509
+ CodeBlockRegexp = %r{
1510
+ (?:\n\n|\A|\A\n)
1511
+ ( # $1 = the code block
1512
+ (?:
1513
+ (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces
1514
+ .*\n+
1515
+ )+
1516
+ )
1517
+ (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at
1518
+ # line-start, or end of doc
1519
+ }x
1520
+
1521
+
1522
+ ### Transform Markdown-style codeblocks in a copy of the specified +str+ and
1523
+ ### return it.
1524
+ def transform_code_blocks( str, rs )
1525
+ @log.debug " Transforming code blocks"
1526
+
1527
+ str.gsub( CodeBlockRegexp ) {|block|
1528
+ codeblock = $1
1529
+ remainder = $2
1530
+
1531
+
1532
+ tmpl = %{\n\n<pre><code>%s\n</code></pre>\n\n%s}
1533
+
1534
+ # patch for ruby 1.9.1 bug
1535
+ if tmpl.respond_to?(:force_encoding) then
1536
+ tmpl.force_encoding(str.encoding)
1537
+ end
1538
+ args = [ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
1539
+
1540
+ # recover all backslash escaped to original form
1541
+ EscapeTable.each {|char, hash|
1542
+ args[0].gsub!( hash[:md5re]){char}
1543
+ }
1544
+
1545
+ # Generate the codeblock
1546
+ tmpl % args
1547
+ }
1548
+ end
1549
+
1550
+
1551
+ FencedCodeBlockRegexp = /^(\~{3,})\n((?m:.+?)\n)\1\n/
1552
+
1553
+ def pretransform_fenced_code_blocks( str, rs )
1554
+ @log.debug " Transforming fenced code blocks => standard code blocks"
1555
+
1556
+ str.gsub( FencedCodeBlockRegexp ) {|block|
1557
+ "\n~\n\n" + indent($2) + "\n~\n\n"
1558
+ }
1559
+ end
1560
+
1561
+
1562
+
1563
+ # Pattern for matching Markdown blockquote blocks
1564
+ BlockQuoteRegexp = %r{
1565
+ (?:
1566
+ ^[ ]*>[ ]? # '>' at the start of a line
1567
+ .+\n # rest of the first line
1568
+ (?:.+\n)* # subsequent consecutive lines
1569
+ \n* # blanks
1570
+ )+
1571
+ }x
1572
+ PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
1573
+
1574
+ ### Transform Markdown-style blockquotes in a copy of the specified +str+
1575
+ ### and return it.
1576
+ def transform_block_quotes( str, rs )
1577
+ @log.debug " Transforming block quotes"
1578
+
1579
+ str.gsub( BlockQuoteRegexp ) {|quote|
1580
+ @log.debug "Making blockquote from %p" % quote
1581
+
1582
+ quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting
1583
+ quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines
1584
+
1585
+ indent = " " * TabWidth
1586
+ quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
1587
+ apply_block_transforms( quote, rs ).
1588
+ gsub( /^/, indent ).
1589
+ gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
1590
+ @log.debug "Blockquoted chunk is: %p" % quoted
1591
+ quoted
1592
+ }
1593
+ end
1594
+
1595
+
1596
+ # AoBane change:
1597
+ # allow loosely urls and addresses (BlueCloth is very strict)
1598
+ #
1599
+ # loose examples:
1600
+ # <skype:tetra-dice> (other protocol)
1601
+ # <ema+il@example.com> (ex: gmail alias)
1602
+ #
1603
+ # not adapted addresses:
1604
+ # <"Abc@def"@example.com> (refer to quoted-string of RFC 5321)
1605
+
1606
+
1607
+ AutoAnchorURLRegexp = /<(#{URI.regexp})>/ # $1 = url
1608
+
1609
+ AutoAnchorEmailRegexp = /<([^'">\s]+?\@[^'">\s]+[.][a-zA-Z]+)>/ # $2 = address
1610
+
1611
+ ### Transform URLs in a copy of the specified +str+ into links and return
1612
+ ### it.
1613
+ def transform_auto_links( str, rs )
1614
+ @log.debug " Transforming auto-links"
1615
+ str.gsub(AutoAnchorURLRegexp){
1616
+ %|<a href="#{Util.escape_html($1)}">#{Util.escape_html($1)}</a>|
1617
+ }.gsub( AutoAnchorEmailRegexp ) {|addr|
1618
+ encode_email_address( unescape_special_chars($1) )
1619
+ }
1620
+ end
1621
+
1622
+
1623
+ # Encoder functions to turn characters of an email address into encoded
1624
+ # entities.
1625
+ Encoders = [
1626
+ lambda {|char| "&#%03d;" % char},
1627
+ lambda {|char| "&#x%X;" % char},
1628
+ lambda {|char| char.chr },
1629
+ ]
1630
+
1631
+ ### Transform a copy of the given email +addr+ into an escaped version safer
1632
+ ### for posting publicly.
1633
+ def encode_email_address( addr )
1634
+
1635
+ rval = ''
1636
+ ("mailto:" + addr).each_byte {|b|
1637
+ case b
1638
+ when ?:
1639
+ rval += ":"
1640
+ when ?@
1641
+ rval += Encoders[ rand(2) ][ b ]
1642
+ else
1643
+ r = rand(100)
1644
+ rval += (
1645
+ r > 90 ? Encoders[2][ b ] :
1646
+ r < 45 ? Encoders[1][ b ] :
1647
+ Encoders[0][ b ]
1648
+ )
1649
+ end
1650
+ }
1651
+
1652
+ return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
1653
+ end
1654
+
1655
+
1656
+ # Regexp for matching Setext-style headers
1657
+ SetextHeaderRegexp = %r{
1658
+ (.+?) # The title text ($1)
1659
+
1660
+ (?: # Markdown Extra: Header Id Attribute (optional)
1661
+ [ ]* # space after closing #'s
1662
+ \{\#
1663
+ (\S+?) # $2 = Id
1664
+ \}
1665
+ [ \t]* # allowed lazy spaces
1666
+ )?
1667
+ \n
1668
+ ([\-=])+ # Match a line of = or -. Save only one in $3.
1669
+ [ ]*\n+
1670
+ }x
1671
+
1672
+ # Regexp for matching ATX-style headers
1673
+ AtxHeaderRegexp = %r{
1674
+ ^(\#+) # $1 = string of #'s
1675
+ [ ]*
1676
+ (.+?) # $2 = Header text
1677
+ [ ]*
1678
+ \#* # optional closing #'s (not counted)
1679
+
1680
+ (?: # Markdown Extra: Header Id Attribute (optional)
1681
+ [ ]* # space after closing #'s
1682
+ \{\#
1683
+ (\S+?) # $3 = Id
1684
+ \}
1685
+ [ \t]* # allowed lazy spaces
1686
+ )?
1687
+
1688
+ \n+
1689
+ }x
1690
+
1691
+ HeaderRegexp = Regexp.union(SetextHeaderRegexp, AtxHeaderRegexp)
1692
+
1693
+ IdRegexp = /^[a-zA-Z][a-zA-Z0-9\:\._-]*$/
1694
+
1695
+ ### Apply Markdown header transforms to a copy of the given +str+ amd render
1696
+ ### state +rs+ and return the result.
1697
+ def transform_headers( str, rs )
1698
+ @log.debug " Transforming headers"
1699
+
1700
+ # Setext-style headers:
1701
+ # Header 1
1702
+ # ========
1703
+ #
1704
+ # Header 2
1705
+ # --------
1706
+ #
1707
+
1708
+ section_numbers = [nil, nil, nil, nil, nil]
1709
+
1710
+ str.
1711
+ gsub( HeaderRegexp ) {|m|
1712
+ if $1 then
1713
+ @log.debug "Found setext-style header"
1714
+ title, id, hdrchar = $1, $2, $3
1715
+
1716
+ case hdrchar
1717
+ when '='
1718
+ level = 1
1719
+ when '-'
1720
+ level = 2
1721
+ end
1722
+ else
1723
+ @log.debug "Found ATX-style header"
1724
+ hdrchars, title, id = $4, $5, $6
1725
+ level = hdrchars.length
1726
+
1727
+ if level >= 7 then
1728
+ rs.warnings << "illegal header level - h#{level} ('#' symbols are too many)"
1729
+ end
1730
+ end
1731
+
1732
+ prefix = ''
1733
+ if rs.numbering? then
1734
+ if level >= rs.numbering_start_level and level <= 6 then
1735
+ depth = level - rs.numbering_start_level
1736
+
1737
+ section_numbers.each_index do |i|
1738
+ if i == depth and section_numbers[depth] then
1739
+ # increment a deepest number if current header's level equals last header's
1740
+ section_numbers[i] += 1
1741
+ elsif i <= depth then
1742
+ # set default number if nil
1743
+ section_numbers[i] ||= 1
1744
+ else
1745
+ # clear discardeds
1746
+ section_numbers[i] = nil
1747
+ end
1748
+ end
1749
+
1750
+ no = ''
1751
+ (0..depth).each do |i|
1752
+ no << "#{section_numbers[i]}."
1753
+ end
1754
+
1755
+ prefix = "#{no} "
1756
+ end
1757
+ end
1758
+
1759
+ title_html = apply_span_transforms( title, rs )
1760
+
1761
+ unless id then
1762
+ case rs.header_id_type
1763
+ when HeaderIDType::ESCAPE
1764
+ id = escape_to_header_id(title_html)
1765
+ if rs.headers.find{|h| h.id == id} then
1766
+ rs.warnings << "header id collision - #{id}"
1767
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1768
+ end
1769
+ else
1770
+ id = "bfheader-#{Digest::MD5.hexdigest(title)}"
1771
+ end
1772
+ end
1773
+
1774
+ title = "#{prefix}#{title}"
1775
+ title_html = "#{prefix}#{title_html}"
1776
+
1777
+
1778
+ unless id =~ IdRegexp then
1779
+ rs.warnings << "illegal header id - #{id} (legal chars: [a-zA-Z0-9_-.] | 1st: [a-zA-Z])"
1780
+ end
1781
+
1782
+ if rs.block_transform_depth == 1 then
1783
+ rs.headers << RenderState::Header.new(id, level, title, title_html)
1784
+ end
1785
+
1786
+ if @use_header_id then
1787
+ %{<h%d id="%s">%s</h%d>\n\n} % [ level, id, title_html, level ]
1788
+ else
1789
+ %{<h%d>%s</h%d>\n\n} % [ level, title_html, level ]
1790
+ end
1791
+ }
1792
+ end
1793
+
1794
+
1795
+ ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
1796
+ ### tags and return it.
1797
+ def form_paragraphs( str, rs )
1798
+ @log.debug " Forming paragraphs"
1799
+ grafs = str.
1800
+ sub( /\A\n+/, '' ).
1801
+ sub( /\n+\z/, '' ).
1802
+ split( /\n{2,}/ )
1803
+
1804
+ rval = grafs.collect {|graf|
1805
+
1806
+ # Unhashify HTML blocks if this is a placeholder
1807
+ if rs.html_blocks.key?( graf )
1808
+ rs.html_blocks[ graf ]
1809
+
1810
+ # no output if this is block separater
1811
+ elsif graf == '~' then
1812
+ ''
1813
+
1814
+ # Otherwise, wrap in <p> tags
1815
+ else
1816
+ apply_span_transforms(graf, rs).
1817
+ sub( /^[ ]*/, '<p>' ) + '</p>'
1818
+ end
1819
+ }.join( "\n\n" )
1820
+
1821
+ @log.debug " Formed paragraphs: %p" % rval
1822
+ return rval
1823
+ end
1824
+
1825
+
1826
+ # Pattern to match the linkid part of an anchor tag for reference-style
1827
+ # links.
1828
+ RefLinkIdRegexp = %r{
1829
+ [ ]? # Optional leading space
1830
+ (?:\n[ ]*)? # Optional newline + spaces
1831
+ \[
1832
+ (.*?) # Id = $1
1833
+ \]
1834
+ }x
1835
+
1836
+ InlineLinkRegexp = %r{
1837
+ \( # Literal paren
1838
+ [ ]* # Zero or more spaces
1839
+ <?(.+?)>? # URI = $1
1840
+ [ ]* # Zero or more spaces
1841
+ (?: #
1842
+ ([\"\']) # Opening quote char = $2
1843
+ (.*?) # Title = $3
1844
+ \2 # Matching quote char
1845
+ )? # Title is optional
1846
+ \)
1847
+ }x
1848
+
1849
+ ### Apply Markdown anchor transforms to a copy of the specified +str+ with
1850
+ ### the given render state +rs+ and return it.
1851
+ def transform_anchors( str, rs )
1852
+ @log.debug " Transforming anchors"
1853
+ @scanner.string = str.dup
1854
+ text = ''
1855
+
1856
+ # Scan the whole string
1857
+ until @scanner.empty?
1858
+
1859
+ if @scanner.scan( /\[/ )
1860
+ link = ''; linkid = ''
1861
+ depth = 1
1862
+ startpos = @scanner.pos
1863
+ @log.debug " Found a bracket-open at %d" % startpos
1864
+
1865
+ # Scan the rest of the tag, allowing unlimited nested []s. If
1866
+ # the scanner runs out of text before the opening bracket is
1867
+ # closed, append the text and return (wasn't a valid anchor).
1868
+ while depth.nonzero?
1869
+ linktext = @scanner.scan_until( /\]|\[/ )
1870
+
1871
+ if linktext
1872
+ @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ]
1873
+ link += linktext
1874
+
1875
+ # Decrement depth for each closing bracket
1876
+ depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
1877
+ @log.debug " Depth is now #{depth}"
1878
+
1879
+ # If there's no more brackets, it must not be an anchor, so
1880
+ # just abort.
1881
+ else
1882
+ @log.debug " Missing closing brace, assuming non-link."
1883
+ link += @scanner.rest
1884
+ @scanner.terminate
1885
+ return text + '[' + link
1886
+ end
1887
+ end
1888
+ link.slice!( -1 ) # Trim final ']'
1889
+ @log.debug " Found leading link %p" % link
1890
+
1891
+
1892
+
1893
+ # Markdown Extra: Footnote
1894
+ if link =~ /^\^(.+)/ then
1895
+ id = $1
1896
+ if rs.footnotes[id] then
1897
+ rs.found_footnote_ids << id
1898
+ label = "[#{rs.found_footnote_ids.size}]"
1899
+ else
1900
+ rs.warnings << "undefined footnote id - #{id}"
1901
+ label = '[?]'
1902
+ end
1903
+
1904
+ text += %Q|<sup id="footnote-ref:#{id}"><a href="#footnote:#{id}" rel="footnote">#{label}</a></sup>|
1905
+
1906
+ # Look for a reference-style second part
1907
+ elsif @scanner.scan( RefLinkIdRegexp )
1908
+ linkid = @scanner[1]
1909
+ linkid = link.dup if linkid.empty?
1910
+ linkid.downcase!
1911
+ @log.debug " Found a linkid: %p" % linkid
1912
+
1913
+ # If there's a matching link in the link table, build an
1914
+ # anchor tag for it.
1915
+ if rs.urls.key?( linkid )
1916
+ @log.debug " Found link key in the link table: %p" % rs.urls[linkid]
1917
+ url = escape_md( rs.urls[linkid] )
1918
+
1919
+ text += %{<a href="#{url}"}
1920
+ if rs.titles.key?(linkid)
1921
+ text += %{ title="%s"} % escape_md( rs.titles[linkid] )
1922
+ end
1923
+ text += %{>#{link}</a>}
1924
+
1925
+ # If the link referred to doesn't exist, just append the raw
1926
+ # source to the result
1927
+ else
1928
+ @log.debug " Linkid %p not found in link table" % linkid
1929
+ @log.debug " Appending original string instead: "
1930
+ @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1931
+
1932
+ rs.warnings << "link-id not found - #{linkid}"
1933
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1934
+ end
1935
+
1936
+ # ...or for an inline style second part
1937
+ elsif @scanner.scan( InlineLinkRegexp )
1938
+ url = @scanner[1]
1939
+ title = @scanner[3]
1940
+ @log.debug " Found an inline link to %p" % url
1941
+
1942
+ url = "##{link}" if url == '#' # target anchor briefing (since AoBane 0.40)
1943
+
1944
+ text += %{<a href="%s"} % escape_md( url )
1945
+ if title
1946
+ title.gsub!( /"/, "&quot;" )
1947
+ text += %{ title="%s"} % escape_md( title )
1948
+ end
1949
+ text += %{>#{link}</a>}
1950
+
1951
+ # No linkid part: just append the first part as-is.
1952
+ else
1953
+ @log.debug "No linkid, so no anchor. Appending literal text."
1954
+ text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
1955
+ end # if linkid
1956
+
1957
+ # Plain text
1958
+ else
1959
+ @log.debug " Scanning to the next link from %p" % @scanner.rest
1960
+ text += @scanner.scan( /[^\[]+/ )
1961
+ end
1962
+
1963
+ end # until @scanner.empty?
1964
+
1965
+ return text
1966
+ end
1967
+
1968
+
1969
+ # Pattern to match strong emphasis in Markdown text
1970
+ BoldRegexp = %r{ (\*\*|__) (\S|\S.*?\S) \1 }x
1971
+
1972
+ # Pattern to match normal emphasis in Markdown text
1973
+ ItalicRegexp = %r{ (\*|_) (\S|\S.*?\S) \1 }x
1974
+
1975
+ ### Transform italic- and bold-encoded text in a copy of the specified +str+
1976
+ ### and return it.
1977
+ def transform_italic_and_bold( str, rs )
1978
+ @log.debug " Transforming italic and bold"
1979
+
1980
+ str.
1981
+ gsub( BoldRegexp, %{<strong>\\2</strong>} ).
1982
+ gsub( ItalicRegexp, %{<em>\\2</em>} )
1983
+ end
1984
+
1985
+
1986
+ ### Transform backticked spans into <code> spans.
1987
+ def transform_code_spans( str, rs )
1988
+ @log.debug " Transforming code spans"
1989
+
1990
+ # Set up the string scanner and just return the string unless there's at
1991
+ # least one backtick.
1992
+ @scanner.string = str.dup
1993
+ unless @scanner.exist?( /`/ )
1994
+ @scanner.terminate
1995
+ @log.debug "No backticks found for code span in %p" % str
1996
+ return str
1997
+ end
1998
+
1999
+ @log.debug "Transforming code spans in %p" % str
2000
+
2001
+ # Build the transformed text anew
2002
+ text = ''
2003
+
2004
+ # Scan to the end of the string
2005
+ until @scanner.empty?
2006
+
2007
+ # Scan up to an opening backtick
2008
+ if pre = @scanner.scan_until( /.??(?=`)/m )
2009
+ text += pre
2010
+ @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
2011
+
2012
+ # Make a pattern to find the end of the span
2013
+ opener = @scanner.scan( /`+/ )
2014
+ len = opener.length
2015
+ closer = Regexp::new( opener )
2016
+ @log.debug "Scanning for end of code span with %p" % closer
2017
+
2018
+ # Scan until the end of the closing backtick sequence. Chop the
2019
+ # backticks off the resultant string, strip leading and trailing
2020
+ # whitespace, and encode any enitites contained in it.
2021
+ codespan = @scanner.scan_until( closer ) or
2022
+ raise FormatError::new( @scanner.rest[0,20],
2023
+ "No %p found before end" % opener )
2024
+
2025
+ @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
2026
+ codespan.slice!( -len, len )
2027
+ text += "<code>%s</code>" %
2028
+ encode_code( codespan.strip, rs )
2029
+
2030
+ # If there's no more backticks, just append the rest of the string
2031
+ # and move the scan pointer to the end
2032
+ else
2033
+ text += @scanner.rest
2034
+ @scanner.terminate
2035
+ end
2036
+ end
2037
+
2038
+ return text
2039
+ end
2040
+
2041
+
2042
+ # Next, handle inline images: ![alt text](url "optional title")
2043
+ # Don't forget: encode * and _
2044
+ InlineImageRegexp = %r{
2045
+ ( # Whole match = $1
2046
+ !\[ (.*?) \] # alt text = $2
2047
+ \([ ]*
2048
+ <?(\S+?)>? # source url = $3
2049
+ [ ]*
2050
+ (?: #
2051
+ (["']) # quote char = $4
2052
+ (.*?) # title = $5
2053
+ \4 # matching quote
2054
+ [ ]*
2055
+ )? # title is optional
2056
+ \)
2057
+ )
2058
+ }x #"
2059
+
2060
+
2061
+ # Reference-style images
2062
+ ReferenceImageRegexp = %r{
2063
+ ( # Whole match = $1
2064
+ !\[ (.*?) \] # Alt text = $2
2065
+ [ ]? # Optional space
2066
+ (?:\n[ ]*)? # One optional newline + spaces
2067
+ \[ (.*?) \] # id = $3
2068
+ )
2069
+ }x
2070
+
2071
+ ### Turn image markup into image tags.
2072
+ def transform_images( str, rs )
2073
+ @log.debug " Transforming images %p" % str
2074
+
2075
+ # Handle reference-style labeled images: ![alt text][id]
2076
+ str.
2077
+ gsub( ReferenceImageRegexp ) {|match|
2078
+ whole, alt, linkid = $1, $2, $3.downcase
2079
+ @log.debug "Matched %p" % match
2080
+ res = nil
2081
+ alt.gsub!( /"/, '&quot;' )
2082
+
2083
+ # for shortcut links like ![this][].
2084
+ linkid = alt.downcase if linkid.empty?
2085
+
2086
+ if rs.urls.key?( linkid )
2087
+ url = escape_md( rs.urls[linkid] )
2088
+ @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
2089
+
2090
+ # Build the tag
2091
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2092
+ if rs.titles.key?( linkid )
2093
+ result += %{ title="%s"} % escape_md( rs.titles[linkid] )
2094
+ end
2095
+ result += EmptyElementSuffix
2096
+
2097
+ else
2098
+ result = whole
2099
+ end
2100
+
2101
+ @log.debug "Replacing %p with %p" % [ match, result ]
2102
+ result
2103
+ }.
2104
+
2105
+ # Inline image style
2106
+ gsub( InlineImageRegexp ) {|match|
2107
+ @log.debug "Found inline image %p" % match
2108
+ whole, alt, title = $1, $2, $5
2109
+ url = escape_md( $3 )
2110
+ alt.gsub!( /"/, '&quot;' )
2111
+
2112
+ # Build the tag
2113
+ result = %{<img src="%s" alt="%s"} % [ url, alt ]
2114
+ unless title.nil?
2115
+ title.gsub!( /"/, '&quot;' )
2116
+ result += %{ title="%s"} % escape_md( title )
2117
+ end
2118
+ result += EmptyElementSuffix
2119
+
2120
+ @log.debug "Replacing %p with %p" % [ match, result ]
2121
+ result
2122
+ }
2123
+ end
2124
+
2125
+
2126
+ # Regexp to match special characters in a code block
2127
+ CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
2128
+
2129
+ ### Escape any characters special to HTML and encode any characters special
2130
+ ### to Markdown in a copy of the given +str+ and return it.
2131
+ def encode_code( str, rs )
2132
+ #str.gsub( %r{&}, '&amp;' ).
2133
+ #gsub( %r{<}, '&lt;' ).
2134
+ #gsub( %r{>}, '&gt;' ).
2135
+ #gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
2136
+ end
2137
+
2138
+ def escape_to_header_id(str)
2139
+ URI.escape(escape_md(str.gsub(/<\/?[^>]*>/, "").gsub(/\s/, "_")).gsub("/", ".2F")).gsub("%", ".")
2140
+ end
2141
+
2142
+ #################################################################
2143
+ ### U T I L I T Y F U N C T I O N S
2144
+ #################################################################
2145
+
2146
+ ### Escape any markdown characters in a copy of the given +str+ and return
2147
+ ### it.
2148
+ def escape_md( str )
2149
+ str.
2150
+ gsub( /\*|_/ ){|symbol| EscapeTable[symbol][:md5]}
2151
+ end
2152
+
2153
+
2154
+ # Matching constructs for tokenizing X/HTML
2155
+ HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx
2156
+ XMLProcInstRegexp = %r{ <\? .*? \?> }mx
2157
+ MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
2158
+
2159
+ HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx
2160
+ HTMLTagCloseRegexp = %r{ > }x
2161
+ HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
2162
+
2163
+ ### Break the HTML source in +str+ into a series of tokens and return
2164
+ ### them. The tokens are just 2-element Array tuples with a type and the
2165
+ ### actual content. If this function is called with a block, the type and
2166
+ ### text parts of each token will be yielded to it one at a time as they are
2167
+ ### extracted.
2168
+ def tokenize_html( str )
2169
+ depth = 0
2170
+ tokens = []
2171
+ @scanner.string = str.dup
2172
+ type, token = nil, nil
2173
+
2174
+ until @scanner.empty?
2175
+ @log.debug "Scanning from %p" % @scanner.rest
2176
+
2177
+ # Match comments and PIs without nesting
2178
+ if (( token = @scanner.scan(MetaTag) ))
2179
+ type = :tag
2180
+
2181
+ # Do nested matching for HTML tags
2182
+ elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
2183
+ tagstart = @scanner.pos
2184
+ @log.debug " Found the start of a plain tag at %d" % tagstart
2185
+
2186
+ # Start the token with the opening angle
2187
+ depth = 1
2188
+ type = :tag
2189
+
2190
+ # Scan the rest of the tag, allowing unlimited nested <>s. If
2191
+ # the scanner runs out of text before the tag is closed, raise
2192
+ # an error.
2193
+ while depth.nonzero?
2194
+
2195
+ # Scan either an opener or a closer
2196
+ chunk = @scanner.scan( HTMLTagPart ) or
2197
+ break # AoBane Fix (refer to spec/code-block.rb)
2198
+
2199
+ @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ]
2200
+
2201
+ token += chunk
2202
+
2203
+ # If the last character of the token so far is a closing
2204
+ # angle bracket, decrement the depth. Otherwise increment
2205
+ # it for a nested tag.
2206
+ depth += ( token[-1, 1] == '>' ? -1 : 1 )
2207
+ @log.debug " Depth is now #{depth}"
2208
+ end
2209
+
2210
+ # Match text segments
2211
+ else
2212
+ @log.debug " Looking for a chunk of text"
2213
+ type = :text
2214
+
2215
+ # Scan forward, always matching at least one character to move
2216
+ # the pointer beyond any non-tag '<'.
2217
+ token = @scanner.scan_until( /[^<]+/m )
2218
+ end
2219
+
2220
+ @log.debug " type: %p, token: %p" % [ type, token ]
2221
+
2222
+ # If a block is given, feed it one token at a time. Add the token to
2223
+ # the token list to be returned regardless.
2224
+ if block_given?
2225
+ yield( type, token )
2226
+ end
2227
+ tokens << [ type, token ]
2228
+ end
2229
+
2230
+ return tokens
2231
+ end
2232
+
2233
+
2234
+ ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
2235
+ def encode_html( str )
2236
+ #str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
2237
+ #gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
2238
+ return str
2239
+ end
2240
+
2241
+
2242
+ ### Return one level of line-leading tabs or spaces from a copy of +str+ and
2243
+ ### return it.
2244
+ def outdent( str )
2245
+ str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
2246
+ end
2247
+
2248
+ def indent(str)
2249
+ str.gsub( /^/, ' ' * TabWidth)
2250
+ end
2251
+
2252
+ end
2253
+ end