kramdown 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of kramdown might be problematic. Click here for more details.
- data/AUTHORS +1 -0
- data/COPYING +24 -0
- data/ChangeLog +1416 -0
- data/GPL +674 -0
- data/README +20 -0
- data/Rakefile +300 -0
- data/VERSION +1 -0
- data/benchmark/benchmark.rb +33 -0
- data/benchmark/mdbasics.text +306 -0
- data/benchmark/mdsyntax.text +888 -0
- data/benchmark/testing.sh +9 -0
- data/benchmark/timing.sh +10 -0
- data/bin/kramdown +26 -0
- data/doc/default.css +293 -0
- data/doc/default.template +78 -0
- data/doc/index.page +89 -0
- data/doc/installation.page +90 -0
- data/doc/news.feed +10 -0
- data/doc/news.page +27 -0
- data/doc/quickref.page +474 -0
- data/doc/syntax.page +1089 -0
- data/doc/tests.page +44 -0
- data/doc/virtual +2 -0
- data/lib/kramdown.rb +23 -0
- data/lib/kramdown/converter.rb +215 -0
- data/lib/kramdown/document.rb +150 -0
- data/lib/kramdown/error.rb +27 -0
- data/lib/kramdown/extension.rb +73 -0
- data/lib/kramdown/parser.rb +1056 -0
- data/lib/kramdown/parser/registry.rb +62 -0
- data/setup.rb +1585 -0
- data/test/run_tests.rb +58 -0
- data/test/test_files.rb +39 -0
- data/test/testcases/block/01_blank_line/spaces.html +1 -0
- data/test/testcases/block/01_blank_line/spaces.text +3 -0
- data/test/testcases/block/01_blank_line/tabs.html +1 -0
- data/test/testcases/block/01_blank_line/tabs.text +6 -0
- data/test/testcases/block/02_eob/beginning.html +1 -0
- data/test/testcases/block/02_eob/beginning.text +3 -0
- data/test/testcases/block/02_eob/end.html +1 -0
- data/test/testcases/block/02_eob/end.text +3 -0
- data/test/testcases/block/02_eob/middle.html +1 -0
- data/test/testcases/block/02_eob/middle.text +5 -0
- data/test/testcases/block/03_paragraph/indented.html +18 -0
- data/test/testcases/block/03_paragraph/indented.text +19 -0
- data/test/testcases/block/03_paragraph/no_newline_at_end.html +5 -0
- data/test/testcases/block/03_paragraph/no_newline_at_end.text +5 -0
- data/test/testcases/block/03_paragraph/one_para.html +1 -0
- data/test/testcases/block/03_paragraph/one_para.text +1 -0
- data/test/testcases/block/03_paragraph/two_para.html +4 -0
- data/test/testcases/block/03_paragraph/two_para.text +4 -0
- data/test/testcases/block/04_header/atx_header.html +26 -0
- data/test/testcases/block/04_header/atx_header.text +24 -0
- data/test/testcases/block/04_header/atx_header_no_newline_at_end.html +1 -0
- data/test/testcases/block/04_header/atx_header_no_newline_at_end.text +1 -0
- data/test/testcases/block/04_header/setext_header.html +25 -0
- data/test/testcases/block/04_header/setext_header.text +27 -0
- data/test/testcases/block/04_header/setext_header_no_newline_at_end.html +1 -0
- data/test/testcases/block/04_header/setext_header_no_newline_at_end.text +2 -0
- data/test/testcases/block/04_header/with_auto_ids.html +17 -0
- data/test/testcases/block/04_header/with_auto_ids.options +1 -0
- data/test/testcases/block/04_header/with_auto_ids.text +19 -0
- data/test/testcases/block/05_blockquote/indented.html +25 -0
- data/test/testcases/block/05_blockquote/indented.text +14 -0
- data/test/testcases/block/05_blockquote/nested.html +9 -0
- data/test/testcases/block/05_blockquote/nested.text +5 -0
- data/test/testcases/block/05_blockquote/no_newline_at_end.html +4 -0
- data/test/testcases/block/05_blockquote/no_newline_at_end.text +2 -0
- data/test/testcases/block/05_blockquote/only_first_quoted.html +8 -0
- data/test/testcases/block/05_blockquote/only_first_quoted.text +4 -0
- data/test/testcases/block/05_blockquote/with_code_blocks.html +15 -0
- data/test/testcases/block/05_blockquote/with_code_blocks.text +11 -0
- data/test/testcases/block/06_codeblock/error.html +4 -0
- data/test/testcases/block/06_codeblock/error.text +4 -0
- data/test/testcases/block/06_codeblock/no_newline_at_end.html +2 -0
- data/test/testcases/block/06_codeblock/no_newline_at_end.text +1 -0
- data/test/testcases/block/06_codeblock/normal.html +13 -0
- data/test/testcases/block/06_codeblock/normal.text +10 -0
- data/test/testcases/block/06_codeblock/tilde_syntax.html +7 -0
- data/test/testcases/block/06_codeblock/tilde_syntax.text +9 -0
- data/test/testcases/block/06_codeblock/whitespace.html +3 -0
- data/test/testcases/block/06_codeblock/whitespace.text +3 -0
- data/test/testcases/block/06_codeblock/with_blank_line.html +13 -0
- data/test/testcases/block/06_codeblock/with_blank_line.text +11 -0
- data/test/testcases/block/06_codeblock/with_eob_marker.html +6 -0
- data/test/testcases/block/06_codeblock/with_eob_marker.text +5 -0
- data/test/testcases/block/07_horizontal_rule/error.html +7 -0
- data/test/testcases/block/07_horizontal_rule/error.text +7 -0
- data/test/testcases/block/07_horizontal_rule/normal.html +19 -0
- data/test/testcases/block/07_horizontal_rule/normal.text +19 -0
- data/test/testcases/block/08_list/escaping.html +17 -0
- data/test/testcases/block/08_list/escaping.text +17 -0
- data/test/testcases/block/08_list/list_and_hr.html +9 -0
- data/test/testcases/block/08_list/list_and_hr.text +5 -0
- data/test/testcases/block/08_list/list_and_others.html +38 -0
- data/test/testcases/block/08_list/list_and_others.text +25 -0
- data/test/testcases/block/08_list/mixed.html +111 -0
- data/test/testcases/block/08_list/mixed.text +66 -0
- data/test/testcases/block/08_list/nested.html +17 -0
- data/test/testcases/block/08_list/nested.text +7 -0
- data/test/testcases/block/08_list/other_first_element.html +39 -0
- data/test/testcases/block/08_list/other_first_element.text +18 -0
- data/test/testcases/block/08_list/simple_ol.html +19 -0
- data/test/testcases/block/08_list/simple_ol.text +13 -0
- data/test/testcases/block/08_list/simple_ul.html +61 -0
- data/test/testcases/block/08_list/simple_ul.text +43 -0
- data/test/testcases/block/08_list/single_item.html +3 -0
- data/test/testcases/block/08_list/single_item.text +1 -0
- data/test/testcases/block/08_list/special_cases.html +29 -0
- data/test/testcases/block/08_list/special_cases.text +19 -0
- data/test/testcases/block/09_html/auto_parse_block_html.html +17 -0
- data/test/testcases/block/09_html/auto_parse_block_html.options +1 -0
- data/test/testcases/block/09_html/auto_parse_block_html.text +14 -0
- data/test/testcases/block/09_html/comment.html +12 -0
- data/test/testcases/block/09_html/comment.text +12 -0
- data/test/testcases/block/09_html/filtered_html.html +1 -0
- data/test/testcases/block/09_html/filtered_html.options +1 -0
- data/test/testcases/block/09_html/filtered_html.text +1 -0
- data/test/testcases/block/09_html/html_and_codeblocks.html +15 -0
- data/test/testcases/block/09_html/html_and_codeblocks.options +1 -0
- data/test/testcases/block/09_html/html_and_codeblocks.text +13 -0
- data/test/testcases/block/09_html/invalid_html_1.html +5 -0
- data/test/testcases/block/09_html/invalid_html_1.text +5 -0
- data/test/testcases/block/09_html/invalid_html_2.html +6 -0
- data/test/testcases/block/09_html/invalid_html_2.text +5 -0
- data/test/testcases/block/09_html/parse_as_raw.html +26 -0
- data/test/testcases/block/09_html/parse_as_raw.text +16 -0
- data/test/testcases/block/09_html/parse_as_span.html +12 -0
- data/test/testcases/block/09_html/parse_as_span.text +7 -0
- data/test/testcases/block/09_html/processing_instruction.html +12 -0
- data/test/testcases/block/09_html/processing_instruction.text +12 -0
- data/test/testcases/block/09_html/simple.html +78 -0
- data/test/testcases/block/09_html/simple.text +56 -0
- data/test/testcases/block/10_ald/simple.html +2 -0
- data/test/testcases/block/10_ald/simple.text +8 -0
- data/test/testcases/block/11_ial/simple.html +17 -0
- data/test/testcases/block/11_ial/simple.text +25 -0
- data/test/testcases/block/12_extension/comment.html +5 -0
- data/test/testcases/block/12_extension/comment.text +11 -0
- data/test/testcases/block/12_extension/ignored.html +6 -0
- data/test/testcases/block/12_extension/ignored.text +11 -0
- data/test/testcases/block/12_extension/kdoptions.html +15 -0
- data/test/testcases/block/12_extension/kdoptions.text +18 -0
- data/test/testcases/block/12_extension/kdoptions2.html +10 -0
- data/test/testcases/block/12_extension/kdoptions2.text +5 -0
- data/test/testcases/block/12_extension/nokramdown.html +6 -0
- data/test/testcases/block/12_extension/nokramdown.text +11 -0
- data/test/testcases/span/01_link/empty.html +3 -0
- data/test/testcases/span/01_link/empty.text +3 -0
- data/test/testcases/span/01_link/image_in_a.html +5 -0
- data/test/testcases/span/01_link/image_in_a.text +5 -0
- data/test/testcases/span/01_link/imagelinks.html +12 -0
- data/test/testcases/span/01_link/imagelinks.text +14 -0
- data/test/testcases/span/01_link/inline.html +40 -0
- data/test/testcases/span/01_link/inline.text +42 -0
- data/test/testcases/span/01_link/link_defs.html +8 -0
- data/test/testcases/span/01_link/link_defs.text +22 -0
- data/test/testcases/span/01_link/links_with_angle_brackets.html +3 -0
- data/test/testcases/span/01_link/links_with_angle_brackets.text +3 -0
- data/test/testcases/span/01_link/reference.html +32 -0
- data/test/testcases/span/01_link/reference.text +42 -0
- data/test/testcases/span/02_emphasis/empty.html +3 -0
- data/test/testcases/span/02_emphasis/empty.text +3 -0
- data/test/testcases/span/02_emphasis/errors.html +9 -0
- data/test/testcases/span/02_emphasis/errors.text +9 -0
- data/test/testcases/span/02_emphasis/nesting.html +34 -0
- data/test/testcases/span/02_emphasis/nesting.text +30 -0
- data/test/testcases/span/02_emphasis/normal.html +42 -0
- data/test/testcases/span/02_emphasis/normal.text +42 -0
- data/test/testcases/span/03_codespan/empty.html +5 -0
- data/test/testcases/span/03_codespan/empty.text +5 -0
- data/test/testcases/span/03_codespan/errors.html +1 -0
- data/test/testcases/span/03_codespan/errors.text +1 -0
- data/test/testcases/span/03_codespan/normal.html +16 -0
- data/test/testcases/span/03_codespan/normal.text +16 -0
- data/test/testcases/span/04_footnote/definitions.html +14 -0
- data/test/testcases/span/04_footnote/definitions.text +18 -0
- data/test/testcases/span/04_footnote/footnote_nr.html +12 -0
- data/test/testcases/span/04_footnote/footnote_nr.options +1 -0
- data/test/testcases/span/04_footnote/footnote_nr.text +4 -0
- data/test/testcases/span/04_footnote/markers.html +46 -0
- data/test/testcases/span/04_footnote/markers.text +26 -0
- data/test/testcases/span/05_html/normal.html +17 -0
- data/test/testcases/span/05_html/normal.text +17 -0
- data/test/testcases/span/autolinks/url_links.html +9 -0
- data/test/testcases/span/autolinks/url_links.text +9 -0
- data/test/testcases/span/escaped_chars/normal.html +33 -0
- data/test/testcases/span/escaped_chars/normal.text +33 -0
- data/test/testcases/span/ial/simple.html +5 -0
- data/test/testcases/span/ial/simple.text +5 -0
- data/test/testcases/span/line_breaks/normal.html +11 -0
- data/test/testcases/span/line_breaks/normal.text +11 -0
- data/test/testcases/span/text_substitutions/entities.html +4 -0
- data/test/testcases/span/text_substitutions/entities.text +4 -0
- data/test/testcases/span/text_substitutions/greaterthan.html +1 -0
- data/test/testcases/span/text_substitutions/greaterthan.text +1 -0
- data/test/testcases/span/text_substitutions/lowerthan.html +1 -0
- data/test/testcases/span/text_substitutions/lowerthan.text +1 -0
- data/test/testcases/span/text_substitutions/typography.html +3 -0
- data/test/testcases/span/text_substitutions/typography.text +3 -0
- metadata +259 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
module Kramdown
|
24
|
+
|
25
|
+
# The base extension class.
|
26
|
+
#
|
27
|
+
# This class provides implementations for the default extensions defined in the kramdown
|
28
|
+
# specification.
|
29
|
+
#
|
30
|
+
# An extension is a method called <tt>parse_EXTNAME</tt> where +EXTNAME+ is the extension name.
|
31
|
+
# These methods are called with three parameters:
|
32
|
+
#
|
33
|
+
# [+parser+]
|
34
|
+
# The parser instance from which the extension method is called.
|
35
|
+
# [+opts+]
|
36
|
+
# A hash containing the options set in the extension.
|
37
|
+
# [+body+]
|
38
|
+
# A string containing the body of the extension. If no body is available, this is +nil+.
|
39
|
+
class Extension
|
40
|
+
|
41
|
+
# Just ignore everything and do nothing.
|
42
|
+
def parse_comment(parser, opts, body)
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# Add the body (if available) as <tt>:raw</tt> Element to the +parser.tree+.
|
47
|
+
def parse_nokramdown(parser, opts, body)
|
48
|
+
parser.tree.children << Element.new(:raw, body) if body.kind_of?(String)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Update the document options with the options set in +opts+.
|
52
|
+
def parse_kdoptions(parser, opts, body)
|
53
|
+
if val = opts.delete('auto_ids')
|
54
|
+
if val.downcase.strip == 'false'
|
55
|
+
parser.doc.options[:auto_ids] = false
|
56
|
+
elsif !val.empty?
|
57
|
+
parser.doc.options[:auto_ids] = true
|
58
|
+
end
|
59
|
+
end
|
60
|
+
if val = opts.delete('filter_html')
|
61
|
+
parser.doc.options[:filter_html] = val.split(/\s+/)
|
62
|
+
end
|
63
|
+
if val = opts.delete('footnote_nr')
|
64
|
+
parser.doc.options[:footnote_nr] = Integer(val) rescue parser.doc.options[:footnote_nr]
|
65
|
+
end
|
66
|
+
opts.each {|k,v| parser.warning("Unknown kramdown options '#{k}'")}
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
|
@@ -0,0 +1,1056 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
require 'strscan'
|
24
|
+
require 'stringio'
|
25
|
+
require 'kramdown/parser/registry'
|
26
|
+
|
27
|
+
#TODO: use [[:alpha:]] in all regexp to allow parsing of international values in 1.9.1
|
28
|
+
#NOTE: use @src.pre_match only before other check/match?/... operations, otherwise the content is changed
|
29
|
+
|
30
|
+
module Kramdown
|
31
|
+
|
32
|
+
# This module contains all available parsers. Currently, there is only one parser for parsing
|
33
|
+
# documents in kramdown format.
|
34
|
+
module Parser
|
35
|
+
|
36
|
+
# Used for parsing a document in kramdown format.
|
37
|
+
class Kramdown
|
38
|
+
|
39
|
+
include ::Kramdown
|
40
|
+
|
41
|
+
attr_reader :tree
|
42
|
+
attr_reader :doc
|
43
|
+
|
44
|
+
# Create a new Kramdown parser object for the Kramdown::Document +doc+.
|
45
|
+
def initialize(doc)
|
46
|
+
@doc = doc
|
47
|
+
@src = nil
|
48
|
+
@tree = nil
|
49
|
+
@unclosed_html_tags = []
|
50
|
+
@stack = []
|
51
|
+
@used_ids = {}
|
52
|
+
@doc.parse_infos[:ald] = {}
|
53
|
+
@doc.parse_infos[:link_defs] = {}
|
54
|
+
@doc.parse_infos[:footnotes] = {}
|
55
|
+
end
|
56
|
+
private_class_method(:new, :allocate)
|
57
|
+
|
58
|
+
|
59
|
+
# Parse the string +source+ using the Kramdown::Document +doc+ and return the parse tree.
|
60
|
+
def self.parse(source, doc)
|
61
|
+
new(doc).parse(source)
|
62
|
+
end
|
63
|
+
|
64
|
+
# The source string provided on initialization is parsed and the created +tree+ is returned.
|
65
|
+
def parse(source)
|
66
|
+
configure_parser
|
67
|
+
tree = Element.new(:root)
|
68
|
+
parse_blocks(tree, adapt_source(source))
|
69
|
+
update_tree(tree)
|
70
|
+
@doc.parse_infos[:footnotes].each do |name, data|
|
71
|
+
update_tree(data[:content])
|
72
|
+
end
|
73
|
+
tree
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add the given warning +text+ to the warning array of the Kramdown document.
|
77
|
+
def warning(text)
|
78
|
+
@doc.warnings << text
|
79
|
+
#TODO: add position information
|
80
|
+
end
|
81
|
+
|
82
|
+
#######
|
83
|
+
private
|
84
|
+
#######
|
85
|
+
|
86
|
+
BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
|
87
|
+
:setext_header, :horizontal_rule, :list, :link_definition, :block_html,
|
88
|
+
:footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
|
89
|
+
SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
|
90
|
+
:span_ial, :html_entity, :typographic_syms, :special_html_chars, :line_break, :escaped_chars,]
|
91
|
+
|
92
|
+
# Adapt the object to allow parsing like specified in the options.
|
93
|
+
def configure_parser
|
94
|
+
@parsers = {}
|
95
|
+
BLOCK_PARSERS.each do |name|
|
96
|
+
if Registry.has_parser?(name, :block)
|
97
|
+
extend(Registry.parser(name).module)
|
98
|
+
@parsers[name] = Registry.parser(name)
|
99
|
+
else
|
100
|
+
raise Kramdown::Error, "Unknown block parser: #{name}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
SPAN_PARSERS.each do |name|
|
104
|
+
if Registry.has_parser?(name, :span)
|
105
|
+
extend(Registry.parser(name).module)
|
106
|
+
@parsers[name] = Registry.parser(name)
|
107
|
+
else
|
108
|
+
raise Kramdown::Error, "Unknown span parser: #{name}"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
@span_start = Regexp.union(*SPAN_PARSERS.map {|name| @parsers[name].start_re})
|
112
|
+
@span_start_re = /(?=#{@span_start})/
|
113
|
+
end
|
114
|
+
|
115
|
+
# Parse all block level elements in +text+ (a string or a StringScanner object) into the
|
116
|
+
# element +el+.
|
117
|
+
def parse_blocks(el, text)
|
118
|
+
@stack.push([@tree, @src, @unclosed_html_tags])
|
119
|
+
@tree, @src, @unclosed_html_tags = el, StringScanner.new(text), []
|
120
|
+
|
121
|
+
while !@src.eos?
|
122
|
+
BLOCK_PARSERS.any? do |name|
|
123
|
+
if @src.check(@parsers[name].start_re)
|
124
|
+
send(@parsers[name].method)
|
125
|
+
else
|
126
|
+
false
|
127
|
+
end
|
128
|
+
end || begin
|
129
|
+
warning('Warning: this should not occur - no block parser handled the line')
|
130
|
+
add_text(@src.scan(/.*\n/))
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
@unclosed_html_tags.reverse.each do |tag|
|
135
|
+
warning("Automatically closing unclosed html tag '#{tag.value}'")
|
136
|
+
end
|
137
|
+
|
138
|
+
@tree, @src, @unclosed_html_tags = *@stack.pop
|
139
|
+
end
|
140
|
+
|
141
|
+
# Update the tree by parsing all <tt>:text</tt> elements with the span level parser (resets
|
142
|
+
# +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
|
143
|
+
def update_tree(element)
|
144
|
+
element.children.map! do |child|
|
145
|
+
if child.type == :text
|
146
|
+
@stack, @tree = [], nil
|
147
|
+
@src = StringScanner.new(child.value)
|
148
|
+
parse_spans(child)
|
149
|
+
child.children
|
150
|
+
else
|
151
|
+
update_tree(child)
|
152
|
+
update_attr_with_ial(child.options[:attr] ||= {}, child.options[:ial]) if child.options[:ial]
|
153
|
+
child
|
154
|
+
end
|
155
|
+
end.flatten!
|
156
|
+
end
|
157
|
+
|
158
|
+
# Parse all span level elements in the source string.
|
159
|
+
def parse_spans(el, stop_re = nil)
|
160
|
+
@stack.push(@tree)
|
161
|
+
@tree = el
|
162
|
+
|
163
|
+
used_re = (stop_re.nil? ? @span_start_re : /(?=#{Regexp.union(stop_re, @span_start)})/)
|
164
|
+
stop_re_found = false
|
165
|
+
while !@src.eos? && !stop_re_found
|
166
|
+
if result = @src.scan_until(used_re)
|
167
|
+
add_text(result)
|
168
|
+
if stop_re && (stop_re_matched = @src.check(stop_re))
|
169
|
+
stop_re_found = (block_given? ? yield : true)
|
170
|
+
end
|
171
|
+
processed = SPAN_PARSERS.any? do |name|
|
172
|
+
if @src.check(@parsers[name].start_re)
|
173
|
+
send(@parsers[name].method)
|
174
|
+
true
|
175
|
+
else
|
176
|
+
false
|
177
|
+
end
|
178
|
+
end unless stop_re_found
|
179
|
+
if !processed && !stop_re_found
|
180
|
+
if stop_re_matched
|
181
|
+
add_text(@src.scan(/./))
|
182
|
+
else
|
183
|
+
raise Kramdown::Error, 'Bug: please report!'
|
184
|
+
end
|
185
|
+
end
|
186
|
+
else
|
187
|
+
add_text(@src.scan_until(/.*/m)) unless stop_re
|
188
|
+
break
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
@tree = @stack.pop
|
193
|
+
|
194
|
+
stop_re_found
|
195
|
+
end
|
196
|
+
|
197
|
+
# Modify the string +source+ to be usable by the parser.
|
198
|
+
def adapt_source(source)
|
199
|
+
source.gsub(/\r\n?/, "\n").chomp + "\n"
|
200
|
+
end
|
201
|
+
|
202
|
+
# This helper method adds the given +text+ either to the last element in the +tree+ if it is a
|
203
|
+
# text element or creates a new text element.
|
204
|
+
def add_text(text, tree = @tree)
|
205
|
+
if tree.children.last && tree.children.last.type == :text
|
206
|
+
tree.children.last.value << text
|
207
|
+
elsif !text.empty?
|
208
|
+
tree.children << Element.new(:text, text)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
module ParserMethods
|
216
|
+
|
217
|
+
INDENT = /^(?:\t| {4})/
|
218
|
+
OPT_SPACE = / {0,3}/
|
219
|
+
|
220
|
+
|
221
|
+
# Parse the string +str+ and extract all attributes and add all found attributes to the hash
|
222
|
+
# +opts+.
|
223
|
+
def parse_attribute_list(str, opts)
|
224
|
+
str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
|
225
|
+
if ref
|
226
|
+
(opts[:refs] ||= []) << ref
|
227
|
+
elsif class_attr
|
228
|
+
opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
|
229
|
+
elsif id_attr
|
230
|
+
opts['id'] = id_attr
|
231
|
+
else
|
232
|
+
opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# Update the +ial+ with the information from the inline attribute list +opts+.
|
238
|
+
def update_ial_with_ial(ial, opts)
|
239
|
+
(ial[:refs] ||= []) << opts[:refs]
|
240
|
+
ial['class'] = ((ial['class'] || '') + " #{opts['class']}").lstrip if opts['class']
|
241
|
+
opts.each {|k,v| ial[k] = v if k != :refs && k != 'class' }
|
242
|
+
end
|
243
|
+
|
244
|
+
# Update the attributes with the information from the inline attribute list and all referenced ALDs.
|
245
|
+
def update_attr_with_ial(attr, ial)
|
246
|
+
ial[:refs].each do |ref|
|
247
|
+
update_attr_with_ial(attr, ref) if ref = @doc.parse_infos[:ald][ref]
|
248
|
+
end if ial[:refs]
|
249
|
+
attr['class'] = ((attr['class'] || '') + " #{ial['class']}").lstrip if ial['class']
|
250
|
+
ial.each {|k,v| attr[k] = v if k.kind_of?(String) && k != 'class' }
|
251
|
+
end
|
252
|
+
|
253
|
+
# Generate an alpha-numeric ID from the the string +str+.
|
254
|
+
def generate_id(str)
|
255
|
+
gen_id = str.gsub(/[^a-zA-Z0-9 -]/, '').gsub(/^[^a-zA-Z]*/, '').gsub(' ', '-').downcase
|
256
|
+
gen_id = 'section' if gen_id.length == 0
|
257
|
+
if @used_ids.has_key?(gen_id)
|
258
|
+
gen_id += '-' + (@used_ids[gen_id] += 1).to_s
|
259
|
+
else
|
260
|
+
@used_ids[gen_id] = 0
|
261
|
+
end
|
262
|
+
gen_id
|
263
|
+
end
|
264
|
+
|
265
|
+
# Helper method for obfuscating the +email+ address by using HTML entities.
|
266
|
+
def obfuscate_email(email)
|
267
|
+
result = ""
|
268
|
+
email.each_byte do |b|
|
269
|
+
result += (b > 128 ? b.chr : "&#%03d;" % b)
|
270
|
+
end
|
271
|
+
result
|
272
|
+
end
|
273
|
+
|
274
|
+
|
275
|
+
BLANK_LINE = /(?:^\s*\n)+/
|
276
|
+
|
277
|
+
# Parse the blank line at the current postition.
|
278
|
+
def parse_blank_line
|
279
|
+
@src.pos += @src.matched_size
|
280
|
+
if @tree.children.last && @tree.children.last.type == :blank
|
281
|
+
@tree.children.last.value += @src.matched
|
282
|
+
else
|
283
|
+
@tree.children << Element.new(:blank, @src.matched)
|
284
|
+
end
|
285
|
+
true
|
286
|
+
end
|
287
|
+
Registry.define_parser(:block, :blank_line, BLANK_LINE, self)
|
288
|
+
|
289
|
+
|
290
|
+
EOB_MARKER = /^\^\s*?\n/
|
291
|
+
|
292
|
+
# Parse the EOB marker at the current location.
|
293
|
+
def parse_eob_marker
|
294
|
+
@src.pos += @src.matched_size
|
295
|
+
true
|
296
|
+
end
|
297
|
+
Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
|
298
|
+
|
299
|
+
|
300
|
+
PARAGRAPH_START = /^#{OPT_SPACE}[^ \t].*?\n/
|
301
|
+
|
302
|
+
# Parse the paragraph at the current location.
|
303
|
+
def parse_paragraph
|
304
|
+
@src.pos += @src.matched_size
|
305
|
+
if @tree.children.last && @tree.children.last.type == :p
|
306
|
+
@tree.children.last.children.first.value << "\n" << @src.matched.chomp
|
307
|
+
else
|
308
|
+
@tree.children << Element.new(:p)
|
309
|
+
add_text(@src.matched.lstrip.chomp, @tree.children.last)
|
310
|
+
end
|
311
|
+
true
|
312
|
+
end
|
313
|
+
Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
|
314
|
+
|
315
|
+
|
316
|
+
SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)\n(-|=)+\s*?\n/
|
317
|
+
|
318
|
+
# Parse the Setext header at the current location.
|
319
|
+
def parse_setext_header
|
320
|
+
if @tree.children.last && @tree.children.last.type != :blank
|
321
|
+
return false
|
322
|
+
end
|
323
|
+
@src.pos += @src.matched_size
|
324
|
+
text, level = @src[1].strip, @src[2]
|
325
|
+
el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
|
326
|
+
add_text(text, el)
|
327
|
+
el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
|
328
|
+
@tree.children << el
|
329
|
+
true
|
330
|
+
end
|
331
|
+
Registry.define_parser(:block, :setext_header, SETEXT_HEADER_START, self)
|
332
|
+
|
333
|
+
|
334
|
+
ATX_HEADER_START = /^\#{1,6}/
|
335
|
+
ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*\s*?\n/
|
336
|
+
|
337
|
+
# Parse the Atx header at the current location.
|
338
|
+
def parse_atx_header
|
339
|
+
if @tree.children.last && @tree.children.last.type != :blank
|
340
|
+
return false
|
341
|
+
end
|
342
|
+
result = @src.scan(ATX_HEADER_MATCH)
|
343
|
+
level, text = @src[1], @src[2].strip
|
344
|
+
el = Element.new(:header, nil, :level => level.length)
|
345
|
+
add_text(text, el)
|
346
|
+
el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
|
347
|
+
@tree.children << el
|
348
|
+
true
|
349
|
+
end
|
350
|
+
Registry.define_parser(:block, :atx_header, ATX_HEADER_START, self)
|
351
|
+
|
352
|
+
|
353
|
+
BLOCKQUOTE_START = /^#{OPT_SPACE}> ?/
|
354
|
+
BLOCKQUOTE_MATCH = /(^#{OPT_SPACE}>.*?\n)+/
|
355
|
+
|
356
|
+
# Parse the blockquote at the current location.
|
357
|
+
def parse_blockquote
|
358
|
+
result = @src.scan(BLOCKQUOTE_MATCH).gsub(BLOCKQUOTE_START, '')
|
359
|
+
el = Element.new(:blockquote)
|
360
|
+
@tree.children << el
|
361
|
+
parse_blocks(el, result)
|
362
|
+
true
|
363
|
+
end
|
364
|
+
Registry.define_parser(:block, :blockquote, BLOCKQUOTE_START, self)
|
365
|
+
|
366
|
+
|
367
|
+
CODEBLOCK_START = INDENT
|
368
|
+
CODEBLOCK_MATCH = /(?:#{INDENT}.*?\S.*?\n)+/
|
369
|
+
|
370
|
+
# Parse the indented codeblock at the current location.
|
371
|
+
def parse_codeblock
|
372
|
+
result = @src.scan(CODEBLOCK_MATCH).gsub(INDENT, '')
|
373
|
+
children = @tree.children
|
374
|
+
if children.length >= 2 && children[-1].type == :blank && children[-2].type == :codeblock
|
375
|
+
children[-2].value << children[-1].value.gsub(INDENT, '') << result
|
376
|
+
children.pop
|
377
|
+
else
|
378
|
+
@tree.children << Element.new(:codeblock, result)
|
379
|
+
end
|
380
|
+
true
|
381
|
+
end
|
382
|
+
Registry.define_parser(:block, :codeblock, CODEBLOCK_START, self)
|
383
|
+
|
384
|
+
|
385
|
+
FENCED_CODEBLOCK_START = /^~{3,}/
|
386
|
+
FENCED_CODEBLOCK_MATCH = /^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m
|
387
|
+
|
388
|
+
# Parse the fenced codeblock at the current location.
|
389
|
+
def parse_codeblock_fenced
|
390
|
+
if @src.check(FENCED_CODEBLOCK_MATCH)
|
391
|
+
@src.pos += @src.matched_size
|
392
|
+
@tree.children << Element.new(:codeblock, @src[2])
|
393
|
+
true
|
394
|
+
else
|
395
|
+
false
|
396
|
+
end
|
397
|
+
end
|
398
|
+
Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
|
399
|
+
|
400
|
+
|
401
|
+
HR_START = /^#{OPT_SPACE}(\*|-|_) *\1 *\1 *(\1| )*\n/
|
402
|
+
|
403
|
+
# Parse the horizontal rule at the current location.
|
404
|
+
def parse_horizontal_rule
|
405
|
+
@src.pos += @src.matched_size
|
406
|
+
@tree.children << Element.new(:hr)
|
407
|
+
true
|
408
|
+
end
|
409
|
+
Registry.define_parser(:block, :horizontal_rule, HR_START, self)
|
410
|
+
|
411
|
+
|
412
|
+
LIST_START_UL = /^(#{OPT_SPACE}[+*-])([\t| ].*?\n)/
|
413
|
+
LIST_START_OL = /^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
|
414
|
+
LIST_START = /#{LIST_START_UL}|#{LIST_START_OL}/
|
415
|
+
|
416
|
+
# Parse the ordered or unordered list at the current location.
|
417
|
+
def parse_list
|
418
|
+
if @tree.children.last && @tree.children.last.type == :p # last element must not be a paragraph
|
419
|
+
return false
|
420
|
+
end
|
421
|
+
|
422
|
+
type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL])
|
423
|
+
list = Element.new(type)
|
424
|
+
|
425
|
+
item = nil
|
426
|
+
indent_re = nil
|
427
|
+
content_re = nil
|
428
|
+
eob_found = false
|
429
|
+
nested_list_found = false
|
430
|
+
while !@src.eos?
|
431
|
+
if @src.check(HR_START)
|
432
|
+
break
|
433
|
+
elsif @src.scan(list_start_re)
|
434
|
+
indentation, content = @src[1].length, @src[2]
|
435
|
+
item = Element.new(:li)
|
436
|
+
list.children << item
|
437
|
+
if content =~ /^\s*\n/
|
438
|
+
indentation = 4
|
439
|
+
else
|
440
|
+
while content =~ /^ *\t/
|
441
|
+
temp = content.scan(/^ */).first.length + indentation
|
442
|
+
content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
|
443
|
+
end
|
444
|
+
indentation += content.scan(/^ */).first.length
|
445
|
+
end
|
446
|
+
content.sub!(/^\s*/, '')
|
447
|
+
item.value = content
|
448
|
+
|
449
|
+
indent_re = /^ {#{indentation}}/
|
450
|
+
content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
|
451
|
+
list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
|
452
|
+
/^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
|
453
|
+
nested_list_found = false
|
454
|
+
elsif result = @src.scan(content_re)
|
455
|
+
result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
|
456
|
+
result.sub!(indent_re, '')
|
457
|
+
if !nested_list_found && result =~ LIST_START
|
458
|
+
parse_blocks(item, item.value)
|
459
|
+
if item.children.length == 1 && item.children.first.type == :p
|
460
|
+
item.value = ''
|
461
|
+
else
|
462
|
+
item.children.clear
|
463
|
+
end
|
464
|
+
nested_list_found = true
|
465
|
+
end
|
466
|
+
item.value << result
|
467
|
+
elsif result = @src.scan(BLANK_LINE)
|
468
|
+
nested_list_found = true
|
469
|
+
item.value << result
|
470
|
+
elsif @src.scan(EOB_MARKER)
|
471
|
+
eob_found = true
|
472
|
+
break
|
473
|
+
else
|
474
|
+
break
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
@tree.children << list
|
479
|
+
|
480
|
+
last = nil
|
481
|
+
list.children.each do |item|
|
482
|
+
temp = Element.new(:temp)
|
483
|
+
parse_blocks(temp, item.value)
|
484
|
+
item.children += temp.children
|
485
|
+
item.value = nil
|
486
|
+
next if item.children.size == 0
|
487
|
+
|
488
|
+
if item.children.first.type == :p && (item.children.length < 2 || item.children[1].type != :blank ||
|
489
|
+
(item == list.children.last && item.children.length == 2 && !eob_found))
|
490
|
+
text = item.children.shift.children.first
|
491
|
+
text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
|
492
|
+
item.children.unshift(text)
|
493
|
+
else
|
494
|
+
item.options[:first_as_block] = true
|
495
|
+
end
|
496
|
+
|
497
|
+
if item.children.last.type == :blank
|
498
|
+
last = item.children.pop
|
499
|
+
else
|
500
|
+
last = nil
|
501
|
+
end
|
502
|
+
end
|
503
|
+
|
504
|
+
@tree.children << last if !last.nil? && !eob_found
|
505
|
+
|
506
|
+
true
|
507
|
+
end
|
508
|
+
Registry.define_parser(:block, :list, LIST_START, self)
|
509
|
+
|
510
|
+
|
511
|
+
PUNCTUATION_CHARS = "_.:,;!?-"
|
512
|
+
LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
|
513
|
+
LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
|
514
|
+
LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
|
515
|
+
|
516
|
+
# Parse the link definition at the current location.
|
517
|
+
def parse_link_definition
|
518
|
+
@src.pos += @src.matched_size
|
519
|
+
link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
|
520
|
+
warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
|
521
|
+
@doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
|
522
|
+
true
|
523
|
+
end
|
524
|
+
Registry.define_parser(:block, :link_definition, LINK_DEFINITION_START, self)
|
525
|
+
|
526
|
+
|
527
|
+
ALD_ID_CHARS = /[\w\d-]/
|
528
|
+
ALD_ANY_CHARS = /\\\}|[^\}]/
|
529
|
+
ALD_ID_NAME = /(?:\w|\d)#{ALD_ID_CHARS}*/
|
530
|
+
ALD_TYPE_KEY_VALUE_PAIR = /(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])+?)\2/
|
531
|
+
ALD_TYPE_CLASS_NAME = /\.(#{ALD_ID_NAME})/
|
532
|
+
ALD_TYPE_ID_NAME = /#(#{ALD_ID_NAME})/
|
533
|
+
ALD_TYPE_REF = /(#{ALD_ID_NAME})/
|
534
|
+
ALD_TYPE_ANY = /(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
|
535
|
+
ALD_START = /^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
|
536
|
+
|
537
|
+
# Parse the attribute list definition at the current location.
|
538
|
+
def parse_ald
|
539
|
+
@src.pos += @src.matched_size
|
540
|
+
parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= {})
|
541
|
+
true
|
542
|
+
end
|
543
|
+
Registry.define_parser(:block, :ald, ALD_START, self)
|
544
|
+
|
545
|
+
|
546
|
+
IAL_BLOCK_START = /^#{OPT_SPACE}\{:(?!:)(#{ALD_ANY_CHARS}+)\}\s*?\n/
|
547
|
+
|
548
|
+
# Parse the inline attribute list at the current location.
|
549
|
+
def parse_block_ial
|
550
|
+
@src.pos += @src.matched_size
|
551
|
+
if @tree.children.last && @tree.children.last.type != :blank
|
552
|
+
parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= {})
|
553
|
+
end
|
554
|
+
true
|
555
|
+
end
|
556
|
+
Registry.define_parser(:block, :block_ial, IAL_BLOCK_START, self)
|
557
|
+
|
558
|
+
|
559
|
+
EXT_BLOCK_START_STR = "^#{OPT_SPACE}\\{::(%s):(:)?(#{ALD_ANY_CHARS}*)\\}\s*?\n"
|
560
|
+
EXT_BLOCK_START = /#{EXT_BLOCK_START_STR % ALD_ID_NAME}/
|
561
|
+
|
562
|
+
# Parse the extension block at the current location.
|
563
|
+
def parse_extension_block
|
564
|
+
@src.pos += @src.matched_size
|
565
|
+
|
566
|
+
ext = @src[1]
|
567
|
+
opts = {}
|
568
|
+
body = nil
|
569
|
+
parse_attribute_list(@src[3], opts)
|
570
|
+
|
571
|
+
if !@doc.extension.public_methods.map {|m| m.to_s}.include?("parse_#{ext}")
|
572
|
+
warning("No extension named '#{ext}' found - ignoring extension block")
|
573
|
+
body = :invalid
|
574
|
+
end
|
575
|
+
|
576
|
+
if !@src[2]
|
577
|
+
stop_re = /#{EXT_BLOCK_START_STR % ext}/
|
578
|
+
if result = @src.scan_until(stop_re)
|
579
|
+
parse_attribute_list(@src[3], opts)
|
580
|
+
body = result.sub!(stop_re, '') if body != :invalid
|
581
|
+
else
|
582
|
+
body = :invalid
|
583
|
+
warning("No ending line for extension block '#{ext}' found - ignoring extension block")
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
@doc.extension.send("parse_#{ext}", self, opts, body) if body != :invalid
|
588
|
+
|
589
|
+
true
|
590
|
+
end
|
591
|
+
Registry.define_parser(:block, :extension_block, EXT_BLOCK_START, self)
|
592
|
+
|
593
|
+
|
594
|
+
FOOTNOTE_DEFINITION_START = /^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n(?:#{BLANK_LINE}?#{CODEBLOCK_MATCH})*)/
|
595
|
+
|
596
|
+
# Parse the foot note definition at the current location.
|
597
|
+
def parse_footnote_definition
|
598
|
+
@src.pos += @src.matched_size
|
599
|
+
|
600
|
+
el = Element.new(:footnote_def)
|
601
|
+
parse_blocks(el, @src[2].gsub(INDENT, ''))
|
602
|
+
warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]]
|
603
|
+
(@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el
|
604
|
+
end
|
605
|
+
Registry.define_parser(:block, :footnote_definition, FOOTNOTE_DEFINITION_START, self)
|
606
|
+
|
607
|
+
|
608
|
+
require 'rexml/parsers/baseparser'
|
609
|
+
|
610
|
+
#:stopdoc:
|
611
|
+
# The following regexps are based on the ones used by REXML, with some slight modifications.
|
612
|
+
#:startdoc:
|
613
|
+
HTML_COMMENT_RE = /<!--(.*?)-->/m
|
614
|
+
HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
|
615
|
+
HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/
|
616
|
+
HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/
|
617
|
+
HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
|
618
|
+
|
619
|
+
|
620
|
+
HTML_PARSE_AS_BLOCK = %w{div blockquote table dl ol ul form fieldset}
|
621
|
+
HTML_PARSE_AS_SPAN = %w{a address b dd dt em h1 h2 h3 h4 h5 h6 legend li p pre span td th}
|
622
|
+
HTML_PARSE_AS_RAW = %w{script math}
|
623
|
+
HTML_PARSE_AS = Hash.new {|h,k| h[k] = :span}
|
624
|
+
HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
|
625
|
+
HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
|
626
|
+
HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
|
627
|
+
|
628
|
+
HTML_BLOCK_ELEMENTS = %w[div p pre h1 h2 h3 h4 h5 h6 hr form fieldset iframe legend script dl ul ol table ins del blockquote address]
|
629
|
+
|
630
|
+
HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
|
631
|
+
|
632
|
+
# Parse the HTML at the current position as block level HTML.
|
633
|
+
def parse_block_html
|
634
|
+
if result = @src.scan(HTML_COMMENT_RE)
|
635
|
+
@tree.children << Element.new(:html_raw, result, :type => :block)
|
636
|
+
@src.scan(/.*?\n/)
|
637
|
+
true
|
638
|
+
elsif result = @src.scan(HTML_INSTRUCTION_RE)
|
639
|
+
@tree.children << Element.new(:html_raw, result, :type => :block)
|
640
|
+
@src.scan(/.*?\n/)
|
641
|
+
true
|
642
|
+
else
|
643
|
+
if !((@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && (HTML_BLOCK_ELEMENTS.include?(@src[1]) || @src[1] =~ /:/)) ||
|
644
|
+
@src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/))
|
645
|
+
return false
|
646
|
+
end
|
647
|
+
|
648
|
+
@src.scan(/^(.*?)\n/)
|
649
|
+
line = @src[1]
|
650
|
+
temp = nil
|
651
|
+
stack = []
|
652
|
+
|
653
|
+
while line.size > 0
|
654
|
+
index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
|
655
|
+
if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag) && (!temp || temp.options[:parse_type] == :block)
|
656
|
+
md = line.match(HTML_TAG_RE)
|
657
|
+
break if !(HTML_BLOCK_ELEMENTS.include?(md[1]) || md[1] =~ /:/)
|
658
|
+
|
659
|
+
add_text(md.pre_match + "\n", temp) if temp
|
660
|
+
line = md.post_match
|
661
|
+
|
662
|
+
attrs = {}
|
663
|
+
md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
|
664
|
+
el = Element.new(:html_element, md[1], :attr => attrs, :type => :block,
|
665
|
+
:parse_type => HTML_PARSE_AS[md[1]])
|
666
|
+
|
667
|
+
(temp || @tree).children << el
|
668
|
+
if !md[4]
|
669
|
+
@unclosed_html_tags.push(el)
|
670
|
+
stack << temp
|
671
|
+
temp = el
|
672
|
+
end
|
673
|
+
elsif index_close_tag
|
674
|
+
md = line.match(HTML_TAG_CLOSE_RE)
|
675
|
+
add_text(md.pre_match, temp) if temp
|
676
|
+
|
677
|
+
line = md.post_match
|
678
|
+
if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
|
679
|
+
el = @unclosed_html_tags.pop
|
680
|
+
@tree = @stack.pop unless temp
|
681
|
+
temp = stack.pop
|
682
|
+
if el.options[:parse_type] == :raw
|
683
|
+
raise Kramdown::Error, "Bug: please report!" if el.children.size > 1
|
684
|
+
el.children.first.type = :raw if el.children.first
|
685
|
+
end
|
686
|
+
else
|
687
|
+
if HTML_BLOCK_ELEMENTS.include?(md[1]) && (temp || @tree).options[:parse_type] == :block
|
688
|
+
warning("Found invalidly nested HTML closing tag for '#{md[1]}'")
|
689
|
+
end
|
690
|
+
if temp
|
691
|
+
add_text(md.to_s, temp)
|
692
|
+
else
|
693
|
+
add_text(md.to_s + "\n")
|
694
|
+
end
|
695
|
+
end
|
696
|
+
else
|
697
|
+
if temp
|
698
|
+
add_text(line, temp)
|
699
|
+
else
|
700
|
+
warning("Ignoring characters at the end of an HTML block line")
|
701
|
+
end
|
702
|
+
line = ''
|
703
|
+
end
|
704
|
+
end
|
705
|
+
if temp && temp.children.last && temp.children.last.type == :text
|
706
|
+
temp.children.last.value << "\n"
|
707
|
+
end
|
708
|
+
if temp
|
709
|
+
if temp.options[:parse_type] == :span || temp.options[:parse_type] == :raw
|
710
|
+
result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
|
711
|
+
add_text(result, temp)
|
712
|
+
end
|
713
|
+
@stack.push(@tree)
|
714
|
+
@tree = temp
|
715
|
+
end
|
716
|
+
true
|
717
|
+
end
|
718
|
+
end
|
719
|
+
Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
|
720
|
+
|
721
|
+
|
722
|
+
|
723
|
+
|
724
|
+
ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
|
725
|
+
|
726
|
+
# Parse the backslash-escaped character at the current location.
|
727
|
+
def parse_escaped_chars
|
728
|
+
@src.pos += @src.matched_size
|
729
|
+
add_text(@src[1])
|
730
|
+
end
|
731
|
+
Registry.define_parser(:span, :escaped_chars, ESCAPED_CHARS, self)
|
732
|
+
|
733
|
+
|
734
|
+
# Parse the HTML entity at the current location.
|
735
|
+
def parse_html_entity
|
736
|
+
@src.pos += @src.matched_size
|
737
|
+
add_text(@src.matched)
|
738
|
+
end
|
739
|
+
Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
|
740
|
+
|
741
|
+
|
742
|
+
SPECIAL_HTML_CHARS = /&|>|</
|
743
|
+
|
744
|
+
# Parse the special HTML characters at the current location.
|
745
|
+
def parse_special_html_chars
|
746
|
+
@src.pos += @src.matched_size
|
747
|
+
add_text(@src.matched)
|
748
|
+
end
|
749
|
+
Registry.define_parser(:span, :special_html_chars, SPECIAL_HTML_CHARS, self)
|
750
|
+
|
751
|
+
|
752
|
+
LINE_BREAK = /( |\\\\)(?=\n)/
|
753
|
+
|
754
|
+
# Parse the line break at the current location.
|
755
|
+
def parse_line_break
|
756
|
+
@src.pos += @src.matched_size
|
757
|
+
@tree.children << Element.new(:br)
|
758
|
+
end
|
759
|
+
Registry.define_parser(:span, :line_break, LINE_BREAK, self)
|
760
|
+
|
761
|
+
|
762
|
+
TYPOGRAPHIC_SYMS = [['---', '—'], ['--', '–'], ['...', '…'],
|
763
|
+
['\\<<', '<<'], ['\\>>', '>>'],
|
764
|
+
['<< ', '« '], [' >>', ' »'],
|
765
|
+
['<<', '«'], ['>>', '»']]
|
766
|
+
TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
|
767
|
+
TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
|
768
|
+
|
769
|
+
# Parse the typographic symbols at the current location.
|
770
|
+
def parse_typographic_syms
|
771
|
+
@src.pos += @src.matched_size
|
772
|
+
add_text(TYPOGRAPHIC_SYMS_SUBST[@src.matched].dup)
|
773
|
+
end
|
774
|
+
Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
|
775
|
+
|
776
|
+
|
777
|
+
AUTOLINK_START = /<((mailto|https?|ftps?):.*?|.*?@.*?)>/
|
778
|
+
|
779
|
+
# Parse the autolink at the current location.
|
780
|
+
def parse_autolink
|
781
|
+
@src.pos += @src.matched_size
|
782
|
+
|
783
|
+
text = href = @src[1]
|
784
|
+
if @src[2].nil? || @src[2] == 'mailto'
|
785
|
+
text = obfuscate_email(@src[2] ? @src[1].sub(/^mailto:/, '') : @src[1])
|
786
|
+
mailto = obfuscate_email('mailto')
|
787
|
+
href = "#{mailto}:#{text}"
|
788
|
+
end
|
789
|
+
el = Element.new(:a, nil, {:attr => {'href' => href}})
|
790
|
+
add_text(text, el)
|
791
|
+
@tree.children << el
|
792
|
+
end
|
793
|
+
Registry.define_parser(:span, :autolink, AUTOLINK_START, self)
|
794
|
+
|
795
|
+
|
796
|
+
CODESPAN_DELIMITER = /`+/
|
797
|
+
|
798
|
+
# Parse the codespan at the current scanner location.
|
799
|
+
def parse_codespan
|
800
|
+
result = @src.scan(CODESPAN_DELIMITER)
|
801
|
+
simple = (result.length == 1)
|
802
|
+
reset_pos = @src.pos
|
803
|
+
|
804
|
+
if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
|
805
|
+
add_text(result)
|
806
|
+
return
|
807
|
+
end
|
808
|
+
|
809
|
+
text = @src.scan_until(/#{result}/)
|
810
|
+
if text
|
811
|
+
text.sub!(/#{result}\Z/, '')
|
812
|
+
if !simple
|
813
|
+
text = text[1..-1] if text[0..0] == ' '
|
814
|
+
text = text[0..-2] if text[-1..-1] == ' '
|
815
|
+
end
|
816
|
+
@tree.children << Element.new(:codespan, text)
|
817
|
+
else
|
818
|
+
@src.pos = reset_pos
|
819
|
+
add_text(result)
|
820
|
+
end
|
821
|
+
end
|
822
|
+
Registry.define_parser(:span, :codespan, CODESPAN_DELIMITER, self)
|
823
|
+
|
824
|
+
|
825
|
+
IAL_SPAN_START = /\{:(#{ALD_ANY_CHARS}+)\}/
|
826
|
+
|
827
|
+
# Parse the inline attribute list at the current location.
|
828
|
+
def parse_span_ial
|
829
|
+
@src.pos += @src.matched_size
|
830
|
+
if @tree.children.last && @tree.children.last.type != :text
|
831
|
+
attr = {}
|
832
|
+
parse_attribute_list(@src[1], attr)
|
833
|
+
update_ial_with_ial(@tree.children.last.options[:ial] ||= {}, attr)
|
834
|
+
update_attr_with_ial(@tree.children.last.options[:attr] ||= {}, attr)
|
835
|
+
else
|
836
|
+
warning("Ignoring span IAL because preceding element is just text")
|
837
|
+
add_text(@src.matched)
|
838
|
+
end
|
839
|
+
end
|
840
|
+
Registry.define_parser(:span, :span_ial, IAL_SPAN_START, self)
|
841
|
+
|
842
|
+
|
843
|
+
FOOTNOTE_MARKER_START = /\[\^(#{ALD_ID_NAME})\]/
|
844
|
+
|
845
|
+
# Parse the footnote marker at the current location.
|
846
|
+
def parse_footnote_marker
|
847
|
+
@src.pos += @src.matched_size
|
848
|
+
fn_def = @doc.parse_infos[:footnotes][@src[1]]
|
849
|
+
if fn_def
|
850
|
+
valid = fn_def[:marker] && fn_def[:marker].options[:stack][0..-2].zip(fn_def[:marker].options[:stack][1..-1]).all? do |par, child|
|
851
|
+
par.children.include?(child)
|
852
|
+
end
|
853
|
+
if !fn_def[:marker] || !valid
|
854
|
+
fn_def[:marker] = Element.new(:footnote, nil, :name => @src[1])
|
855
|
+
fn_def[:marker].options[:stack] = [@stack, @tree, fn_def[:marker]].flatten.compact
|
856
|
+
@tree.children << fn_def[:marker]
|
857
|
+
else
|
858
|
+
warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker")
|
859
|
+
add_text(@src.matched)
|
860
|
+
end
|
861
|
+
else
|
862
|
+
warning("Footnote definition for '#{@src[1]}' not found")
|
863
|
+
add_text(@src.matched)
|
864
|
+
end
|
865
|
+
end
|
866
|
+
Registry.define_parser(:span, :footnote_marker, FOOTNOTE_MARKER_START, self)
|
867
|
+
|
868
|
+
|
869
|
+
EMPHASIS_START = /(?:\*\*?|__?)/
|
870
|
+
|
871
|
+
# Parse the emphasis at the current location.
|
872
|
+
def parse_emphasis
|
873
|
+
result = @src.scan(EMPHASIS_START)
|
874
|
+
element = (result.length == 2 ? :strong : :em)
|
875
|
+
type = (result =~ /_/ ? '_' : '*')
|
876
|
+
reset_pos = @src.pos
|
877
|
+
|
878
|
+
if (type == '_' && @src.pre_match =~ /[[:alpha:]]\Z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/)
|
879
|
+
add_text(result)
|
880
|
+
return
|
881
|
+
end
|
882
|
+
|
883
|
+
sub_parse = lambda do |delim, elem|
|
884
|
+
el = Element.new(elem)
|
885
|
+
stop_re = /#{Regexp.escape(delim)}/
|
886
|
+
found = parse_spans(el, stop_re) do
|
887
|
+
(@src.string[@src.pos-1, 1] !~ /\s/) &&
|
888
|
+
(elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) &&
|
889
|
+
(type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0
|
890
|
+
end
|
891
|
+
[found, el, stop_re]
|
892
|
+
end
|
893
|
+
|
894
|
+
found, el, stop_re = sub_parse.call(result, element)
|
895
|
+
if !found && element == :strong
|
896
|
+
@src.pos = reset_pos - 1
|
897
|
+
found, el, stop_re = sub_parse.call(type, :em)
|
898
|
+
end
|
899
|
+
if found
|
900
|
+
@src.scan(stop_re)
|
901
|
+
@tree.children << el
|
902
|
+
else
|
903
|
+
@src.pos = reset_pos
|
904
|
+
add_text(result)
|
905
|
+
end
|
906
|
+
end
|
907
|
+
Registry.define_parser(:span, :emphasis, EMPHASIS_START, self)
|
908
|
+
|
909
|
+
|
910
|
+
HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--)/
|
911
|
+
|
912
|
+
# Parse the HTML at the current position as span level HTML.
|
913
|
+
def parse_span_html
|
914
|
+
if result = @src.scan(HTML_COMMENT_RE)
|
915
|
+
@tree.children << Element.new(:html_raw, result, :type => :span)
|
916
|
+
elsif result = @src.scan(HTML_INSTRUCTION_RE)
|
917
|
+
@tree.children << Element.new(:html_raw, result, :type => :span)
|
918
|
+
elsif result = @src.scan(HTML_TAG_RE)
|
919
|
+
reset_pos = @src.pos
|
920
|
+
attrs = {}
|
921
|
+
@src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
|
922
|
+
el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
|
923
|
+
if @src[4]
|
924
|
+
@tree.children << el
|
925
|
+
else
|
926
|
+
stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
|
927
|
+
if parse_spans(el, stop_re)
|
928
|
+
@src.scan(stop_re)
|
929
|
+
@tree.children << el
|
930
|
+
else
|
931
|
+
@src.pos = reset_pos
|
932
|
+
add_text(result)
|
933
|
+
end
|
934
|
+
end
|
935
|
+
else
|
936
|
+
add_text(@src.scan(/./))
|
937
|
+
end
|
938
|
+
end
|
939
|
+
Registry.define_parser(:span, :span_html, HTML_BLOCK_START, self)
|
940
|
+
|
941
|
+
|
942
|
+
LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
|
943
|
+
LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
|
944
|
+
LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/
|
945
|
+
|
946
|
+
LINK_START = /!?\[(?=[^^])/
|
947
|
+
|
948
|
+
# Parse the link at the current scanner position. This method is used to parse normal links as
|
949
|
+
# well as image links.
|
950
|
+
def parse_link
|
951
|
+
result = @src.scan(LINK_START)
|
952
|
+
reset_pos = @src.pos
|
953
|
+
|
954
|
+
link_type = (result =~ /^!/ ? :img : :a)
|
955
|
+
|
956
|
+
# no nested links allowed
|
957
|
+
if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
|
958
|
+
add_text(result)
|
959
|
+
return
|
960
|
+
end
|
961
|
+
el = Element.new(link_type)
|
962
|
+
|
963
|
+
stop_re = /\]|!?\[/
|
964
|
+
count = 1
|
965
|
+
found = parse_spans(el, stop_re) do
|
966
|
+
case @src.matched
|
967
|
+
when "[", "!["
|
968
|
+
count += 1
|
969
|
+
when "]"
|
970
|
+
count -= 1
|
971
|
+
end
|
972
|
+
count - el.children.select {|c| c.type == :img}.size == 0
|
973
|
+
end
|
974
|
+
if !found || el.children.empty?
|
975
|
+
@src.pos = reset_pos
|
976
|
+
add_text(result)
|
977
|
+
return
|
978
|
+
end
|
979
|
+
alt_text = @src.string[reset_pos...@src.pos]
|
980
|
+
conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
|
981
|
+
@src.scan(stop_re)
|
982
|
+
|
983
|
+
# reference style link or no link url
|
984
|
+
if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
|
985
|
+
link_id = (@src[1] || conv_link_id).downcase
|
986
|
+
if @doc.parse_infos[:link_defs].has_key?(link_id)
|
987
|
+
add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
|
988
|
+
else
|
989
|
+
warning("No link definition for link ID '#{link_id}' found")
|
990
|
+
@src.pos = reset_pos
|
991
|
+
add_text(result)
|
992
|
+
end
|
993
|
+
return
|
994
|
+
end
|
995
|
+
|
996
|
+
# link url in parentheses
|
997
|
+
if @src.scan(/\(<(.*?)>/)
|
998
|
+
link_url = @src[1]
|
999
|
+
if @src.scan(/\)/)
|
1000
|
+
add_link(el, link_url, nil, alt_text)
|
1001
|
+
return
|
1002
|
+
end
|
1003
|
+
else
|
1004
|
+
link_url = ''
|
1005
|
+
re = /\(|\)|\s/
|
1006
|
+
nr_of_brackets = 0
|
1007
|
+
while temp = @src.scan_until(re)
|
1008
|
+
link_url += temp
|
1009
|
+
case @src.matched
|
1010
|
+
when /\s/
|
1011
|
+
break
|
1012
|
+
when '('
|
1013
|
+
nr_of_brackets += 1
|
1014
|
+
when ')'
|
1015
|
+
nr_of_brackets -= 1
|
1016
|
+
break if nr_of_brackets == 0
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
link_url = link_url[1..-2]
|
1020
|
+
|
1021
|
+
if nr_of_brackets == 0
|
1022
|
+
add_link(el, link_url, nil, alt_text)
|
1023
|
+
return
|
1024
|
+
end
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
if @src.scan(LINK_INLINE_TITLE_RE)
|
1028
|
+
add_link(el, link_url, @src[2], alt_text)
|
1029
|
+
else
|
1030
|
+
@src.pos = reset_pos
|
1031
|
+
add_text(result)
|
1032
|
+
end
|
1033
|
+
end
|
1034
|
+
Registry.define_parser(:span, :link, LINK_START, self)
|
1035
|
+
|
1036
|
+
|
1037
|
+
# This helper methods adds the approriate attributes to the element +el+ of type +a+ or +img+
|
1038
|
+
# and the element itself to the <tt>@tree</tt>.
|
1039
|
+
def add_link(el, href, title, alt_text = nil)
|
1040
|
+
el.options[:attr] ||= {}
|
1041
|
+
el.options[:attr]['title'] = title if title
|
1042
|
+
if el.type == :a
|
1043
|
+
el.options[:attr]['href'] = href
|
1044
|
+
else
|
1045
|
+
el.options[:attr]['src'] = href
|
1046
|
+
el.options[:attr]['alt'] = alt_text
|
1047
|
+
el.children.clear
|
1048
|
+
end
|
1049
|
+
@tree.children << el
|
1050
|
+
end
|
1051
|
+
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
end
|