kramdown 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of kramdown might be problematic. Click here for more details.
- data/ChangeLog +267 -0
- data/VERSION +1 -1
- data/benchmark/benchmark.rb +2 -1
- data/benchmark/generate_data.rb +110 -0
- data/benchmark/historic-jruby-1.4.0.dat +7 -0
- data/benchmark/historic-ruby-1.8.6.dat +7 -0
- data/benchmark/historic-ruby-1.8.7.dat +7 -0
- data/benchmark/historic-ruby-1.9.1p243.dat +7 -0
- data/benchmark/historic-ruby-1.9.2dev.dat +7 -0
- data/benchmark/static-jruby-1.4.0.dat +7 -0
- data/benchmark/static-ruby-1.8.6.dat +7 -0
- data/benchmark/static-ruby-1.8.7.dat +7 -0
- data/benchmark/static-ruby-1.9.1p243.dat +7 -0
- data/benchmark/static-ruby-1.9.2dev.dat +7 -0
- data/benchmark/testing.sh +1 -1
- data/doc/index.page +5 -5
- data/doc/installation.page +3 -3
- data/doc/quickref.page +3 -3
- data/doc/syntax.page +133 -101
- data/doc/tests.page +9 -1
- data/lib/kramdown/compatibility.rb +34 -0
- data/lib/kramdown/converter.rb +26 -8
- data/lib/kramdown/document.rb +2 -1
- data/lib/kramdown/parser.rb +1 -1192
- data/lib/kramdown/parser/kramdown.rb +272 -0
- data/lib/kramdown/parser/kramdown/attribute_list.rb +102 -0
- data/lib/kramdown/parser/kramdown/autolink.rb +42 -0
- data/lib/kramdown/parser/kramdown/blank_line.rb +43 -0
- data/lib/kramdown/parser/kramdown/blockquote.rb +42 -0
- data/lib/kramdown/parser/kramdown/codeblock.rb +62 -0
- data/lib/kramdown/parser/kramdown/codespan.rb +57 -0
- data/lib/kramdown/parser/kramdown/emphasis.rb +69 -0
- data/lib/kramdown/parser/kramdown/eob.rb +39 -0
- data/lib/kramdown/parser/kramdown/escaped_chars.rb +38 -0
- data/lib/kramdown/parser/kramdown/extension.rb +65 -0
- data/lib/kramdown/parser/kramdown/footnote.rb +72 -0
- data/lib/kramdown/parser/kramdown/header.rb +81 -0
- data/lib/kramdown/parser/kramdown/horizontal_rule.rb +39 -0
- data/lib/kramdown/parser/kramdown/html.rb +253 -0
- data/lib/kramdown/{deprecated.rb → parser/kramdown/html_entity.rb} +10 -12
- data/lib/kramdown/parser/kramdown/line_break.rb +38 -0
- data/lib/kramdown/parser/kramdown/link.rb +153 -0
- data/lib/kramdown/parser/kramdown/list.rb +225 -0
- data/lib/kramdown/parser/kramdown/paragraph.rb +44 -0
- data/lib/kramdown/parser/kramdown/typographic_symbol.rb +48 -0
- data/lib/kramdown/version.rb +1 -1
- data/test/testcases/block/09_html/comment.html +1 -0
- data/test/testcases/block/09_html/comment.text +1 -1
- data/test/testcases/block/09_html/content_model/tables.text +2 -2
- data/test/testcases/block/09_html/not_parsed.html +10 -0
- data/test/testcases/block/09_html/not_parsed.text +9 -0
- data/test/testcases/block/09_html/parse_as_raw.html +4 -0
- data/test/testcases/block/09_html/parse_as_raw.text +2 -0
- data/test/testcases/block/09_html/parse_block_html.html +4 -0
- data/test/testcases/block/09_html/parse_block_html.text +3 -0
- data/test/testcases/block/09_html/processing_instruction.html +1 -0
- data/test/testcases/block/09_html/processing_instruction.text +1 -1
- data/test/testcases/block/09_html/simple.html +8 -15
- data/test/testcases/block/09_html/simple.text +2 -12
- data/test/testcases/span/02_emphasis/normal.html +8 -4
- data/test/testcases/span/02_emphasis/normal.text +6 -2
- data/test/testcases/span/05_html/markdown_attr.html +2 -1
- data/test/testcases/span/05_html/markdown_attr.text +2 -1
- data/test/testcases/span/05_html/normal.html +6 -2
- data/test/testcases/span/05_html/normal.text +4 -0
- metadata +35 -4
- data/lib/kramdown/parser/registry.rb +0 -62
@@ -0,0 +1,81 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
module Kramdown
|
24
|
+
module Parser
|
25
|
+
class Kramdown
|
26
|
+
|
27
|
+
# Generate an alpha-numeric ID from the the string +str+.
|
28
|
+
def generate_id(str)
|
29
|
+
gen_id = str.gsub(/[^a-zA-Z0-9 -]/, '').gsub(/^[^a-zA-Z]*/, '').gsub(' ', '-').downcase
|
30
|
+
gen_id = 'section' if gen_id.length == 0
|
31
|
+
@used_ids ||= {}
|
32
|
+
if @used_ids.has_key?(gen_id)
|
33
|
+
gen_id += '-' + (@used_ids[gen_id] += 1).to_s
|
34
|
+
else
|
35
|
+
@used_ids[gen_id] = 0
|
36
|
+
end
|
37
|
+
gen_id
|
38
|
+
end
|
39
|
+
|
40
|
+
HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
|
41
|
+
SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
|
42
|
+
|
43
|
+
# Parse the Setext header at the current location.
|
44
|
+
def parse_setext_header
|
45
|
+
if @tree.children.last && @tree.children.last.type != :blank
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
@src.pos += @src.matched_size
|
49
|
+
text, id, level = @src[1].strip, @src[2], @src[3]
|
50
|
+
el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
|
51
|
+
add_text(text, el)
|
52
|
+
el.options[:attr] = {'id' => id} if id
|
53
|
+
el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
|
54
|
+
@tree.children << el
|
55
|
+
true
|
56
|
+
end
|
57
|
+
define_parser(:setext_header, SETEXT_HEADER_START)
|
58
|
+
|
59
|
+
|
60
|
+
ATX_HEADER_START = /^\#{1,6}/
|
61
|
+
ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
|
62
|
+
|
63
|
+
# Parse the Atx header at the current location.
|
64
|
+
def parse_atx_header
|
65
|
+
if @tree.children.last && @tree.children.last.type != :blank
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
result = @src.scan(ATX_HEADER_MATCH)
|
69
|
+
level, text, id = @src[1], @src[2].strip, @src[3]
|
70
|
+
el = Element.new(:header, nil, :level => level.length)
|
71
|
+
add_text(text, el)
|
72
|
+
el.options[:attr] = {'id' => id} if id
|
73
|
+
el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
|
74
|
+
@tree.children << el
|
75
|
+
true
|
76
|
+
end
|
77
|
+
define_parser(:atx_header, ATX_HEADER_START)
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
module Kramdown
|
24
|
+
module Parser
|
25
|
+
class Kramdown
|
26
|
+
|
27
|
+
HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
|
28
|
+
|
29
|
+
# Parse the horizontal rule at the current location.
|
30
|
+
def parse_horizontal_rule
|
31
|
+
@src.pos += @src.matched_size
|
32
|
+
@tree.children << Element.new(:hr)
|
33
|
+
true
|
34
|
+
end
|
35
|
+
define_parser(:horizontal_rule, HR_START)
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,253 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
require 'rexml/parsers/baseparser'
|
24
|
+
|
25
|
+
module Kramdown
|
26
|
+
module Parser
|
27
|
+
class Kramdown
|
28
|
+
|
29
|
+
#:stopdoc:
|
30
|
+
# The following regexps are based on the ones used by REXML, with some slight modifications.
|
31
|
+
#:startdoc:
|
32
|
+
HTML_COMMENT_RE = /<!--(.*?)-->/m
|
33
|
+
HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
|
34
|
+
HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
|
35
|
+
HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
|
36
|
+
HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/m
|
37
|
+
|
38
|
+
|
39
|
+
HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
|
40
|
+
map noscript object ol table tbody td th thead tfoot tr ul}
|
41
|
+
HTML_PARSE_AS_SPAN = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
|
42
|
+
h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
|
43
|
+
rp rt rtc ruby samp select small span strong sub sup tt var}
|
44
|
+
HTML_PARSE_AS_RAW = %w{script math option textarea}
|
45
|
+
|
46
|
+
HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
|
47
|
+
HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
|
48
|
+
HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
|
49
|
+
HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
|
50
|
+
|
51
|
+
#:stopdoc:
|
52
|
+
# Some HTML elements like script belong to both categories (i.e. are valid in block and
|
53
|
+
# span HTML) and don't appear therefore!
|
54
|
+
#:startdoc:
|
55
|
+
HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
|
56
|
+
ins kbd label option q rb rbc rp rt rtc ruby samp select small span
|
57
|
+
strong sub sup textarea tt var}
|
58
|
+
HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
|
59
|
+
form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
|
60
|
+
td th thead tfoot tr ul}
|
61
|
+
HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
|
62
|
+
|
63
|
+
HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
|
64
|
+
HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/))/
|
65
|
+
|
66
|
+
# Parse the HTML at the current position as block level HTML.
|
67
|
+
def parse_block_html
|
68
|
+
if result = @src.scan(HTML_COMMENT_RE)
|
69
|
+
@tree.children << Element.new(:xml_comment, result, :type => :block)
|
70
|
+
@src.scan(/[ \t]*\n/)
|
71
|
+
true
|
72
|
+
elsif result = @src.scan(HTML_INSTRUCTION_RE)
|
73
|
+
@tree.children << Element.new(:xml_pi, result, :type => :block)
|
74
|
+
@src.scan(/[ \t]*\n/)
|
75
|
+
true
|
76
|
+
else
|
77
|
+
if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
|
78
|
+
@src.pos += @src.matched_size
|
79
|
+
handle_html_start_tag
|
80
|
+
true
|
81
|
+
elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
|
82
|
+
@src.pos += @src.matched_size
|
83
|
+
name = @src[1]
|
84
|
+
|
85
|
+
if @tree.type ==:html_element && @tree.value == name
|
86
|
+
throw :stop_block_parsing, :found
|
87
|
+
else
|
88
|
+
warning("Found invalidly used HTML closing tag for '#{name}' - ignoring it")
|
89
|
+
true
|
90
|
+
end
|
91
|
+
else
|
92
|
+
false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
define_parser(:block_html, HTML_BLOCK_START)
|
97
|
+
|
98
|
+
|
99
|
+
# Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
|
100
|
+
# (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
|
101
|
+
# +val+ is nil, then the default parsing mode is used.
|
102
|
+
def get_parse_type(val)
|
103
|
+
case val
|
104
|
+
when "0" then :raw
|
105
|
+
when "1" then :default
|
106
|
+
when "span" then :span
|
107
|
+
when "block" then :block
|
108
|
+
when NilClass then nil
|
109
|
+
else
|
110
|
+
warning("Invalid markdown attribute val '#{val}', using default")
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Process the HTML start tag that has already be scanned/checked.
|
116
|
+
def handle_html_start_tag
|
117
|
+
curpos = @src.pos
|
118
|
+
name = @src[1]
|
119
|
+
closed = !@src[4].nil?
|
120
|
+
attrs = {}
|
121
|
+
@src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr] = val}
|
122
|
+
|
123
|
+
parse_type = if @tree.type != :html_element || @tree.options[:parse_type] != :raw
|
124
|
+
(@doc.options[:parse_block_html] ? HTML_PARSE_AS[name] : :raw)
|
125
|
+
else
|
126
|
+
:raw
|
127
|
+
end
|
128
|
+
if val = get_parse_type(attrs.delete('markdown'))
|
129
|
+
parse_type = (val == :default ? HTML_PARSE_AS[name] : val)
|
130
|
+
end
|
131
|
+
|
132
|
+
@src.scan(/[ \t]*\n/) if parse_type == :block
|
133
|
+
|
134
|
+
el = Element.new(:html_element, name, :attr => attrs, :type => :block, :parse_type => parse_type)
|
135
|
+
el.options[:outer_element] = true if @tree.type != :html_element
|
136
|
+
el.options[:parent_is_raw] = true if @tree.type == :html_element && @tree.options[:parse_type] == :raw
|
137
|
+
@tree.children << el
|
138
|
+
|
139
|
+
if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
|
140
|
+
warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
|
141
|
+
elsif !closed
|
142
|
+
if parse_type == :block
|
143
|
+
end_tag_found = parse_blocks(el)
|
144
|
+
if !end_tag_found
|
145
|
+
warning("Found no end tag for '#{el.value}' - auto-closing it")
|
146
|
+
end
|
147
|
+
elsif parse_type == :span
|
148
|
+
if result = @src.scan_until(/(?=<\/#{el.value}\s*>)/m)
|
149
|
+
add_text(@src.string[curpos...@src.pos], el)
|
150
|
+
@src.scan(HTML_TAG_CLOSE_RE)
|
151
|
+
else
|
152
|
+
add_text(@src.scan(/.*/m), el)
|
153
|
+
warning("Found no end tag for '#{el.value}' - auto-closing it")
|
154
|
+
end
|
155
|
+
else
|
156
|
+
parse_raw_html(el)
|
157
|
+
end
|
158
|
+
@src.scan(/[ \t]*\n/) unless (@tree.type == :html_element && @tree.options[:parse_type] == :raw)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Parse raw HTML until the matching end tag for +el+ is found or until the end of the
|
163
|
+
# document.
|
164
|
+
def parse_raw_html(el)
|
165
|
+
@stack.push(@tree)
|
166
|
+
@tree = el
|
167
|
+
|
168
|
+
done = false
|
169
|
+
endpos = nil
|
170
|
+
while !@src.eos? && !done
|
171
|
+
if result = @src.scan_until(HTML_RAW_START)
|
172
|
+
endpos = @src.pos
|
173
|
+
add_text(result, @tree, :html_text)
|
174
|
+
if @src.scan(HTML_TAG_RE)
|
175
|
+
handle_html_start_tag
|
176
|
+
elsif @src.scan(HTML_TAG_CLOSE_RE)
|
177
|
+
if @tree.value == @src[1]
|
178
|
+
done = true
|
179
|
+
else
|
180
|
+
warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
|
181
|
+
end
|
182
|
+
else
|
183
|
+
add_text(@src.scan(/./), @tree, :html_text)
|
184
|
+
end
|
185
|
+
else
|
186
|
+
result = @src.scan(/.*/m)
|
187
|
+
add_text(result, @tree, :html_text)
|
188
|
+
warning("Found no end tag for '#{@tree.value}' - auto-closing it")
|
189
|
+
done = true
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
@tree = @stack.pop
|
194
|
+
endpos
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
|
199
|
+
|
200
|
+
# Parse the HTML at the current position as span level HTML.
|
201
|
+
def parse_span_html
|
202
|
+
if result = @src.scan(HTML_COMMENT_RE)
|
203
|
+
@tree.children << Element.new(:xml_comment, result, :type => :span)
|
204
|
+
elsif result = @src.scan(HTML_INSTRUCTION_RE)
|
205
|
+
@tree.children << Element.new(:xml_pi, result, :type => :span)
|
206
|
+
elsif result = @src.scan(HTML_TAG_CLOSE_RE)
|
207
|
+
warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
|
208
|
+
elsif result = @src.scan(HTML_TAG_RE)
|
209
|
+
return if HTML_BLOCK_ELEMENTS.include?(@src[1])
|
210
|
+
|
211
|
+
reset_pos = @src.pos
|
212
|
+
attrs = {}
|
213
|
+
@src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
|
214
|
+
|
215
|
+
do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) ? false : @doc.options[:parse_span_html])
|
216
|
+
if val = get_parse_type(attrs.delete('markdown'))
|
217
|
+
if val == :block
|
218
|
+
warning("Cannot use block level parsing in span level HTML tag - using default mode")
|
219
|
+
elsif val == :span
|
220
|
+
do_parsing = true
|
221
|
+
elsif val == :default
|
222
|
+
(HTML_PARSE_AS_RAW.include?(@src[1]) ? false : true)
|
223
|
+
elsif val == :raw
|
224
|
+
do_parsing = false
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
|
229
|
+
stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
|
230
|
+
if @src[4]
|
231
|
+
@tree.children << el
|
232
|
+
elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
|
233
|
+
warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
|
234
|
+
@tree.children << el
|
235
|
+
else
|
236
|
+
if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html]), (do_parsing ? :text : :html_text))
|
237
|
+
end_pos = @src.pos
|
238
|
+
@src.scan(stop_re)
|
239
|
+
else
|
240
|
+
warning("Found no end tag for '#{el.value}' - auto-closing it")
|
241
|
+
add_text(@src.scan(/.*/m))
|
242
|
+
end
|
243
|
+
@tree.children << el
|
244
|
+
end
|
245
|
+
else
|
246
|
+
add_text(@src.scan(/./))
|
247
|
+
end
|
248
|
+
end
|
249
|
+
define_parser(:span_html, HTML_SPAN_START)
|
250
|
+
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -20,22 +20,20 @@
|
|
20
20
|
#++
|
21
21
|
#
|
22
22
|
|
23
|
+
require 'rexml/parsers/baseparser'
|
24
|
+
|
23
25
|
module Kramdown
|
26
|
+
module Parser
|
27
|
+
class Kramdown
|
24
28
|
|
25
|
-
|
29
|
+
# Parse the HTML entity at the current location.
|
30
|
+
def parse_html_entity
|
31
|
+
@src.pos += @src.matched_size
|
32
|
+
@tree.children << Element.new(:entity, @src.matched)
|
33
|
+
end
|
34
|
+
define_parser(:html_entity, REXML::Parsers::BaseParser::REFERENCE_RE)
|
26
35
|
|
27
|
-
def parse_nokramdown(parser, opts, body)
|
28
|
-
warn("The extension 'nokramdown' is deprecated and has been renamed to 'nomarkdown'")
|
29
|
-
parse_nomarkdown(parser, opts, body)
|
30
|
-
end
|
31
36
|
|
32
|
-
def parse_kdoptions(parser, opts, body)
|
33
|
-
warn("The extension 'kdoptions' is deprecated and has been renamed to 'options'")
|
34
|
-
parse_options(parser, opts, body)
|
35
37
|
end
|
36
|
-
|
37
38
|
end
|
38
|
-
|
39
39
|
end
|
40
|
-
|
41
|
-
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
module Kramdown
|
24
|
+
module Parser
|
25
|
+
class Kramdown
|
26
|
+
|
27
|
+
LINE_BREAK = /( |\\\\)(?=\n)/
|
28
|
+
|
29
|
+
# Parse the line break at the current location.
|
30
|
+
def parse_line_break
|
31
|
+
@src.pos += @src.matched_size
|
32
|
+
@tree.children << Element.new(:br)
|
33
|
+
end
|
34
|
+
define_parser(:line_break, LINE_BREAK)
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
|
5
|
+
#
|
6
|
+
# This file is part of kramdown.
|
7
|
+
#
|
8
|
+
# kramdown is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
#++
|
21
|
+
#
|
22
|
+
|
23
|
+
module Kramdown
|
24
|
+
module Parser
|
25
|
+
class Kramdown
|
26
|
+
|
27
|
+
PUNCTUATION_CHARS = "_.:,;!?-"
|
28
|
+
LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
|
29
|
+
LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
|
30
|
+
LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
|
31
|
+
|
32
|
+
# Parse the link definition at the current location.
|
33
|
+
def parse_link_definition
|
34
|
+
@src.pos += @src.matched_size
|
35
|
+
link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
|
36
|
+
warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
|
37
|
+
@doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
|
38
|
+
true
|
39
|
+
end
|
40
|
+
define_parser(:link_definition, LINK_DEFINITION_START)
|
41
|
+
|
42
|
+
|
43
|
+
# This helper methods adds the approriate attributes to the element +el+ of type +a+ or +img+
|
44
|
+
# and the element itself to the <tt>@tree</tt>.
|
45
|
+
def add_link(el, href, title, alt_text = nil)
|
46
|
+
el.options[:attr] ||= {}
|
47
|
+
el.options[:attr]['title'] = title if title
|
48
|
+
if el.type == :a
|
49
|
+
el.options[:attr]['href'] = href
|
50
|
+
else
|
51
|
+
el.options[:attr]['src'] = href
|
52
|
+
el.options[:attr]['alt'] = alt_text
|
53
|
+
el.children.clear
|
54
|
+
end
|
55
|
+
@tree.children << el
|
56
|
+
end
|
57
|
+
|
58
|
+
LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
|
59
|
+
LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
|
60
|
+
LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/
|
61
|
+
LINK_START = /!?\[(?=[^^])/
|
62
|
+
|
63
|
+
# Parse the link at the current scanner position. This method is used to parse normal links as
|
64
|
+
# well as image links.
|
65
|
+
def parse_link
|
66
|
+
result = @src.scan(LINK_START)
|
67
|
+
reset_pos = @src.pos
|
68
|
+
|
69
|
+
link_type = (result =~ /^!/ ? :img : :a)
|
70
|
+
|
71
|
+
# no nested links allowed
|
72
|
+
if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
|
73
|
+
add_text(result)
|
74
|
+
return
|
75
|
+
end
|
76
|
+
el = Element.new(link_type)
|
77
|
+
|
78
|
+
stop_re = /\]|!?\[/
|
79
|
+
count = 1
|
80
|
+
found = parse_spans(el, stop_re) do
|
81
|
+
case @src.matched
|
82
|
+
when "[", "!["
|
83
|
+
count += 1
|
84
|
+
when "]"
|
85
|
+
count -= 1
|
86
|
+
end
|
87
|
+
count - el.children.select {|c| c.type == :img}.size == 0
|
88
|
+
end
|
89
|
+
if !found || el.children.empty?
|
90
|
+
@src.pos = reset_pos
|
91
|
+
add_text(result)
|
92
|
+
return
|
93
|
+
end
|
94
|
+
alt_text = @src.string[reset_pos...@src.pos]
|
95
|
+
conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
|
96
|
+
@src.scan(stop_re)
|
97
|
+
|
98
|
+
# reference style link or no link url
|
99
|
+
if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
|
100
|
+
link_id = (@src[1] || conv_link_id).downcase
|
101
|
+
if @doc.parse_infos[:link_defs].has_key?(link_id)
|
102
|
+
add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
|
103
|
+
else
|
104
|
+
warning("No link definition for link ID '#{link_id}' found")
|
105
|
+
@src.pos = reset_pos
|
106
|
+
add_text(result)
|
107
|
+
end
|
108
|
+
return
|
109
|
+
end
|
110
|
+
|
111
|
+
# link url in parentheses
|
112
|
+
if @src.scan(/\(<(.*?)>/)
|
113
|
+
link_url = @src[1]
|
114
|
+
if @src.scan(/\)/)
|
115
|
+
add_link(el, link_url, nil, alt_text)
|
116
|
+
return
|
117
|
+
end
|
118
|
+
else
|
119
|
+
link_url = ''
|
120
|
+
re = /\(|\)|\s/
|
121
|
+
nr_of_brackets = 0
|
122
|
+
while temp = @src.scan_until(re)
|
123
|
+
link_url += temp
|
124
|
+
case @src.matched
|
125
|
+
when /\s/
|
126
|
+
break
|
127
|
+
when '('
|
128
|
+
nr_of_brackets += 1
|
129
|
+
when ')'
|
130
|
+
nr_of_brackets -= 1
|
131
|
+
break if nr_of_brackets == 0
|
132
|
+
end
|
133
|
+
end
|
134
|
+
link_url = link_url[1..-2]
|
135
|
+
|
136
|
+
if nr_of_brackets == 0
|
137
|
+
add_link(el, link_url, nil, alt_text)
|
138
|
+
return
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
if @src.scan(LINK_INLINE_TITLE_RE)
|
143
|
+
add_link(el, link_url, @src[2], alt_text)
|
144
|
+
else
|
145
|
+
@src.pos = reset_pos
|
146
|
+
add_text(result)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
define_parser(:link, LINK_START)
|
150
|
+
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|