kramdown 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of kramdown might be problematic. Click here for more details.
- data/CONTRIBUTERS +1 -1
- data/ChangeLog +532 -0
- data/README +22 -12
- data/Rakefile +9 -8
- data/VERSION +1 -1
- data/benchmark/benchmark.sh +61 -0
- data/benchmark/generate_data.rb +57 -55
- data/benchmark/testing.sh +1 -1
- data/benchmark/timing.sh +3 -3
- data/bin/kramdown +1 -2
- data/data/kramdown/document.html +2 -2
- data/data/kramdown/document.latex +2 -2
- data/doc/default.scss.css +6 -1
- data/doc/default.template +1 -1
- data/doc/documentation.page +1 -1
- data/doc/index.page +9 -7
- data/doc/installation.page +2 -3
- data/doc/links.markdown +1 -1
- data/doc/quickref.page +19 -19
- data/doc/syntax.page +117 -98
- data/doc/tests.page +8 -7
- data/lib/kramdown/compatibility.rb +2 -1
- data/lib/kramdown/converter.rb +5 -7
- data/lib/kramdown/converter/base.rb +87 -32
- data/lib/kramdown/converter/html.rb +134 -122
- data/lib/kramdown/converter/kramdown.rb +24 -25
- data/lib/kramdown/converter/latex.rb +65 -55
- data/lib/kramdown/document.rb +487 -42
- data/lib/kramdown/error.rb +3 -0
- data/lib/kramdown/options.rb +83 -28
- data/lib/kramdown/parser.rb +5 -5
- data/lib/kramdown/parser/base.rb +55 -13
- data/lib/kramdown/parser/html.rb +83 -71
- data/lib/kramdown/parser/kramdown.rb +73 -54
- data/lib/kramdown/parser/kramdown/abbreviation.rb +17 -12
- data/lib/kramdown/parser/kramdown/autolink.rb +2 -3
- data/lib/kramdown/parser/kramdown/blank_line.rb +1 -1
- data/lib/kramdown/parser/kramdown/block_boundary.rb +2 -2
- data/lib/kramdown/parser/kramdown/blockquote.rb +2 -2
- data/lib/kramdown/parser/kramdown/codeblock.rb +5 -2
- data/lib/kramdown/parser/kramdown/codespan.rb +1 -2
- data/lib/kramdown/parser/kramdown/emphasis.rb +1 -1
- data/lib/kramdown/parser/kramdown/escaped_chars.rb +1 -1
- data/lib/kramdown/parser/kramdown/extensions.rb +204 -0
- data/lib/kramdown/parser/kramdown/footnote.rb +7 -7
- data/lib/kramdown/parser/kramdown/header.rb +4 -2
- data/lib/kramdown/parser/kramdown/horizontal_rule.rb +1 -1
- data/lib/kramdown/parser/kramdown/html.rb +39 -45
- data/lib/kramdown/parser/kramdown/link.rb +19 -29
- data/lib/kramdown/parser/kramdown/list.rb +13 -13
- data/lib/kramdown/parser/kramdown/math.rb +1 -1
- data/lib/kramdown/parser/kramdown/paragraph.rb +5 -4
- data/lib/kramdown/parser/kramdown/smart_quotes.rb +1 -1
- data/lib/kramdown/parser/kramdown/table.rb +51 -12
- data/lib/kramdown/parser/markdown.rb +69 -0
- data/lib/kramdown/utils.rb +2 -2
- data/lib/kramdown/utils/entities.rb +10 -1
- data/lib/kramdown/utils/html.rb +22 -11
- data/lib/kramdown/utils/ordered_hash.rb +44 -40
- data/lib/kramdown/version.rb +1 -1
- data/man/man1/kramdown.1 +31 -4
- data/test/testcases/block/08_list/item_ial.html +1 -1
- data/test/testcases/block/11_ial/nested.html +11 -0
- data/test/testcases/block/11_ial/nested.text +15 -0
- data/test/testcases/block/13_definition_list/item_ial.html +1 -1
- data/test/testcases/block/14_table/escaping.html +52 -0
- data/test/testcases/block/14_table/escaping.text +19 -0
- data/test/testcases/block/14_table/simple.html.19 +139 -0
- data/test/testcases/block/14_table/simple.text +1 -1
- data/test/testcases/block/15_math/normal.html +13 -13
- data/test/testcases/block/16_toc/{no_toc_depth.html → no_toc.html} +0 -0
- data/test/testcases/block/16_toc/{no_toc_depth.options → no_toc.options} +0 -0
- data/test/testcases/block/16_toc/{no_toc_depth.text → no_toc.text} +0 -0
- data/test/testcases/block/16_toc/{toc_depth_2.html → toc_levels.html} +4 -4
- data/test/testcases/block/16_toc/toc_levels.options +1 -0
- data/test/testcases/block/16_toc/{toc_depth_2.text → toc_levels.text} +0 -0
- data/test/testcases/span/escaped_chars/normal.html +4 -0
- data/test/testcases/span/escaped_chars/normal.text +4 -0
- data/test/testcases/span/ial/simple.html +1 -1
- data/test/testcases/span/math/normal.html +2 -2
- metadata +20 -25
- data/benchmark/historic-jruby-1.4.0.dat +0 -7
- data/benchmark/historic-ruby-1.8.6.dat +0 -7
- data/benchmark/historic-ruby-1.8.7.dat +0 -7
- data/benchmark/historic-ruby-1.9.1p243.dat +0 -7
- data/benchmark/historic-ruby-1.9.2dev.dat +0 -7
- data/benchmark/static-jruby-1.4.0.dat +0 -7
- data/benchmark/static-ruby-1.8.6.dat +0 -7
- data/benchmark/static-ruby-1.8.7.dat +0 -7
- data/benchmark/static-ruby-1.9.1p243.dat +0 -7
- data/benchmark/static-ruby-1.9.2dev.dat +0 -7
- data/lib/kramdown/parser/kramdown/attribute_list.rb +0 -111
- data/lib/kramdown/parser/kramdown/extension.rb +0 -116
- data/test/testcases/block/16_toc/toc_depth_2.options +0 -1
data/lib/kramdown/error.rb
CHANGED
data/lib/kramdown/options.rb
CHANGED
@@ -40,27 +40,30 @@ module Kramdown
|
|
40
40
|
# ----------------------------
|
41
41
|
# :section: Option definitions
|
42
42
|
#
|
43
|
-
# This sections
|
43
|
+
# This sections describes the methods that can be used on the Options module.
|
44
44
|
# ----------------------------
|
45
45
|
|
46
|
-
#
|
47
|
-
Definition = Struct.new(:name, :type, :default, :desc)
|
46
|
+
# Struct class for storing the definition of an option.
|
47
|
+
Definition = Struct.new(:name, :type, :default, :desc, :validator)
|
48
48
|
|
49
49
|
# Allowed option types.
|
50
|
-
ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean,
|
50
|
+
ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Object]
|
51
51
|
|
52
52
|
@options = {}
|
53
53
|
|
54
54
|
# Define a new option called +name+ (a Symbol) with the given +type+ (String, Integer, Float,
|
55
|
-
# Symbol, Boolean,
|
55
|
+
# Symbol, Boolean, Object), default value +default+ and the description +desc+. If a block is
|
56
|
+
# specified, it should validate the value and either raise an error or return a valid value.
|
56
57
|
#
|
57
|
-
# The type 'Object' should only be used
|
58
|
-
#
|
59
|
-
|
58
|
+
# The type 'Object' should only be used for complex types for which none of the other types
|
59
|
+
# suffices. A block needs to be specified when using type 'Object' and it has to cope with
|
60
|
+
# a value given as string and as the opaque type.
|
61
|
+
def self.define(name, type, default, desc, &block)
|
60
62
|
raise ArgumentError, "Option name #{name} is already used" if @options.has_key?(name)
|
61
63
|
raise ArgumentError, "Invalid option type #{type} specified" if !ALLOWED_TYPES.include?(type)
|
62
64
|
raise ArgumentError, "Invalid type for default value" if !(type === default) && !default.nil?
|
63
|
-
|
65
|
+
raise ArgumentError, "Missing validator block" if type == Object && block.nil?
|
66
|
+
@options[name] = Definition.new(name, type, default, desc, block)
|
64
67
|
end
|
65
68
|
|
66
69
|
# Return all option definitions.
|
@@ -98,21 +101,22 @@ module Kramdown
|
|
98
101
|
# String and then to the correct type.
|
99
102
|
def self.parse(name, data)
|
100
103
|
raise ArgumentError, "No option named #{name} defined" if !@options.has_key?(name)
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
data.split(/\s+/)
|
104
|
+
if !(@options[name].type === data)
|
105
|
+
data = data.to_s
|
106
|
+
data = if @options[name].type == String
|
107
|
+
data
|
108
|
+
elsif @options[name].type == Integer
|
109
|
+
Integer(data) rescue raise Kramdown::Error, "Invalid integer value for option '#{name}': '#{data}'"
|
110
|
+
elsif @options[name].type == Float
|
111
|
+
Float(data) rescue raise Kramdown::Error, "Invalid float value for option '#{name}': '#{data}'"
|
112
|
+
elsif @options[name].type == Symbol
|
113
|
+
(data.strip.empty? ? nil : data.to_sym)
|
114
|
+
elsif @options[name].type == Boolean
|
115
|
+
data.downcase.strip != 'false' && !data.empty?
|
116
|
+
end
|
115
117
|
end
|
118
|
+
data = @options[name].validator[data] if @options[name].validator
|
119
|
+
data
|
116
120
|
end
|
117
121
|
|
118
122
|
# ----------------------------
|
@@ -169,7 +173,7 @@ EOF
|
|
169
173
|
Process kramdown syntax in block HTML tags
|
170
174
|
|
171
175
|
If this option is `true`, the kramdown parser processes the content of
|
172
|
-
block HTML tags as text containing block
|
176
|
+
block HTML tags as text containing block-level elements. Since this is
|
173
177
|
not wanted normally, the default is `false`. It is normally better to
|
174
178
|
selectively enable kramdown processing via the markdown attribute.
|
175
179
|
|
@@ -181,7 +185,7 @@ EOF
|
|
181
185
|
Process kramdown syntax in span HTML tags
|
182
186
|
|
183
187
|
If this option is `true`, the kramdown parser processes the content of
|
184
|
-
span HTML tags as text containing span
|
188
|
+
span HTML tags as text containing span-level elements.
|
185
189
|
|
186
190
|
Default: true
|
187
191
|
Used by: kramdown parser
|
@@ -275,14 +279,45 @@ Default: :as_char
|
|
275
279
|
Used by: HTML converter, kramdown converter
|
276
280
|
EOF
|
277
281
|
|
278
|
-
define(:toc_depth, Integer,
|
279
|
-
Defines the maximum level of headers which will be used to generate the table of
|
282
|
+
define(:toc_depth, Integer, -1, <<EOF)
|
283
|
+
DEPRECATED: Defines the maximum level of headers which will be used to generate the table of
|
280
284
|
contents. For instance, with a value of 2, toc entries will be generated for h1
|
281
285
|
and h2 headers but not for h3, h4, etc. A value of 0 uses all header levels.
|
282
286
|
|
283
|
-
|
287
|
+
Use option toc_levels instead!
|
288
|
+
|
289
|
+
Default: -1
|
290
|
+
Used by: HTML/Latex converter
|
291
|
+
EOF
|
292
|
+
|
293
|
+
define(:toc_levels, Object, (1..6).to_a, <<EOF) do |val|
|
294
|
+
Defines the levels that are used for the table of contents
|
295
|
+
|
296
|
+
The individual levels can be specified by separating them with commas
|
297
|
+
(e.g. 1,2,3) or by using the range syntax (e.g. 1..3). Only the
|
298
|
+
specified levels are used for the table of contents.
|
299
|
+
|
300
|
+
Default: 1..6
|
284
301
|
Used by: HTML/Latex converter
|
285
302
|
EOF
|
303
|
+
if String === val
|
304
|
+
if val =~ /^(\d)\.\.(\d)$/
|
305
|
+
val = Range.new($1.to_i, $2.to_i).to_a
|
306
|
+
elsif val =~ /^\d(?:,\d)*$/
|
307
|
+
val = val.split(/,/).map {|s| s.to_i}.uniq
|
308
|
+
else
|
309
|
+
raise Kramdown::Error, "Invalid syntax for option toc_levels"
|
310
|
+
end
|
311
|
+
elsif Array === val
|
312
|
+
val = val.map {|s| s.to_i}.uniq
|
313
|
+
else
|
314
|
+
raise Kramdown::Error, "Invalid type #{val.class} for option toc_levels"
|
315
|
+
end
|
316
|
+
if val.any? {|i| !(1..6).include?(i)}
|
317
|
+
raise Kramdown::Error, "Level numbers for option toc_levels have to be integers from 1 to 6"
|
318
|
+
end
|
319
|
+
val
|
320
|
+
end
|
286
321
|
|
287
322
|
define(:line_width, Integer, 72, <<EOF)
|
288
323
|
Defines the line width to be used when outputting a document
|
@@ -291,6 +326,26 @@ Default: 72
|
|
291
326
|
Used by: kramdown converter
|
292
327
|
EOF
|
293
328
|
|
329
|
+
define(:latex_headers, Object, %w{section subsection subsubsection paragraph subparagraph subparagraph}, <<EOF) do |val|
|
330
|
+
Defines the LaTeX commands for different header levels
|
331
|
+
|
332
|
+
The commands for the header levels one to six can be specified by
|
333
|
+
separating them with commas.
|
334
|
+
|
335
|
+
Default: section,subsection,subsubsection,paragraph,subparagraph,subsubparagraph
|
336
|
+
Used by: Latex converter
|
337
|
+
EOF
|
338
|
+
if String === val
|
339
|
+
val = val.split(/,/)
|
340
|
+
elsif !(Array === val)
|
341
|
+
raise Kramdown::Error, "Invalid type #{val.class} for option latex_headers"
|
342
|
+
end
|
343
|
+
if val.size != 6
|
344
|
+
raise Kramdown::Error, "Option latex_headers needs exactly six LaTeX commands"
|
345
|
+
end
|
346
|
+
val
|
347
|
+
end
|
348
|
+
|
294
349
|
end
|
295
350
|
|
296
351
|
end
|
data/lib/kramdown/parser.rb
CHANGED
@@ -22,17 +22,17 @@
|
|
22
22
|
|
23
23
|
module Kramdown
|
24
24
|
|
25
|
-
#
|
25
|
+
# This module contains all available parsers. A parser takes an input string and converts the
|
26
|
+
# string to an element tree.
|
26
27
|
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
# * Kramdown for parsing documents in kramdown format
|
30
|
-
# * Html for parsing HTML documents
|
28
|
+
# New parsers should be derived from the Base class which provides common functionality - see its
|
29
|
+
# API documentation for how to create a custom converter class.
|
31
30
|
module Parser
|
32
31
|
|
33
32
|
autoload :Base, 'kramdown/parser/base'
|
34
33
|
autoload :Kramdown, 'kramdown/parser/kramdown'
|
35
34
|
autoload :Html, 'kramdown/parser/html'
|
35
|
+
autoload :Markdown, 'kramdown/parser/markdown'
|
36
36
|
|
37
37
|
end
|
38
38
|
|
data/lib/kramdown/parser/base.rb
CHANGED
@@ -24,37 +24,79 @@ module Kramdown
|
|
24
24
|
|
25
25
|
module Parser
|
26
26
|
|
27
|
-
# == Base class for parsers
|
27
|
+
# == \Base class for parsers
|
28
28
|
#
|
29
29
|
# This class serves as base class for parsers. It provides common methods that can/should be
|
30
30
|
# used by all parsers, especially by those using StringScanner for parsing.
|
31
31
|
#
|
32
|
+
# A parser object is used as a throw-away object, i.e. it is only used for storing the needed
|
33
|
+
# state information during parsing. Therefore one can't instantiate a parser object directly but
|
34
|
+
# only use the Base::parse method.
|
35
|
+
#
|
36
|
+
# == Implementing a parser
|
37
|
+
#
|
38
|
+
# Implementing a new parser is rather easy: just derive a new class from this class and put it
|
39
|
+
# in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
|
40
|
+
# parser works correctly. Then you need to implement the <tt>#parse</tt> method which has to
|
41
|
+
# contain the parsing code.
|
42
|
+
#
|
43
|
+
# Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
|
32
44
|
class Base
|
33
45
|
|
34
|
-
#
|
35
|
-
|
36
|
-
|
46
|
+
# The hash with the parsing options.
|
47
|
+
attr_reader :options
|
48
|
+
|
49
|
+
# The array with the parser warnings.
|
50
|
+
attr_reader :warnings
|
51
|
+
|
52
|
+
# The original source string.
|
53
|
+
attr_reader :source
|
54
|
+
|
55
|
+
# The root element of element tree that is created from the source string.
|
56
|
+
attr_reader :root
|
57
|
+
|
58
|
+
# Initialize the parser object with the +source+ string and the parsing +options+.
|
59
|
+
#
|
60
|
+
# The <tt>@root</tt> element, the <tt>@warnings</tt> array and <tt>@text_type</tt> (specifies
|
61
|
+
# the default type for newly created text nodes) are automatically initialized.
|
62
|
+
def initialize(source, options)
|
63
|
+
@source = source
|
64
|
+
@options = Kramdown::Options.merge(options)
|
65
|
+
@root = Element.new(:root, nil, nil, :encoding => (RUBY_VERSION >= '1.9' ? source.encoding : nil))
|
66
|
+
@warnings = []
|
37
67
|
@text_type = :text
|
38
68
|
end
|
39
69
|
private_class_method(:new, :allocate)
|
40
70
|
|
41
|
-
# Parse the +source+ string into an element tree, using the
|
42
|
-
#
|
71
|
+
# Parse the +source+ string into an element tree, possibly using the parsing +options+, and
|
72
|
+
# return the root element of the element tree and an array with warning messages.
|
43
73
|
#
|
44
74
|
# Initializes a new instance of the calling class and then calls the #parse method that must
|
45
75
|
# be implemented by each subclass.
|
46
|
-
def self.parse(source,
|
47
|
-
new(
|
76
|
+
def self.parse(source, options = {})
|
77
|
+
parser = new(source, options)
|
78
|
+
parser.parse
|
79
|
+
[parser.root, parser.warnings]
|
48
80
|
end
|
49
81
|
|
82
|
+
# Parse the source string into an element tree.
|
83
|
+
#
|
84
|
+
# The parsing code should parse the source provided in <tt>@source</tt> and build an element
|
85
|
+
# tree the root of which should be <tt>@root</tt>.
|
86
|
+
#
|
87
|
+
# This is the only method that has to be implemented by sub-classes!
|
88
|
+
def parse
|
89
|
+
raise NotImplementedError
|
90
|
+
end
|
50
91
|
|
51
|
-
# Add the given warning +text+ to the warning array
|
92
|
+
# Add the given warning +text+ to the warning array.
|
52
93
|
def warning(text)
|
53
|
-
@
|
94
|
+
@warnings << text
|
54
95
|
#TODO: add position information
|
55
96
|
end
|
56
97
|
|
57
|
-
# Modify the string +source+ to be usable by the parser
|
98
|
+
# Modify the string +source+ to be usable by the parser (unifies line ending characters to
|
99
|
+
# <tt>\n</tt> and makes sure +source+ ends with a new line character).
|
58
100
|
def adapt_source(source)
|
59
101
|
source.gsub(/\r\n?/, "\n").chomp + "\n"
|
60
102
|
end
|
@@ -69,8 +111,8 @@ module Kramdown
|
|
69
111
|
end
|
70
112
|
end
|
71
113
|
|
72
|
-
# Extract the part of the StringScanner +
|
73
|
-
# method
|
114
|
+
# Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
|
115
|
+
# method works correctly under Ruby 1.8 and Ruby 1.9.
|
74
116
|
def extract_string(range, strscan)
|
75
117
|
result = nil
|
76
118
|
if RUBY_VERSION >= '1.9'
|
data/lib/kramdown/parser/html.rb
CHANGED
@@ -28,10 +28,13 @@ module Kramdown
|
|
28
28
|
module Parser
|
29
29
|
|
30
30
|
# Used for parsing a HTML document.
|
31
|
+
#
|
32
|
+
# The parsing code is in the Parser module that can also be used by other parsers.
|
31
33
|
class Html < Base
|
32
34
|
|
33
35
|
# Contains all constants that are used when parsing.
|
34
36
|
module Constants
|
37
|
+
|
35
38
|
#:stopdoc:
|
36
39
|
# The following regexps are based on the ones used by REXML, with some slight modifications.
|
37
40
|
HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/m
|
@@ -43,17 +46,17 @@ module Kramdown
|
|
43
46
|
HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
|
44
47
|
|
45
48
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
HTML_CONTENT_MODEL_BLOCK = %w{applet button blockquote body colgroup dd div dl fieldset
|
50
|
+
form iframe li map noscript object ol table tbody thead tfoot tr td ul}
|
51
|
+
HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym address b bdo big cite caption del dfn dt em
|
52
|
+
h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
|
53
|
+
rp rt rtc ruby samp select small span strong sub sup th tt var}
|
54
|
+
HTML_CONTENT_MODEL_RAW = %w{script math option textarea pre code}
|
52
55
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
56
|
+
HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
|
57
|
+
HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
|
58
|
+
HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
|
59
|
+
HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
|
57
60
|
|
58
61
|
# Some HTML elements like script belong to both categories (i.e. are valid in block and
|
59
62
|
# span HTML) and don't appear therefore!
|
@@ -69,15 +72,18 @@ module Kramdown
|
|
69
72
|
|
70
73
|
# Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
|
71
74
|
# functionality. The only thing that must be provided by the class are instance variable
|
72
|
-
# <tt>@stack</tt> for storing needed state and <tt>@src</tt> (instance of StringScanner)
|
73
|
-
# the actual parsing.
|
75
|
+
# <tt>@stack</tt> for storing the needed state and <tt>@src</tt> (instance of StringScanner)
|
76
|
+
# for the actual parsing.
|
74
77
|
module Parser
|
75
78
|
|
76
79
|
include Constants
|
77
80
|
|
78
|
-
# Process the HTML start tag that has already be
|
79
|
-
#
|
80
|
-
|
81
|
+
# Process the HTML start tag that has already be <tt>scan</tt>ned/<tt>check</tt>ed.
|
82
|
+
#
|
83
|
+
# Does the common processing steps and then yields to the caller for further processing
|
84
|
+
# (first parameter is the created element, the second parameter is +true+ if the HTML
|
85
|
+
# element is already closed, ie. contains no body).
|
86
|
+
def handle_html_start_tag # :yields: el, closed
|
81
87
|
name = @src[1]
|
82
88
|
closed = !@src[4].nil?
|
83
89
|
attrs = Utils::OrderedHash.new
|
@@ -98,18 +104,20 @@ module Kramdown
|
|
98
104
|
end
|
99
105
|
end
|
100
106
|
|
107
|
+
# Handle the HTML script tag at the current position.
|
101
108
|
def handle_html_script_tag
|
102
109
|
curpos = @src.pos
|
103
110
|
if result = @src.scan_until(/(?=<\/script\s*>)/m)
|
104
111
|
add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
|
105
112
|
@src.scan(HTML_TAG_CLOSE_RE)
|
106
113
|
else
|
107
|
-
add_text(@src.
|
114
|
+
add_text(@src.rest, @tree.children.last, :raw)
|
115
|
+
@src.terminate
|
108
116
|
warning("Found no end tag for 'script' - auto-closing it")
|
109
117
|
end
|
110
118
|
end
|
111
119
|
|
112
|
-
HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/
|
120
|
+
HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
|
113
121
|
|
114
122
|
# Parse raw HTML from the current source position, storing the found elements in +el+.
|
115
123
|
# Parsing continues until one of the following criteria are fulfilled:
|
@@ -141,11 +149,11 @@ module Kramdown
|
|
141
149
|
warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
|
142
150
|
end
|
143
151
|
else
|
144
|
-
add_text(@src.
|
152
|
+
add_text(@src.getch, @tree, :text)
|
145
153
|
end
|
146
154
|
else
|
147
|
-
|
148
|
-
|
155
|
+
add_text(@src.rest, @tree, :text)
|
156
|
+
@src.terminate
|
149
157
|
warning("Found no end tag for '#{@tree.value}' - auto-closing it") if @tree.type == :html_element
|
150
158
|
done = true
|
151
159
|
end
|
@@ -160,6 +168,8 @@ module Kramdown
|
|
160
168
|
# Converts HTML elements to native elements if possible.
|
161
169
|
class ElementConverter
|
162
170
|
|
171
|
+
# :stopdoc:
|
172
|
+
|
163
173
|
include Constants
|
164
174
|
include ::Kramdown::Utils::Entities
|
165
175
|
|
@@ -172,14 +182,18 @@ module Kramdown
|
|
172
182
|
header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
|
173
183
|
SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
|
174
184
|
|
175
|
-
def initialize(
|
176
|
-
@
|
185
|
+
def initialize(root)
|
186
|
+
@root = root
|
187
|
+
end
|
188
|
+
|
189
|
+
def self.convert(root, el = root)
|
190
|
+
new(root).process(el)
|
177
191
|
end
|
178
192
|
|
179
193
|
# Convert the element +el+ and its children.
|
180
194
|
def process(el, do_conversion = true, preserve_text = false, parent = nil)
|
181
195
|
case el.type
|
182
|
-
when :xml_comment, :xml_pi
|
196
|
+
when :xml_comment, :xml_pi
|
183
197
|
ptype = if parent.nil?
|
184
198
|
'div'
|
185
199
|
else
|
@@ -191,9 +205,13 @@ module Kramdown
|
|
191
205
|
else parent.type.to_s
|
192
206
|
end
|
193
207
|
end
|
194
|
-
el.options
|
208
|
+
el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
|
195
209
|
return
|
196
210
|
when :html_element
|
211
|
+
when :root
|
212
|
+
el.children.each {|c| process(c)}
|
213
|
+
remove_whitespace_children(el)
|
214
|
+
return
|
197
215
|
else return
|
198
216
|
end
|
199
217
|
|
@@ -204,7 +222,7 @@ module Kramdown
|
|
204
222
|
if do_conversion && self.class.method_defined?(mname)
|
205
223
|
send(mname, el)
|
206
224
|
elsif do_conversion && SIMPLE_ELEMENTS.include?(type)
|
207
|
-
set_basics(el, type.intern
|
225
|
+
set_basics(el, type.intern)
|
208
226
|
process_children(el, do_conversion, preserve_text)
|
209
227
|
else
|
210
228
|
process_html_element(el, do_conversion, preserve_text)
|
@@ -245,16 +263,16 @@ module Kramdown
|
|
245
263
|
Element.new(:entity, entity(val), nil, :original => src.matched)
|
246
264
|
end
|
247
265
|
else
|
248
|
-
result << Element.new(:text, src.
|
266
|
+
result << Element.new(:text, src.rest)
|
267
|
+
src.terminate
|
249
268
|
end
|
250
269
|
end
|
251
270
|
result
|
252
271
|
end
|
253
272
|
|
254
273
|
def process_html_element(el, do_conversion = true, preserve_text = false)
|
255
|
-
el.options
|
256
|
-
|
257
|
-
}
|
274
|
+
el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
|
275
|
+
:content_model => HTML_CONTENT_MODEL[el.value])
|
258
276
|
process_children(el, do_conversion, preserve_text)
|
259
277
|
end
|
260
278
|
|
@@ -266,7 +284,7 @@ module Kramdown
|
|
266
284
|
tmp = []
|
267
285
|
last_is_p = false
|
268
286
|
el.children.each do |c|
|
269
|
-
if
|
287
|
+
if Element.category(c) != :block || c.type == :text
|
270
288
|
if !last_is_p
|
271
289
|
tmp << Element.new(:p, nil, nil, :transparent => true)
|
272
290
|
last_is_p = true
|
@@ -296,14 +314,14 @@ module Kramdown
|
|
296
314
|
el.children.delete_if do |c|
|
297
315
|
i += 1
|
298
316
|
c.type == :text && c.value.strip.empty? &&
|
299
|
-
(i == 0 || i == el.children.length - 1 || (el.children[i-1]
|
300
|
-
el.children[i+1]
|
317
|
+
(i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
|
318
|
+
Element.category(el.children[i+1]) == :block))
|
301
319
|
end
|
302
320
|
end
|
303
321
|
|
304
|
-
def set_basics(el, type,
|
322
|
+
def set_basics(el, type, opts = {})
|
305
323
|
el.type = type
|
306
|
-
el.options
|
324
|
+
el.options.replace(opts)
|
307
325
|
el.value = nil
|
308
326
|
end
|
309
327
|
|
@@ -314,7 +332,7 @@ module Kramdown
|
|
314
332
|
|
315
333
|
def convert_a(el)
|
316
334
|
if el.attr['href']
|
317
|
-
set_basics(el, :a
|
335
|
+
set_basics(el, :a)
|
318
336
|
process_children(el)
|
319
337
|
else
|
320
338
|
process_html_element(el, false)
|
@@ -322,17 +340,17 @@ module Kramdown
|
|
322
340
|
end
|
323
341
|
|
324
342
|
def convert_b(el)
|
325
|
-
set_basics(el, :strong
|
343
|
+
set_basics(el, :strong)
|
326
344
|
process_children(el)
|
327
345
|
end
|
328
346
|
|
329
347
|
def convert_i(el)
|
330
|
-
set_basics(el, :em
|
348
|
+
set_basics(el, :em)
|
331
349
|
process_children(el)
|
332
350
|
end
|
333
351
|
|
334
352
|
def convert_h1(el)
|
335
|
-
set_basics(el, :header, :
|
353
|
+
set_basics(el, :header, :level => el.value[1..1].to_i)
|
336
354
|
extract_text(el, el.options[:raw_text] = '')
|
337
355
|
process_children(el)
|
338
356
|
end
|
@@ -350,12 +368,12 @@ module Kramdown
|
|
350
368
|
mem << c.value
|
351
369
|
elsif c.type == :entity
|
352
370
|
if RUBY_VERSION >= '1.9'
|
353
|
-
mem << c.value.char.encode(@
|
371
|
+
mem << c.value.char.encode(@root.options[:encoding])
|
354
372
|
elsif [60, 62, 34, 38].include?(c.value.code_point)
|
355
373
|
mem << c.value.code_point.chr
|
356
374
|
end
|
357
375
|
elsif c.type == :smart_quote || c.type == :typographic_sym
|
358
|
-
mem << entity(c.value.to_s).char.encode(@
|
376
|
+
mem << entity(c.value.to_s).char.encode(@root.options[:encoding])
|
359
377
|
else
|
360
378
|
raise "Bug - please report"
|
361
379
|
end
|
@@ -368,9 +386,9 @@ module Kramdown
|
|
368
386
|
process_html_element(el, false, true)
|
369
387
|
else
|
370
388
|
if el.value == 'code'
|
371
|
-
set_basics(el, :codespan
|
389
|
+
set_basics(el, :codespan)
|
372
390
|
else
|
373
|
-
set_basics(el, :codeblock
|
391
|
+
set_basics(el, :codeblock)
|
374
392
|
end
|
375
393
|
el.value = result.first.value
|
376
394
|
el.children.clear
|
@@ -384,8 +402,9 @@ module Kramdown
|
|
384
402
|
return
|
385
403
|
end
|
386
404
|
process_children(el)
|
387
|
-
set_basics(el, :table
|
405
|
+
set_basics(el, :table)
|
388
406
|
el.options[:alignment] = []
|
407
|
+
|
389
408
|
calc_alignment = lambda do |c|
|
390
409
|
if c.type == :tr && el.options[:alignment].empty?
|
391
410
|
el.options[:alignment] = [:default] * c.children.length
|
@@ -395,8 +414,18 @@ module Kramdown
|
|
395
414
|
end
|
396
415
|
end
|
397
416
|
calc_alignment.call(el)
|
417
|
+
|
418
|
+
change_th_type = lambda do |c|
|
419
|
+
if c.type == :th
|
420
|
+
c.type = :td
|
421
|
+
else
|
422
|
+
c.children.each {|cc| change_th_type.call(cc)}
|
423
|
+
end
|
424
|
+
end
|
425
|
+
change_th_type.call(el)
|
426
|
+
|
398
427
|
if el.children.first.type == :tr
|
399
|
-
tbody = Element.new(:tbody
|
428
|
+
tbody = Element.new(:tbody)
|
400
429
|
tbody.children = el.children
|
401
430
|
el.children = [tbody]
|
402
431
|
end
|
@@ -427,52 +456,38 @@ module Kramdown
|
|
427
456
|
end && el.children.any? {|t| t.value == 'tbody'})
|
428
457
|
end
|
429
458
|
|
430
|
-
def
|
459
|
+
def convert_script(el)
|
431
460
|
if !is_math_tag?(el)
|
432
461
|
process_html_element(el)
|
433
462
|
else
|
434
463
|
handle_math_tag(el)
|
435
464
|
end
|
436
465
|
end
|
437
|
-
alias :convert_span :convert_div
|
438
466
|
|
439
467
|
def is_math_tag?(el)
|
440
|
-
el.attr['
|
441
|
-
el.children.size == 1 && el.children.first.type == :text
|
468
|
+
el.attr['type'].to_s =~ /\bmath\/tex\b/
|
442
469
|
end
|
443
470
|
|
444
471
|
def handle_math_tag(el)
|
445
|
-
set_basics(el, :math, (el.
|
472
|
+
set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
|
446
473
|
el.value = el.children.shift.value
|
447
|
-
|
448
|
-
el.attr.delete('class')
|
449
|
-
else
|
450
|
-
el.attr['class'].sub!(/\s?math/, '')
|
451
|
-
end
|
452
|
-
el.value.gsub!(/&(amp|quot|gt|lt);/) do |m|
|
453
|
-
case m
|
454
|
-
when '&' then '&'
|
455
|
-
when '"' then '"'
|
456
|
-
when '>' then '>'
|
457
|
-
when '<' then '<'
|
458
|
-
end
|
459
|
-
end
|
474
|
+
el.attr.delete('type')
|
460
475
|
end
|
476
|
+
|
461
477
|
end
|
462
478
|
|
463
479
|
include Parser
|
464
480
|
|
465
|
-
# Parse
|
466
|
-
def parse
|
467
|
-
@stack = []
|
468
|
-
@tree = Element.new(:root)
|
481
|
+
# Parse the source string provided on initialization as HTML document.
|
482
|
+
def parse
|
483
|
+
@stack, @tree = [], @root
|
469
484
|
@src = StringScanner.new(adapt_source(source))
|
470
485
|
|
471
486
|
while true
|
472
487
|
if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
|
473
488
|
@tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
|
474
489
|
elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
|
475
|
-
|
490
|
+
# ignore the doctype
|
476
491
|
elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
|
477
492
|
@tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
|
478
493
|
else
|
@@ -485,10 +500,7 @@ module Kramdown
|
|
485
500
|
end
|
486
501
|
parse_raw_html(@tree, &tag_handler)
|
487
502
|
|
488
|
-
|
489
|
-
@tree.children.each {|c| ec.process(c)}
|
490
|
-
ec.remove_whitespace_children(@tree)
|
491
|
-
@tree
|
503
|
+
ElementConverter.convert(@tree)
|
492
504
|
end
|
493
505
|
|
494
506
|
end
|