kramdown 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (94) hide show
  1. data/CONTRIBUTERS +1 -1
  2. data/ChangeLog +532 -0
  3. data/README +22 -12
  4. data/Rakefile +9 -8
  5. data/VERSION +1 -1
  6. data/benchmark/benchmark.sh +61 -0
  7. data/benchmark/generate_data.rb +57 -55
  8. data/benchmark/testing.sh +1 -1
  9. data/benchmark/timing.sh +3 -3
  10. data/bin/kramdown +1 -2
  11. data/data/kramdown/document.html +2 -2
  12. data/data/kramdown/document.latex +2 -2
  13. data/doc/default.scss.css +6 -1
  14. data/doc/default.template +1 -1
  15. data/doc/documentation.page +1 -1
  16. data/doc/index.page +9 -7
  17. data/doc/installation.page +2 -3
  18. data/doc/links.markdown +1 -1
  19. data/doc/quickref.page +19 -19
  20. data/doc/syntax.page +117 -98
  21. data/doc/tests.page +8 -7
  22. data/lib/kramdown/compatibility.rb +2 -1
  23. data/lib/kramdown/converter.rb +5 -7
  24. data/lib/kramdown/converter/base.rb +87 -32
  25. data/lib/kramdown/converter/html.rb +134 -122
  26. data/lib/kramdown/converter/kramdown.rb +24 -25
  27. data/lib/kramdown/converter/latex.rb +65 -55
  28. data/lib/kramdown/document.rb +487 -42
  29. data/lib/kramdown/error.rb +3 -0
  30. data/lib/kramdown/options.rb +83 -28
  31. data/lib/kramdown/parser.rb +5 -5
  32. data/lib/kramdown/parser/base.rb +55 -13
  33. data/lib/kramdown/parser/html.rb +83 -71
  34. data/lib/kramdown/parser/kramdown.rb +73 -54
  35. data/lib/kramdown/parser/kramdown/abbreviation.rb +17 -12
  36. data/lib/kramdown/parser/kramdown/autolink.rb +2 -3
  37. data/lib/kramdown/parser/kramdown/blank_line.rb +1 -1
  38. data/lib/kramdown/parser/kramdown/block_boundary.rb +2 -2
  39. data/lib/kramdown/parser/kramdown/blockquote.rb +2 -2
  40. data/lib/kramdown/parser/kramdown/codeblock.rb +5 -2
  41. data/lib/kramdown/parser/kramdown/codespan.rb +1 -2
  42. data/lib/kramdown/parser/kramdown/emphasis.rb +1 -1
  43. data/lib/kramdown/parser/kramdown/escaped_chars.rb +1 -1
  44. data/lib/kramdown/parser/kramdown/extensions.rb +204 -0
  45. data/lib/kramdown/parser/kramdown/footnote.rb +7 -7
  46. data/lib/kramdown/parser/kramdown/header.rb +4 -2
  47. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +1 -1
  48. data/lib/kramdown/parser/kramdown/html.rb +39 -45
  49. data/lib/kramdown/parser/kramdown/link.rb +19 -29
  50. data/lib/kramdown/parser/kramdown/list.rb +13 -13
  51. data/lib/kramdown/parser/kramdown/math.rb +1 -1
  52. data/lib/kramdown/parser/kramdown/paragraph.rb +5 -4
  53. data/lib/kramdown/parser/kramdown/smart_quotes.rb +1 -1
  54. data/lib/kramdown/parser/kramdown/table.rb +51 -12
  55. data/lib/kramdown/parser/markdown.rb +69 -0
  56. data/lib/kramdown/utils.rb +2 -2
  57. data/lib/kramdown/utils/entities.rb +10 -1
  58. data/lib/kramdown/utils/html.rb +22 -11
  59. data/lib/kramdown/utils/ordered_hash.rb +44 -40
  60. data/lib/kramdown/version.rb +1 -1
  61. data/man/man1/kramdown.1 +31 -4
  62. data/test/testcases/block/08_list/item_ial.html +1 -1
  63. data/test/testcases/block/11_ial/nested.html +11 -0
  64. data/test/testcases/block/11_ial/nested.text +15 -0
  65. data/test/testcases/block/13_definition_list/item_ial.html +1 -1
  66. data/test/testcases/block/14_table/escaping.html +52 -0
  67. data/test/testcases/block/14_table/escaping.text +19 -0
  68. data/test/testcases/block/14_table/simple.html.19 +139 -0
  69. data/test/testcases/block/14_table/simple.text +1 -1
  70. data/test/testcases/block/15_math/normal.html +13 -13
  71. data/test/testcases/block/16_toc/{no_toc_depth.html → no_toc.html} +0 -0
  72. data/test/testcases/block/16_toc/{no_toc_depth.options → no_toc.options} +0 -0
  73. data/test/testcases/block/16_toc/{no_toc_depth.text → no_toc.text} +0 -0
  74. data/test/testcases/block/16_toc/{toc_depth_2.html → toc_levels.html} +4 -4
  75. data/test/testcases/block/16_toc/toc_levels.options +1 -0
  76. data/test/testcases/block/16_toc/{toc_depth_2.text → toc_levels.text} +0 -0
  77. data/test/testcases/span/escaped_chars/normal.html +4 -0
  78. data/test/testcases/span/escaped_chars/normal.text +4 -0
  79. data/test/testcases/span/ial/simple.html +1 -1
  80. data/test/testcases/span/math/normal.html +2 -2
  81. metadata +20 -25
  82. data/benchmark/historic-jruby-1.4.0.dat +0 -7
  83. data/benchmark/historic-ruby-1.8.6.dat +0 -7
  84. data/benchmark/historic-ruby-1.8.7.dat +0 -7
  85. data/benchmark/historic-ruby-1.9.1p243.dat +0 -7
  86. data/benchmark/historic-ruby-1.9.2dev.dat +0 -7
  87. data/benchmark/static-jruby-1.4.0.dat +0 -7
  88. data/benchmark/static-ruby-1.8.6.dat +0 -7
  89. data/benchmark/static-ruby-1.8.7.dat +0 -7
  90. data/benchmark/static-ruby-1.9.1p243.dat +0 -7
  91. data/benchmark/static-ruby-1.9.2dev.dat +0 -7
  92. data/lib/kramdown/parser/kramdown/attribute_list.rb +0 -111
  93. data/lib/kramdown/parser/kramdown/extension.rb +0 -116
  94. data/test/testcases/block/16_toc/toc_depth_2.options +0 -1
@@ -22,6 +22,9 @@
22
22
 
23
23
  module Kramdown
24
24
 
25
+ # This error is raised when an error condition is encountered.
26
+ #
27
+ # *Note* that this error is only raised by the support framework for the parsers and converters.
25
28
  class Error < RuntimeError; end
26
29
 
27
30
  end
@@ -40,27 +40,30 @@ module Kramdown
40
40
  # ----------------------------
41
41
  # :section: Option definitions
42
42
  #
43
- # This sections informs describes the methods that can be used on the Options module.
43
+ # This sections describes the methods that can be used on the Options module.
44
44
  # ----------------------------
45
45
 
46
- # Contains the definition of an option.
47
- Definition = Struct.new(:name, :type, :default, :desc)
46
+ # Struct class for storing the definition of an option.
47
+ Definition = Struct.new(:name, :type, :default, :desc, :validator)
48
48
 
49
49
  # Allowed option types.
50
- ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Array, Object]
50
+ ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Object]
51
51
 
52
52
  @options = {}
53
53
 
54
54
  # Define a new option called +name+ (a Symbol) with the given +type+ (String, Integer, Float,
55
- # Symbol, Boolean, Array, Object), default value +default+ and the description +desc+.
55
+ # Symbol, Boolean, Object), default value +default+ and the description +desc+. If a block is
56
+ # specified, it should validate the value and either raise an error or return a valid value.
56
57
  #
57
- # The type 'Object' should only be used if none of the other types suffices because such an
58
- # option will be opaque and cannot be used, for example, by CLI command!
59
- def self.define(name, type, default, desc)
58
+ # The type 'Object' should only be used for complex types for which none of the other types
59
+ # suffices. A block needs to be specified when using type 'Object' and it has to cope with
60
+ # a value given as string and as the opaque type.
61
+ def self.define(name, type, default, desc, &block)
60
62
  raise ArgumentError, "Option name #{name} is already used" if @options.has_key?(name)
61
63
  raise ArgumentError, "Invalid option type #{type} specified" if !ALLOWED_TYPES.include?(type)
62
64
  raise ArgumentError, "Invalid type for default value" if !(type === default) && !default.nil?
63
- @options[name] = Definition.new(name, type, default, desc)
65
+ raise ArgumentError, "Missing validator block" if type == Object && block.nil?
66
+ @options[name] = Definition.new(name, type, default, desc, block)
64
67
  end
65
68
 
66
69
  # Return all option definitions.
@@ -98,21 +101,22 @@ module Kramdown
98
101
  # String and then to the correct type.
99
102
  def self.parse(name, data)
100
103
  raise ArgumentError, "No option named #{name} defined" if !@options.has_key?(name)
101
- return data if @options[name].type === data
102
- data = data.to_s
103
- if @options[name].type == String
104
- data
105
- elsif @options[name].type == Integer
106
- Integer(data)
107
- elsif @options[name].type == Float
108
- Float(data)
109
- elsif @options[name].type == Symbol
110
- (data.strip.empty? ? nil : data.to_sym)
111
- elsif @options[name].type == Boolean
112
- data.downcase.strip != 'false' && !data.empty?
113
- elsif @options[name].type == Array
114
- data.split(/\s+/)
104
+ if !(@options[name].type === data)
105
+ data = data.to_s
106
+ data = if @options[name].type == String
107
+ data
108
+ elsif @options[name].type == Integer
109
+ Integer(data) rescue raise Kramdown::Error, "Invalid integer value for option '#{name}': '#{data}'"
110
+ elsif @options[name].type == Float
111
+ Float(data) rescue raise Kramdown::Error, "Invalid float value for option '#{name}': '#{data}'"
112
+ elsif @options[name].type == Symbol
113
+ (data.strip.empty? ? nil : data.to_sym)
114
+ elsif @options[name].type == Boolean
115
+ data.downcase.strip != 'false' && !data.empty?
116
+ end
115
117
  end
118
+ data = @options[name].validator[data] if @options[name].validator
119
+ data
116
120
  end
117
121
 
118
122
  # ----------------------------
@@ -169,7 +173,7 @@ EOF
169
173
  Process kramdown syntax in block HTML tags
170
174
 
171
175
  If this option is `true`, the kramdown parser processes the content of
172
- block HTML tags as text containing block level elements. Since this is
176
+ block HTML tags as text containing block-level elements. Since this is
173
177
  not wanted normally, the default is `false`. It is normally better to
174
178
  selectively enable kramdown processing via the markdown attribute.
175
179
 
@@ -181,7 +185,7 @@ EOF
181
185
  Process kramdown syntax in span HTML tags
182
186
 
183
187
  If this option is `true`, the kramdown parser processes the content of
184
- span HTML tags as text containing span level elements.
188
+ span HTML tags as text containing span-level elements.
185
189
 
186
190
  Default: true
187
191
  Used by: kramdown parser
@@ -275,14 +279,45 @@ Default: :as_char
275
279
  Used by: HTML converter, kramdown converter
276
280
  EOF
277
281
 
278
- define(:toc_depth, Integer, 0, <<EOF)
279
- Defines the maximum level of headers which will be used to generate the table of
282
+ define(:toc_depth, Integer, -1, <<EOF)
283
+ DEPRECATED: Defines the maximum level of headers which will be used to generate the table of
280
284
  contents. For instance, with a value of 2, toc entries will be generated for h1
281
285
  and h2 headers but not for h3, h4, etc. A value of 0 uses all header levels.
282
286
 
283
- Default: 0
287
+ Use option toc_levels instead!
288
+
289
+ Default: -1
290
+ Used by: HTML/Latex converter
291
+ EOF
292
+
293
+ define(:toc_levels, Object, (1..6).to_a, <<EOF) do |val|
294
+ Defines the levels that are used for the table of contents
295
+
296
+ The individual levels can be specified by separating them with commas
297
+ (e.g. 1,2,3) or by using the range syntax (e.g. 1..3). Only the
298
+ specified levels are used for the table of contents.
299
+
300
+ Default: 1..6
284
301
  Used by: HTML/Latex converter
285
302
  EOF
303
+ if String === val
304
+ if val =~ /^(\d)\.\.(\d)$/
305
+ val = Range.new($1.to_i, $2.to_i).to_a
306
+ elsif val =~ /^\d(?:,\d)*$/
307
+ val = val.split(/,/).map {|s| s.to_i}.uniq
308
+ else
309
+ raise Kramdown::Error, "Invalid syntax for option toc_levels"
310
+ end
311
+ elsif Array === val
312
+ val = val.map {|s| s.to_i}.uniq
313
+ else
314
+ raise Kramdown::Error, "Invalid type #{val.class} for option toc_levels"
315
+ end
316
+ if val.any? {|i| !(1..6).include?(i)}
317
+ raise Kramdown::Error, "Level numbers for option toc_levels have to be integers from 1 to 6"
318
+ end
319
+ val
320
+ end
286
321
 
287
322
  define(:line_width, Integer, 72, <<EOF)
288
323
  Defines the line width to be used when outputting a document
@@ -291,6 +326,26 @@ Default: 72
291
326
  Used by: kramdown converter
292
327
  EOF
293
328
 
329
+ define(:latex_headers, Object, %w{section subsection subsubsection paragraph subparagraph subparagraph}, <<EOF) do |val|
330
+ Defines the LaTeX commands for different header levels
331
+
332
+ The commands for the header levels one to six can be specified by
333
+ separating them with commas.
334
+
335
+ Default: section,subsection,subsubsection,paragraph,subparagraph,subsubparagraph
336
+ Used by: Latex converter
337
+ EOF
338
+ if String === val
339
+ val = val.split(/,/)
340
+ elsif !(Array === val)
341
+ raise Kramdown::Error, "Invalid type #{val.class} for option latex_headers"
342
+ end
343
+ if val.size != 6
344
+ raise Kramdown::Error, "Option latex_headers needs exactly six LaTeX commands"
345
+ end
346
+ val
347
+ end
348
+
294
349
  end
295
350
 
296
351
  end
@@ -22,17 +22,17 @@
22
22
 
23
23
  module Kramdown
24
24
 
25
- # == Parser Module
25
+ # This module contains all available parsers. A parser takes an input string and converts the
26
+ # string to an element tree.
26
27
  #
27
- # This module contains all available parsers. Currently, there two parsers:
28
- #
29
- # * Kramdown for parsing documents in kramdown format
30
- # * Html for parsing HTML documents
28
+ # New parsers should be derived from the Base class which provides common functionality - see its
29
+ # API documentation for how to create a custom converter class.
31
30
  module Parser
32
31
 
33
32
  autoload :Base, 'kramdown/parser/base'
34
33
  autoload :Kramdown, 'kramdown/parser/kramdown'
35
34
  autoload :Html, 'kramdown/parser/html'
35
+ autoload :Markdown, 'kramdown/parser/markdown'
36
36
 
37
37
  end
38
38
 
@@ -24,37 +24,79 @@ module Kramdown
24
24
 
25
25
  module Parser
26
26
 
27
- # == Base class for parsers
27
+ # == \Base class for parsers
28
28
  #
29
29
  # This class serves as base class for parsers. It provides common methods that can/should be
30
30
  # used by all parsers, especially by those using StringScanner for parsing.
31
31
  #
32
+ # A parser object is used as a throw-away object, i.e. it is only used for storing the needed
33
+ # state information during parsing. Therefore one can't instantiate a parser object directly but
34
+ # only use the Base::parse method.
35
+ #
36
+ # == Implementing a parser
37
+ #
38
+ # Implementing a new parser is rather easy: just derive a new class from this class and put it
39
+ # in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
40
+ # parser works correctly. Then you need to implement the <tt>#parse</tt> method which has to
41
+ # contain the parsing code.
42
+ #
43
+ # Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
32
44
  class Base
33
45
 
34
- # Initialize the parser with the given Kramdown document +doc+.
35
- def initialize(doc)
36
- @doc = doc
46
+ # The hash with the parsing options.
47
+ attr_reader :options
48
+
49
+ # The array with the parser warnings.
50
+ attr_reader :warnings
51
+
52
+ # The original source string.
53
+ attr_reader :source
54
+
55
+ # The root element of element tree that is created from the source string.
56
+ attr_reader :root
57
+
58
+ # Initialize the parser object with the +source+ string and the parsing +options+.
59
+ #
60
+ # The <tt>@root</tt> element, the <tt>@warnings</tt> array and <tt>@text_type</tt> (specifies
61
+ # the default type for newly created text nodes) are automatically initialized.
62
+ def initialize(source, options)
63
+ @source = source
64
+ @options = Kramdown::Options.merge(options)
65
+ @root = Element.new(:root, nil, nil, :encoding => (RUBY_VERSION >= '1.9' ? source.encoding : nil))
66
+ @warnings = []
37
67
  @text_type = :text
38
68
  end
39
69
  private_class_method(:new, :allocate)
40
70
 
41
- # Parse the +source+ string into an element tree, using the information provided by the
42
- # Kramdown document +doc+.
71
+ # Parse the +source+ string into an element tree, possibly using the parsing +options+, and
72
+ # return the root element of the element tree and an array with warning messages.
43
73
  #
44
74
  # Initializes a new instance of the calling class and then calls the #parse method that must
45
75
  # be implemented by each subclass.
46
- def self.parse(source, doc)
47
- new(doc).parse(source)
76
+ def self.parse(source, options = {})
77
+ parser = new(source, options)
78
+ parser.parse
79
+ [parser.root, parser.warnings]
48
80
  end
49
81
 
82
+ # Parse the source string into an element tree.
83
+ #
84
+ # The parsing code should parse the source provided in <tt>@source</tt> and build an element
85
+ # tree the root of which should be <tt>@root</tt>.
86
+ #
87
+ # This is the only method that has to be implemented by sub-classes!
88
+ def parse
89
+ raise NotImplementedError
90
+ end
50
91
 
51
- # Add the given warning +text+ to the warning array of the Kramdown document.
92
+ # Add the given warning +text+ to the warning array.
52
93
  def warning(text)
53
- @doc.warnings << text
94
+ @warnings << text
54
95
  #TODO: add position information
55
96
  end
56
97
 
57
- # Modify the string +source+ to be usable by the parser.
98
+ # Modify the string +source+ to be usable by the parser (unifies line ending characters to
99
+ # <tt>\n</tt> and makes sure +source+ ends with a new line character).
58
100
  def adapt_source(source)
59
101
  source.gsub(/\r\n?/, "\n").chomp + "\n"
60
102
  end
@@ -69,8 +111,8 @@ module Kramdown
69
111
  end
70
112
  end
71
113
 
72
- # Extract the part of the StringScanner +srcscan+ backed string specified by the +range+. This
73
- # method also works correctly under Ruby 1.9.
114
+ # Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
115
+ # method works correctly under Ruby 1.8 and Ruby 1.9.
74
116
  def extract_string(range, strscan)
75
117
  result = nil
76
118
  if RUBY_VERSION >= '1.9'
@@ -28,10 +28,13 @@ module Kramdown
28
28
  module Parser
29
29
 
30
30
  # Used for parsing a HTML document.
31
+ #
32
+ # The parsing code is in the Parser module that can also be used by other parsers.
31
33
  class Html < Base
32
34
 
33
35
  # Contains all constants that are used when parsing.
34
36
  module Constants
37
+
35
38
  #:stopdoc:
36
39
  # The following regexps are based on the ones used by REXML, with some slight modifications.
37
40
  HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/m
@@ -43,17 +46,17 @@ module Kramdown
43
46
  HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
44
47
 
45
48
 
46
- HTML_PARSE_AS_BLOCK = %w{applet button blockquote body colgroup dd div dl fieldset form iframe li
47
- map noscript object ol table tbody thead tfoot tr td ul}
48
- HTML_PARSE_AS_SPAN = %w{a abbr acronym address b bdo big cite caption del dfn dt em
49
- h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
50
- rp rt rtc ruby samp select small span strong sub sup th tt var}
51
- HTML_PARSE_AS_RAW = %w{script math option textarea pre code}
49
+ HTML_CONTENT_MODEL_BLOCK = %w{applet button blockquote body colgroup dd div dl fieldset
50
+ form iframe li map noscript object ol table tbody thead tfoot tr td ul}
51
+ HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym address b bdo big cite caption del dfn dt em
52
+ h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
53
+ rp rt rtc ruby samp select small span strong sub sup th tt var}
54
+ HTML_CONTENT_MODEL_RAW = %w{script math option textarea pre code}
52
55
 
53
- HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
54
- HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
55
- HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
56
- HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
56
+ HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
57
+ HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
58
+ HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
59
+ HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
57
60
 
58
61
  # Some HTML elements like script belong to both categories (i.e. are valid in block and
59
62
  # span HTML) and don't appear therefore!
@@ -69,15 +72,18 @@ module Kramdown
69
72
 
70
73
  # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
71
74
  # functionality. The only thing that must be provided by the class are instance variable
72
- # <tt>@stack</tt> for storing needed state and <tt>@src</tt> (instance of StringScanner) for
73
- # the actual parsing.
75
+ # <tt>@stack</tt> for storing the needed state and <tt>@src</tt> (instance of StringScanner)
76
+ # for the actual parsing.
74
77
  module Parser
75
78
 
76
79
  include Constants
77
80
 
78
- # Process the HTML start tag that has already be scanned/checked. Does the common processing
79
- # steps and then yields to the caller for further processing.
80
- def handle_html_start_tag
81
+ # Process the HTML start tag that has already be <tt>scan</tt>ned/<tt>check</tt>ed.
82
+ #
83
+ # Does the common processing steps and then yields to the caller for further processing
84
+ # (first parameter is the created element, the second parameter is +true+ if the HTML
85
+ # element is already closed, ie. contains no body).
86
+ def handle_html_start_tag # :yields: el, closed
81
87
  name = @src[1]
82
88
  closed = !@src[4].nil?
83
89
  attrs = Utils::OrderedHash.new
@@ -98,18 +104,20 @@ module Kramdown
98
104
  end
99
105
  end
100
106
 
107
+ # Handle the HTML script tag at the current position.
101
108
  def handle_html_script_tag
102
109
  curpos = @src.pos
103
110
  if result = @src.scan_until(/(?=<\/script\s*>)/m)
104
111
  add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
105
112
  @src.scan(HTML_TAG_CLOSE_RE)
106
113
  else
107
- add_text(@src.scan(/.*/m), @tree.children.last, :raw)
114
+ add_text(@src.rest, @tree.children.last, :raw)
115
+ @src.terminate
108
116
  warning("Found no end tag for 'script' - auto-closing it")
109
117
  end
110
118
  end
111
119
 
112
- HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/
120
+ HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
113
121
 
114
122
  # Parse raw HTML from the current source position, storing the found elements in +el+.
115
123
  # Parsing continues until one of the following criteria are fulfilled:
@@ -141,11 +149,11 @@ module Kramdown
141
149
  warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
142
150
  end
143
151
  else
144
- add_text(@src.scan(/./), @tree, :text)
152
+ add_text(@src.getch, @tree, :text)
145
153
  end
146
154
  else
147
- result = @src.scan(/.*/m)
148
- add_text(result, @tree, :text)
155
+ add_text(@src.rest, @tree, :text)
156
+ @src.terminate
149
157
  warning("Found no end tag for '#{@tree.value}' - auto-closing it") if @tree.type == :html_element
150
158
  done = true
151
159
  end
@@ -160,6 +168,8 @@ module Kramdown
160
168
  # Converts HTML elements to native elements if possible.
161
169
  class ElementConverter
162
170
 
171
+ # :stopdoc:
172
+
163
173
  include Constants
164
174
  include ::Kramdown::Utils::Entities
165
175
 
@@ -172,14 +182,18 @@ module Kramdown
172
182
  header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
173
183
  SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
174
184
 
175
- def initialize(doc)
176
- @doc = doc
185
+ def initialize(root)
186
+ @root = root
187
+ end
188
+
189
+ def self.convert(root, el = root)
190
+ new(root).process(el)
177
191
  end
178
192
 
179
193
  # Convert the element +el+ and its children.
180
194
  def process(el, do_conversion = true, preserve_text = false, parent = nil)
181
195
  case el.type
182
- when :xml_comment, :xml_pi, :html_doctype
196
+ when :xml_comment, :xml_pi
183
197
  ptype = if parent.nil?
184
198
  'div'
185
199
  else
@@ -191,9 +205,13 @@ module Kramdown
191
205
  else parent.type.to_s
192
206
  end
193
207
  end
194
- el.options = {:category => HTML_PARSE_AS_SPAN.include?(ptype) ? :span : :block}
208
+ el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
195
209
  return
196
210
  when :html_element
211
+ when :root
212
+ el.children.each {|c| process(c)}
213
+ remove_whitespace_children(el)
214
+ return
197
215
  else return
198
216
  end
199
217
 
@@ -204,7 +222,7 @@ module Kramdown
204
222
  if do_conversion && self.class.method_defined?(mname)
205
223
  send(mname, el)
206
224
  elsif do_conversion && SIMPLE_ELEMENTS.include?(type)
207
- set_basics(el, type.intern, HTML_SPAN_ELEMENTS.include?(type) ? :span : :block)
225
+ set_basics(el, type.intern)
208
226
  process_children(el, do_conversion, preserve_text)
209
227
  else
210
228
  process_html_element(el, do_conversion, preserve_text)
@@ -245,16 +263,16 @@ module Kramdown
245
263
  Element.new(:entity, entity(val), nil, :original => src.matched)
246
264
  end
247
265
  else
248
- result << Element.new(:text, src.scan(/.*/m))
266
+ result << Element.new(:text, src.rest)
267
+ src.terminate
249
268
  end
250
269
  end
251
270
  result
252
271
  end
253
272
 
254
273
  def process_html_element(el, do_conversion = true, preserve_text = false)
255
- el.options = {:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
256
- :parse_type => HTML_PARSE_AS[el.value]
257
- }
274
+ el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
275
+ :content_model => HTML_CONTENT_MODEL[el.value])
258
276
  process_children(el, do_conversion, preserve_text)
259
277
  end
260
278
 
@@ -266,7 +284,7 @@ module Kramdown
266
284
  tmp = []
267
285
  last_is_p = false
268
286
  el.children.each do |c|
269
- if c.options[:category] != :block || c.type == :text
287
+ if Element.category(c) != :block || c.type == :text
270
288
  if !last_is_p
271
289
  tmp << Element.new(:p, nil, nil, :transparent => true)
272
290
  last_is_p = true
@@ -296,14 +314,14 @@ module Kramdown
296
314
  el.children.delete_if do |c|
297
315
  i += 1
298
316
  c.type == :text && c.value.strip.empty? &&
299
- (i == 0 || i == el.children.length - 1 || (el.children[i-1].options[:category] == :block &&
300
- el.children[i+1].options[:category] == :block))
317
+ (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
318
+ Element.category(el.children[i+1]) == :block))
301
319
  end
302
320
  end
303
321
 
304
- def set_basics(el, type, category, opts = {})
322
+ def set_basics(el, type, opts = {})
305
323
  el.type = type
306
- el.options = {:category => category}.merge(opts)
324
+ el.options.replace(opts)
307
325
  el.value = nil
308
326
  end
309
327
 
@@ -314,7 +332,7 @@ module Kramdown
314
332
 
315
333
  def convert_a(el)
316
334
  if el.attr['href']
317
- set_basics(el, :a, :span)
335
+ set_basics(el, :a)
318
336
  process_children(el)
319
337
  else
320
338
  process_html_element(el, false)
@@ -322,17 +340,17 @@ module Kramdown
322
340
  end
323
341
 
324
342
  def convert_b(el)
325
- set_basics(el, :strong, :span)
343
+ set_basics(el, :strong)
326
344
  process_children(el)
327
345
  end
328
346
 
329
347
  def convert_i(el)
330
- set_basics(el, :em, :span)
348
+ set_basics(el, :em)
331
349
  process_children(el)
332
350
  end
333
351
 
334
352
  def convert_h1(el)
335
- set_basics(el, :header, :block, :level => el.value[1..1].to_i)
353
+ set_basics(el, :header, :level => el.value[1..1].to_i)
336
354
  extract_text(el, el.options[:raw_text] = '')
337
355
  process_children(el)
338
356
  end
@@ -350,12 +368,12 @@ module Kramdown
350
368
  mem << c.value
351
369
  elsif c.type == :entity
352
370
  if RUBY_VERSION >= '1.9'
353
- mem << c.value.char.encode(@doc.parse_infos[:encoding])
371
+ mem << c.value.char.encode(@root.options[:encoding])
354
372
  elsif [60, 62, 34, 38].include?(c.value.code_point)
355
373
  mem << c.value.code_point.chr
356
374
  end
357
375
  elsif c.type == :smart_quote || c.type == :typographic_sym
358
- mem << entity(c.value.to_s).char.encode(@doc.parse_infos[:encoding])
376
+ mem << entity(c.value.to_s).char.encode(@root.options[:encoding])
359
377
  else
360
378
  raise "Bug - please report"
361
379
  end
@@ -368,9 +386,9 @@ module Kramdown
368
386
  process_html_element(el, false, true)
369
387
  else
370
388
  if el.value == 'code'
371
- set_basics(el, :codespan, :span)
389
+ set_basics(el, :codespan)
372
390
  else
373
- set_basics(el, :codeblock, :block)
391
+ set_basics(el, :codeblock)
374
392
  end
375
393
  el.value = result.first.value
376
394
  el.children.clear
@@ -384,8 +402,9 @@ module Kramdown
384
402
  return
385
403
  end
386
404
  process_children(el)
387
- set_basics(el, :table, :block)
405
+ set_basics(el, :table)
388
406
  el.options[:alignment] = []
407
+
389
408
  calc_alignment = lambda do |c|
390
409
  if c.type == :tr && el.options[:alignment].empty?
391
410
  el.options[:alignment] = [:default] * c.children.length
@@ -395,8 +414,18 @@ module Kramdown
395
414
  end
396
415
  end
397
416
  calc_alignment.call(el)
417
+
418
+ change_th_type = lambda do |c|
419
+ if c.type == :th
420
+ c.type = :td
421
+ else
422
+ c.children.each {|cc| change_th_type.call(cc)}
423
+ end
424
+ end
425
+ change_th_type.call(el)
426
+
398
427
  if el.children.first.type == :tr
399
- tbody = Element.new(:tbody, nil, nil, :category => :block)
428
+ tbody = Element.new(:tbody)
400
429
  tbody.children = el.children
401
430
  el.children = [tbody]
402
431
  end
@@ -427,52 +456,38 @@ module Kramdown
427
456
  end && el.children.any? {|t| t.value == 'tbody'})
428
457
  end
429
458
 
430
- def convert_div(el)
459
+ def convert_script(el)
431
460
  if !is_math_tag?(el)
432
461
  process_html_element(el)
433
462
  else
434
463
  handle_math_tag(el)
435
464
  end
436
465
  end
437
- alias :convert_span :convert_div
438
466
 
439
467
  def is_math_tag?(el)
440
- el.attr['class'].to_s =~ /\bmath\b/ &&
441
- el.children.size == 1 && el.children.first.type == :text
468
+ el.attr['type'].to_s =~ /\bmath\/tex\b/
442
469
  end
443
470
 
444
471
  def handle_math_tag(el)
445
- set_basics(el, :math, (el.value == 'div' ? :block : :span))
472
+ set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
446
473
  el.value = el.children.shift.value
447
- if el.attr['class'] =~ /^\s*math\s*$/
448
- el.attr.delete('class')
449
- else
450
- el.attr['class'].sub!(/\s?math/, '')
451
- end
452
- el.value.gsub!(/&(amp|quot|gt|lt);/) do |m|
453
- case m
454
- when '&amp;' then '&'
455
- when '&quot;' then '"'
456
- when '&gt;' then '>'
457
- when '&lt;' then '<'
458
- end
459
- end
474
+ el.attr.delete('type')
460
475
  end
476
+
461
477
  end
462
478
 
463
479
  include Parser
464
480
 
465
- # Parse +source+ as HTML document and return the created +tree+.
466
- def parse(source)
467
- @stack = []
468
- @tree = Element.new(:root)
481
+ # Parse the source string provided on initialization as HTML document.
482
+ def parse
483
+ @stack, @tree = [], @root
469
484
  @src = StringScanner.new(adapt_source(source))
470
485
 
471
486
  while true
472
487
  if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
473
488
  @tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
474
489
  elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
475
- @tree.children << Element.new(:html_doctype, result.strip, nil, :category => :block)
490
+ # ignore the doctype
476
491
  elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
477
492
  @tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
478
493
  else
@@ -485,10 +500,7 @@ module Kramdown
485
500
  end
486
501
  parse_raw_html(@tree, &tag_handler)
487
502
 
488
- ec = ElementConverter.new(@doc)
489
- @tree.children.each {|c| ec.process(c)}
490
- ec.remove_whitespace_children(@tree)
491
- @tree
503
+ ElementConverter.convert(@tree)
492
504
  end
493
505
 
494
506
  end