RubyGems - kramdown - Versions diffs - 0.11.0 → 0.12.0 - Mend

kramdown 0.11.0 → 0.12.0

Potentially problematic release.

This version of kramdown might be problematic. Click here for more details.

Files changed (94) hide show

data/CONTRIBUTERS +1 -1
data/ChangeLog +532 -0
data/README +22 -12
data/Rakefile +9 -8
data/VERSION +1 -1
data/benchmark/benchmark.sh +61 -0
data/benchmark/generate_data.rb +57 -55
data/benchmark/testing.sh +1 -1
data/benchmark/timing.sh +3 -3
data/bin/kramdown +1 -2
data/data/kramdown/document.html +2 -2
data/data/kramdown/document.latex +2 -2
data/doc/default.scss.css +6 -1
data/doc/default.template +1 -1
data/doc/documentation.page +1 -1
data/doc/index.page +9 -7
data/doc/installation.page +2 -3
data/doc/links.markdown +1 -1
data/doc/quickref.page +19 -19
data/doc/syntax.page +117 -98
data/doc/tests.page +8 -7
data/lib/kramdown/compatibility.rb +2 -1
data/lib/kramdown/converter.rb +5 -7
data/lib/kramdown/converter/base.rb +87 -32
data/lib/kramdown/converter/html.rb +134 -122
data/lib/kramdown/converter/kramdown.rb +24 -25
data/lib/kramdown/converter/latex.rb +65 -55
data/lib/kramdown/document.rb +487 -42
data/lib/kramdown/error.rb +3 -0
data/lib/kramdown/options.rb +83 -28
data/lib/kramdown/parser.rb +5 -5
data/lib/kramdown/parser/base.rb +55 -13
data/lib/kramdown/parser/html.rb +83 -71
data/lib/kramdown/parser/kramdown.rb +73 -54
data/lib/kramdown/parser/kramdown/abbreviation.rb +17 -12
data/lib/kramdown/parser/kramdown/autolink.rb +2 -3
data/lib/kramdown/parser/kramdown/blank_line.rb +1 -1
data/lib/kramdown/parser/kramdown/block_boundary.rb +2 -2
data/lib/kramdown/parser/kramdown/blockquote.rb +2 -2
data/lib/kramdown/parser/kramdown/codeblock.rb +5 -2
data/lib/kramdown/parser/kramdown/codespan.rb +1 -2
data/lib/kramdown/parser/kramdown/emphasis.rb +1 -1
data/lib/kramdown/parser/kramdown/escaped_chars.rb +1 -1
data/lib/kramdown/parser/kramdown/extensions.rb +204 -0
data/lib/kramdown/parser/kramdown/footnote.rb +7 -7
data/lib/kramdown/parser/kramdown/header.rb +4 -2
data/lib/kramdown/parser/kramdown/horizontal_rule.rb +1 -1
data/lib/kramdown/parser/kramdown/html.rb +39 -45
data/lib/kramdown/parser/kramdown/link.rb +19 -29
data/lib/kramdown/parser/kramdown/list.rb +13 -13
data/lib/kramdown/parser/kramdown/math.rb +1 -1
data/lib/kramdown/parser/kramdown/paragraph.rb +5 -4
data/lib/kramdown/parser/kramdown/smart_quotes.rb +1 -1
data/lib/kramdown/parser/kramdown/table.rb +51 -12
data/lib/kramdown/parser/markdown.rb +69 -0
data/lib/kramdown/utils.rb +2 -2
data/lib/kramdown/utils/entities.rb +10 -1
data/lib/kramdown/utils/html.rb +22 -11
data/lib/kramdown/utils/ordered_hash.rb +44 -40
data/lib/kramdown/version.rb +1 -1
data/man/man1/kramdown.1 +31 -4
data/test/testcases/block/08_list/item_ial.html +1 -1
data/test/testcases/block/11_ial/nested.html +11 -0
data/test/testcases/block/11_ial/nested.text +15 -0
data/test/testcases/block/13_definition_list/item_ial.html +1 -1
data/test/testcases/block/14_table/escaping.html +52 -0
data/test/testcases/block/14_table/escaping.text +19 -0
data/test/testcases/block/14_table/simple.html.19 +139 -0
data/test/testcases/block/14_table/simple.text +1 -1
data/test/testcases/block/15_math/normal.html +13 -13
data/test/testcases/block/16_toc/{no_toc_depth.html → no_toc.html} +0 -0
data/test/testcases/block/16_toc/{no_toc_depth.options → no_toc.options} +0 -0
data/test/testcases/block/16_toc/{no_toc_depth.text → no_toc.text} +0 -0
data/test/testcases/block/16_toc/{toc_depth_2.html → toc_levels.html} +4 -4
data/test/testcases/block/16_toc/toc_levels.options +1 -0
data/test/testcases/block/16_toc/{toc_depth_2.text → toc_levels.text} +0 -0
data/test/testcases/span/escaped_chars/normal.html +4 -0
data/test/testcases/span/escaped_chars/normal.text +4 -0
data/test/testcases/span/ial/simple.html +1 -1
data/test/testcases/span/math/normal.html +2 -2
metadata +20 -25
data/benchmark/historic-jruby-1.4.0.dat +0 -7
data/benchmark/historic-ruby-1.8.6.dat +0 -7
data/benchmark/historic-ruby-1.8.7.dat +0 -7
data/benchmark/historic-ruby-1.9.1p243.dat +0 -7
data/benchmark/historic-ruby-1.9.2dev.dat +0 -7
data/benchmark/static-jruby-1.4.0.dat +0 -7
data/benchmark/static-ruby-1.8.6.dat +0 -7
data/benchmark/static-ruby-1.8.7.dat +0 -7
data/benchmark/static-ruby-1.9.1p243.dat +0 -7
data/benchmark/static-ruby-1.9.2dev.dat +0 -7
data/lib/kramdown/parser/kramdown/attribute_list.rb +0 -111
data/lib/kramdown/parser/kramdown/extension.rb +0 -116
data/test/testcases/block/16_toc/toc_depth_2.options +0 -1

@@ -22,6 +22,9 @@
 module Kramdown
+  # This error is raised when an error condition is encountered.
+  #
+  # *Note* that this error is only raised by the support framework for the parsers and converters.
   class Error < RuntimeError; end
 end

data/lib/kramdown/options.rb CHANGED

@@ -40,27 +40,30 @@ module Kramdown
     # ----------------------------
     # :section: Option definitions
     #
-    # This sections informs describes the methods that can be used on the Options module.
+    # This sections describes the methods that can be used on the Options module.
     # ----------------------------
-    # Contains the definition of an option.
-    Definition = Struct.new(:name, :type, :default, :desc)
+    # Struct class for storing the definition of an option.
+    Definition = Struct.new(:name, :type, :default, :desc, :validator)
     # Allowed option types.
-    ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Array, Object]
+    ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Object]
     @options = {}
     # Define a new option called +name+ (a Symbol) with the given +type+ (String, Integer, Float,
-    # Symbol, Boolean, Array, Object), default value +default+ and the description +desc+.
+    # Symbol, Boolean, Object), default value +default+ and the description +desc+. If a block is
+    # specified, it should validate the value and either raise an error or return a valid value.
     #
-    # The type 'Object' should only be used if none of the other types suffices because such an
-    # option will be opaque and cannot be used, for example, by CLI command!
-    def self.define(name, type, default, desc)
+    # The type 'Object' should only be used for complex types for which none of the other types
+    # suffices. A block needs to be specified when using type 'Object' and it has to cope with
+    # a value given as string and as the opaque type.
+    def self.define(name, type, default, desc, &block)
       raise ArgumentError, "Option name #{name} is already used" if @options.has_key?(name)
       raise ArgumentError, "Invalid option type #{type} specified" if !ALLOWED_TYPES.include?(type)
       raise ArgumentError, "Invalid type for default value" if !(type === default) && !default.nil?
-      @options[name] = Definition.new(name, type, default, desc)
+      raise ArgumentError, "Missing validator block" if type == Object && block.nil?
+      @options[name] = Definition.new(name, type, default, desc, block)
     end
     # Return all option definitions.
@@ -98,21 +101,22 @@ module Kramdown
     # String and then to the correct type.
     def self.parse(name, data)
       raise ArgumentError, "No option named #{name} defined" if !@options.has_key?(name)
-      return data if @options[name].type === data
-      data = data.to_s
-      if @options[name].type == String
-        data
-      elsif @options[name].type == Integer
-        Integer(data)
-      elsif @options[name].type == Float
-        Float(data)
-      elsif @options[name].type == Symbol
-        (data.strip.empty? ? nil : data.to_sym)
-      elsif @options[name].type == Boolean
-        data.downcase.strip != 'false' && !data.empty?
-      elsif @options[name].type == Array
-        data.split(/\s+/)
+      if !(@options[name].type === data)
+        data = data.to_s
+        data = if @options[name].type == String
+                 data
+               elsif @options[name].type == Integer
+                 Integer(data) rescue raise Kramdown::Error, "Invalid integer value for option '#{name}': '#{data}'"
+               elsif @options[name].type == Float
+                 Float(data) rescue raise Kramdown::Error, "Invalid float value for option '#{name}': '#{data}'"
+               elsif @options[name].type == Symbol
+                 (data.strip.empty? ? nil : data.to_sym)
+               elsif @options[name].type == Boolean
+                 data.downcase.strip != 'false' && !data.empty?
+               end
       end
+      data = @options[name].validator[data] if @options[name].validator
+      data
     end
     # ----------------------------
@@ -169,7 +173,7 @@ EOF
 Process kramdown syntax in block HTML tags
 If this option is `true`, the kramdown parser processes the content of
-block HTML tags as text containing block level elements. Since this is
+block HTML tags as text containing block-level elements. Since this is
 not wanted normally, the default is `false`. It is normally better to
 selectively enable kramdown processing via the markdown attribute.
@@ -181,7 +185,7 @@ EOF
 Process kramdown syntax in span HTML tags
 If this option is `true`, the kramdown parser processes the content of
-span HTML tags as text containing span level elements.
+span HTML tags as text containing span-level elements.
 Default: true
 Used by: kramdown parser
@@ -275,14 +279,45 @@ Default: :as_char
 Used by: HTML converter, kramdown converter
 EOF
-    define(:toc_depth, Integer, 0, <<EOF)
-Defines the maximum level of headers which will be used to generate the table of
+    define(:toc_depth, Integer, -1, <<EOF)
+DEPRECATED: Defines the maximum level of headers which will be used to generate the table of
 contents. For instance, with a value of 2, toc entries will be generated for h1
 and h2 headers but not for h3, h4, etc. A value of 0 uses all header levels.
-Default: 0
+Use option toc_levels instead!
+Default: -1
+Used by: HTML/Latex converter
+EOF
+    define(:toc_levels, Object, (1..6).to_a, <<EOF) do |val|
+Defines the levels that are used for the table of contents
+The individual levels can be specified by separating them with commas
+(e.g. 1,2,3) or by using the range syntax (e.g. 1..3). Only the
+specified levels are used for the table of contents.
+Default: 1..6
 Used by: HTML/Latex converter
 EOF
+      if String === val
+        if val =~ /^(\d)\.\.(\d)$/
+          val = Range.new($1.to_i, $2.to_i).to_a
+        elsif val =~ /^\d(?:,\d)*$/
+          val = val.split(/,/).map {|s| s.to_i}.uniq
+        else
+          raise Kramdown::Error, "Invalid syntax for option toc_levels"
+        end
+      elsif Array === val
+        val = val.map {|s| s.to_i}.uniq
+      else
+        raise Kramdown::Error, "Invalid type #{val.class} for option toc_levels"
+      end
+      if val.any? {|i| !(1..6).include?(i)}
+        raise Kramdown::Error, "Level numbers for option toc_levels have to be integers from 1 to 6"
+      end
+      val
+    end
     define(:line_width, Integer, 72, <<EOF)
 Defines the line width to be used when outputting a document
@@ -291,6 +326,26 @@ Default: 72
 Used by: kramdown converter
 EOF
+    define(:latex_headers, Object, %w{section subsection subsubsection paragraph subparagraph subparagraph}, <<EOF) do |val|
+Defines the LaTeX commands for different header levels
+The commands for the header levels one to six can be specified by
+separating them with commas.
+Default: section,subsection,subsubsection,paragraph,subparagraph,subsubparagraph
+Used by: Latex converter
+EOF
+      if String === val
+        val = val.split(/,/)
+      elsif !(Array === val)
+        raise Kramdown::Error, "Invalid type #{val.class} for option latex_headers"
+      end
+      if val.size != 6
+        raise Kramdown::Error, "Option latex_headers needs exactly six LaTeX commands"
+      end
+      val
+    end
   end
 end

data/lib/kramdown/parser.rb CHANGED

@@ -22,17 +22,17 @@
 module Kramdown
-  # == Parser Module
+  # This module contains all available parsers. A parser takes an input string and converts the
+  # string to an element tree.
   #
-  # This module contains all available parsers. Currently, there two parsers:
-  #
-  # * Kramdown for parsing documents in kramdown format
-  # * Html for parsing HTML documents
+  # New parsers should be derived from the Base class which provides common functionality - see its
+  # API documentation for how to create a custom converter class.
   module Parser
     autoload :Base, 'kramdown/parser/base'
     autoload :Kramdown, 'kramdown/parser/kramdown'
     autoload :Html, 'kramdown/parser/html'
+    autoload :Markdown, 'kramdown/parser/markdown'
   end

data/lib/kramdown/parser/base.rb CHANGED

@@ -24,37 +24,79 @@ module Kramdown
   module Parser
-    # == Base class for parsers
+    # == \Base class for parsers
     #
     # This class serves as base class for parsers. It provides common methods that can/should be
     # used by all parsers, especially by those using StringScanner for parsing.
     #
+    # A parser object is used as a throw-away object, i.e. it is only used for storing the needed
+    # state information during parsing. Therefore one can't instantiate a parser object directly but
+    # only use the Base::parse method.
+    #
+    # == Implementing a parser
+    #
+    # Implementing a new parser is rather easy: just derive a new class from this class and put it
+    # in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
+    # parser works correctly. Then you need to implement the <tt>#parse</tt> method which has to
+    # contain the parsing code.
+    #
+    # Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
     class Base
-      # Initialize the parser with the given Kramdown document +doc+.
-      def initialize(doc)
-        @doc = doc
+      # The hash with the parsing options.
+      attr_reader :options
+      # The array with the parser warnings.
+      attr_reader :warnings
+      # The original source string.
+      attr_reader :source
+      # The root element of element tree that is created from the source string.
+      attr_reader :root
+      # Initialize the parser object with the +source+ string and the parsing +options+.
+      #
+      # The <tt>@root</tt> element, the <tt>@warnings</tt> array and <tt>@text_type</tt> (specifies
+      # the default type for newly created text nodes) are automatically initialized.
+      def initialize(source, options)
+        @source = source
+        @options = Kramdown::Options.merge(options)
+        @root = Element.new(:root, nil, nil, :encoding => (RUBY_VERSION >= '1.9' ? source.encoding : nil))
+        @warnings = []
         @text_type = :text
       end
       private_class_method(:new, :allocate)
-      # Parse the +source+ string into an element tree, using the information provided by the
-      # Kramdown document +doc+.
+      # Parse the +source+ string into an element tree, possibly using the parsing +options+, and
+      # return the root element of the element tree and an array with warning messages.
       #
       # Initializes a new instance of the calling class and then calls the #parse method that must
       # be implemented by each subclass.
-      def self.parse(source, doc)
-        new(doc).parse(source)
+      def self.parse(source, options = {})
+        parser = new(source, options)
+        parser.parse
+        [parser.root, parser.warnings]
       end
+      # Parse the source string into an element tree.
+      #
+      # The parsing code should parse the source provided in <tt>@source</tt> and build an element
+      # tree the root of which should be <tt>@root</tt>.
+      #
+      # This is the only method that has to be implemented by sub-classes!
+      def parse
+        raise NotImplementedError
+      end
-      # Add the given warning +text+ to the warning array of the Kramdown document.
+      # Add the given warning +text+ to the warning array.
       def warning(text)
-        @doc.warnings << text
+        @warnings << text
         #TODO: add position information
       end
-      # Modify the string +source+ to be usable by the parser.
+      # Modify the string +source+ to be usable by the parser (unifies line ending characters to
+      # <tt>\n</tt> and makes sure +source+ ends with a new line character).
       def adapt_source(source)
         source.gsub(/\r\n?/, "\n").chomp + "\n"
       end
@@ -69,8 +111,8 @@ module Kramdown
         end
       end
-      # Extract the part of the StringScanner +srcscan+ backed string specified by the +range+. This
-      # method also works correctly under Ruby 1.9.
+      # Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
+      # method works correctly under Ruby 1.8 and Ruby 1.9.
       def extract_string(range, strscan)
         result = nil
         if RUBY_VERSION >= '1.9'

data/lib/kramdown/parser/html.rb CHANGED

@@ -28,10 +28,13 @@ module Kramdown
   module Parser
     # Used for parsing a HTML document.
+    #
+    # The parsing code is in the Parser module that can also be used by other parsers.
     class Html < Base
       # Contains all constants that are used when parsing.
       module Constants
         #:stopdoc:
         # The following regexps are based on the ones used by REXML, with some slight modifications.
         HTML_DOCTYPE_RE = /<!DOCTYPE.*?>/m
@@ -43,17 +46,17 @@ module Kramdown
         HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
-        HTML_PARSE_AS_BLOCK = %w{applet button blockquote body colgroup dd div dl fieldset form iframe li
-                               map noscript object ol table tbody thead tfoot tr td ul}
-        HTML_PARSE_AS_SPAN  = %w{a abbr acronym address b bdo big cite caption del dfn dt em
-                               h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
-                               rp rt rtc ruby samp select small span strong sub sup th tt var}
-        HTML_PARSE_AS_RAW   = %w{script math option textarea pre code}
+        HTML_CONTENT_MODEL_BLOCK = %w{applet button blockquote body colgroup dd div dl fieldset
+             form iframe li map noscript object ol table tbody thead tfoot tr td ul}
+        HTML_CONTENT_MODEL_SPAN  = %w{a abbr acronym address b bdo big cite caption del dfn dt em
+             h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
+             rp rt rtc ruby samp select small span strong sub sup th tt var}
+        HTML_CONTENT_MODEL_RAW   = %w{script math option textarea pre code}
-        HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
-        HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
-        HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
-        HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
+        HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
+        HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
+        HTML_CONTENT_MODEL_SPAN.each {|i| HTML_CONTENT_MODEL[i] = :span}
+        HTML_CONTENT_MODEL_RAW.each {|i| HTML_CONTENT_MODEL[i] = :raw}
         # Some HTML elements like script belong to both categories (i.e. are valid in block and
         # span HTML) and don't appear therefore!
@@ -69,15 +72,18 @@ module Kramdown
       # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
       # functionality. The only thing that must be provided by the class are instance variable
-      # <tt>@stack</tt> for storing needed state and <tt>@src</tt> (instance of StringScanner) for
-      # the actual parsing.
+      # <tt>@stack</tt> for storing the needed state and <tt>@src</tt> (instance of StringScanner)
+      # for the actual parsing.
       module Parser
         include Constants
-        # Process the HTML start tag that has already be scanned/checked. Does the common processing
-        # steps and then yields to the caller for further processing.
-        def handle_html_start_tag
+        # Process the HTML start tag that has already be <tt>scan</tt>ned/<tt>check</tt>ed.
+        #
+        # Does the common processing steps and then yields to the caller for further processing
+        # (first parameter is the created element, the second parameter is +true+ if the HTML
+        # element is already closed, ie. contains no body).
+        def handle_html_start_tag # :yields: el, closed
           name = @src[1]
           closed = !@src[4].nil?
           attrs = Utils::OrderedHash.new
@@ -98,18 +104,20 @@ module Kramdown
           end
         end
+        # Handle the HTML script tag at the current position.
         def handle_html_script_tag
           curpos = @src.pos
           if result = @src.scan_until(/(?=<\/script\s*>)/m)
             add_text(extract_string(curpos...@src.pos, @src), @tree.children.last, :raw)
             @src.scan(HTML_TAG_CLOSE_RE)
           else
-            add_text(@src.scan(/.*/m), @tree.children.last, :raw)
+            add_text(@src.rest, @tree.children.last, :raw)
+            @src.terminate
             warning("Found no end tag for 'script' - auto-closing it")
           end
         end
-        HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/
+        HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/|!--|\?))/ # :nodoc:
         # Parse raw HTML from the current source position, storing the found elements in +el+.
         # Parsing continues until one of the following criteria are fulfilled:
@@ -141,11 +149,11 @@ module Kramdown
                   warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
                 end
               else
-                add_text(@src.scan(/./), @tree, :text)
+                add_text(@src.getch, @tree, :text)
               end
             else
-              result = @src.scan(/.*/m)
-              add_text(result, @tree, :text)
+              add_text(@src.rest, @tree, :text)
+              @src.terminate
               warning("Found no end tag for '#{@tree.value}' - auto-closing it") if @tree.type == :html_element
               done = true
             end
@@ -160,6 +168,8 @@ module Kramdown
       # Converts HTML elements to native elements if possible.
       class ElementConverter
+        # :stopdoc:
         include Constants
         include ::Kramdown::Utils::Entities
@@ -172,14 +182,18 @@ module Kramdown
                               header h1 h2 h3 h4 h5 h6 legend li nav p section td th}
         SIMPLE_ELEMENTS = %w{em strong blockquote hr br img p thead tbody tfoot tr td th ul ol dl li dl dt dd}
-        def initialize(doc)
-          @doc = doc
+        def initialize(root)
+          @root = root
+        end
+        def self.convert(root, el = root)
+          new(root).process(el)
         end
         # Convert the element +el+ and its children.
         def process(el, do_conversion = true, preserve_text = false, parent = nil)
           case el.type
-          when :xml_comment, :xml_pi, :html_doctype
+          when :xml_comment, :xml_pi
             ptype = if parent.nil?
                       'div'
                     else
@@ -191,9 +205,13 @@ module Kramdown
                       else parent.type.to_s
                       end
                     end
-            el.options = {:category => HTML_PARSE_AS_SPAN.include?(ptype) ? :span : :block}
+            el.options.replace({:category => (HTML_CONTENT_MODEL[ptype] == :span ? :span : :block)})
             return
           when :html_element
+          when :root
+            el.children.each {|c| process(c)}
+            remove_whitespace_children(el)
+            return
           else return
           end
@@ -204,7 +222,7 @@ module Kramdown
           if do_conversion && self.class.method_defined?(mname)
             send(mname, el)
           elsif do_conversion && SIMPLE_ELEMENTS.include?(type)
-            set_basics(el, type.intern, HTML_SPAN_ELEMENTS.include?(type) ? :span : :block)
+            set_basics(el, type.intern)
             process_children(el, do_conversion, preserve_text)
           else
             process_html_element(el, do_conversion, preserve_text)
@@ -245,16 +263,16 @@ module Kramdown
                           Element.new(:entity, entity(val), nil, :original => src.matched)
                         end
             else
-              result << Element.new(:text, src.scan(/.*/m))
+              result << Element.new(:text, src.rest)
+              src.terminate
             end
           end
           result
         end
         def process_html_element(el, do_conversion = true, preserve_text = false)
-          el.options = {:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
-            :parse_type => HTML_PARSE_AS[el.value]
-          }
+          el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
+                             :content_model => HTML_CONTENT_MODEL[el.value])
           process_children(el, do_conversion, preserve_text)
         end
@@ -266,7 +284,7 @@ module Kramdown
           tmp = []
           last_is_p = false
           el.children.each do |c|
-            if c.options[:category] != :block || c.type == :text
+            if Element.category(c) != :block || c.type == :text
               if !last_is_p
                 tmp << Element.new(:p, nil, nil, :transparent => true)
                 last_is_p = true
@@ -296,14 +314,14 @@ module Kramdown
           el.children.delete_if do |c|
             i += 1
             c.type == :text && c.value.strip.empty? &&
-              (i == 0 || i == el.children.length - 1 || (el.children[i-1].options[:category] == :block &&
-                                                         el.children[i+1].options[:category] == :block))
+              (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i-1]) == :block &&
+                                                         Element.category(el.children[i+1]) == :block))
           end
         end
-        def set_basics(el, type, category, opts = {})
+        def set_basics(el, type, opts = {})
           el.type = type
-          el.options = {:category => category}.merge(opts)
+          el.options.replace(opts)
           el.value = nil
         end
@@ -314,7 +332,7 @@ module Kramdown
         def convert_a(el)
           if el.attr['href']
-            set_basics(el, :a, :span)
+            set_basics(el, :a)
             process_children(el)
           else
             process_html_element(el, false)
@@ -322,17 +340,17 @@ module Kramdown
         end
         def convert_b(el)
-          set_basics(el, :strong, :span)
+          set_basics(el, :strong)
           process_children(el)
         end
         def convert_i(el)
-          set_basics(el, :em, :span)
+          set_basics(el, :em)
           process_children(el)
         end
         def convert_h1(el)
-          set_basics(el, :header, :block, :level => el.value[1..1].to_i)
+          set_basics(el, :header, :level => el.value[1..1].to_i)
           extract_text(el, el.options[:raw_text] = '')
           process_children(el)
         end
@@ -350,12 +368,12 @@ module Kramdown
                 mem << c.value
               elsif c.type == :entity
                 if RUBY_VERSION >= '1.9'
-                  mem << c.value.char.encode(@doc.parse_infos[:encoding])
+                  mem << c.value.char.encode(@root.options[:encoding])
                 elsif [60, 62, 34, 38].include?(c.value.code_point)
                   mem << c.value.code_point.chr
                 end
               elsif c.type == :smart_quote || c.type == :typographic_sym
-                mem << entity(c.value.to_s).char.encode(@doc.parse_infos[:encoding])
+                mem << entity(c.value.to_s).char.encode(@root.options[:encoding])
               else
                 raise "Bug - please report"
               end
@@ -368,9 +386,9 @@ module Kramdown
             process_html_element(el, false, true)
           else
             if el.value == 'code'
-              set_basics(el, :codespan, :span)
+              set_basics(el, :codespan)
             else
-              set_basics(el, :codeblock, :block)
+              set_basics(el, :codeblock)
             end
             el.value = result.first.value
             el.children.clear
@@ -384,8 +402,9 @@ module Kramdown
             return
           end
           process_children(el)
-          set_basics(el, :table, :block)
+          set_basics(el, :table)
           el.options[:alignment] = []
           calc_alignment = lambda do |c|
             if c.type == :tr && el.options[:alignment].empty?
               el.options[:alignment] = [:default] * c.children.length
@@ -395,8 +414,18 @@ module Kramdown
             end
           end
           calc_alignment.call(el)
+          change_th_type = lambda do |c|
+            if c.type == :th
+              c.type = :td
+            else
+              c.children.each {|cc| change_th_type.call(cc)}
+            end
+          end
+          change_th_type.call(el)
           if el.children.first.type == :tr
-            tbody = Element.new(:tbody, nil, nil, :category => :block)
+            tbody = Element.new(:tbody)
             tbody.children = el.children
             el.children = [tbody]
           end
@@ -427,52 +456,38 @@ module Kramdown
              end && el.children.any? {|t| t.value == 'tbody'})
         end
-        def convert_div(el)
+        def convert_script(el)
           if !is_math_tag?(el)
             process_html_element(el)
           else
             handle_math_tag(el)
           end
         end
-        alias :convert_span :convert_div
         def is_math_tag?(el)
-          el.attr['class'].to_s =~ /\bmath\b/ &&
-            el.children.size == 1 && el.children.first.type == :text
+          el.attr['type'].to_s =~ /\bmath\/tex\b/
         end
         def handle_math_tag(el)
-          set_basics(el, :math, (el.value == 'div' ? :block : :span))
+          set_basics(el, :math, :category => (el.attr['type'] =~ /mode=display/ ? :block : :span))
           el.value = el.children.shift.value
-          if el.attr['class'] =~ /^\s*math\s*$/
-            el.attr.delete('class')
-          else
-            el.attr['class'].sub!(/\s?math/, '')
-          end
-          el.value.gsub!(/&(amp|quot|gt|lt);/) do |m|
-            case m
-            when '&amp;'   then '&'
-            when '&quot;'  then '"'
-            when '&gt;'    then '>'
-            when '&lt;'    then '<'
-            end
-          end
+          el.attr.delete('type')
         end
       end
       include Parser
-      # Parse +source+ as HTML document and return the created +tree+.
-      def parse(source)
-        @stack = []
-        @tree = Element.new(:root)
+      # Parse the source string provided on initialization as HTML document.
+      def parse
+        @stack, @tree = [], @root
         @src = StringScanner.new(adapt_source(source))
         while true
           if result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/)
             @tree.children << Element.new(:xml_pi, result.strip, nil, :category => :block)
           elsif result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/)
-            @tree.children << Element.new(:html_doctype, result.strip, nil, :category => :block)
+            # ignore the doctype
           elsif result = @src.scan(/\s*#{HTML_COMMENT_RE}/)
             @tree.children << Element.new(:xml_comment, result.strip, nil, :category => :block)
           else
@@ -485,10 +500,7 @@ module Kramdown
         end
         parse_raw_html(@tree, &tag_handler)
-        ec = ElementConverter.new(@doc)
-        @tree.children.each {|c| ec.process(c)}
-        ec.remove_whitespace_children(@tree)
-        @tree
+        ElementConverter.convert(@tree)
       end
     end