RubyGems - kramdown - Versions diffs - 0.1.0 → 0.2.0 - Mend

kramdown 0.1.0 → 0.2.0

Potentially problematic release.

This version of kramdown might be problematic. Click here for more details.

Files changed (87) hide show

data/doc/tests.page CHANGED Viewed

@@ -34,11 +34,10 @@ fast but they do not provide additional syntax elements). As one can see below,
 currently (November 2009) ~5x faster than Maruku, ~10x faster than BlueFeather but ~30x slower than
 BlueCloth and rdiscount:
-{::nokramdown:}
 <pre><code>
 {execute_cmd: {command: "ruby -Ilib -rubygems benchmark/benchmark.rb", process_output: false, escape_html: true}}
-</code></pre>
-{::nokramdown:}
+</code>
+</pre>
 [Markdown Test Suite]: http://daringfireball.net/projects/downloads/MarkdownTest_1.0.zip
 [MDTest]: http://www.michelf.com/docs/projets/mdtest-1.0.zip

data/lib/kramdown/converter.rb CHANGED Viewed

@@ -61,6 +61,10 @@ module Kramdown
         escape_html(el.value, false)
       end
+      def convert_eob(el, inner, indent)
+        ''
+      end
       def convert_p(el, inner, indent)
         "#{' '*indent}<p#{options_for_element(el)}>#{inner}</p>\n"
       end
@@ -97,22 +101,28 @@ module Kramdown
         "#{' '*indent}<#{el.type}#{options_for_element(el)}>\n#{inner}#{' '*indent}</#{el.type}>\n"
       end
       alias :convert_ol :convert_ul
+      alias :convert_dl :convert_ul
       def convert_li(el, inner, indent)
-        output = ' '*indent << "<li" << options_for_element(el) << ">"
-        if el.options[:first_as_block]
+        output = ' '*indent << "<#{el.type}" << options_for_element(el) << ">"
+        if el.options[:first_is_block]
           output << "\n" << inner << ' '*indent
         else
           output << inner << (inner =~ /\n\Z/ ? ' '*indent : '')
         end
-        output << "</li>\n"
+        output << "</#{el.type}>\n"
+      end
+      alias :convert_dd :convert_li
+      def convert_dt(el, inner, indent)
+        "#{' '*indent}<dt#{options_for_element(el)}>#{inner}</dt>\n"
       end
       def convert_html_raw(el, inner, indent)
         el.value + (el.options[:type] == :block ? "\n" : '')
       end
-      HTML_TAGS_WITH_BODY=['div']
+      HTML_TAGS_WITH_BODY=['div', 'script']
       def convert_html_element(el, inner, indent)
         if @doc.options[:filter_html].include?(el.value)
@@ -120,15 +130,22 @@ module Kramdown
         elsif el.options[:type] == :span
           "<#{el.value}#{options_for_element(el)}" << (!inner.empty? ? ">#{inner}</#{el.value}>" : " />")
         else
-          output = ' '*indent << "<#{el.value}#{options_for_element(el)}"
-          if !inner.empty?
-            output << ">\n#{inner.chomp}\n"  << ' '*indent << "</#{el.value}>"
+          output = ''
+          output << ' '*indent if !el.options[:no_start_indent] && el.options[:parse_type] != :raw && !el.options[:parent_is_raw]
+          output << "<#{el.value}#{options_for_element(el)}"
+          if !inner.empty? && (el.options[:compact] || el.options[:parse_type] != :block)
+            output << ">#{inner}</#{el.value}>"
+          elsif !inner.empty? && (el.children.first.type == :text || el.children.first.options[:no_start_indent])
+            output << ">#{inner}" << ' '*indent << "</#{el.value}>"
+          elsif !inner.empty?
+            output << ">\n#{inner}"  << ' '*indent << "</#{el.value}>"
           elsif HTML_TAGS_WITH_BODY.include?(el.value)
             output << "></#{el.value}>"
           else
             output << " />"
           end
-          output << "\n"
+          output << "\n" if el.options[:outer_element] || (el.options[:parse_type] != :raw && !el.options[:parent_is_raw])
+          output
         end
       end
@@ -164,6 +181,19 @@ module Kramdown
       end
       alias :convert_strong :convert_em
+      def convert_entity(el, inner, indent)
+        el.value
+      end
+      TYPOGRAPHIC_SYMS = {
+        :mdash => '&mdash;', :ndash => '&ndash;', :ellipsis => '&hellip;',
+        :laquo_space => '&laquo;&nbsp;', :raquo_space => '&nbsp;&raquo;',
+        :laquo => '&laquo;', :raquo => '&raquo;'
+      }
+      def convert_typographic_sym(el, inner, indent)
+        TYPOGRAPHIC_SYMS[el.value]
+      end
       def convert_root(el, inner, indent)
         inner << footnote_content
       end
@@ -174,7 +204,7 @@ module Kramdown
         ol = Element.new(:ol)
         ol.options[:attr] = {'start' => @footnote_start} if @footnote_start != 1
         @footnotes.each do |name, data|
-          li = Element.new(:li, nil, {:attr => {:id => "fn:#{name}"}, :first_as_block => true})
+          li = Element.new(:li, nil, {:attr => {:id => "fn:#{name}"}, :first_is_block => true})
           li.children = Marshal.load(Marshal.dump(data[:content].children)) #TODO: probably remove this!!!!
           ol.children << li
@@ -204,7 +234,8 @@ module Kramdown
       ESCAPE_ALL_NOT_ENTITIES_RE = Regexp.union(REXML::Parsers::BaseParser::REFERENCE_RE, ESCAPE_ALL_RE)
       # Escape the special HTML characters in the string +str+. If +all+ is +true+ then all
-      # characters are escaped, if +all+ is +false+
+      # characters are escaped, if +all+ is +false+ then only those characters are escaped that are
+      # not part on an HTML entity.
       def escape_html(str, all = true)
         str.gsub(all ? ESCAPE_ALL_RE : ESCAPE_ALL_NOT_ENTITIES_RE) {|m| ESCAPE_MAP[m] || m}
       end

data/lib/kramdown/deprecated.rb ADDED Viewed

@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+#
+#--
+# Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
+#
+# This file is part of kramdown.
+#
+# kramdown is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#++
+#
+module Kramdown
+  class Extension
+    def parse_nokramdown(parser, opts, body)
+      warn("The extension 'nokramdown' is deprecated and has been renamed to 'nomarkdown'")
+      parse_nomarkdown(parser, opts, body)
+    end
+    def parse_kdoptions(parser, opts, body)
+      warn("The extension 'kdoptions' is deprecated and has been renamed to 'options'")
+      parse_options(parser, opts, body)
+    end
+  end
+end

data/lib/kramdown/document.rb CHANGED Viewed

@@ -20,16 +20,15 @@
 #++
 #
+require 'kramdown/version'
 require 'kramdown/error'
 require 'kramdown/parser'
 require 'kramdown/converter'
 require 'kramdown/extension'
+require 'kramdown/deprecated'
 module Kramdown
-  # The kramdown version.
-  VERSION = '0.1.0'
   # The main interface to kramdown.
   #
   # This class provides a one-stop-shop for using kramdown to convert text into various output
@@ -50,20 +49,30 @@ module Kramdown
     #
     # [:auto_ids (used by the parser)]
     #    A boolean value deciding whether automatic header ID generation is used. Default: +false+.
-    #    When using the +kdoptions+ extension, the string 'false' will be the value +false+, every
-    #    other non-empty string will be +true+.
     # [:filter_html (used by the HTML converter)]
     #    An array of HTML tag names that defines which tags should be filtered from the output. For
     #    example, if the value contains +iframe+, then all HTML +iframe+ tags are filtered out and
-    #    only the body is displayed. Default: empty array. When using the +kdoptions+ extension, the
+    #    only the body is displayed. Default: empty array. When using the +options+ extension, the
     #    string value needs to hold the HTML tag names separated by one or more spaces.
     # [:footnote_nr (used by the HTML converter)]
     #    The initial number used for creating the link to the first footnote. Default: +1+. When
-    #    using the +kdoptions+ extension, the string value needs to be a valid number.
+    #    using the +options+ extension, the string value needs to be a valid number.
+    # [:parse_block_html (used by the parser)]
+    #    A boolean value deciding whether kramdown syntax is processed in block HTML tags. Default:
+    #    +false+.
+    # [:parse_span_html (used by the parser)]
+    #    A boolean value deciding whether kramdown syntax is processed in span HTML tags. Default:
+    #    +true+.
+    #
+    # When using the +options+ extension, all boolean values can be set to false by using the
+    # string 'false' or an empty string, any other non-empty string will be converted to the value
+    # +true+.
     DEFAULT_OPTIONS={
       :footnote_nr => 1,
       :filter_html => [],
-      :auto_ids => false
+      :auto_ids => true,
+      :parse_block_html => false,
+      :parse_span_html => true
     }

data/lib/kramdown/extension.rb CHANGED Viewed

@@ -44,18 +44,14 @@ module Kramdown
     end
     # Add the body (if available) as <tt>:raw</tt> Element to the +parser.tree+.
-    def parse_nokramdown(parser, opts, body)
+    def parse_nomarkdown(parser, opts, body)
       parser.tree.children << Element.new(:raw, body) if body.kind_of?(String)
     end
     # Update the document options with the options set in +opts+.
-    def parse_kdoptions(parser, opts, body)
+    def parse_options(parser, opts, body)
       if val = opts.delete('auto_ids')
-        if val.downcase.strip == 'false'
-          parser.doc.options[:auto_ids] = false
-        elsif !val.empty?
-          parser.doc.options[:auto_ids] = true
-        end
+        parser.doc.options[:auto_ids] = boolean_value(val)
       end
       if val = opts.delete('filter_html')
         parser.doc.options[:filter_html] = val.split(/\s+/)
@@ -63,9 +59,19 @@ module Kramdown
       if val = opts.delete('footnote_nr')
         parser.doc.options[:footnote_nr] = Integer(val) rescue parser.doc.options[:footnote_nr]
       end
+      if val = opts.delete('parse_block_html')
+        parser.doc.options[:parse_block_html] = boolean_value(val)
+      end
+      if val = opts.delete('parse_span_html')
+        parser.doc.options[:parse_span_html] = boolean_value(val)
+      end
       opts.each {|k,v| parser.warning("Unknown kramdown options '#{k}'")}
     end
+    def boolean_value(val)
+      val.downcase.strip != 'false' && !val.empty?
+    end
   end
 end

data/lib/kramdown/parser.rb CHANGED Viewed

@@ -84,10 +84,10 @@ module Kramdown
       #######
       BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
-                       :setext_header, :horizontal_rule, :list, :link_definition, :block_html,
+                       :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
                        :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
       SPAN_PARSERS =  [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
-                       :span_ial, :html_entity, :typographic_syms, :special_html_chars, :line_break, :escaped_chars,]
+                       :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
       # Adapt the object to allow parsing like specified in the options.
       def configure_parser
@@ -292,6 +292,7 @@ module Kramdown
       # Parse the EOB marker at the current location.
       def parse_eob_marker
         @src.pos += @src.matched_size
+        @tree.children << Element.new(:eob)
         true
       end
       Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
@@ -312,8 +313,8 @@ module Kramdown
       end
       Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
-      SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)\n(-|=)+\s*?\n/
+      HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
+      SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
       # Parse the Setext header at the current location.
       def parse_setext_header
@@ -321,10 +322,11 @@ module Kramdown
           return false
         end
         @src.pos += @src.matched_size
-        text, level = @src[1].strip, @src[2]
+        text, id, level = @src[1].strip, @src[2], @src[3]
         el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
         add_text(text, el)
-        el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
+        el.options[:attr] = {'id' => id} if id
+        el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
         @tree.children << el
         true
       end
@@ -332,7 +334,7 @@ module Kramdown
       ATX_HEADER_START = /^\#{1,6}/
-      ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*\s*?\n/
+      ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
       # Parse the Atx header at the current location.
       def parse_atx_header
@@ -340,10 +342,11 @@ module Kramdown
           return false
         end
         result = @src.scan(ATX_HEADER_MATCH)
-        level, text = @src[1], @src[2].strip
+        level, text, id = @src[1], @src[2].strip, @src[3]
         el = Element.new(:header, nil, :level => level.length)
         add_text(text, el)
-        el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
+        el.options[:attr] = {'id' => id} if id
+        el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
         @tree.children << el
         true
       end
@@ -398,7 +401,7 @@ module Kramdown
       Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
-      HR_START = /^#{OPT_SPACE}(\*|-|_) *\1 *\1 *(\1| )*\n/
+      HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
       # Parse the horizontal rule at the current location.
       def parse_horizontal_rule
@@ -431,23 +434,10 @@ module Kramdown
           if @src.check(HR_START)
             break
           elsif @src.scan(list_start_re)
-            indentation, content = @src[1].length, @src[2]
             item = Element.new(:li)
+            item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
             list.children << item
-            if content =~ /^\s*\n/
-              indentation = 4
-            else
-              while content =~ /^ *\t/
-                temp = content.scan(/^ */).first.length + indentation
-                content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
-              end
-              indentation += content.scan(/^ */).first.length
-            end
-            content.sub!(/^\s*/, '')
-            item.value = content
-            indent_re = /^ {#{indentation}}/
-            content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
             list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
                              /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
             nested_list_found = false
@@ -491,7 +481,7 @@ module Kramdown
             text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
             item.children.unshift(text)
           else
-            item.options[:first_as_block] = true
+            item.options[:first_is_block] = true
           end
           if item.children.last.type == :blank
@@ -507,6 +497,110 @@ module Kramdown
       end
       Registry.define_parser(:block, :list, LIST_START, self)
+      def parse_first_list_line(indentation, content)
+        if content =~ /^\s*\n/
+          indentation = 4
+        else
+          while content =~ /^ *\t/
+            temp = content.scan(/^ */).first.length + indentation
+            content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
+          end
+          indentation += content.scan(/^ */).first.length
+        end
+        content.sub!(/^\s*/, '')
+        indent_re = /^ {#{indentation}}/
+        content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
+        [content, indentation, content_re, indent_re]
+      end
+      DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
+      # Parse the ordered or unordered list at the current location.
+      def parse_definition_list
+        children = @tree.children
+        if !children.last || (children.length == 1 && children.last.type != :p ) ||
+            (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
+          return false
+        end
+        first_as_para = false
+        deflist = Element.new(:dl)
+        para = @tree.children.pop
+        if para.type == :blank
+          para = @tree.children.pop
+          first_as_para = true
+        end
+        para.children.first.value.split("\n").each do |term|
+          el = Element.new(:dt)
+          el.children << Element.new(:text, term)
+          deflist.children << el
+        end
+        item = nil
+        indent_re = nil
+        content_re = nil
+        def_start_re = DEFINITION_LIST_START
+        while !@src.eos?
+          if @src.scan(def_start_re)
+            item = Element.new(:dd)
+            item.options[:first_as_para] = first_as_para
+            item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
+            deflist.children << item
+            def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
+            first_as_para = false
+          elsif result = @src.scan(content_re)
+            result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
+            result.sub!(indent_re, '')
+            item.value << result
+            first_as_para = false
+          elsif result = @src.scan(BLANK_LINE)
+            first_as_para = true
+            item.value << result
+          else
+            break
+          end
+        end
+        last = nil
+        deflist.children.each do |item|
+          next if item.type == :dt
+          parse_blocks(item, item.value)
+          item.value = nil
+          next if item.children.size == 0
+          if item.children.last.type == :blank
+            last = item.children.pop
+          else
+            last = nil
+          end
+          if item.children.first.type == :p && !item.options.delete(:first_as_para)
+            text = item.children.shift.children.first
+            text.value += "\n" if !item.children.empty?
+            item.children.unshift(text)
+          else
+            item.options[:first_is_block] = true
+          end
+        end
+        if @tree.children.length >= 1 && @tree.children.last.type == :dl
+          @tree.children[-1].children += deflist.children
+        elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
+          @tree.children.pop
+          @tree.children[-1].children += deflist.children
+        else
+          @tree.children << deflist
+        end
+        @tree.children << last if !last.nil?
+        true
+      end
+      Registry.define_parser(:block, :definition_list, DEFINITION_LIST_START, self)
       PUNCTUATION_CHARS = "_.:,;!?-"
       LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
@@ -612,20 +706,34 @@ module Kramdown
       #:startdoc:
       HTML_COMMENT_RE = /<!--(.*?)-->/m
       HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
-      HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/
-      HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/
+      HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
+      HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
       HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
-      HTML_PARSE_AS_BLOCK = %w{div blockquote table dl ol ul form fieldset}
-      HTML_PARSE_AS_SPAN  = %w{a address b dd dt em h1 h2 h3 h4 h5 h6 legend li p pre span td th}
-      HTML_PARSE_AS_RAW   = %w{script math}
-      HTML_PARSE_AS = Hash.new {|h,k| h[k] = :span}
+      HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
+                               map noscript object ol table tbody td th thead tfoot tr ul}
+      HTML_PARSE_AS_SPAN  = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
+                               h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
+                               rp rt rtc ruby samp select small span strong sub sup tt var}
+      HTML_PARSE_AS_RAW   = %w{script math option textarea}
+      HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
       HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
       HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
       HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
-      HTML_BLOCK_ELEMENTS = %w[div p pre h1 h2 h3 h4 h5 h6 hr form fieldset iframe legend script dl ul ol table ins del blockquote address]
+      #:stopdoc:
+      # Some HTML elements like script belong to both categories (i.e. are valid in block and
+      # span HTML) and don't appear therefore!
+      #:startdoc:
+      HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
+                              ins kbd label option q rb rbc rp rt rtc ruby samp select small span
+                              strong sub sup textarea tt var}
+      HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
+                               form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
+                               td th thead tfoot tr ul}
+      HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
       HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
@@ -640,85 +748,125 @@ module Kramdown
           @src.scan(/.*?\n/)
           true
         else
-          if !((@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && (HTML_BLOCK_ELEMENTS.include?(@src[1]) || @src[1] =~ /:/)) ||
-               @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/))
-            return false
+          if (!@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !@src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/)) ||
+              HTML_SPAN_ELEMENTS.include?(@src[1])
+            if @tree.type == :html_element && @tree.options[:parse_type] != :block
+              add_html_text(@src.scan(/.*?\n/), @tree)
+              add_html_text(@src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/), @tree)
+              return true
+            else
+              return false
+            end
           end
-          @src.scan(/^(.*?)\n/)
-          line = @src[1]
-          temp = nil
+          current_el = (@tree.type == :html_element ? @tree : nil)
+          @src.scan(/^(#{OPT_SPACE})(.*?)\n/)
+          if current_el && current_el.options[:parse_type] == :raw
+            add_html_text(@src[1], current_el)
+          end
+          line = @src[2]
           stack = []
           while line.size > 0
             index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
-            if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag) && (!temp || temp.options[:parse_type] == :block)
+            if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag)
               md = line.match(HTML_TAG_RE)
-              break if !(HTML_BLOCK_ELEMENTS.include?(md[1]) || md[1] =~ /:/)
-              add_text(md.pre_match + "\n", temp) if temp
               line = md.post_match
+              add_html_text(md.pre_match, current_el) if current_el
+              if HTML_SPAN_ELEMENTS.include?(md[1]) || (current_el && current_el.options[:parse_type] == :span)
+                add_html_text(md.to_s, current_el) if current_el
+                next
+              end
               attrs = {}
               md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
-              el = Element.new(:html_element, md[1], :attr => attrs, :type => :block,
-                               :parse_type => HTML_PARSE_AS[md[1]])
-              (temp || @tree).children << el
-              if !md[4]
+              parse_type = if !current_el || current_el.options[:parse_type] != :raw
+                             (@doc.options[:parse_block_html] ? HTML_PARSE_AS[md[1]] : :raw)
+                           else
+                             :raw
+                           end
+              if val = get_parse_type(attrs.delete('markdown'))
+                parse_type = (val == :default ? HTML_PARSE_AS[md[1]] : val)
+              end
+              el = Element.new(:html_element, md[1], :attr => attrs, :type => :block, :parse_type => parse_type)
+              el.options[:no_start_indent] = true if !stack.empty?
+              el.options[:outer_element] = true if !current_el
+              el.options[:parent_is_raw] = true if current_el && current_el.options[:parse_type] == :raw
+              @tree.children << el
+              if !md[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
+                warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
+              elsif !md[4]
                 @unclosed_html_tags.push(el)
-                stack << temp
-                temp = el
+                @stack.push(@tree)
+                stack.push(current_el)
+                @tree = current_el = el
               end
             elsif index_close_tag
               md = line.match(HTML_TAG_CLOSE_RE)
-              add_text(md.pre_match, temp) if temp
               line = md.post_match
+              add_html_text(md.pre_match, current_el) if current_el
               if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
                 el = @unclosed_html_tags.pop
-                @tree = @stack.pop unless temp
-                temp = stack.pop
-                if el.options[:parse_type] == :raw
-                  raise Kramdown::Error, "Bug: please report!" if el.children.size > 1
-                  el.children.first.type = :raw if el.children.first
-                end
+                @tree = @stack.pop
+                current_el.options[:compact] = true if stack.size > 0
+                current_el = stack.pop || (@tree.type == :html_element ? @tree : nil)
               else
-                if HTML_BLOCK_ELEMENTS.include?(md[1]) && (temp || @tree).options[:parse_type] == :block
-                  warning("Found invalidly nested HTML closing tag for '#{md[1]}'")
-                end
-                if temp
-                  add_text(md.to_s, temp)
-                else
-                  add_text(md.to_s + "\n")
+                if !HTML_SPAN_ELEMENTS.include?(md[1]) && @tree.options[:parse_type] != :span
+                  warning("Found invalidly used HTML closing tag for '#{md[1]}'")
+                elsif current_el
+                  add_html_text(md.to_s, current_el)
                 end
               end
             else
-              if temp
-                add_text(line, temp)
+              if current_el
+                line.rstrip! if current_el.options[:parse_type] == :block
+                add_html_text(line + "\n", current_el)
               else
-                warning("Ignoring characters at the end of an HTML block line")
+                add_text(line + "\n")
               end
               line = ''
             end
           end
-          if temp && temp.children.last && temp.children.last.type == :text
-            temp.children.last.value << "\n"
-          end
-          if temp
-            if temp.options[:parse_type] == :span || temp.options[:parse_type] == :raw
-              result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
-              add_text(result, temp)
-            end
-            @stack.push(@tree)
-            @tree = temp
+          if current_el && (current_el.options[:parse_type] == :span || current_el.options[:parse_type] == :raw)
+            result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
+            last = current_el.children.last
+            result = "\n" + result if last.nil? || (last.type != :text && last.type != :raw) || last.value !~ /\n\Z/
+            add_html_text(result, current_el)
           end
           true
         end
       end
       Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
+      # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
+      # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
+      # +val+ is nil, then the default parsing mode is used.
+      def get_parse_type(val)
+        case val
+        when "0" then :raw
+        when "1" then :default
+        when "span" then :span
+        when "block" then :block
+        when NilClass then nil
+        else
+          warning("Invalid markdown attribute val '#{val}', using default")
+          nil
+        end
+      end
+      # Special version of #add_text which either creates a :text element or a :raw element,
+      # depending on the HTML element type.
+      def add_html_text(text, tree)
+        type = (tree.options[:parse_type] == :raw ? :raw : :text)
+        if tree.children.last && tree.children.last.type == type
+          tree.children.last.value << text
+        elsif !text.empty?
+          tree.children << Element.new(type, text)
+        end
+      end
       ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
@@ -734,21 +882,11 @@ module Kramdown
       # Parse the HTML entity at the current location.
       def parse_html_entity
         @src.pos += @src.matched_size
-        add_text(@src.matched)
+        @tree.children << Element.new(:entity, @src.matched)
       end
       Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
-      SPECIAL_HTML_CHARS = /&|>|</
-      # Parse the special HTML characters at the current location.
-      def parse_special_html_chars
-        @src.pos += @src.matched_size
-        add_text(@src.matched)
-      end
-      Registry.define_parser(:span, :special_html_chars, SPECIAL_HTML_CHARS, self)
       LINE_BREAK = /(  |\\\\)(?=\n)/
       # Parse the line break at the current location.
@@ -759,22 +897,27 @@ module Kramdown
       Registry.define_parser(:span, :line_break, LINE_BREAK, self)
-      TYPOGRAPHIC_SYMS = [['---', '&mdash;'], ['--', '&ndash;'], ['...', '&hellip;'],
+      TYPOGRAPHIC_SYMS = [['---', :mdash], ['--', :ndash], ['...', :ellipsis],
                           ['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
-                          ['<< ', '&laquo;&nbsp;'], [' >>', '&nbsp;&raquo;'],
-                          ['<<', '&laquo;'], ['>>', '&raquo;']]
+                          ['<< ', :laquo_space], [' >>', :raquo_space],
+                          ['<<', :laquo], ['>>', :raquo]]
       TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
       TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
       # Parse the typographic symbols at the current location.
       def parse_typographic_syms
         @src.pos += @src.matched_size
-        add_text(TYPOGRAPHIC_SYMS_SUBST[@src.matched].dup)
+        val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
+        if val.kind_of?(Symbol)
+          @tree.children << Element.new(:typographic_sym, val)
+        else
+          add_text(val.dup)
+        end
       end
       Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
-      AUTOLINK_START = /<((mailto|https?|ftps?):.*?|.*?@.*?)>/
+      AUTOLINK_START = /<((mailto|https?|ftps?):.*?|\S*?@\S*?)>/
       # Parse the autolink at the current location.
       def parse_autolink
@@ -916,17 +1059,42 @@ module Kramdown
         elsif result = @src.scan(HTML_INSTRUCTION_RE)
           @tree.children << Element.new(:html_raw, result, :type => :span)
         elsif result = @src.scan(HTML_TAG_RE)
+          if HTML_BLOCK_ELEMENTS.include?(@src[1])
+            add_text(result)
+            return
+          end
           reset_pos = @src.pos
           attrs = {}
-          @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
+          @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
+          do_parsing = @doc.options[:parse_span_html]
+          if val = get_parse_type(attrs.delete('markdown'))
+            if val == :block
+              warning("Cannot use block level parsing in span level HTML tag - using default mode")
+            elsif val == :span || val == :default
+              do_parsing = true
+            elsif val == :raw
+              do_parsing = false
+            end
+          end
+          do_parsing = false if HTML_PARSE_AS_RAW.include?(@src[1])
           el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
+          stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
           if @src[4]
             @tree.children << el
+          elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
+            warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
+            @tree.children << el
           else
-            stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
             if parse_spans(el, stop_re)
+              end_pos = @src.pos
               @src.scan(stop_re)
               @tree.children << el
+              if !do_parsing
+                el.children.clear
+                el.children << Element.new(:raw, @src.string[reset_pos...end_pos])
+              end
             else
               @src.pos = reset_pos
               add_text(result)
@@ -936,7 +1104,7 @@ module Kramdown
           add_text(@src.scan(/./))
         end
       end
-      Registry.define_parser(:span, :span_html, HTML_BLOCK_START, self)
+      Registry.define_parser(:span, :span_html, HTML_SPAN_START, self)
       LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/