RubyGems - maruku - Versions diffs - 0.6.1 → 0.7.0.beta1 - Mend

maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (263) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/MIT-LICENSE.txt +20 -0
data/bin/maruku +153 -152
data/bin/marutex +2 -29
data/data/entities.xml +261 -0
data/docs/math.md +14 -18
data/lib/maruku.rb +65 -77
data/lib/maruku/attributes.rb +109 -214
data/lib/maruku/defaults.rb +45 -67
data/lib/maruku/document.rb +43 -0
data/lib/maruku/element.rb +112 -0
data/lib/maruku/errors.rb +71 -0
data/lib/maruku/ext/div.rb +105 -113
data/lib/maruku/ext/fenced_code.rb +97 -0
data/lib/maruku/ext/math.rb +22 -26
data/lib/maruku/ext/math/elements.rb +20 -26
data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
data/lib/maruku/ext/math/parsing.rb +107 -113
data/lib/maruku/ext/math/to_html.rb +184 -187
data/lib/maruku/ext/math/to_latex.rb +30 -21
data/lib/maruku/helpers.rb +158 -257
data/lib/maruku/html.rb +254 -0
data/lib/maruku/input/charsource.rb +272 -319
data/lib/maruku/input/extensions.rb +62 -63
data/lib/maruku/input/html_helper.rb +220 -189
data/lib/maruku/input/linesource.rb +90 -110
data/lib/maruku/input/mdline.rb +129 -0
data/lib/maruku/input/parse_block.rb +618 -612
data/lib/maruku/input/parse_doc.rb +145 -215
data/lib/maruku/input/parse_span.rb +658 -0
data/lib/maruku/input/rubypants.rb +200 -128
data/lib/maruku/inspect_element.rb +60 -0
data/lib/maruku/maruku.rb +10 -31
data/lib/maruku/output/entity_table.rb +33 -0
data/lib/maruku/output/s5/fancy.rb +462 -462
data/lib/maruku/output/s5/to_s5.rb +115 -135
data/lib/maruku/output/to_html.rb +898 -983
data/lib/maruku/output/to_latex.rb +561 -560
data/lib/maruku/output/to_markdown.rb +207 -162
data/lib/maruku/output/to_s.rb +11 -52
data/lib/maruku/string_utils.rb +129 -179
data/lib/maruku/toc.rb +185 -196
data/lib/maruku/version.rb +33 -38
data/spec/block_docs/abbrev.md +776 -0
data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
data/{tests/unittest → spec/block_docs}/alt.md +2 -14
data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
data/{tests/unittest → spec/block_docs}/blank.md +0 -12
data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
data/{tests/unittest → spec/block_docs}/code.md +7 -14
data/{tests/unittest → spec/block_docs}/code2.md +4 -14
data/{tests/unittest → spec/block_docs}/code3.md +12 -16
data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
data/{tests/unittest → spec/block_docs}/easy.md +1 -13
data/spec/block_docs/email.md +29 -0
data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
data/{tests/unittest → spec/block_docs}/entities.md +27 -29
data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
data/spec/block_docs/fenced_code_blocks.md +66 -0
data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
data/spec/block_docs/footnotes2.md +78 -0
data/spec/block_docs/hard.md +25 -0
data/spec/block_docs/header_after_par.md +62 -0
data/{tests/unittest → spec/block_docs}/headers.md +10 -18
data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
data/{tests/unittest → spec/block_docs}/html3.md +1 -13
data/{tests/unittest → spec/block_docs}/html4.md +2 -14
data/{tests/unittest → spec/block_docs}/html5.md +2 -14
data/spec/block_docs/html_block_in_para.md +22 -0
data/spec/block_docs/html_inline.md +25 -0
data/spec/block_docs/html_trailing.md +31 -0
data/spec/block_docs/ie.md +62 -0
data/spec/block_docs/iframe.md +29 -0
data/{tests/unittest → spec/block_docs}/images.md +22 -28
data/{tests/unittest → spec/block_docs}/images2.md +7 -17
data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
data/spec/block_docs/inline_html_beginning.md +10 -0
data/spec/block_docs/issue20.md +9 -0
data/spec/block_docs/issue26.md +22 -0
data/spec/block_docs/issue29.md +9 -0
data/spec/block_docs/issue30.md +30 -0
data/spec/block_docs/issue31.md +25 -0
data/spec/block_docs/issue40.md +40 -0
data/spec/block_docs/issue64.md +55 -0
data/spec/block_docs/issue67.md +19 -0
data/spec/block_docs/issue70.md +11 -0
data/spec/block_docs/issue72.md +17 -0
data/spec/block_docs/issue74.md +38 -0
data/spec/block_docs/issue79.md +15 -0
data/spec/block_docs/issue83.md +13 -0
data/spec/block_docs/issue85.md +25 -0
data/spec/block_docs/issue88.md +19 -0
data/spec/block_docs/issue89.md +12 -0
data/spec/block_docs/issue90.md +38 -0
data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
data/{tests/unittest → spec/block_docs}/links.md +33 -32
data/spec/block_docs/links2.md +21 -0
data/{tests/unittest → spec/block_docs}/list1.md +0 -12
data/{tests/unittest → spec/block_docs}/list12.md +2 -14
data/{tests/unittest → spec/block_docs}/list2.md +2 -14
data/spec/block_docs/list_multipara.md +42 -0
data/{tests/unittest → spec/block_docs}/lists.md +28 -29
data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
data/spec/block_docs/lists11.md +23 -0
data/spec/block_docs/lists12.md +43 -0
data/spec/block_docs/lists13.md +55 -0
data/spec/block_docs/lists14.md +61 -0
data/spec/block_docs/lists15.md +36 -0
data/spec/block_docs/lists6.md +88 -0
data/spec/block_docs/lists7b.md +58 -0
data/spec/block_docs/lists9.md +53 -0
data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
data/spec/block_docs/lists_blank.md +35 -0
data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
data/spec/block_docs/lists_nested.md +44 -0
data/spec/block_docs/lists_nested_blankline.md +28 -0
data/spec/block_docs/lists_nested_deep.md +43 -0
data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
data/spec/block_docs/lists_paraindent.md +47 -0
data/spec/block_docs/lists_tab.md +54 -0
data/spec/block_docs/loss.md +17 -0
data/spec/block_docs/math-blahtex/equations.md +30 -0
data/spec/block_docs/math-blahtex/inline.md +48 -0
data/spec/block_docs/math-blahtex/math2.md +45 -0
data/spec/block_docs/math-blahtex/table.md +25 -0
data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
data/spec/block_docs/math/embedded_svg.md +97 -0
data/spec/block_docs/math/equations.md +44 -0
data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
data/spec/block_docs/math/math2.md +45 -0
data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
data/spec/block_docs/math/raw_mathml.md +87 -0
data/spec/block_docs/math/table.md +25 -0
data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
data/{tests/unittest → spec/block_docs}/olist.md +6 -18
data/{tests/unittest → spec/block_docs}/one.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
data/spec/block_docs/ref_with_title.md +22 -0
data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
data/spec/block_docs/tables.md +58 -0
data/{tests/unittest → spec/block_docs}/test.md +1 -13
data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
data/spec/block_docs/toc.md +87 -0
data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
data/spec/block_docs/xml.md +33 -0
data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
data/spec/block_docs/xml3.md +24 -0
data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
data/spec/block_spec.rb +110 -0
data/spec/cli_spec.rb +8 -0
data/spec/span_spec.rb +256 -0
data/spec/spec_helper.rb +2 -0
data/spec/to_html_utf8_spec.rb +13 -0
metadata +205 -243
metadata.gz.sig +3 -0
data/Rakefile +0 -48
data/bin/marudown +0 -29
data/bin/marutest +0 -345
data/docs/changelog.md +0 -334
data/lib/maruku/errors_management.rb +0 -92
data/lib/maruku/ext/math/latex_fix.rb +0 -12
data/lib/maruku/input/parse_span_better.rb +0 -746
data/lib/maruku/input/type_detection.rb +0 -147
data/lib/maruku/output/to_latex_entities.rb +0 -367
data/lib/maruku/output/to_latex_strings.rb +0 -64
data/lib/maruku/structures.rb +0 -167
data/lib/maruku/structures_inspect.rb +0 -87
data/lib/maruku/structures_iterators.rb +0 -61
data/lib/maruku/tests/benchmark.rb +0 -82
data/lib/maruku/tests/new_parser.rb +0 -373
data/lib/maruku/tests/tests.rb +0 -136
data/lib/maruku/usage/example1.rb +0 -33
data/tests/bugs/code_in_links.md +0 -101
data/tests/bugs/complex_escaping.md +0 -38
data/tests/math/syntax.md +0 -46
data/tests/math_usage/document.md +0 -13
data/tests/others/abbreviations.md +0 -11
data/tests/others/blank.md +0 -4
data/tests/others/code.md +0 -5
data/tests/others/code2.md +0 -8
data/tests/others/code3.md +0 -16
data/tests/others/email.md +0 -4
data/tests/others/entities.md +0 -19
data/tests/others/escaping.md +0 -16
data/tests/others/extra_dl.md +0 -101
data/tests/others/extra_header_id.md +0 -13
data/tests/others/extra_table1.md +0 -40
data/tests/others/footnotes.md +0 -17
data/tests/others/headers.md +0 -10
data/tests/others/hrule.md +0 -10
data/tests/others/images.md +0 -20
data/tests/others/inline_html.md +0 -42
data/tests/others/links.md +0 -38
data/tests/others/list1.md +0 -4
data/tests/others/list2.md +0 -5
data/tests/others/list3.md +0 -8
data/tests/others/lists.md +0 -32
data/tests/others/lists_after_paragraph.md +0 -44
data/tests/others/lists_ol.md +0 -39
data/tests/others/misc_sw.md +0 -105
data/tests/others/one.md +0 -1
data/tests/others/paragraphs.md +0 -13
data/tests/others/sss06.md +0 -352
data/tests/others/test.md +0 -4
data/tests/s5/s5profiling.md +0 -48
data/tests/unittest/bug_def.md +0 -28
data/tests/unittest/email.md +0 -32
data/tests/unittest/html2.md +0 -34
data/tests/unittest/ie.md +0 -61
data/tests/unittest/links2.md +0 -34
data/tests/unittest/lists11.md +0 -28
data/tests/unittest/lists6.md +0 -53
data/tests/unittest/lists9.md +0 -76
data/tests/unittest/math/equations.md +0 -86
data/tests/unittest/math/math2.md +0 -57
data/tests/unittest/math/table.md +0 -37
data/tests/unittest/notyet/header_after_par.md +0 -70
data/tests/unittest/red_tests/abbrev.md +0 -1388
data/tests/unittest/red_tests/lists7.md +0 -68
data/tests/unittest/red_tests/lists7b.md +0 -128
data/tests/unittest/red_tests/lists8.md +0 -76
data/tests/unittest/red_tests/xml.md +0 -70
data/tests/unittest/xml3.md +0 -38
data/tests/utf8-files/simple.md +0 -1
data/unit_test_block.sh +0 -5
data/unit_test_span.sh +0 -3

data/lib/maruku/input/linesource.rb CHANGED

@@ -1,111 +1,91 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
-module MaRuKu; module In; module Markdown; module BlockLevelParser
-# This represents a source of lines that can be consumed.
-#
-# It is the twin of CharSource.
-#
-class LineSource
-	include MaRuKu::Strings
-	attr_reader :parent
-	def initialize(lines, parent=nil, parent_offset=nil)
-		raise "NIL lines? " if not lines
-		@lines = lines
-		@lines_index = 0
-		@parent = parent
-		@parent_offset = parent_offset
-	end
-	def cur_line()  @lines[@lines_index] end
-	def next_line() @lines[@lines_index+1] end
-	def shift_line()
-		raise "Over the rainbow" if @lines_index >= @lines.size
-		l = @lines[@lines_index]
-		@lines_index += 1
-		return l
-	end
-	def ignore_line
-		raise "Over the rainbow" if @lines_index >= @lines.size
-		@lines_index += 1
-	end
-	def describe
-		s = "At line #{original_line_number(@lines_index)}\n"
-		context = 3 # lines
-		from = [@lines_index-context, 0].max
-		to   = [@lines_index+context, @lines.size-1].min
-		for i in from..to
-			prefix = (i == @lines_index) ? '--> ' : '    ';
-			l = @lines[i]
-			s += "%10s %4s|%s" %
-				[@lines[i].md_type.to_s, prefix, l]
-			s += "|\n"
-		end
-#		if @parent
-#			s << "Parent context is: \n"
-#			s << add_tabs(@parent.describe,1,'|')
-#		end
-		s
-	end
-	def original_line_number(index)
-		if @parent
-			return index + @parent.original_line_number(@parent_offset)
-		else
-			1 + index
-		end
-	end
-	def cur_index
-		@lines_index
-	end
-	# Returns the type of next line as a string
-	# breaks at first :definition
-	def tell_me_the_future
-		s = ""; num_e = 0;
-		for i in @lines_index..@lines.size-1
-			c = case @lines[i].md_type
-				when :text; "t"
-				when :empty; num_e+=1; "e"
-				when :definition; "d"
-				else "o"
-			end
-			s += c
-			break if c == "d" or num_e>1
-		end
-		s
-	end
-end # linesource
-end end end end # block
+module MaRuKu::In::Markdown::BlockLevelParser
+  # This represents a source of lines that can be consumed.
+  #
+  # It is the twin of CharSource.
+  #
+  class LineSource
+    attr_reader :parent
+    def initialize(lines, parent=nil, parent_offset=nil)
+      raise "NIL lines? " unless lines
+      @lines = lines.map {|l| l.kind_of?(MaRuKu::MDLine) ? l : MaRuKu::MDLine.new(l) }
+      @lines_index = 0
+      @parent = parent
+      @parent_offset = parent_offset
+    end
+    def cur_line
+      @lines[@lines_index]
+    end
+    def next_line
+      @lines[@lines_index + 1]
+    end
+    def shift_line
+      raise "Over the rainbow" if @lines_index >= @lines.size
+      l = @lines[@lines_index]
+      @lines_index += 1
+      l
+    end
+    def ignore_line
+      raise "Over the rainbow" if @lines_index >= @lines.size
+      @lines_index += 1
+    end
+    def describe
+      s = "At line #{original_line_number(@lines_index)}\n"
+      context = 3 # lines
+      from = [@lines_index - context, 0].max
+      to   = [@lines_index + context, @lines.size - 1].min
+      from.upto(to) do |i|
+        prefix = (i == @lines_index) ? '--> ' : '    ';
+        l = @lines[i]
+        s += "%10s %4s|%s" %
+          [@lines[i].md_type.to_s, prefix, l]
+        s += "|\n"
+      end
+      s
+    end
+    def original_line_number(index)
+      if @parent
+        index + @parent.original_line_number(@parent_offset)
+      else
+        1 + index
+      end
+    end
+    def cur_index
+      @lines_index
+    end
+    # Returns the type of next line as a string
+    # breaks at first :definition
+    def tell_me_the_future
+      s = ""
+      num_e = 0
+      @lines_index.upto(@lines.size - 1) do |i|
+        c = case @lines[i].md_type
+            when :text; "t"
+            when :empty; num_e += 1; "e"
+            when :definition; "d"
+            else "o"
+            end
+        s << c
+        break if c == "d" or num_e > 1
+      end
+      s
+    end
+  end # linesource
+end

data/lib/maruku/input/mdline.rb ADDED

@@ -0,0 +1,129 @@
+# This code does the classification of lines for block-level parsing.
+module MaRuKu
+  # Represents a single line in a Markdown source file, as produced by
+  # LineSource.
+  class MDLine < String
+    def md_type
+      @md_type ||= line_md_type
+    end
+    # Returns the number of leading spaces on this string,
+    # considering that a tab counts as {MaRuKu::Strings::TAB_SIZE} spaces.
+    #
+    # @param s [String]
+    # @return [Fixnum]
+    def number_of_leading_spaces
+      if self =~ /\A\s+/
+        spaces = $&
+        spaces.count(" ") + spaces.count("\t") * MaRuKu::Strings::TAB_SIZE
+      else
+        0
+      end
+    end
+    def gsub!(*args)
+      # Any in-place-modification method should reset the md_type
+      @md_type = nil
+      super
+    end
+    private
+    def line_md_type
+      # The order of evaluation is important (:text is a catch-all)
+      return :text           if self =~ /\A[a-zA-Z]/
+      return :empty          if self =~ /\A\s*\z/
+      return :footnote_text  if self =~ FootnoteText
+      return :ref_definition if self =~ LinkRegex || self =~ IncompleteLink
+      return :abbreviation   if self =~ Abbreviation
+      return :definition     if self =~ Definition
+      # I had a bug with emails and urls at the beginning of the
+      # line that were mistaken for raw_html
+      return :text           if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
+      return :text           if self =~ /\A[ ]{0,3}<http:/
+      # raw html is like PHP Markdown Extra: at most three spaces before
+      return :xml_instr      if self =~ /\A\s*<\?/
+      return :raw_html       if self =~ %r{^[ ]{0,3}</?\s*\w+}
+      return :raw_html       if self =~ /\A[ ]{0,3}<\!\-\-/
+      return :header1        if self =~ /\A(=)+/
+      return :header2        if self =~ /\A([-\s])+\z/
+      return :header3        if self =~ /\A(#)+\s*\S+/
+      # at least three asterisks/hyphens/underscores on a line, and only whitespace
+      return :hrule          if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
+      return :ulist          if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
+      return :olist          if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
+      return :code           if number_of_leading_spaces >= 4
+      return :quote          if self =~ /\A>/
+      return :ald            if self =~ AttributeDefinitionList
+      return :ial            if self =~ InlineAttributeList
+      return :text # else, it's just text
+    end
+  end
+  # MacRuby has trouble with commented regexes, so just put the expanded form
+  # in a comment.
+  # $1 = id   $2 = attribute list
+  AttributeDefinitionList = /\A\s{0,3}\{([\w\s]+)\}:\s*(.*?)\s*\z/
+  #
+  InlineAttributeList = /\A\s{0,3}\{([:#\.].*?)\}\s*\z/
+  # Example:
+  #     ^:blah blah
+  #     ^: blah blah
+  #     ^   : blah blah
+  Definition = /\A[ ]{0,3}:\s*(\S.*)\z/
+  # %r{
+  #   ^ # begin of line
+  #   [ ]{0,3} # up to 3 spaces
+  #   : # colon
+  #   \s* # whitespace
+  #   (\S.*) # the text    = $1
+  #   $ # end of line
+  # }x
+  # Example:
+  #     *[HTML]: Hyper Text Markup Language
+  Abbreviation = /\A[ ]{0,3}\*\[([^\]]+)\]:\s*(\S.*\S)*\s*\z/
+  # %r{
+  #   ^  # begin of line
+  #   [ ]{0,3} # up to 3 spaces
+  #   \* # one asterisk
+  #   \[ # opening bracket
+  #   ([^\]]+) # any non-closing bracket:  id = $1
+  #   \] # closing bracket
+  #   :  # colon
+  #   \s* # whitespace
+  #   (\S.*\S)* #           definition=$2
+  #   \s* # strip this whitespace
+  #   $   # end of line
+  # }x
+  FootnoteText = /\A[ ]{0,3}\[(\^.+)\]:\s*(\S.*)?\z/
+  # %r{
+  #   ^  # begin of line
+  #   [ ]{0,3} # up to 3 spaces
+  #   \[(\^.+)\]: # id = $1 (including '^')
+  #   \s*(\S.*)?$    # text = $2 (not obb.)
+  # }x
+  # This regex is taken from BlueCloth sources
+  # Link defs are in the form: ^[id]: \n? url "optional title"
+  LinkRegex = /\A[ ]{0,3}\[([^\[\]]+)\]:[ ]*<?([^>\s]+)>?[ ]*(?:(?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\)))\s*(.+)?)?/
+  #%r{
+  # ^[ ]{0,3}\[([^\[\]]+)\]:    # id = $1
+  #   [ ]*
+  # <?([^>\s]+)>?       # url = $2
+  #   [ ]*
+  # (?: # Titles are delimited by "quotes" or (parens).
+  #   (?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\))) # title = $3, $4, or $5
+  #   \s*(.+)? # stuff = $6
+  # )?  # title is optional
+  #}x
+  IncompleteLink = /\A[ ]{0,3}\[([^\[\]]+?)\]:\s*\z/
+  # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
+  # | -------------:| ------------------------------ |
+  TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
+end

data/lib/maruku/input/parse_block.rb CHANGED

@@ -1,615 +1,621 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
+require 'set'
 module MaRuKu; module In; module Markdown; module BlockLevelParser
-	include Helpers
-	include MaRuKu::Strings
-	include MaRuKu::In::Markdown::SpanLevelParser
-	class BlockContext < Array
-		def describe
-			n = 5
-			desc = size > n ? self[-n,n] : self
-			"Last #{n} elements: "+
-			desc.map{|x| "\n -" + x.inspect}.join
-		end
-	end
-	# Splits the string and calls parse_lines_as_markdown
-	def parse_text_as_markdown(text)
-		lines =  split_lines(text)
-		src = LineSource.new(lines)
-		return parse_blocks(src)
-	end
-	# Input is a LineSource
-	def parse_blocks(src)
-		output = BlockContext.new
-		# run state machine
-		while src.cur_line
-			next if check_block_extensions(src, output, src.cur_line)
-#  Prints detected type (useful for debugging)
-#			puts "#{src.cur_line.md_type}|#{src.cur_line}"
-			case src.cur_line.md_type
-				when :empty;
-					output.push :empty
-					src.ignore_line
-				when :ial
-					m =  InlineAttributeList.match src.shift_line
-					content = m[1] ||  ""
-#					puts "Content: #{content.inspect}"
-					src2 = CharSource.new(content, src)
-					interpret_extension(src2, output, [nil])
-				when :ald
-					output.push read_ald(src)
-				when :text
-					# paragraph, or table, or definition list
-					read_text_material(src, output)
-				when :header2, :hrule
-					# hrule
-					src.shift_line
-					output.push md_hrule()
-				when :header3
-					output.push read_header3(src)
-				when :ulist, :olist
-					list_type = src.cur_line.md_type == :ulist ? :ul : :ol
-					li = read_list_item(src)
-					# append to current list if we have one
-					if output.last.kind_of?(MDElement) &&
-						output.last.node_type == list_type then
-						output.last.children << li
-					else
-						output.push md_el(list_type, [li])
-					end
-				when :quote;    output.push read_quote(src)
-				when :code;     e = read_code(src); output << e if e
-				when :raw_html; e = read_raw_html(src); output << e if e
-				when :footnote_text;   output.push read_footnote_text(src)
-				when :ref_definition;
-					if src.parent && (src.cur_index == 0)
-						read_text_material(src, output)
-					else
-						read_ref_definition(src, output)
-					end
-				when :abbreviation;    output.push read_abbreviation(src)
-				when :xml_instr;       read_xml_instruction(src, output)
-				when :metadata;
-					maruku_error "Please use the new meta-data syntax: \n"+
-					"  http://maruku.rubyforge.org/proposal.html\n", src
-					src.ignore_line
-				else # warn if we forgot something
-					md_type = src.cur_line.md_type
-					line = src.cur_line
-					maruku_error "Ignoring line '#{line}' type = #{md_type}", src
-					src.shift_line
-			end
-		end
-		merge_ial(output, src, output)
-		output.delete_if {|x| x.kind_of?(MDElement) &&
-			x.node_type == :ial}
-		# get rid of empty line markers
-		output.delete_if {|x| x == :empty}
-		# See for each list if we can omit the paragraphs and use li_span
-		# TODO: do this after
-		output.each do |c|
-			# Remove paragraphs that we can get rid of
-			if [:ul,:ol].include? c.node_type
-				if c.children.all? {|li| !li.want_my_paragraph} then
-					c.children.each do |d|
-						d.node_type = :li_span
-						d.children = d.children[0].children
-					end
-				end
-			end
-			if c.node_type == :definition_list
-				if c.children.all?{|defi| !defi.want_my_paragraph} then
-					c.children.each do |definition|
-						definition.definitions.each do |dd|
-							dd.children = dd.children[0].children
-						end
-					end
-				end
-			end
-		end
-		output
-	end
-	def read_text_material(src, output)
-		if src.cur_line =~ MightBeTableHeader and
-			(src.next_line && src.next_line =~ TableSeparator)
-			output.push read_table(src)
-		elsif [:header1,:header2].include? src.next_line.md_type
-			output.push read_header12(src)
-		elsif eventually_comes_a_def_list(src)
-		 	definition = read_definition(src)
-			if output.last.kind_of?(MDElement) &&
-				output.last.node_type == :definition_list then
-				output.last.children << definition
-			else
-				output.push md_el(:definition_list, [definition])
-			end
-		else # Start of a paragraph
-			output.push read_paragraph(src)
-		end
-	end
-	def read_ald(src)
-		if (l=src.shift_line) =~ AttributeDefinitionList
-			id = $1;   al=$2;
-			al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
-			self.ald[id] = al;
-			return md_ald(id, al)
-		else
-			maruku_error "Bug Bug:\n#{l.inspect}"
-			return nil
-		end
-	end
-	# reads a header (with ----- or ========)
-	def read_header12(src)
-		line = src.shift_line.strip
-		al = nil
-		# Check if there is an IAL
-		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
-			line = $1.strip
-			ial = $2
-			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
-		end
-		text = parse_lines_as_span [ line ]
-		level = src.cur_line.md_type == :header2 ? 2 : 1;
-		src.shift_line
-		return md_header(level, text, al)
-	end
-	# reads a header like '#### header ####'
-	def read_header3(src)
-		line = src.shift_line.strip
-		al = nil
-		# Check if there is an IAL
-		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
-			line = $1.strip
-			ial = $2
-			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
-		end
-		level = num_leading_hashes(line)
-		text = parse_lines_as_span [strip_hashes(line)]
-		return md_header(level, text, al)
-	end
-	def read_xml_instruction(src, output)
-		m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
-		raise "BugBug" if not m
-		target = m[2] || ''
-		code = m[3]
-		until code =~ /\?>/
-			code += "\n"+src.shift_line
-		end
-		if not code =~ (/\?>\s*$/)
-			garbage = (/\?>(.*)$/.match(code))[1]
-			maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
-				add_tabs(code, 1, '|'), src
-		end
-		code.gsub!(/\?>\s*$/, '')
-		if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
-			result = safe_execute_code(self, code)
-			if result
-				if result.kind_of? String
-					raise "Not expected"
-				else
-					output.push(*result)
-				end
-			end
-		else
-			output.push md_xml_instr(target, code)
-		end
-	end
-	def read_raw_html(src)
-		h = HTMLHelper.new
-		begin
-			h.eat_this(l=src.shift_line)
-#			puts "\nBLOCK:\nhtml -> #{l.inspect}"
-			while src.cur_line and not h.is_finished?
-				l=src.shift_line
-#				puts "html -> #{l.inspect}"
-				h.eat_this "\n"+l
-			end
-		rescue Exception => e
-			ex = e.inspect + e.backtrace.join("\n")
-			maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
-		end
-		if not (h.rest =~ /^\s*$/)
-			maruku_error "Could you please format this better?\n"+
-				"I see that #{h.rest.inspect} is left after the raw HTML.", src
-		end
-		raw_html = h.stuff_you_read
-		return md_html(raw_html)
-	end
-	def read_paragraph(src)
-		lines = [src.shift_line]
-		while src.cur_line
-			# :olist does not break
-			case t = src.cur_line.md_type
-				when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
-					break
-				when :olist,:ulist
-					break if src.next_line.md_type == t
-			end
-			break if src.cur_line.strip.size == 0
-			break if [:header1,:header2].include? src.next_line.md_type
-			break if any_matching_block_extension?(src.cur_line)
-			lines << src.shift_line
-		end
-#		dbg_describe_ary(lines, 'PAR')
-		children = parse_lines_as_span(lines, src)
-		return md_par(children)
-	end
-	# Reads one list item, either ordered or unordered.
-	def read_list_item(src)
-		parent_offset = src.cur_index
-		item_type = src.cur_line.md_type
-		first = src.shift_line
-		indentation = spaces_before_first_char(first)
-		break_list = [:ulist, :olist, :ial]
-		# Ugly things going on inside `read_indented_content`
-		lines, want_my_paragraph =
-			read_indented_content(src,indentation, break_list, item_type)
-		# add first line
-			# Strip first '*', '-', '+' from first line
-			stripped = first[indentation, first.size-1]
-		lines.unshift stripped
-		# dbg_describe_ary(lines, 'LIST ITEM ')
-		src2 = LineSource.new(lines, src, parent_offset)
-		children = parse_blocks(src2)
-		with_par = want_my_paragraph || (children.size>1)
-		return md_li(children, with_par)
-	end
-	def read_abbreviation(src)
-		if not (l=src.shift_line) =~ Abbreviation
-			maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
-		end
-		abbr = $1
-		desc = $2
-		if (not abbr) or (abbr.size==0)
-			maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
-		end
-		self.abbreviations[abbr] = desc
-		return md_abbr_def(abbr, desc)
-	end
-	def read_footnote_text(src)
-		parent_offset = src.cur_index
-		first = src.shift_line
-		if not first =~ FootnoteText
-			maruku_error "Bug (it's Andrea's fault)"
-		end
-		id = $1
-		text = $2
-		# Ugly things going on inside `read_indented_content`
-		indentation = 4 #first.size-text.size
-#		puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
-		break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
-		item_type = :footnote_text
-		lines, want_my_paragraph =
-			read_indented_content(src,indentation, break_list, item_type)
-		# add first line
-		if text && text.strip != "" then lines.unshift text end
-#		dbg_describe_ary(lines, 'FOOTNOTE')
-		src2 = LineSource.new(lines, src, parent_offset)
-		children = parse_blocks(src2)
-		e = md_footnote(id, children)
-		self.footnotes[id] = e
-		return e
-	end
-	# This is the only ugly function in the code base.
-	# It is used to read list items, descriptions, footnote text
-	def read_indented_content(src, indentation, break_list, item_type)
-		lines =[]
-		# collect all indented lines
-		saw_empty = false; saw_anything_after = false
-		while src.cur_line
-#			puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
-			#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
-			if src.cur_line.md_type == :empty
-				saw_empty = true
-				lines << src.shift_line
-				next
-			end
-			# after a white line
-			if saw_empty
-				# we expect things to be properly aligned
-				if (ns=number_of_leading_spaces(src.cur_line)) < indentation
-					#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
-					break
-				end
-				saw_anything_after = true
-			else
-#				if src.cur_line[0] != ?\
-					break if break_list.include? src.cur_line.md_type
-#				end
-#				break if src.cur_line.md_type != :text
-			end
-			stripped = strip_indent(src.shift_line, indentation)
-			lines << stripped
-			#puts "Accepted as #{stripped.inspect}"
-			# You are only required to indent the first line of
-			# a child paragraph.
-			if stripped.md_type == :text
-				while src.cur_line && (src.cur_line.md_type == :text)
-					lines << strip_indent(src.shift_line, indentation)
-				end
-			end
-		end
-		want_my_paragraph = saw_anything_after ||
-			(saw_empty && (src.cur_line  && (src.cur_line.md_type == item_type)))
-#		dbg_describe_ary(lines, 'LI')
-		# create a new context
-		while lines.last && (lines.last.md_type == :empty)
-			lines.pop
-		end
-		return lines, want_my_paragraph
-	end
-	def read_quote(src)
-		parent_offset = src.cur_index
-		lines = []
-		# collect all indented lines
-		while src.cur_line && src.cur_line.md_type == :quote
-			lines << unquote(src.shift_line)
-		end
-#		dbg_describe_ary(lines, 'QUOTE')
-		src2 = LineSource.new(lines, src, parent_offset)
-		children = parse_blocks(src2)
-		return md_quote(children)
-	end
-	def read_code(src)
-		# collect all indented lines
-		lines = []
-		while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
-			lines << strip_indent(src.shift_line, 4)
-		end
-		#while lines.last && (lines.last.md_type == :empty )
-		while lines.last && lines.last.strip.size == 0
-			lines.pop
-		end
-		while lines.first && lines.first.strip.size == 0
-			lines.shift
-		end
-		return nil if lines.empty?
-		source = lines.join("\n")
-#		dbg_describe_ary(lines, 'CODE')
-		return md_codeblock(source)
-	end
-	# Reads a series of metadata lines with empty lines in between
-	def read_metadata(src)
-		hash = {}
-		while src.cur_line
-			case src.cur_line.md_type
-				when :empty;  src.shift_line
-				when :metadata; hash.merge! parse_metadata(src.shift_line)
-				else break
-			end
-		end
-		hash
-	end
-	def read_ref_definition(src, out)
-		line = src.shift_line
-		# if link is incomplete, shift next line
-		if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
-			([1,2,3].include? number_of_leading_spaces(src.cur_line) )
-			line += " "+ src.shift_line
-		end
-#		puts "total= #{line}"
-		match = LinkRegex.match(line)
-		if not match
-			maruku_error "Link does not respect format: '#{line}'"
-			return
-		end
-		id = match[1]; url = match[2]; title = match[3];
-		id = sanitize_ref_id(id)
-		hash = self.refs[id] = {:url=>url,:title=>title}
-		stuff=match[4]
-		if stuff
-			stuff.split.each do |couple|
-#					puts "found #{couple}"
-				k, v = couple.split('=')
-				v ||= ""
-				if v[0,1]=='"' then v = v[1, v.size-2] end
-#					puts "key:_#{k}_ value=_#{v}_"
-				hash[k.to_sym] = v
-			end
-		end
-#			puts hash.inspect
-		out.push md_ref_def(id, url, meta={:title=>title})
-	end
-	def split_cells(s)
-#		s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
-# changed to allow empty cells
-		s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
-	end
-	def read_table(src)
-		head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
-		separator=split_cells(src.shift_line)
-		align = separator.map { |s|  s =~ Sep
-			if $1 and $2 then :center elsif $2 then :right else :left end }
-		num_columns = align.size
-		if head.size != num_columns
-			maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
-			tell_user "I will ignore this table."
-			# XXX try to recover
-			return md_br()
-		end
-		rows = []
-		while src.cur_line && src.cur_line =~ /\|/
-			row = split_cells(src.shift_line).map{|s|
-				md_el(:cell, parse_lines_as_span([s]))}
-			if head.size != num_columns
-				maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
-				tell_user "I will ignore this table."
-				# XXX try to recover
-				return md_br()
-			end
-			rows << row
-		end
-		children = (head+rows).flatten
-		return md_el(:table, children, {:align => align})
-	end
-	# If current line is text, a definition list is coming
-	# if 1) text,empty,[text,empty]*,definition
-	def eventually_comes_a_def_list(src)
-		future = src.tell_me_the_future
-		ok = future =~ %r{^t+e?d}x
-#		puts "future: #{future} - #{ok}"
-		ok
-	end
-	def read_definition(src)
-		# Read one or more terms
-		terms = []
-		while  src.cur_line &&  src.cur_line.md_type == :text
-			terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
-		end
-#		dbg_describe_ary(terms, 'DT')
-		want_my_paragraph = false
-		raise "Chunky Bacon!" if not src.cur_line
-		# one optional empty
-		if src.cur_line.md_type == :empty
-			want_my_paragraph = true
-			src.shift_line
-		end
-		raise "Chunky Bacon!" if src.cur_line.md_type != :definition
-		# Read one or more definitions
-		definitions = []
-		while src.cur_line && src.cur_line.md_type == :definition
-			parent_offset = src.cur_index
-			first = src.shift_line
-			first =~ Definition
-			first = $1
-			# I know, it's ugly!!!
-			lines, w_m_p =
-				read_indented_content(src,4, [:definition], :definition)
-			want_my_paragraph ||= w_m_p
-			lines.unshift first
-#			dbg_describe_ary(lines, 'DD')
-			src2 = LineSource.new(lines, src, parent_offset)
-			children = parse_blocks(src2)
-			definitions << md_el(:definition_data, children)
-		end
-		return md_el(:definition, terms+definitions, {
-			:terms => terms,
-			:definitions => definitions,
-			:want_my_paragraph => want_my_paragraph})
-	end
-end # BlockLevelParser
-end # MaRuKu
-end
-end
+  include Helpers
+  include MaRuKu::Strings
+  include MaRuKu::In::Markdown::SpanLevelParser
+  class BlockContext < Array
+    def describe
+      n = 5
+      desc = size > n ? self[-n, n] : self
+      "Last #{n} elements: " +
+        desc.map {|x| "\n -" + x.inspect }.join
+    end
+  end
+  # Splits the string and calls parse_lines_as_markdown
+  def parse_text_as_markdown(text)
+    lines =  split_lines(text)
+    src = LineSource.new(lines)
+    parse_blocks(src)
+  end
+  # Input is a LineSource
+  def parse_blocks(src)
+    output = BlockContext.new
+    # run state machine
+    while src.cur_line
+      next if check_block_extensions(src, output, src.cur_line)
+      md_type = src.cur_line.md_type
+      # Prints detected type (useful for debugging)
+      #puts "parse_blocks #{md_type}|#{src.cur_line}"
+      case md_type
+      when :empty
+        output << :empty
+        src.ignore_line
+      when :ial
+        m = InlineAttributeList.match src.shift_line
+        content = m[1] || ""
+        src2 = CharSource.new(content, src)
+        interpret_extension(src2, output)
+      when :ald
+        output << read_ald(src)
+      when :text
+        # paragraph, or table, or definition list
+        read_text_material(src, output)
+      when :header2, :hrule
+        # hrule
+        src.shift_line
+        output << md_hrule
+      when :header3
+        output << read_header3(src)
+      when :ulist, :olist
+        list_type = (md_type == :ulist) ? :ul : :ol
+        li = read_list_item(src)
+        # append to current list if we have one
+        if output.last.kind_of?(MDElement) &&
+            output.last.node_type == list_type then
+          output.last.children << li
+        else
+          output << md_el(list_type, li)
+        end
+      when :quote
+        output << read_quote(src)
+      when :code
+        e = read_code(src)
+        output << e if e
+      when :raw_html
+        # More extra hacky stuff - if there's more than just HTML, we either wrap it
+        # in a paragraph or break it up depending on whether it's an inline element or not
+        e = read_raw_html(src)
+        unless e.empty?
+          if e.first.parsed_html &&
+              (first_node_name = e.first.parsed_html.first_node_name) &&
+              HTML_INLINE_ELEMS.include?(first_node_name) &&
+              !%w(svg math).include?(first_node_name)
+            content = [e.first]
+            if e.size > 1
+              content.concat(e[1].children)
+            end
+            output << md_par(content)
+          else
+            output.concat(e)
+          end
+        end
+      when :footnote_text
+        output << read_footnote_text(src)
+      when :ref_definition
+        if src.parent && src.cur_index == 0
+          read_text_material(src, output)
+        else
+          read_ref_definition(src, output)
+        end
+      when :abbreviation
+        output << read_abbreviation(src)
+      when :xml_instr
+        read_xml_instruction(src, output)
+      else # warn if we forgot something
+        line = src.cur_line
+        maruku_error "Ignoring line '#{line}' type = #{md_type}", src
+        src.shift_line
+      end
+    end
+    merge_ial(output, src, output)
+    output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
+    # get rid of empty line markers
+    output.delete_if {|x| x == :empty }
+    # See for each list if we can omit the paragraphs
+    # TODO: do this after
+    output.each do |c|
+      # Remove paragraphs that we can get rid of
+      if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
+        c.children.each do |d|
+          if d.children.first && d.children.first.node_type == :paragraph
+            d.children = d.children.first.children + d.children[1..-1]
+          end
+        end
+      elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
+        c.children.each do |definition|
+          definition.definitions.each do |dd|
+            if dd.children.first.node_type == :paragraph
+              dd.children = dd.children.first.children + dd.children[1..-1]
+            end
+          end
+        end
+      end
+    end
+    output
+  end
+  def read_text_material(src, output)
+    if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
+        src.next_line &&
+        src.next_line.rstrip =~ TableSeparator
+      output << read_table(src)
+    elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
+      output << read_header12(src)
+    elsif eventually_comes_a_def_list(src)
+      definition = read_definition(src)
+      if output.last.kind_of?(MDElement) &&
+          output.last.node_type == :definition_list then
+        output.last.children << definition
+      else
+        output << md_el(:definition_list, definition)
+      end
+    else # Start of a paragraph
+      output << read_paragraph(src)
+    end
+  end
+  def read_ald(src)
+    if (l = src.shift_line) =~ AttributeDefinitionList
+      id = $1
+      al = read_attribute_list(CharSource.new($2, src))
+      self.ald[id] = al;
+      md_ald(id, al)
+    else
+      maruku_error "Bug Bug:\n#{l.inspect}"
+      nil
+    end
+  end
+  # reads a header (with ----- or ========)
+  def read_header12(src)
+    line = src.shift_line.strip
+    al = nil
+    # Check if there is an IAL
+    if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
+      line = $1.strip
+      ial = $2
+      al = read_attribute_list(CharSource.new(ial, src))
+    end
+    text = parse_span line
+    if text.empty?
+      text = "{#{ial}}"
+      al = nil
+    end
+    level = src.cur_line.md_type == :header2 ? 2 : 1;
+    src.shift_line
+    md_header(level, text, al)
+  end
+  # reads a header like '#### header ####'
+  def read_header3(src)
+    line = src.shift_line.strip
+    al = nil
+    # Check if there is an IAL
+    if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
+      line = $1.strip
+      ial = $2
+      al = read_attribute_list(CharSource.new(ial, src))
+    end
+    level = line[/^#+/].size
+    text = parse_span line.gsub(/\A#+|#+\z/, '')
+    if text.empty?
+      text = "{#{ial}}"
+      al = nil
+    end
+    md_header(level, text, al)
+  end
+  def read_xml_instruction(src, output)
+    m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
+    raise "BugBug" unless m
+    target = m[2] || ''
+    code = m[3]
+    until code.include?('?>')
+      code << "\n" << src.shift_line
+    end
+    unless code =~ /\?>\s*$/
+      garbage = (/\?>(.*)$/.match(code))[1]
+      maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
+        code.gsub(/^/, '|'), src
+    end
+    code.gsub!(/\?>\s*$/, '')
+    if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
+      result = safe_execute_code(self, code)
+      if result
+        if result.kind_of? String
+          raise "Not expected"
+        else
+          output.push(*result)
+        end
+      end
+    else
+      output << md_xml_instr(target, code)
+    end
+  end
+  def read_raw_html(src)
+    extra_line = nil
+    h = HTMLHelper.new
+    begin
+      l = src.shift_line
+      h.eat_this(l)
+      #     puts "\nBLOCK:\nhtml -> #{l.inspect}"
+      while src.cur_line && !h.is_finished?
+        l = src.shift_line
+        #       puts "html -> #{l.inspect}"
+        h.eat_this "\n" + l
+      end
+    rescue => e
+      maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
+    end
+    unless h.rest =~ /^\s*$/
+      extra_line = h.rest
+    end
+    raw_html = h.stuff_you_read
+    is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)
+    if extra_line
+      remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
+      if extra_line.start_with?(' ')
+        remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
+      end
+      is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
+    else
+      [md_html(raw_html)]
+    end
+  end
+  def read_paragraph(src)
+    lines = [src.shift_line]
+    while src.cur_line
+      # :olist does not break
+      case t = src.cur_line.md_type
+      when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
+        break
+      when :olist, :ulist
+        break if !src.next_line || src.next_line.md_type == t
+      when :raw_html
+        # This is a pretty awful hack to handle inline HTML
+        # but it means double-parsing HMTL.
+        html = parse_span([src.cur_line], src)
+        unless html.empty? || html.first.is_a?(String)
+          if html.first.parsed_html
+            first_node_name = html.first.parsed_html.first_node_name
+          end
+        end
+        break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
+      end
+      break if src.cur_line.strip.empty?
+      break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
+      break if any_matching_block_extension?(src.cur_line)
+      lines << src.shift_line
+    end
+    children = parse_span(lines, src)
+    md_par(children)
+  end
+  # Reads one list item, either ordered or unordered.
+  def read_list_item(src)
+    parent_offset = src.cur_index
+    item_type = src.cur_line.md_type
+    first = src.shift_line
+    indentation, ial = spaces_before_first_char(first)
+    al = read_attribute_list(CharSource.new(ial, src)) if ial
+    ial_offset = ial ? ial.length + 3 : 0
+    lines, want_my_paragraph =
+      read_indented_content(src, indentation, [], item_type, ial_offset)
+    # add first line
+    # Strip first '*', '-', '+' from first line
+    stripped = first[indentation, first.size - 1]
+    lines.unshift stripped
+    src2 = LineSource.new(lines, src, parent_offset)
+    children = parse_blocks(src2)
+    md_li(children, want_my_paragraph, al)
+  end
+  def read_abbreviation(src)
+    unless (l = src.shift_line) =~ Abbreviation
+      maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
+    end
+    abbr = $1
+    desc = $2
+    if !abbr || abbr.empty?
+      maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
+    end
+    self.abbreviations[abbr] = desc
+    md_abbr_def(abbr, desc)
+  end
+  def read_footnote_text(src)
+    parent_offset = src.cur_index
+    first = src.shift_line
+    unless first =~ FootnoteText
+      maruku_error "Bug (it's Andrea's fault)"
+    end
+    id = $1
+    text = $2 || ''
+    indentation = 4 #first.size-text.size
+    #   puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
+    break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
+    item_type = :footnote_text
+    lines, _ = read_indented_content(src, indentation, break_list, item_type)
+    # add first line
+    lines.unshift text unless text.strip.empty?
+    src2 = LineSource.new(lines, src, parent_offset)
+    children = parse_blocks(src2)
+    e = md_footnote(id, children)
+    self.footnotes[id] = e
+    e
+  end
+  # This is the only ugly function in the code base.
+  # It is used to read list items, descriptions, footnote text
+  def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
+    lines = []
+    # collect all indented lines
+    saw_empty = false
+    saw_anything_after = false
+    break_list = Array(break_list)
+    len = indentation - ial_offset
+    while src.cur_line
+      num_leading_spaces = src.cur_line.number_of_leading_spaces
+      break if num_leading_spaces < len && ![:text, :empty].include?(src.cur_line.md_type)
+      line = strip_indent(src.cur_line, indentation)
+      md_type = line.md_type
+      if md_type == :empty
+        saw_empty = true
+        lines << line
+        src.shift_line
+        next
+      end
+      # Unquestioningly grab anything that's deeper-indented
+      if md_type != :code && num_leading_spaces > len
+        lines << line
+        src.shift_line
+        next
+      end
+      # after a white line
+      if saw_empty
+        # we expect things to be properly aligned
+        break if num_leading_spaces < len
+        saw_anything_after = true
+      else
+        break if break_list.include?(md_type)
+      end
+      lines << line
+      src.shift_line
+      # You are only required to indent the first line of
+      # a child paragraph.
+      if md_type == :text
+        while src.cur_line && src.cur_line.md_type == :text
+          lines << strip_indent(src.shift_line, indentation)
+        end
+      end
+    end
+    # TODO fix this
+    want_my_paragraph = saw_anything_after ||
+      (saw_empty && src.cur_line && src.cur_line.md_type == item_type)
+    # create a new context
+    while lines.last && lines.last.md_type == :empty
+      lines.pop
+    end
+    return lines, want_my_paragraph
+  end
+  def read_quote(src)
+    parent_offset = src.cur_index
+    lines = []
+    # collect all indented lines
+    while src.cur_line && src.cur_line.md_type == :quote
+      lines << unquote(src.shift_line)
+    end
+    src2 = LineSource.new(lines, src, parent_offset)
+    children = parse_blocks(src2)
+    md_quote(children)
+  end
+  def read_code(src)
+    # collect all indented lines
+    lines = []
+    while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
+      lines << strip_indent(src.shift_line, 4)
+    end
+    #while lines.last && (lines.last.md_type == :empty )
+    while lines.last && lines.last.strip.size == 0
+      lines.pop
+    end
+    while lines.first && lines.first.strip.size == 0
+      lines.shift
+    end
+    return nil if lines.empty?
+    source = lines.join("\n")
+    md_codeblock(source)
+  end
+  def read_ref_definition(src, out)
+    line = src.shift_line
+    # if link is incomplete, shift next line
+    if src.cur_line &&
+        ![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
+        (1..3).include?(src.cur_line.number_of_leading_spaces)
+      line << " " << src.shift_line
+    end
+    match = LinkRegex.match(line)
+    unless match
+      maruku_error "Link does not respect format: '#{line}'" and return
+    end
+    id = match[1]
+    url = match[2]
+    title = match[3] || match[4] || match[5]
+    id = sanitize_ref_id(id)
+    hash = self.refs[id] = {
+      :url => url,
+      :title => title
+    }
+    stuff = (match[6] || '')
+    stuff.split.each do |couple|
+      k, v = couple.split('=')
+      v ||= ""
+      v = v[1..-2] if v.start_with?('"') # strip quotes
+      hash[k.to_sym] = v
+    end
+    out << md_ref_def(id, url, :title => title)
+  end
+  def split_cells(s)
+    s.split('|').reject(&:empty?).map(&:strip)
+  end
+  def read_table(src)
+    head = split_cells(src.shift_line).map do |s|
+      md_el(:head_cell, parse_span(s))
+    end
+    separator = split_cells(src.shift_line)
+    align = separator.map do |s|
+      # ex: :-------------------:
+      # If the separator starts and ends with a colon,
+      # center the cell. If it's on the right, right-align,
+      # otherwise left-align.
+      starts = s.start_with? ':'
+      ends = s.end_with? ':'
+      if starts && ends
+        :center
+      elsif ends
+        :right
+      else
+        :left
+      end
+    end
+    num_columns = align.size
+    if head.size != num_columns
+      maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
+      tell_user "I will ignore this table."
+      # XXX try to recover
+      return md_br
+    end
+    rows = []
+    while src.cur_line && src.cur_line =~ /\|/
+      row = split_cells(src.shift_line).map do |s|
+        md_el(:cell, parse_span(s))
+      end
+      if head.size != num_columns
+        maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
+        tell_user "I will ignore this table."
+        # XXX try to recover
+        return md_br
+      end
+      rows << row
+    end
+    children = (head + rows).flatten
+    md_el(:table, children, { :align => align })
+  end
+  # If current line is text, a definition list is coming
+  # if 1) text,empty,[text,empty]*,definition
+  def eventually_comes_a_def_list(src)
+    src.tell_me_the_future =~ %r{^t+e?d}x
+  end
+  def read_definition(src)
+    # Read one or more terms
+    terms = []
+    while src.cur_line && src.cur_line.md_type == :text
+      terms << md_el(:definition_term, parse_span(src.shift_line))
+    end
+    want_my_paragraph = false
+    raise "Chunky Bacon!" unless src.cur_line
+    # one optional empty
+    if src.cur_line.md_type == :empty
+      want_my_paragraph = true
+      src.shift_line
+    end
+    raise "Chunky Bacon!" unless src.cur_line.md_type == :definition
+    # Read one or more definitions
+    definitions = []
+    while src.cur_line && src.cur_line.md_type == :definition
+      parent_offset = src.cur_index
+      first = src.shift_line
+      first =~ Definition
+      first = $1
+      lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
+      want_my_paragraph ||= w_m_p
+      lines.unshift first
+      src2 = LineSource.new(lines, src, parent_offset)
+      children = parse_blocks(src2)
+      definitions << md_el(:definition_data, children)
+    end
+    md_el(:definition, terms + definitions, {
+            :terms => terms,
+            :definitions => definitions,
+            :want_my_paragraph => want_my_paragraph
+          })
+  end
+end end end end