RubyGems - maruku - Versions diffs - 0.6.0 → 0.7.3 - Mend

maruku 0.6.0 → 0.7.3

Files changed (290) hide show

checksums.yaml +7 -0
data/MIT-LICENSE.txt +20 -0
data/bin/maruku +153 -152
data/bin/marutex +2 -29
data/data/entities.xml +261 -0
data/docs/markdown_syntax.md +9 -21
data/docs/math.md +14 -18
data/lib/maruku.rb +65 -78
data/lib/maruku/attributes.rb +109 -214
data/lib/maruku/defaults.rb +45 -67
data/lib/maruku/document.rb +44 -0
data/lib/maruku/element.rb +138 -0
data/lib/maruku/errors.rb +80 -0
data/lib/maruku/ext/div.rb +105 -113
data/lib/maruku/ext/fenced_code.rb +97 -0
data/lib/maruku/ext/math.rb +22 -26
data/lib/maruku/ext/math/elements.rb +20 -26
data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
data/lib/maruku/ext/math/parsing.rb +121 -115
data/lib/maruku/ext/math/to_html.rb +202 -187
data/lib/maruku/ext/math/to_latex.rb +34 -21
data/lib/maruku/helpers.rb +158 -257
data/lib/maruku/html.rb +251 -0
data/lib/maruku/input/charsource.rb +272 -319
data/lib/maruku/input/extensions.rb +62 -63
data/lib/maruku/input/html_helper.rb +233 -189
data/lib/maruku/input/linesource.rb +90 -110
data/lib/maruku/input/mdline.rb +131 -0
data/lib/maruku/input/parse_block.rb +736 -613
data/lib/maruku/input/parse_doc.rb +145 -217
data/lib/maruku/input/parse_span.rb +740 -0
data/lib/maruku/inspect_element.rb +60 -0
data/lib/maruku/maruku.rb +14 -30
data/lib/maruku/output/entity_table.rb +37 -0
data/lib/maruku/output/s5/fancy.rb +462 -462
data/lib/maruku/output/s5/to_s5.rb +115 -135
data/lib/maruku/output/to_html.rb +907 -983
data/lib/maruku/output/to_latex.rb +571 -563
data/lib/maruku/output/to_markdown.rb +207 -162
data/lib/maruku/output/to_s.rb +10 -52
data/lib/maruku/string_utils.rb +129 -179
data/lib/maruku/toc.rb +185 -196
data/lib/maruku/version.rb +33 -38
data/spec/block_docs/abbrev.md +776 -0
data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
data/spec/block_docs/abbreviations2.md +27 -0
data/{tests/unittest → spec/block_docs}/alt.md +2 -14
data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
data/spec/block_docs/attribute_sanitize.md +22 -0
data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
data/spec/block_docs/atx_headers.md +22 -0
data/spec/block_docs/auto_cdata.md +48 -0
data/spec/block_docs/bad_cites.md +30 -0
data/spec/block_docs/bad_divrefs.md +30 -0
data/{tests/unittest → spec/block_docs}/blank.md +0 -12
data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
data/spec/block_docs/block_quotes.md +66 -0
data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
data/{tests/unittest → spec/block_docs}/bug_table.md +7 -19
data/spec/block_docs/cites.md +37 -0
data/{tests/unittest → spec/block_docs}/code.md +7 -14
data/{tests/unittest → spec/block_docs}/code2.md +4 -14
data/{tests/unittest → spec/block_docs}/code3.md +12 -16
data/spec/block_docs/code4.md +79 -0
data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
data/spec/block_docs/div_without_newline.md +16 -0
data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
data/{tests/unittest → spec/block_docs}/easy.md +1 -13
data/spec/block_docs/email.md +29 -0
data/spec/block_docs/empty_cells.md +31 -0
data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
data/{tests/unittest → spec/block_docs}/entities.md +33 -41
data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
data/{tests/unittest → spec/block_docs}/extra_table1.md +9 -21
data/spec/block_docs/fenced_code_blocks.md +58 -0
data/spec/block_docs/fenced_code_blocks_highlighted.md +17 -0
data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
data/spec/block_docs/footnotes2.md +82 -0
data/spec/block_docs/hard.md +25 -0
data/spec/block_docs/header_after_par.md +62 -0
data/{tests/unittest → spec/block_docs}/headers.md +10 -18
data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
data/{tests/unittest → spec/block_docs}/html3.md +1 -13
data/{tests/unittest → spec/block_docs}/html4.md +2 -14
data/{tests/unittest → spec/block_docs}/html5.md +2 -14
data/spec/block_docs/html_block_in_para.md +22 -0
data/spec/block_docs/html_inline.md +25 -0
data/spec/block_docs/html_trailing.md +31 -0
data/spec/block_docs/ie.md +62 -0
data/spec/block_docs/iframe.md +29 -0
data/spec/block_docs/ignore_bad_header.md +9 -0
data/{tests/unittest → spec/block_docs}/images.md +22 -28
data/{tests/unittest → spec/block_docs}/images2.md +7 -17
data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
data/spec/block_docs/inline_html_beginning.md +10 -0
data/spec/block_docs/issue106.md +78 -0
data/spec/block_docs/issue115.md +20 -0
data/spec/block_docs/issue117.md +13 -0
data/spec/block_docs/issue120.md +48 -0
data/spec/block_docs/issue123.md +11 -0
data/spec/block_docs/issue124.md +16 -0
data/spec/block_docs/issue126.md +9 -0
data/spec/block_docs/issue130.md +11 -0
data/spec/block_docs/issue20.md +9 -0
data/spec/block_docs/issue26.md +22 -0
data/spec/block_docs/issue29.md +9 -0
data/spec/block_docs/issue30.md +30 -0
data/spec/block_docs/issue31.md +25 -0
data/spec/block_docs/issue40.md +52 -0
data/spec/block_docs/issue64.md +55 -0
data/spec/block_docs/issue67.md +19 -0
data/spec/block_docs/issue70.md +11 -0
data/spec/block_docs/issue72.md +17 -0
data/spec/block_docs/issue74.md +38 -0
data/spec/block_docs/issue79.md +15 -0
data/spec/block_docs/issue83.md +13 -0
data/spec/block_docs/issue85.md +25 -0
data/spec/block_docs/issue88.md +19 -0
data/spec/block_docs/issue89.md +12 -0
data/spec/block_docs/issue90.md +38 -0
data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
data/{tests/unittest → spec/block_docs}/links.md +33 -32
data/spec/block_docs/links2.md +21 -0
data/{tests/unittest → spec/block_docs}/list1.md +0 -12
data/{tests/unittest → spec/block_docs}/list12.md +2 -14
data/{tests/unittest → spec/block_docs}/list2.md +2 -14
data/spec/block_docs/list_multipara.md +42 -0
data/{tests/unittest → spec/block_docs}/lists.md +28 -29
data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
data/spec/block_docs/lists11.md +23 -0
data/spec/block_docs/lists12.md +43 -0
data/spec/block_docs/lists13.md +55 -0
data/spec/block_docs/lists14.md +61 -0
data/spec/block_docs/lists15.md +36 -0
data/spec/block_docs/lists6.md +88 -0
data/spec/block_docs/lists7b.md +58 -0
data/spec/block_docs/lists9.md +53 -0
data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
data/spec/block_docs/lists_blank.md +35 -0
data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +44 -29
data/spec/block_docs/lists_nested.md +44 -0
data/spec/block_docs/lists_nested_blankline.md +34 -0
data/spec/block_docs/lists_nested_deep.md +43 -0
data/spec/block_docs/lists_ol.md +129 -0
data/spec/block_docs/lists_ol2.md +147 -0
data/spec/block_docs/lists_paraindent.md +42 -0
data/spec/block_docs/lists_tab.md +54 -0
data/spec/block_docs/loss.md +17 -0
data/spec/block_docs/math-blahtex/equations.md +29 -0
data/spec/block_docs/math-blahtex/inline.md +48 -0
data/spec/block_docs/math-blahtex/math2.md +52 -0
data/spec/block_docs/math-blahtex/table.md +25 -0
data/spec/block_docs/math/embedded_invalid_svg.md +108 -0
data/spec/block_docs/math/embedded_svg.md +136 -0
data/spec/block_docs/math/equations.md +49 -0
data/spec/block_docs/math/inline.md +46 -0
data/spec/block_docs/math/math2.md +53 -0
data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
data/spec/block_docs/math/raw_mathml.md +87 -0
data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
data/spec/block_docs/math/table.md +25 -0
data/{tests/unittest → spec/block_docs}/math/table2.md +11 -23
data/{tests/unittest → spec/block_docs}/misc_sw.md +184 -121
data/{tests/unittest → spec/block_docs}/olist.md +6 -18
data/{tests/unittest → spec/block_docs}/one.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
data/spec/block_docs/ref_with_title.md +22 -0
data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
data/{tests/unittest → spec/block_docs}/table_attributes.md +6 -20
data/spec/block_docs/table_colspan.md +41 -0
data/spec/block_docs/tables.md +47 -0
data/spec/block_docs/tables2.md +74 -0
data/{tests/unittest → spec/block_docs}/test.md +1 -13
data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
data/spec/block_docs/toc.md +87 -0
data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
data/spec/block_docs/xml.md +33 -0
data/spec/block_docs/xml3.md +24 -0
data/spec/block_docs/xml_comments.md +32 -0
data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
data/spec/block_spec.rb +110 -0
data/spec/cli_spec.rb +8 -0
data/spec/span_spec.rb +263 -0
data/spec/spec_helper.rb +3 -0
data/spec/to_html_utf8_spec.rb +13 -0
metadata +218 -202
data/Rakefile +0 -73
data/bin/marudown +0 -29
data/bin/marutest +0 -345
data/docs/changelog.md +0 -334
data/lib/maruku/errors_management.rb +0 -92
data/lib/maruku/ext/math/latex_fix.rb +0 -12
data/lib/maruku/input/parse_span_better.rb +0 -746
data/lib/maruku/input/rubypants.rb +0 -225
data/lib/maruku/input/type_detection.rb +0 -147
data/lib/maruku/output/to_latex_entities.rb +0 -367
data/lib/maruku/output/to_latex_strings.rb +0 -64
data/lib/maruku/structures.rb +0 -167
data/lib/maruku/structures_inspect.rb +0 -87
data/lib/maruku/structures_iterators.rb +0 -61
data/lib/maruku/tests/benchmark.rb +0 -82
data/lib/maruku/tests/new_parser.rb +0 -373
data/lib/maruku/tests/tests.rb +0 -136
data/lib/maruku/usage/example1.rb +0 -33
data/maruku_gem.rb +0 -33
data/tests/bugs/code_in_links.md +0 -101
data/tests/bugs/complex_escaping.md +0 -38
data/tests/math/syntax.md +0 -46
data/tests/math_usage/document.md +0 -13
data/tests/others/abbreviations.md +0 -11
data/tests/others/blank.md +0 -4
data/tests/others/code.md +0 -5
data/tests/others/code2.md +0 -8
data/tests/others/code3.md +0 -16
data/tests/others/email.md +0 -4
data/tests/others/entities.md +0 -19
data/tests/others/escaping.md +0 -16
data/tests/others/extra_dl.md +0 -101
data/tests/others/extra_header_id.md +0 -13
data/tests/others/extra_table1.md +0 -40
data/tests/others/footnotes.md +0 -17
data/tests/others/headers.md +0 -10
data/tests/others/hrule.md +0 -10
data/tests/others/images.md +0 -20
data/tests/others/inline_html.md +0 -42
data/tests/others/links.md +0 -38
data/tests/others/list1.md +0 -4
data/tests/others/list2.md +0 -5
data/tests/others/list3.md +0 -8
data/tests/others/lists.md +0 -32
data/tests/others/lists_after_paragraph.md +0 -44
data/tests/others/lists_ol.md +0 -39
data/tests/others/misc_sw.md +0 -105
data/tests/others/one.md +0 -1
data/tests/others/paragraphs.md +0 -13
data/tests/others/sss06.md +0 -352
data/tests/others/test.md +0 -4
data/tests/s5/s5profiling.md +0 -48
data/tests/unittest/bug_def.md +0 -28
data/tests/unittest/email.md +0 -32
data/tests/unittest/hang.md +0 -29
data/tests/unittest/html2.md +0 -34
data/tests/unittest/ie.md +0 -61
data/tests/unittest/links2.md +0 -34
data/tests/unittest/lists11.md +0 -28
data/tests/unittest/lists6.md +0 -53
data/tests/unittest/lists9.md +0 -76
data/tests/unittest/lists_ol.md +0 -274
data/tests/unittest/math/equations.md +0 -86
data/tests/unittest/math/inline.md +0 -58
data/tests/unittest/math/math2.md +0 -57
data/tests/unittest/math/table.md +0 -37
data/tests/unittest/notyet/header_after_par.md +0 -70
data/tests/unittest/pending/empty_cells.md +0 -49
data/tests/unittest/red_tests/abbrev.md +0 -1388
data/tests/unittest/red_tests/lists7.md +0 -68
data/tests/unittest/red_tests/lists7b.md +0 -128
data/tests/unittest/red_tests/lists8.md +0 -76
data/tests/unittest/red_tests/xml.md +0 -70
data/tests/unittest/xml2.md +0 -31
data/tests/unittest/xml3.md +0 -38
data/tests/utf8-files/simple.md +0 -1
data/unit_test_block.sh +0 -5
data/unit_test_span.sh +0 -3

data/lib/maruku/input/linesource.rb CHANGED

@@ -1,111 +1,91 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
-module MaRuKu; module In; module Markdown; module BlockLevelParser
-# This represents a source of lines that can be consumed.
-#
-# It is the twin of CharSource.
-#
-class LineSource
-	include MaRuKu::Strings
-	attr_reader :parent
-	def initialize(lines, parent=nil, parent_offset=nil)
-		raise "NIL lines? " if not lines
-		@lines = lines
-		@lines_index = 0
-		@parent = parent
-		@parent_offset = parent_offset
-	end
-	def cur_line()  @lines[@lines_index] end
-	def next_line() @lines[@lines_index+1] end
-	def shift_line()
-		raise "Over the rainbow" if @lines_index >= @lines.size
-		l = @lines[@lines_index]
-		@lines_index += 1
-		return l
-	end
-	def ignore_line
-		raise "Over the rainbow" if @lines_index >= @lines.size
-		@lines_index += 1
-	end
-	def describe
-		s = "At line #{original_line_number(@lines_index)}\n"
-		context = 3 # lines
-		from = [@lines_index-context, 0].max
-		to   = [@lines_index+context, @lines.size-1].min
-		for i in from..to
-			prefix = (i == @lines_index) ? '--> ' : '    ';
-			l = @lines[i]
-			s += "%10s %4s|%s" %
-				[@lines[i].md_type.to_s, prefix, l]
-			s += "|\n"
-		end
-#		if @parent
-#			s << "Parent context is: \n"
-#			s << add_tabs(@parent.describe,1,'|')
-#		end
-		s
-	end
-	def original_line_number(index)
-		if @parent
-			return index + @parent.original_line_number(@parent_offset)
-		else
-			1 + index
-		end
-	end
-	def cur_index
-		@lines_index
-	end
-	# Returns the type of next line as a string
-	# breaks at first :definition
-	def tell_me_the_future
-		s = ""; num_e = 0;
-		for i in @lines_index..@lines.size-1
-			c = case @lines[i].md_type
-				when :text; "t"
-				when :empty; num_e+=1; "e"
-				when :definition; "d"
-				else "o"
-			end
-			s += c
-			break if c == "d" or num_e>1
-		end
-		s
-	end
-end # linesource
-end end end end # block
+module MaRuKu::In::Markdown::BlockLevelParser
+  # This represents a source of lines that can be consumed.
+  #
+  # It is the twin of CharSource.
+  #
+  class LineSource
+    attr_reader :parent
+    def initialize(lines, parent=nil, parent_offset=nil)
+      raise "NIL lines? " unless lines
+      @lines = lines.map {|l| l.kind_of?(MaRuKu::MDLine) ? l : MaRuKu::MDLine.new(l) }
+      @lines_index = 0
+      @parent = parent
+      @parent_offset = parent_offset
+    end
+    def cur_line
+      @lines[@lines_index]
+    end
+    def next_line
+      @lines[@lines_index + 1]
+    end
+    def shift_line
+      raise "Over the rainbow" if @lines_index >= @lines.size
+      l = @lines[@lines_index]
+      @lines_index += 1
+      l
+    end
+    def ignore_line
+      raise "Over the rainbow" if @lines_index >= @lines.size
+      @lines_index += 1
+    end
+    def describe
+      s = "At line #{original_line_number(@lines_index)}\n"
+      context = 3 # lines
+      from = [@lines_index - context, 0].max
+      to   = [@lines_index + context, @lines.size - 1].min
+      from.upto(to) do |i|
+        prefix = (i == @lines_index) ? '--> ' : '    ';
+        l = @lines[i]
+        s += "%10s %4s|%s" %
+          [@lines[i].md_type.to_s, prefix, l]
+        s += "|\n"
+      end
+      s
+    end
+    def original_line_number(index)
+      if @parent
+        index + @parent.original_line_number(@parent_offset)
+      else
+        1 + index
+      end
+    end
+    def cur_index
+      @lines_index
+    end
+    # Returns the type of next line as a string
+    # breaks at first :definition
+    def tell_me_the_future
+      s = ""
+      num_e = 0
+      @lines_index.upto(@lines.size - 1) do |i|
+        c = case @lines[i].md_type
+            when :text; "t"
+            when :empty; num_e += 1; "e"
+            when :definition; "d"
+            else "o"
+            end
+        s << c
+        break if c == "d" or num_e > 1
+      end
+      s
+    end
+  end # linesource
+end

data/lib/maruku/input/mdline.rb ADDED

@@ -0,0 +1,131 @@
+# This code does the classification of lines for block-level parsing.
+module MaRuKu
+  # Represents a single line in a Markdown source file, as produced by
+  # LineSource.
+  class MDLine < String
+    def md_type
+      @md_type ||= line_md_type
+    end
+    # Returns the number of leading spaces on this string,
+    # considering that a tab counts as {MaRuKu::Strings::TAB_SIZE} spaces.
+    #
+    # @param s [String]
+    # @return [Fixnum]
+    def number_of_leading_spaces
+      if self =~ /\A\s+/
+        spaces = $&
+        spaces.count(" ") + spaces.count("\t") * MaRuKu::Strings::TAB_SIZE
+      else
+        0
+      end
+    end
+    def gsub!(*args)
+      # Any in-place-modification method should reset the md_type
+      @md_type = nil
+      super
+    end
+    private
+    def line_md_type
+      # The order of evaluation is important (:text is a catch-all)
+      return :text           if self =~ /\A[a-zA-Z]/
+      return :empty          if self =~ /\A\s*\z/
+      return :footnote_text  if self =~ FootnoteText
+      return :ref_definition if self =~ LinkRegex || self =~ IncompleteLink
+      return :abbreviation   if self =~ Abbreviation
+      return :definition     if self =~ Definition
+      # I had a bug with emails and urls at the beginning of the
+      # line that were mistaken for raw_html
+      return :text           if self =~ /\A[ ]{0,3}#{EMailAddress}/
+      return :text           if self =~ /\A[ ]{0,3}<\w+:\/\//
+      # raw html is like PHP Markdown Extra: at most three spaces before
+      return :xml_instr      if self =~ /\A\s*<\?/
+      return :raw_html       if self =~ %r{\A[ ]{0,3}</?\s*\w+}
+      return :raw_html       if self =~ /\A[ ]{0,3}<\!\-\-/
+      return :header1        if self =~ /\A(=)+/
+      return :header2        if self =~ /\A([-\s])+\z/
+      return :header3        if self =~ /\A(#)+\s*\S+/
+      # at least three asterisks/hyphens/underscores on a line, and only whitespace
+      return :hrule          if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
+      return :ulist          if self =~ /\A[ ]{0,3}([\*\-\+])\s+.*/
+      return :olist          if self =~ /\A[ ]{0,3}\d+\.\s+.*/
+      return :code           if number_of_leading_spaces >= 4
+      return :quote          if self =~ /\A>/
+      return :ald            if self =~ AttributeDefinitionList
+      return :ial            if self =~ InlineAttributeList
+      return :text # else, it's just text
+    end
+  end
+  # MacRuby has trouble with commented regexes, so just put the expanded form
+  # in a comment.
+  # $1 = id   $2 = attribute list
+  AttributeDefinitionList = /\A\s{0,3}\{([\w\s]+)\}:\s*(.*?)\s*\z/
+  #
+  InlineAttributeList = /\A\s{0,3}\{([:#\.].*?)\}\s*\z/
+  # Example:
+  #     ^:blah blah
+  #     ^: blah blah
+  #     ^   : blah blah
+  Definition = /\A[ ]{0,3}:\s*(\S.*)\z/
+  # %r{
+  #   ^ # begin of line
+  #   [ ]{0,3} # up to 3 spaces
+  #   : # colon
+  #   \s* # whitespace
+  #   (\S.*) # the text    = $1
+  #   $ # end of line
+  # }x
+  # Example:
+  #     *[HTML]: Hyper Text Markup Language
+  Abbreviation = /\A[ ]{0,3}\*\[([^\]]+)\]:\s*(\S.*\S)*\s*\z/
+  # %r{
+  #   ^  # begin of line
+  #   [ ]{0,3} # up to 3 spaces
+  #   \* # one asterisk
+  #   \[ # opening bracket
+  #   ([^\]]+) # any non-closing bracket:  id = $1
+  #   \] # closing bracket
+  #   :  # colon
+  #   \s* # whitespace
+  #   (\S.*\S)* #           definition=$2
+  #   \s* # strip this whitespace
+  #   $   # end of line
+  # }x
+  FootnoteText = /\A[ ]{0,3}\[(\^.+)\]:\s*(\S.*)?\z/
+  # %r{
+  #   ^  # begin of line
+  #   [ ]{0,3} # up to 3 spaces
+  #   \[(\^.+)\]: # id = $1 (including '^')
+  #   \s*(\S.*)?$    # text = $2 (not obb.)
+  # }x
+  # This regex is taken from BlueCloth sources
+  # Link defs are in the form: ^[id]: \n? url "optional title"
+  LinkRegex = /\A[ ]{0,3}\[([^\[\]]+)\]:[ ]*<?([^>\s]+)>?[ ]*(?:(?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\)))\s*(.+)?)?/
+  #%r{
+  # ^[ ]{0,3}\[([^\[\]]+)\]:    # id = $1
+  #   [ ]*
+  # <?([^>\s]+)>?       # url = $2
+  #   [ ]*
+  # (?: # Titles are delimited by "quotes" or (parens).
+  #   (?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\))) # title = $3, $4, or $5
+  #   \s*(.+)? # stuff = $6
+  # )?  # title is optional
+  #}x
+  IncompleteLink = /\A[ ]{0,3}\[([^\[\]]+?)\]:\s*\z/
+  # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
+  # | -------------:| ------------------------------ |
+  TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
+  EMailAddress = /<([^:@>]+?@[^:@>]+?)>/
+end

data/lib/maruku/input/parse_block.rb CHANGED

@@ -1,615 +1,738 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
 module MaRuKu; module In; module Markdown; module BlockLevelParser
-	include Helpers
-	include MaRuKu::Strings
-	include MaRuKu::In::Markdown::SpanLevelParser
-	class BlockContext < Array
-		def describe
-			n = 5
-			desc = size > n ? self[-n,n] : self
-			"Last #{n} elements: "+
-			desc.map{|x| "\n -" + x.inspect}.join
-		end
-	end
-	# Splits the string and calls parse_lines_as_markdown
-	def parse_text_as_markdown(text)
-		lines =  split_lines(text)
-		src = LineSource.new(lines)
-		return parse_blocks(src)
-	end
-	# Input is a LineSource
-	def parse_blocks(src)
-		output = BlockContext.new
-		# run state machine
-		while src.cur_line
-			next if check_block_extensions(src, output, src.cur_line)
-#  Prints detected type (useful for debugging)
-#			puts "#{src.cur_line.md_type}|#{src.cur_line}"
-			case src.cur_line.md_type
-				when :empty;
-					output.push :empty
-					src.ignore_line
-				when :ial
-					m =  InlineAttributeList.match src.shift_line
-					content = m[1] ||  ""
-#					puts "Content: #{content.inspect}"
-					src2 = CharSource.new(content, src)
-					interpret_extension(src2, output, [nil])
-				when :ald
-					output.push read_ald(src)
-				when :text
-					# paragraph, or table, or definition list
-					read_text_material(src, output)
-				when :header2, :hrule
-					# hrule
-					src.shift_line
-					output.push md_hrule()
-				when :header3
-					output.push read_header3(src)
-				when :ulist, :olist
-					list_type = src.cur_line.md_type == :ulist ? :ul : :ol
-					li = read_list_item(src)
-					# append to current list if we have one
-					if output.last.kind_of?(MDElement) &&
-						output.last.node_type == list_type then
-						output.last.children << li
-					else
-						output.push md_el(list_type, [li])
-					end
-				when :quote;    output.push read_quote(src)
-				when :code;     e = read_code(src); output << e if e
-				when :raw_html; e = read_raw_html(src); output << e if e
-				when :footnote_text;   output.push read_footnote_text(src)
-				when :ref_definition;
-					if src.parent && (src.cur_index == 0)
-						read_text_material(src, output)
-					else
-						read_ref_definition(src, output)
-					end
-				when :abbreviation;    output.push read_abbreviation(src)
-				when :xml_instr;       read_xml_instruction(src, output)
-				when :metadata;
-					maruku_error "Please use the new meta-data syntax: \n"+
-					"  http://maruku.rubyforge.org/proposal.html\n", src
-					src.ignore_line
-				else # warn if we forgot something
-					md_type = src.cur_line.md_type
-					line = src.cur_line
-					maruku_error "Ignoring line '#{line}' type = #{md_type}", src
-					src.shift_line
-			end
-		end
-		merge_ial(output, src, output)
-		output.delete_if {|x| x.kind_of?(MDElement) &&
-			x.node_type == :ial}
-		# get rid of empty line markers
-		output.delete_if {|x| x == :empty}
-		# See for each list if we can omit the paragraphs and use li_span
-		# TODO: do this after
-		output.each do |c|
-			# Remove paragraphs that we can get rid of
-			if [:ul,:ol].include? c.node_type
-				if c.children.all? {|li| !li.want_my_paragraph} then
-					c.children.each do |d|
-						d.node_type = :li_span
-						d.children = d.children[0].children
-					end
-				end
-			end
-			if c.node_type == :definition_list
-				if c.children.all?{|defi| !defi.want_my_paragraph} then
-					c.children.each do |definition|
-						definition.definitions.each do |dd|
-							dd.children = dd.children[0].children
-						end
-					end
-				end
-			end
-		end
-		output
-	end
-	def read_text_material(src, output)
-		if src.cur_line =~ MightBeTableHeader and
-			(src.next_line && src.next_line =~ TableSeparator)
-			output.push read_table(src)
-		elsif [:header1,:header2].include? src.next_line.md_type
-			output.push read_header12(src)
-		elsif eventually_comes_a_def_list(src)
-		 	definition = read_definition(src)
-			if output.last.kind_of?(MDElement) &&
-				output.last.node_type == :definition_list then
-				output.last.children << definition
-			else
-				output.push md_el(:definition_list, [definition])
-			end
-		else # Start of a paragraph
-			output.push read_paragraph(src)
-		end
-	end
-	def read_ald(src)
-		if (l=src.shift_line) =~ AttributeDefinitionList
-			id = $1;   al=$2;
-			al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
-			self.ald[id] = al;
-			return md_ald(id, al)
-		else
-			maruku_error "Bug Bug:\n#{l.inspect}"
-			return nil
-		end
-	end
-	# reads a header (with ----- or ========)
-	def read_header12(src)
-		line = src.shift_line.strip
-		al = nil
-		# Check if there is an IAL
-		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
-			line = $1.strip
-			ial = $2
-			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
-		end
-		text = parse_lines_as_span [ line ]
-		level = src.cur_line.md_type == :header2 ? 2 : 1;
-		src.shift_line
-		return md_header(level, text, al)
-	end
-	# reads a header like '#### header ####'
-	def read_header3(src)
-		line = src.shift_line.strip
-		al = nil
-		# Check if there is an IAL
-		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
-			line = $1.strip
-			ial = $2
-			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
-		end
-		level = num_leading_hashes(line)
-		text = parse_lines_as_span [strip_hashes(line)]
-		return md_header(level, text, al)
-	end
-	def read_xml_instruction(src, output)
-		m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
-		raise "BugBug" if not m
-		target = m[2] || ''
-		code = m[3]
-		until code =~ /\?>/
-			code += "\n"+src.shift_line
-		end
-		if not code =~ (/\?>\s*$/)
-			garbage = (/\?>(.*)$/.match(code))[1]
-			maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
-				add_tabs(code, 1, '|'), src
-		end
-		code.gsub!(/\?>\s*$/, '')
-		if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
-			result = safe_execute_code(self, code)
-			if result
-				if result.kind_of? String
-					raise "Not expected"
-				else
-					output.push(*result)
-				end
-			end
-		else
-			output.push md_xml_instr(target, code)
-		end
-	end
-	def read_raw_html(src)
-		h = HTMLHelper.new
-		begin
-			h.eat_this(l=src.shift_line)
-#			puts "\nBLOCK:\nhtml -> #{l.inspect}"
-			while src.cur_line and not h.is_finished?
-				l=src.shift_line
-#				puts "html -> #{l.inspect}"
-				h.eat_this "\n"+l
-			end
-		rescue Exception => e
-			ex = e.inspect + e.backtrace.join("\n")
-			maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
-		end
-		if not (h.rest =~ /^\s*$/)
-			maruku_error "Could you please format this better?\n"+
-				"I see that #{h.rest.inspect} is left after the raw HTML.", src
-		end
-		raw_html = h.stuff_you_read
-		return md_html(raw_html)
-	end
-	def read_paragraph(src)
-		lines = [src.shift_line]
-		while src.cur_line
-			# :olist does not break
-			case t = src.cur_line.md_type
-				when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
-					break
-				when :olist,:ulist
-					break if src.next_line.md_type == t
-			end
-			break if src.cur_line.strip.size == 0
-			break if [:header1,:header2].include? src.next_line.md_type
-			break if any_matching_block_extension?(src.cur_line)
-			lines << src.shift_line
-		end
-#		dbg_describe_ary(lines, 'PAR')
-		children = parse_lines_as_span(lines, src)
-		return md_par(children)
-	end
-	# Reads one list item, either ordered or unordered.
-	def read_list_item(src)
-		parent_offset = src.cur_index
-		item_type = src.cur_line.md_type
-		first = src.shift_line
-		indentation = spaces_before_first_char(first)
-		break_list = [:ulist, :olist, :ial]
-		# Ugly things going on inside `read_indented_content`
-		lines, want_my_paragraph =
-			read_indented_content(src,indentation, break_list, item_type)
-		# add first line
-			# Strip first '*', '-', '+' from first line
-			stripped = first[indentation, first.size-1]
-		lines.unshift stripped
-		# dbg_describe_ary(lines, 'LIST ITEM ')
-		src2 = LineSource.new(lines, src, parent_offset)
-		children = parse_blocks(src2)
-		with_par = want_my_paragraph || (children.size>1)
-		return md_li(children, with_par)
-	end
-	def read_abbreviation(src)
-		if not (l=src.shift_line) =~ Abbreviation
-			maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
-		end
-		abbr = $1
-		desc = $2
-		if (not abbr) or (abbr.size==0)
-			maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
-		end
-		self.abbreviations[abbr] = desc
-		return md_abbr_def(abbr, desc)
-	end
-	def read_footnote_text(src)
-		parent_offset = src.cur_index
-		first = src.shift_line
-		if not first =~ FootnoteText
-			maruku_error "Bug (it's Andrea's fault)"
-		end
-		id = $1
-		text = $2
-		# Ugly things going on inside `read_indented_content`
-		indentation = 4 #first.size-text.size
-#		puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
-		break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
-		item_type = :footnote_text
-		lines, want_my_paragraph =
-			read_indented_content(src,indentation, break_list, item_type)
-		# add first line
-		if text && text.strip != "" then lines.unshift text end
-#		dbg_describe_ary(lines, 'FOOTNOTE')
-		src2 = LineSource.new(lines, src, parent_offset)
-		children = parse_blocks(src2)
-		e = md_footnote(id, children)
-		self.footnotes[id] = e
-		return e
-	end
-	# This is the only ugly function in the code base.
-	# It is used to read list items, descriptions, footnote text
-	def read_indented_content(src, indentation, break_list, item_type)
-		lines =[]
-		# collect all indented lines
-		saw_empty = false; saw_anything_after = false
-		while src.cur_line
-#			puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
-			#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
-			if src.cur_line.md_type == :empty
-				saw_empty = true
-				lines << src.shift_line
-				next
-			end
-			# after a white line
-			if saw_empty
-				# we expect things to be properly aligned
-				if (ns=number_of_leading_spaces(src.cur_line)) < indentation
-					#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
-					break
-				end
-				saw_anything_after = true
-			else
-#				if src.cur_line[0] != ?\
-					break if break_list.include? src.cur_line.md_type
-#				end
-#				break if src.cur_line.md_type != :text
-			end
-			stripped = strip_indent(src.shift_line, indentation)
-			lines << stripped
-			#puts "Accepted as #{stripped.inspect}"
-			# You are only required to indent the first line of
-			# a child paragraph.
-			if stripped.md_type == :text
-				while src.cur_line && (src.cur_line.md_type == :text)
-					lines << strip_indent(src.shift_line, indentation)
-				end
-			end
-		end
-		want_my_paragraph = saw_anything_after ||
-			(saw_empty && (src.cur_line  && (src.cur_line.md_type == item_type)))
-#		dbg_describe_ary(lines, 'LI')
-		# create a new context
-		while lines.last && (lines.last.md_type == :empty)
-			lines.pop
-		end
-		return lines, want_my_paragraph
-	end
-	def read_quote(src)
-		parent_offset = src.cur_index
-		lines = []
-		# collect all indented lines
-		while src.cur_line && src.cur_line.md_type == :quote
-			lines << unquote(src.shift_line)
-		end
-#		dbg_describe_ary(lines, 'QUOTE')
-		src2 = LineSource.new(lines, src, parent_offset)
-		children = parse_blocks(src2)
-		return md_quote(children)
-	end
-	def read_code(src)
-		# collect all indented lines
-		lines = []
-		while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
-			lines << strip_indent(src.shift_line, 4)
-		end
-		#while lines.last && (lines.last.md_type == :empty )
-		while lines.last && lines.last.strip.size == 0
-			lines.pop
-		end
-		while lines.first && lines.first.strip.size == 0
-			lines.shift
-		end
-		return nil if lines.empty?
-		source = lines.join("\n")
-#		dbg_describe_ary(lines, 'CODE')
-		return md_codeblock(source)
-	end
-	# Reads a series of metadata lines with empty lines in between
-	def read_metadata(src)
-		hash = {}
-		while src.cur_line
-			case src.cur_line.md_type
-				when :empty;  src.shift_line
-				when :metadata; hash.merge! parse_metadata(src.shift_line)
-				else break
-			end
-		end
-		hash
-	end
-	def read_ref_definition(src, out)
-		line = src.shift_line
-		# if link is incomplete, shift next line
-		if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
-			([1,2,3].include? number_of_leading_spaces(src.cur_line) )
-			line += " "+ src.shift_line
-		end
-#		puts "total= #{line}"
-		match = LinkRegex.match(line)
-		if not match
-			maruku_error "Link does not respect format: '#{line}'"
-			return
-		end
-		id = match[1]; url = match[2]; title = match[3];
-		id = sanitize_ref_id(id)
-		hash = self.refs[id] = {:url=>url,:title=>title}
-		stuff=match[4]
-		if stuff
-			stuff.split.each do |couple|
-#					puts "found #{couple}"
-				k, v = couple.split('=')
-				v ||= ""
-				if v[0,1]=='"' then v = v[1, v.size-2] end
-#					puts "key:_#{k}_ value=_#{v}_"
-				hash[k.to_sym] = v
-			end
-		end
-#			puts hash.inspect
-		out.push md_ref_def(id, url, meta={:title=>title})
-	end
-	def split_cells(s)
-#		s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
-# changed to allow empty cells
-		s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
-	end
-	def read_table(src)
-		head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
-		separator=split_cells(src.shift_line)
-		align = separator.map { |s|  s =~ Sep
-			if $1 and $2 then :center elsif $2 then :right else :left end }
-		num_columns = align.size
-		if head.size != num_columns
-			maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
-			tell_user "I will ignore this table."
-			# XXX try to recover
-			return md_br()
-		end
-		rows = []
-		while src.cur_line && src.cur_line =~ /\|/
-			row = split_cells(src.shift_line).map{|s|
-				md_el(:cell, parse_lines_as_span([s]))}
-			if head.size != num_columns
-				maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
-				tell_user "I will ignore this table."
-				# XXX try to recover
-				return md_br()
-			end
-			rows << row
-		end
-		children = (head+rows).flatten
-		return md_el(:table, children, {:align => align})
-	end
-	# If current line is text, a definition list is coming
-	# if 1) text,empty,[text,empty]*,definition
-	def eventually_comes_a_def_list(src)
-		future = src.tell_me_the_future
-		ok = future =~ %r{^t+e?d}x
-#		puts "future: #{future} - #{ok}"
-		ok
-	end
-	def read_definition(src)
-		# Read one or more terms
-		terms = []
-		while  src.cur_line &&  src.cur_line.md_type == :text
-			terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
-		end
-#		dbg_describe_ary(terms, 'DT')
-		want_my_paragraph = false
-		raise "Chunky Bacon!" if not src.cur_line
-		# one optional empty
-		if src.cur_line.md_type == :empty
-			want_my_paragraph = true
-			src.shift_line
-		end
-		raise "Chunky Bacon!" if src.cur_line.md_type != :definition
-		# Read one or more definitions
-		definitions = []
-		while src.cur_line && src.cur_line.md_type == :definition
-			parent_offset = src.cur_index
-			first = src.shift_line
-			first =~ Definition
-			first = $1
-			# I know, it's ugly!!!
-			lines, w_m_p =
-				read_indented_content(src,4, [:definition], :definition)
-			want_my_paragraph ||= w_m_p
-			lines.unshift first
-#			dbg_describe_ary(lines, 'DD')
-			src2 = LineSource.new(lines, src, parent_offset)
-			children = parse_blocks(src2)
-			definitions << md_el(:definition_data, children)
-		end
-		return md_el(:definition, terms+definitions, {
-			:terms => terms,
-			:definitions => definitions,
-			:want_my_paragraph => want_my_paragraph})
-	end
-end # BlockLevelParser
-end # MaRuKu
-end
-end
+  include Helpers
+  include MaRuKu::Strings
+  include MaRuKu::In::Markdown::SpanLevelParser
+  class BlockContext < Array
+    def describe
+      n = 5
+      desc = size > n ? self[-n, n] : self
+      "Last #{n} elements: " +
+        desc.map {|x| "\n -" + x.inspect }.join
+    end
+  end
+  # Splits the string and calls parse_lines_as_markdown
+  def parse_text_as_markdown(text)
+    lines =  split_lines(text)
+    src = LineSource.new(lines)
+    parse_blocks(src)
+  end
+  # Input is a LineSource
+  def parse_blocks(src)
+    output = BlockContext.new
+    # run state machine
+    while src.cur_line
+      next if check_block_extensions(src, output, src.cur_line)
+      md_type = src.cur_line.md_type
+      # Prints detected type (useful for debugging)
+      #puts "parse_blocks #{md_type}|#{src.cur_line}"
+      case md_type
+      when :empty
+        output << :empty
+        src.ignore_line
+      when :ial
+        m = InlineAttributeList.match src.shift_line
+        content = m[1] || ""
+        src2 = CharSource.new(content, src)
+        interpret_extension(src2, output)
+      when :ald
+        output << read_ald(src)
+      when :text
+        # paragraph, or table, or definition list
+        read_text_material(src, output)
+      when :header2, :hrule
+        # hrule
+        src.shift_line
+        output << md_hrule
+      when :header3
+        output << read_header3(src)
+      when :ulist, :olist
+        list_type = (md_type == :ulist) ? :ul : :ol
+        li = read_list_item(src)
+        # append to current list if we have one
+        if output.last.kind_of?(MDElement) &&
+            output.last.node_type == list_type then
+          output.last.children << li
+        else
+          output << md_el(list_type, li)
+        end
+      when :quote
+        output << read_quote(src)
+      when :code
+        e = read_code(src)
+        output << e if e
+      when :raw_html
+        # More extra hacky stuff - if there's more than just HTML, we either wrap it
+        # in a paragraph or break it up depending on whether it's an inline element or not
+        e = read_raw_html(src)
+        unless e.empty?
+          if e.first.parsed_html &&
+              (first_node_name = e.first.parsed_html.first_node_name) &&
+              HTML_INLINE_ELEMS.include?(first_node_name) &&
+              !%w(svg math).include?(first_node_name)
+            content = [e.first]
+            if e.size > 1
+              content.concat(e[1].children)
+            end
+            output << md_par(content)
+          else
+            output.concat(e)
+          end
+        end
+      when :footnote_text
+        output << read_footnote_text(src)
+      when :ref_definition
+        if src.parent && src.cur_index == 0
+          read_text_material(src, output)
+        else
+          read_ref_definition(src, output)
+        end
+      when :abbreviation
+        output << read_abbreviation(src)
+      when :xml_instr
+        read_xml_instruction(src, output)
+      else # unhandled line type at this level
+        # Just treat it as raw text
+        read_text_material(src, output)
+      end
+    end
+    merge_ial(output, src, output)
+    output.delete_if do |x|
+      # Strip out IAL
+      (x.kind_of?(MDElement) && x.node_type == :ial) ||
+      # get rid of empty line markers
+      x == :empty
+    end
+    # See for each list if we can omit the paragraphs
+    # TODO: do this after
+    output.each do |c|
+      # Remove paragraphs that we can get rid of
+      if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
+        c.children.each do |d|
+          if d.children.first && d.children.first.node_type == :paragraph
+            d.children = d.children.first.children + d.children[1..-1]
+          end
+        end
+      elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
+        c.children.each do |definition|
+          definition.definitions.each do |dd|
+            if dd.children.first.node_type == :paragraph
+              dd.children = dd.children.first.children + dd.children[1..-1]
+            end
+          end
+        end
+      end
+    end
+    output
+  end
+  def read_text_material(src, output)
+    if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
+        src.next_line &&
+        src.next_line.rstrip =~ TableSeparator
+      output << read_table(src)
+    elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
+      output << read_header12(src)
+    elsif eventually_comes_a_def_list(src)
+      definition = read_definition(src)
+      if output.last.kind_of?(MDElement) &&
+          output.last.node_type == :definition_list then
+        output.last.children << definition
+      else
+        output << md_el(:definition_list, definition)
+      end
+    else # Start of a paragraph
+      output.concat read_paragraph(src)
+    end
+  end
+  def read_ald(src)
+    if (l = src.shift_line) =~ AttributeDefinitionList
+      id = $1
+      al = read_attribute_list(CharSource.new($2, src))
+      self.ald[id] = al;
+      md_ald(id, al)
+    else
+      maruku_error "Bug Bug:\n#{l.inspect}"
+      nil
+    end
+  end
+  # reads a header (with ----- or ========)
+  def read_header12(src)
+    line = src.shift_line.strip
+    al = nil
+    # Check if there is an IAL
+    if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
+      line = $1.strip
+      ial = $2
+      al = read_attribute_list(CharSource.new(ial, src))
+    end
+    text = parse_span line
+    if text.empty?
+      text = "{#{ial}}"
+      al = nil
+    end
+    level = src.cur_line.md_type == :header2 ? 2 : 1;
+    src.shift_line
+    md_header(level, text, al)
+  end
+  # reads a header like '#### header ####'
+  def read_header3(src)
+    line = src.shift_line.strip
+    al = nil
+    # Check if there is an IAL
+    if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
+      line = $1.strip
+      ial = $2
+      al = read_attribute_list(CharSource.new(ial, src))
+    end
+    level = line[/^#+/].size
+    if level > 6
+      text = parse_span line
+      return md_par(text, al)
+    end
+    text = parse_span line.gsub(/\A#+|#+\z/, '')
+    if text.empty?
+      text = "{#{ial}}"
+      al = nil
+    end
+    md_header(level, text, al)
+  end
+  def read_xml_instruction(src, output)
+    m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
+    raise "BugBug" unless m
+    target = m[2] || ''
+    code = m[3]
+    until code.include?('?>')
+      code << "\n" << src.shift_line
+    end
+    unless code =~ /\?>\s*$/
+      garbage = (/\?>(.*)$/.match(code))[1]
+      maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
+        code.gsub(/^/, '|'), src
+    end
+    code.gsub!(/\?>\s*$/, '')
+    if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
+      result = safe_execute_code(self, code)
+      if result
+        if result.kind_of? String
+          raise "Not expected"
+        else
+          output.push(*result)
+        end
+      end
+    else
+      output << md_xml_instr(target, code)
+    end
+  end
+  def read_raw_html(src)
+    extra_line = nil
+    h = HTMLHelper.new
+    begin
+      l = src.shift_line
+      h.eat_this(l)
+      #     puts "\nBLOCK:\nhtml -> #{l.inspect}"
+      while src.cur_line && !h.is_finished?
+        l = src.shift_line
+        #       puts "html -> #{l.inspect}"
+        h.eat_this "\n" + l
+      end
+    rescue => e
+      maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
+    end
+    unless h.rest =~ /^\s*$/
+      extra_line = h.rest
+    end
+    raw_html = h.stuff_you_read
+    is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)
+    if extra_line
+      remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
+      if extra_line.start_with?(' ')
+        remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
+      end
+      is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
+    else
+      [md_html(raw_html)]
+    end
+  end
+  def read_paragraph(src)
+    lines = [src.shift_line]
+    while src.cur_line
+      # :olist does not break
+      case t = src.cur_line.md_type
+      when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
+        break
+      end
+      break if src.cur_line.strip.empty?
+      break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
+      break if any_matching_block_extension?(src.cur_line)
+      lines << src.shift_line
+    end
+    children = parse_span(lines, src)
+    pick_apart_non_inline_html(children)
+  end
+  # If there are non-inline HTML tags in the paragraph, break them out into
+  # their own elements and make paragraphs out of everything else.
+  def pick_apart_non_inline_html(children)
+    output = []
+    para_children = []
+    children.each do |child|
+      if element_is_non_inline_html?(child)
+        unless para_children.empty?
+          # Fix up paragraphs before non-inline elements having an extra space
+          last_child = para_children.last
+          if last_child.is_a?(String) && !last_child.empty?
+            last_child.replace last_child[0..-2]
+          end
+          output << md_par(para_children)
+          para_children = []
+        end
+        output << child
+      else
+        para_children << child
+      end
+    end
+    unless para_children.empty?
+      output << md_par(para_children)
+    end
+    output
+  end
+  # Is the given element an HTML element whose root is not an inline element?
+  def element_is_non_inline_html?(elem)
+    if elem.is_a?(MDElement) && elem.node_type == :raw_html && elem.parsed_html
+      first_node_name = elem.parsed_html.first_node_name
+      first_node_name && !HTML_INLINE_ELEMS.include?(elem.parsed_html.first_node_name)
+    else
+      false
+    end
+  end
+  # Reads one list item, either ordered or unordered.
+  def read_list_item(src)
+    parent_offset = src.cur_index
+    item_type = src.cur_line.md_type
+    first = src.shift_line
+    indentation, ial = spaces_before_first_char(first)
+    al = read_attribute_list(CharSource.new(ial, src)) if ial
+    ial_offset = ial ? ial.length + 3 : 0
+    lines, want_my_paragraph = read_indented_content(src, indentation, [], item_type, ial_offset)
+    # in case there is a second line and this line starts a new list, format it.
+    if !lines.empty? && [:ulist, :olist].include?(MaRuKu::MDLine.new(lines.first).md_type)
+      lines.unshift ""
+    end
+    # add first line
+    # Strip first '*', '-', '+' from first line
+    first_changed = first.gsub(/([^\t]*)(\t)/) { $1 + " " * (TAB_SIZE - $1.length % TAB_SIZE) }
+    stripped = first_changed[indentation, first_changed.size - 1]
+    lines.unshift stripped
+    src2 = LineSource.new(lines, src, parent_offset)
+    children = parse_blocks(src2)
+    md_li(children, want_my_paragraph, al)
+  end
+  def read_abbreviation(src)
+    unless (l = src.shift_line) =~ Abbreviation
+      maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
+    end
+    abbr = $1
+    desc = $2
+    if !abbr || abbr.empty?
+      maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
+    end
+    self.abbreviations[abbr] = desc
+    md_abbr_def(abbr, desc)
+  end
+  def read_footnote_text(src)
+    parent_offset = src.cur_index
+    first = src.shift_line
+    unless first =~ FootnoteText
+      maruku_error "Bug (it's Andrea's fault)"
+    end
+    id = $1
+    text = $2 || ''
+    indentation = 4 #first.size-text.size
+    #   puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
+    break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
+    item_type = :footnote_text
+    lines, _ = read_indented_content(src, indentation, break_list, item_type)
+    # add first line
+    lines.unshift text unless text.strip.empty?
+    src2 = LineSource.new(lines, src, parent_offset)
+    children = parse_blocks(src2)
+    e = md_footnote(id, children)
+    self.footnotes[id] = e
+    e
+  end
+  # This is the only ugly function in the code base.
+  # It is used to read list items, descriptions, footnote text
+  def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
+    lines = []
+    # collect all indented lines
+    saw_empty = false
+    saw_anything_after = false
+    break_list = Array(break_list)
+    len = indentation - ial_offset
+    while src.cur_line
+      num_leading_spaces = src.cur_line.number_of_leading_spaces
+      break if num_leading_spaces < len && ![:text, :empty, :code].include?(src.cur_line.md_type)
+      line = strip_indent(src.cur_line, indentation)
+      md_type = line.md_type
+      if md_type == :empty
+        saw_empty = true
+        lines << line
+        src.shift_line
+        next
+      end
+      # Unquestioningly grab anything that's deeper-indented
+      if md_type != :code && num_leading_spaces > len
+        lines << line
+        src.shift_line
+        next
+      end
+      # after a white line
+      if saw_empty
+        # we expect things to be properly aligned
+        break if num_leading_spaces < len
+        saw_anything_after = true
+      else
+        break if break_list.include?(md_type)
+      end
+      if md_type == :code && num_leading_spaces > len+6
+        lines << strip_indent(src.cur_line, num_leading_spaces-4)
+        src.shift_line
+        next
+      end
+      lines << line
+      src.shift_line
+      # You are only required to indent the first line of
+      # a child paragraph.
+      if md_type == :text
+        while src.cur_line && src.cur_line.md_type == :text
+          lines << strip_indent(src.shift_line, indentation)
+        end
+      end
+    end
+    # TODO fix this
+    want_my_paragraph = saw_anything_after ||
+      (saw_empty && src.cur_line && src.cur_line.md_type == item_type)
+    # create a new context
+    while lines.last && lines.last.md_type == :empty
+      lines.pop
+    end
+    return lines, want_my_paragraph
+  end
+  def read_quote(src)
+    parent_offset = src.cur_index
+    lines = []
+    # collect all indented lines
+    while src.cur_line &&  ( [:text, :quote].include?(src.cur_line.md_type) or
+        src.cur_line.md_type == :empty && ( src.next_line && src.next_line.md_type == :quote ) )
+      lines << unquote(src.shift_line)
+    end
+    src2 = LineSource.new(lines, src, parent_offset)
+    children = parse_blocks(src2)
+    md_quote(children)
+  end
+  def read_code(src)
+    # collect all indented lines
+    lines = []
+    while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
+      lines << strip_indent(src.shift_line, 4)
+    end
+    #while lines.last && (lines.last.md_type == :empty )
+    while lines.last && lines.last.strip.size == 0
+      lines.pop
+    end
+    while lines.first && lines.first.strip.size == 0
+      lines.shift
+    end
+    return nil if lines.empty?
+    source = lines.join("\n")
+    md_codeblock(source)
+  end
+  def read_ref_definition(src, out)
+    line = src.shift_line
+    # if link is incomplete, shift next line
+    if src.cur_line &&
+        ![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
+        (1..3).include?(src.cur_line.number_of_leading_spaces)
+      line << " " << src.shift_line
+    end
+    match = LinkRegex.match(line)
+    unless match
+      maruku_error "Link does not respect format: '#{line}'" and return
+    end
+    id = match[1]
+    url = match[2]
+    title = match[3] || match[4] || match[5]
+    id = sanitize_ref_id(id)
+    hash = self.refs[id] = {
+      :url => url,
+      :title => title
+    }
+    stuff = (match[6] || '')
+    stuff.split.each do |couple|
+      k, v = couple.split('=')
+      v ||= ""
+      v = v[1..-2] if v.start_with?('"') # strip quotes
+      hash[k.to_sym] = v
+    end
+    out << md_ref_def(id, url, :title => title)
+  end
+  def split_cells(s, allowBlank = false)
+    if allowBlank
+      if /^[|].*[|]$/ =~ s # handle the simple and decorated table cases
+        s.split('|', -1)[1..-2]   # allow blank cells, but only keep the inner elements of the cells
+      elsif /^.*[|]$/ =~ s
+        s.split('|', -1)[0..-2]   # allow blank cells, but only keep the inner elements of the cells
+      else
+        s.split('|', -1)
+      end
+    else
+      s.split('|').reject(&:empty?).map(&:strip)
+    end
+  end
+  def read_table(src)
+    head = split_cells(src.shift_line).map do |s|
+      md_el(:head_cell, parse_span(s))
+    end
+    separator = split_cells(src.shift_line)
+    align = separator.map do |s|
+      # ex: :-------------------:
+      # If the separator starts and ends with a colon,
+      # center the cell. If it's on the right, right-align,
+      # otherwise left-align.
+      starts = s.start_with? ':'
+      ends = s.end_with? ':'
+      if s.empty? # blank
+        nil
+      elsif starts && ends
+        :center
+      elsif ends
+        :right
+      else
+        :left
+      end
+    end
+    align.pop if align[-1].nil? # trailing blank
+    num_columns = align.size
+    head.pop if head.size == num_columns + 1 && head[-1].al.size == 0 # trailing blank
+    if head.size != num_columns
+      maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
+      tell_user "I will ignore this table."
+      # XXX try to recover
+      return md_br
+    end
+    rows = []
+    while src.cur_line && src.cur_line.include?('|')
+      row = []
+      colCount = 0
+      colspan = 1
+      currElem = nil
+      currIdx = 0
+      split_cells(src.shift_line, true).map do |s|
+        if s.empty?
+          # empty cells increase the colspan of the previous cell
+          found = false
+          colspan +=  1
+          al = (currElem &&currElem.al) || AttributeList.new
+          if al.size > 0
+            elem = find_colspan(al)
+            if elem != nil
+              elem[1] = colspan.to_s
+              found = true
+            end
+          end
+          al.push(["colspan", colspan.to_s]) unless found # also handles the case of and empty attribute list
+        else
+          colspan = 1
+          row[currIdx] = md_el(:cell, parse_span(s))
+          currElem = row[currIdx]
+          currIdx += 1
+        end
+      end
+      #
+      # sanity check - make sure the current row has the right number of columns (including spans)
+      #                If not, dump the table and return a break
+      #
+      num_columns = count_columns(row)
+      if num_columns == head.size + 1 && row[-1].al.size == 0 #trailing blank cell
+        row.pop
+        num_columns -= 1
+      end
+      if head.size != num_columns
+        maruku_error  "Row does not have #{head.size} columns: \n#{row.inspect} - #{num_columns}"
+        tell_user "I will ignore this table."
+        # XXX need to recover
+        return md_br
+      end
+      rows << row
+    end
+    rows.unshift(head) # put the header row on the processed table
+    md_el(:table, rows, { :align => align })
+  end
+  #
+  # count the actual number of elements in a row taking into account colspans
+  #
+  def count_columns(row)
+    colCount = 0
+    row.each do |cell|
+      if cell.al && cell.al.size > 0
+        al = find_colspan(cell.al)
+        if al != nil
+          colCount += al[1].to_i
+        else
+          colCount += 1
+        end
+      else
+        colCount += 1
+      end
+    end
+    colCount
+  end
+  #
+  # Search an attribute list looking for a colspan
+  #
+  def find_colspan(al)
+    al.find {|alElem| alElem[0] == "colspan" }
+  end
+  # If current line is text, a definition list is coming
+  # if 1) text,empty,[text,empty]*,definition
+  def eventually_comes_a_def_list(src)
+    src.tell_me_the_future =~ %r{^t+e?d}x
+  end
+  def read_definition(src)
+    # Read one or more terms
+    terms = []
+    while src.cur_line && src.cur_line.md_type == :text
+      terms << md_el(:definition_term, parse_span(src.shift_line))
+    end
+    want_my_paragraph = false
+    raise "Chunky Bacon!" unless src.cur_line
+    # one optional empty
+    if src.cur_line.md_type == :empty
+      want_my_paragraph = true
+      src.shift_line
+    end
+    raise "Chunky Bacon!" unless src.cur_line.md_type == :definition
+    # Read one or more definitions
+    definitions = []
+    while src.cur_line && src.cur_line.md_type == :definition
+      parent_offset = src.cur_index
+      first = src.shift_line
+      first =~ Definition
+      first = $1
+      lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
+      want_my_paragraph ||= w_m_p
+      lines.unshift first
+      src2 = LineSource.new(lines, src, parent_offset)
+      children = parse_blocks(src2)
+      definitions << md_el(:definition_data, children)
+    end
+    md_el(:definition, terms + definitions, {
+            :terms => terms,
+            :definitions => definitions,
+            :want_my_paragraph => want_my_paragraph
+          })
+  end
+end end end end