RubyGems - maruku - Versions diffs - 0.6.1 → 0.7.0.beta1 - Mend

maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (263) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/MIT-LICENSE.txt +20 -0
data/bin/maruku +153 -152
data/bin/marutex +2 -29
data/data/entities.xml +261 -0
data/docs/math.md +14 -18
data/lib/maruku.rb +65 -77
data/lib/maruku/attributes.rb +109 -214
data/lib/maruku/defaults.rb +45 -67
data/lib/maruku/document.rb +43 -0
data/lib/maruku/element.rb +112 -0
data/lib/maruku/errors.rb +71 -0
data/lib/maruku/ext/div.rb +105 -113
data/lib/maruku/ext/fenced_code.rb +97 -0
data/lib/maruku/ext/math.rb +22 -26
data/lib/maruku/ext/math/elements.rb +20 -26
data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
data/lib/maruku/ext/math/parsing.rb +107 -113
data/lib/maruku/ext/math/to_html.rb +184 -187
data/lib/maruku/ext/math/to_latex.rb +30 -21
data/lib/maruku/helpers.rb +158 -257
data/lib/maruku/html.rb +254 -0
data/lib/maruku/input/charsource.rb +272 -319
data/lib/maruku/input/extensions.rb +62 -63
data/lib/maruku/input/html_helper.rb +220 -189
data/lib/maruku/input/linesource.rb +90 -110
data/lib/maruku/input/mdline.rb +129 -0
data/lib/maruku/input/parse_block.rb +618 -612
data/lib/maruku/input/parse_doc.rb +145 -215
data/lib/maruku/input/parse_span.rb +658 -0
data/lib/maruku/input/rubypants.rb +200 -128
data/lib/maruku/inspect_element.rb +60 -0
data/lib/maruku/maruku.rb +10 -31
data/lib/maruku/output/entity_table.rb +33 -0
data/lib/maruku/output/s5/fancy.rb +462 -462
data/lib/maruku/output/s5/to_s5.rb +115 -135
data/lib/maruku/output/to_html.rb +898 -983
data/lib/maruku/output/to_latex.rb +561 -560
data/lib/maruku/output/to_markdown.rb +207 -162
data/lib/maruku/output/to_s.rb +11 -52
data/lib/maruku/string_utils.rb +129 -179
data/lib/maruku/toc.rb +185 -196
data/lib/maruku/version.rb +33 -38
data/spec/block_docs/abbrev.md +776 -0
data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
data/{tests/unittest → spec/block_docs}/alt.md +2 -14
data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
data/{tests/unittest → spec/block_docs}/blank.md +0 -12
data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
data/{tests/unittest → spec/block_docs}/code.md +7 -14
data/{tests/unittest → spec/block_docs}/code2.md +4 -14
data/{tests/unittest → spec/block_docs}/code3.md +12 -16
data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
data/{tests/unittest → spec/block_docs}/easy.md +1 -13
data/spec/block_docs/email.md +29 -0
data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
data/{tests/unittest → spec/block_docs}/entities.md +27 -29
data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
data/spec/block_docs/fenced_code_blocks.md +66 -0
data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
data/spec/block_docs/footnotes2.md +78 -0
data/spec/block_docs/hard.md +25 -0
data/spec/block_docs/header_after_par.md +62 -0
data/{tests/unittest → spec/block_docs}/headers.md +10 -18
data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
data/{tests/unittest → spec/block_docs}/html3.md +1 -13
data/{tests/unittest → spec/block_docs}/html4.md +2 -14
data/{tests/unittest → spec/block_docs}/html5.md +2 -14
data/spec/block_docs/html_block_in_para.md +22 -0
data/spec/block_docs/html_inline.md +25 -0
data/spec/block_docs/html_trailing.md +31 -0
data/spec/block_docs/ie.md +62 -0
data/spec/block_docs/iframe.md +29 -0
data/{tests/unittest → spec/block_docs}/images.md +22 -28
data/{tests/unittest → spec/block_docs}/images2.md +7 -17
data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
data/spec/block_docs/inline_html_beginning.md +10 -0
data/spec/block_docs/issue20.md +9 -0
data/spec/block_docs/issue26.md +22 -0
data/spec/block_docs/issue29.md +9 -0
data/spec/block_docs/issue30.md +30 -0
data/spec/block_docs/issue31.md +25 -0
data/spec/block_docs/issue40.md +40 -0
data/spec/block_docs/issue64.md +55 -0
data/spec/block_docs/issue67.md +19 -0
data/spec/block_docs/issue70.md +11 -0
data/spec/block_docs/issue72.md +17 -0
data/spec/block_docs/issue74.md +38 -0
data/spec/block_docs/issue79.md +15 -0
data/spec/block_docs/issue83.md +13 -0
data/spec/block_docs/issue85.md +25 -0
data/spec/block_docs/issue88.md +19 -0
data/spec/block_docs/issue89.md +12 -0
data/spec/block_docs/issue90.md +38 -0
data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
data/{tests/unittest → spec/block_docs}/links.md +33 -32
data/spec/block_docs/links2.md +21 -0
data/{tests/unittest → spec/block_docs}/list1.md +0 -12
data/{tests/unittest → spec/block_docs}/list12.md +2 -14
data/{tests/unittest → spec/block_docs}/list2.md +2 -14
data/spec/block_docs/list_multipara.md +42 -0
data/{tests/unittest → spec/block_docs}/lists.md +28 -29
data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
data/spec/block_docs/lists11.md +23 -0
data/spec/block_docs/lists12.md +43 -0
data/spec/block_docs/lists13.md +55 -0
data/spec/block_docs/lists14.md +61 -0
data/spec/block_docs/lists15.md +36 -0
data/spec/block_docs/lists6.md +88 -0
data/spec/block_docs/lists7b.md +58 -0
data/spec/block_docs/lists9.md +53 -0
data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
data/spec/block_docs/lists_blank.md +35 -0
data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
data/spec/block_docs/lists_nested.md +44 -0
data/spec/block_docs/lists_nested_blankline.md +28 -0
data/spec/block_docs/lists_nested_deep.md +43 -0
data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
data/spec/block_docs/lists_paraindent.md +47 -0
data/spec/block_docs/lists_tab.md +54 -0
data/spec/block_docs/loss.md +17 -0
data/spec/block_docs/math-blahtex/equations.md +30 -0
data/spec/block_docs/math-blahtex/inline.md +48 -0
data/spec/block_docs/math-blahtex/math2.md +45 -0
data/spec/block_docs/math-blahtex/table.md +25 -0
data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
data/spec/block_docs/math/embedded_svg.md +97 -0
data/spec/block_docs/math/equations.md +44 -0
data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
data/spec/block_docs/math/math2.md +45 -0
data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
data/spec/block_docs/math/raw_mathml.md +87 -0
data/spec/block_docs/math/table.md +25 -0
data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
data/{tests/unittest → spec/block_docs}/olist.md +6 -18
data/{tests/unittest → spec/block_docs}/one.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
data/spec/block_docs/ref_with_title.md +22 -0
data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
data/spec/block_docs/tables.md +58 -0
data/{tests/unittest → spec/block_docs}/test.md +1 -13
data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
data/spec/block_docs/toc.md +87 -0
data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
data/spec/block_docs/xml.md +33 -0
data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
data/spec/block_docs/xml3.md +24 -0
data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
data/spec/block_spec.rb +110 -0
data/spec/cli_spec.rb +8 -0
data/spec/span_spec.rb +256 -0
data/spec/spec_helper.rb +2 -0
data/spec/to_html_utf8_spec.rb +13 -0
metadata +205 -243
metadata.gz.sig +3 -0
data/Rakefile +0 -48
data/bin/marudown +0 -29
data/bin/marutest +0 -345
data/docs/changelog.md +0 -334
data/lib/maruku/errors_management.rb +0 -92
data/lib/maruku/ext/math/latex_fix.rb +0 -12
data/lib/maruku/input/parse_span_better.rb +0 -746
data/lib/maruku/input/type_detection.rb +0 -147
data/lib/maruku/output/to_latex_entities.rb +0 -367
data/lib/maruku/output/to_latex_strings.rb +0 -64
data/lib/maruku/structures.rb +0 -167
data/lib/maruku/structures_inspect.rb +0 -87
data/lib/maruku/structures_iterators.rb +0 -61
data/lib/maruku/tests/benchmark.rb +0 -82
data/lib/maruku/tests/new_parser.rb +0 -373
data/lib/maruku/tests/tests.rb +0 -136
data/lib/maruku/usage/example1.rb +0 -33
data/tests/bugs/code_in_links.md +0 -101
data/tests/bugs/complex_escaping.md +0 -38
data/tests/math/syntax.md +0 -46
data/tests/math_usage/document.md +0 -13
data/tests/others/abbreviations.md +0 -11
data/tests/others/blank.md +0 -4
data/tests/others/code.md +0 -5
data/tests/others/code2.md +0 -8
data/tests/others/code3.md +0 -16
data/tests/others/email.md +0 -4
data/tests/others/entities.md +0 -19
data/tests/others/escaping.md +0 -16
data/tests/others/extra_dl.md +0 -101
data/tests/others/extra_header_id.md +0 -13
data/tests/others/extra_table1.md +0 -40
data/tests/others/footnotes.md +0 -17
data/tests/others/headers.md +0 -10
data/tests/others/hrule.md +0 -10
data/tests/others/images.md +0 -20
data/tests/others/inline_html.md +0 -42
data/tests/others/links.md +0 -38
data/tests/others/list1.md +0 -4
data/tests/others/list2.md +0 -5
data/tests/others/list3.md +0 -8
data/tests/others/lists.md +0 -32
data/tests/others/lists_after_paragraph.md +0 -44
data/tests/others/lists_ol.md +0 -39
data/tests/others/misc_sw.md +0 -105
data/tests/others/one.md +0 -1
data/tests/others/paragraphs.md +0 -13
data/tests/others/sss06.md +0 -352
data/tests/others/test.md +0 -4
data/tests/s5/s5profiling.md +0 -48
data/tests/unittest/bug_def.md +0 -28
data/tests/unittest/email.md +0 -32
data/tests/unittest/html2.md +0 -34
data/tests/unittest/ie.md +0 -61
data/tests/unittest/links2.md +0 -34
data/tests/unittest/lists11.md +0 -28
data/tests/unittest/lists6.md +0 -53
data/tests/unittest/lists9.md +0 -76
data/tests/unittest/math/equations.md +0 -86
data/tests/unittest/math/math2.md +0 -57
data/tests/unittest/math/table.md +0 -37
data/tests/unittest/notyet/header_after_par.md +0 -70
data/tests/unittest/red_tests/abbrev.md +0 -1388
data/tests/unittest/red_tests/lists7.md +0 -68
data/tests/unittest/red_tests/lists7b.md +0 -128
data/tests/unittest/red_tests/lists8.md +0 -76
data/tests/unittest/red_tests/xml.md +0 -70
data/tests/unittest/xml3.md +0 -38
data/tests/utf8-files/simple.md +0 -1
data/unit_test_block.sh +0 -5
data/unit_test_span.sh +0 -3

data/lib/maruku/input/parse_doc.rb CHANGED

@@ -1,33 +1,16 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
-module MaRuKu; module In; module Markdown; module BlockLevelParser
-	def parse_doc(s)
-		# FIXME \r\n => \n
-		meta2 =  parse_email_headers(s)
-		data = meta2[:data]
-		meta2.delete :data
-		self.attributes.merge! meta2
+require 'strscan'
+require 'cgi'
+module MaRuKu::In::Markdown::BlockLevelParser
+  def parse_doc(s)
+    # Remove BOM if it is present
+    s = s.sub(/^\xEF\xBB\xBF/u, '')
+    meta2 = parse_email_headers(s)
+    data = meta2.delete :data
+    self.attributes.merge! meta2
 =begin maruku_doc
 Attribute: encoding
 Scope:     document
@@ -37,50 +20,40 @@ If the `encoding` attribute is specified, then the content
 will be converted from the specified encoding to UTF-8.
 =end
-		enc = self.attributes[:encoding]
-		self.attributes.delete :encoding
-		if enc && enc.downcase != 'utf-8'
-			# Switch to ruby 1.9 String#encode
-			# with backward 1.8 compatibility
-			if data.respond_to?(:encode!)
-				data.encode!('UTF-8', enc)
-			else
-				require 'iconv'
-				data = Iconv.new('utf-8', enc).iconv(data)
-			end
-		end
-		@children = parse_text_as_markdown(data)
-		if true #markdown_extra?
-			self.search_abbreviations
-			self.substitute_markdown_inside_raw_html
-		end
-		toc = create_toc
-		# use title if not set
-		if not self.attributes[:title] and toc.header_element
-			title = toc.header_element.to_s
-			self.attributes[:title]  = title
-#			puts "Set document title to #{title}"
-		end
-		# save for later use
-		self.toc = toc
-		# Now do the attributes magic
-		each_element do |e|
-			# default attribute list
-			if default = self.ald[e.node_type.to_s]
-				expand_attribute_list(default, e.attributes)
-			end
-			expand_attribute_list(e.al, e.attributes)
-#			puts "#{e.node_type}: #{e.attributes.inspect}"
-		end
+    enc = self.attributes.delete(:encoding) || 'utf-8'
+    if enc.downcase != 'utf-8'
+      # Switch to ruby 1.9 String#encode
+      # with backward 1.8 compatibility
+      if data.respond_to?(:encode!)
+        data.encode!('UTF-8', enc)
+      else
+        require 'iconv'
+        data = Iconv.new('utf-8', enc).iconv(data)
+      end
+    end
+    @children = parse_text_as_markdown(data)
+    if markdown_extra?
+      self.search_abbreviations
+      self.substitute_markdown_inside_raw_html
+    end
+    self.toc = create_toc
+    # use title if not set
+    self.attributes[:title] ||= toc.header_element.children.join if toc.header_element
+    # Now do the attributes magic
+    each_element do |e|
+      # default attribute list
+      if default = self.ald[e.node_type.to_s]
+        expand_attribute_list(default, e.attributes)
+      end
+      expand_attribute_list(e.al, e.attributes)
+#     puts "#{e.node_type}: #{e.attributes.inspect}"
+    end
 =begin maruku_doc
 Attribute: unsafe_features
 Scope:     global
@@ -89,144 +62,101 @@ Summary:   Enables execution of XML instructions.
 Disabled by default because of security concerns.
 =end
-		if Maruku::Globals[:unsafe_features]
-			self.execute_code_blocks
-			# TODO: remove executed code blocks
-		end
-	end
-	# Expands an attribute list in an Hash
-	def expand_attribute_list(al, result)
-		al.each do |k, v|
-			case k
-			when :class
-				if not result[:class]
-					result[:class] = v
-				else
-					result[:class] += " " + v
-				end
-			when :id; result[:id] = v
-			when :ref;
-				if self.ald[v]
-					already = (result[:expanded_references] ||= [])
-					if not already.include?(v)
-						already.push v
-						expand_attribute_list(self.ald[v], result)
-					else
-						already.push  v
-						maruku_error "Circular reference between labels.\n\n"+
-						"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
-							(already.map{|x| x.inspect}.join(' => '))
-					end
-				else
-					if not result[:unresolved_references]
-						result[:unresolved_references] = v
-					else
-						result[:unresolved_references] << " #{v}"
-					end
-				#	$stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
-					result[v.to_sym] = true
-				end
-			else
-				result[k.to_sym]=v
-			end
-		end
-	end
-	def safe_execute_code(object, code)
-		begin
-			return object.instance_eval(code)
-		rescue Exception => e
-			maruku_error "Exception while executing this:\n"+
-				add_tabs(code, 1, ">")+
-				"\nThe error was:\n"+
-				add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
-		rescue RuntimeError => e
-			maruku_error "2: Exception while executing this:\n"+
-				add_tabs(code, 1, ">")+
-				"\nThe error was:\n"+
-				add_tabs(e.inspect, 1, "|")
-		rescue SyntaxError => e
-			maruku_error "2: Exception while executing this:\n"+
-				add_tabs(code, 1, ">")+
-				"\nThe error was:\n"+
-				add_tabs(e.inspect, 1, "|")
-		end
-		nil
-	end
-	def execute_code_blocks
-		self.each_element(:xml_instr) do |e|
-			if e.target == 'maruku'
-				result = safe_execute_code(e, e.code)
-				if result.kind_of?(String)
-					puts "Result is : #{result.inspect}"
-				end
-			end
-		end
-	end
-	def search_abbreviations
-		self.abbreviations.each do |abbrev, title|
-			reg = Regexp.new(Regexp.escape(abbrev))
-			self.replace_each_string do |s|
-				# bug if many abbreviations are present (agorf)
-				if m = reg.match(s)
-					e = md_abbr(abbrev.dup, title ? title.dup : nil)
-					[m.pre_match, e, m.post_match]
-				else
-					s
-				end
-			end
-		end
-	end
-	include REXML
-	# (PHP Markdown extra) Search for elements that have
-	# markdown=1 or markdown=block defined
-	def substitute_markdown_inside_raw_html
-		self.each_element(:raw_html) do |e|
-			doc = e.instance_variable_get :@parsed_html
-			if doc # valid html
-				# parse block-level markdown elements in these HTML tags
-				block_tags = ['div']
-				# use xpath to find elements with 'markdown' attribute
-				XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
-#					puts "Found #{e}"
-					# should we parse block-level or span-level?
-					how = e.attributes['markdown']
-					parse_blocks = (how == 'block') || block_tags.include?(e.name)
-					# Select all text elements of e
-					XPath.match(e, "//text()" ).each { |original_text|
-						s = original_text.value.strip
-						if s.size > 0
-					#	    puts "Parsing #{s.inspect} as blocks: #{parse_blocks}  (#{e.name}, #{e.attributes['markdown']})  "
-							el = md_el(:dummy,
-							 	parse_blocks ? parse_text_as_markdown(s) :
-							                  parse_lines_as_span([s]) )
-							p = original_text.parent
-							el.children_to_html.each do |x|
-								p.insert_before(original_text, x)
-							end
-							p.delete(original_text)
-						end
-					}
-          # remove 'markdown' attribute
-          e.delete_attribute 'markdown'
-				end
-			end
-		end
-	end
-end end end end
+    if Maruku::Globals[:unsafe_features]
+      self.execute_code_blocks
+      # TODO: remove executed code blocks
+    end
+  end
+  # Expands an attribute list in an Hash
+  def expand_attribute_list(al, result)
+    al.each do |k, v|
+      case k
+      when :class
+        if result[:class]
+          result[:class] << " " << v
+        else
+          result[:class] = v
+        end
+      when :id
+        result[:id] = v
+      when :ref
+        if self.ald[v]
+          already = (result[:expanded_references] ||= [])
+          if !already.include?(v)
+            already << v
+            expand_attribute_list(self.ald[v], result)
+          else
+            already << v
+            maruku_error "Circular reference between labels.\n\n" +
+            "Label #{v.inspect} calls itself via recursion.\nThe recursion is " +
+              already.map(&:inspect).join(' => ')
+          end
+        else
+          if result[:unresolved_references]
+            result[:unresolved_references] << " " << v
+          else
+            result[:unresolved_references] = v
+          end
+          # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
+          result[v.to_sym] = true
+        end
+      else
+        result[k.to_sym] = v
+      end
+    end
+  end
+  def safe_execute_code(object, code)
+    begin
+      object.instance_eval(code)
+    rescue StandardError, ScriptError => e
+      maruku_error "Exception while executing this:\n" +
+        code.gsub(/^/, ">") +
+        "\nThe error was:\n" +
+        (e.inspect + "\n" + e.caller.join("\n")).gsub(/^/, "|")
+      nil
+    end
+  end
+  def execute_code_blocks
+    each_element(:xml_instr) do |e|
+      if e.target == 'maruku'
+        result = safe_execute_code(e, e.code)
+        if result.kind_of?(String)
+          puts "Result is : #{result.inspect}"
+        end
+      end
+    end
+  end
+  def search_abbreviations
+    abbreviations.each do |abbrev, title|
+      reg = Regexp.new(Regexp.escape(abbrev))
+      replace_each_string do |s|
+        # bug if many abbreviations are present (agorf)
+        p = StringScanner.new(s)
+        a = []
+        until p.eos?
+          o = ''
+          o << p.getch until p.scan(reg) or p.eos?
+          a << o unless o.empty?
+          a << md_abbr(abbrev.dup, title ? title.dup : nil) if p.matched == abbrev
+        end
+        a
+      end
+    end
+  end
+  # (PHP Markdown extra) Search for elements that have
+  # markdown=1 or markdown=block defined
+  def substitute_markdown_inside_raw_html
+    each_element(:raw_html) do |e|
+      html = e.parsed_html
+      next unless html
+      html.process_markdown_inside_elements(self)
+    end
+  end
+end

data/lib/maruku/input/parse_span.rb ADDED

@@ -0,0 +1,658 @@
+module MaRuKu::In::Markdown::SpanLevelParser
+  include MaRuKu::Helpers
+  EscapedCharInText = '\\`*_{}[]()#.!|:+->'.split(//)
+  EscapedCharInQuotes = EscapedCharInText + ["'", '"']
+  EscapedCharInInlineCode = ['\\', '`']
+  IgnoreWikiLinks = MaRuKu::Globals[:ignore_wikilinks]
+  def parse_span(string, parent=nil)
+    string = Array(string).join("\n") unless string.kind_of? String
+    src = MaRuKu::In::Markdown::SpanLevelParser::CharSource.new(string, parent)
+    read_span(src, EscapedCharInText, [nil])
+  end
+  # This is the main loop for reading span elements
+  #
+  # It's long, but not *complex* or difficult to understand.
+  #
+  #
+  def read_span(src, escaped, exit_on_chars=nil, exit_on_strings=nil)
+    escaped = Array(escaped)
+    con = SpanContext.new
+    c = d = nil
+    while true
+      c = src.cur_char
+      # This is only an optimization which cuts 50% of the time used.
+      # (but you can't use a-zA-z in exit_on_chars)
+      if c && c =~ /a-zA-Z0-9/
+        con.push_char src.shift_char
+        next
+      end
+      break if Array(exit_on_chars).include?(c)
+      if Array(exit_on_strings).any? {|x| src.cur_chars_are x }
+        # Special case: bold nested in italic
+        break unless !(['*', '_'] & Array(exit_on_strings)).empty? &&
+          ['**', '__'].include?(src.cur_chars(2)) &&
+          !['***', '___'].include?(src.cur_chars(3))
+      end
+      # check if there are extensions
+      next if check_span_extensions(src, con)
+      case c = src.cur_char
+      when ' '
+        if src.cur_chars_are "  \n"
+          src.ignore_chars(3)
+          con.push_element md_br
+          next
+        else
+          src.ignore_char
+          con.push_space
+        end
+      when "\n", "\t"
+        src.ignore_char
+        con.push_space
+      when '`'
+        read_inline_code(src, con)
+      when '<'
+        # It could be:
+        # 1) HTML "<div ..."
+        # 2) HTML "<!-- ..."
+        # 3) url "<http:// ", "<ftp:// ..."
+        # 4) email "<andrea@... ", "<mailto:andrea@..."
+        # 5) on itself! "a < b  "
+        # 6) Start of <<guillemettes>>
+        case d = src.next_char
+        when '<'  # guillemettes
+          src.ignore_chars(2)
+          con.push_char '<'
+          con.push_char '<'
+        when '!'
+          if src.cur_chars_are '<!--'
+            read_inline_html(src, con)
+          else
+            con.push_char src.shift_char
+          end
+        when '?'
+          read_xml_instr_span(src, con)
+        when ' ', "\t"
+          con.push_char src.shift_char
+        else
+          if src.next_matches(/<mailto:/) ||
+              src.next_matches(/<[\w\.]+\@/)
+            read_email_el(src, con)
+          elsif src.next_matches(/<\w+:/)
+            read_url_el(src, con)
+          elsif src.next_matches(/<\w/)
+            #puts "This is HTML: #{src.cur_chars(20)}"
+            read_inline_html(src, con)
+          else
+            #puts "This is NOT HTML: #{src.cur_chars(20)}"
+            con.push_char src.shift_char
+          end
+        end
+      when "\\"
+        d = src.next_char
+        if d == "'"
+          src.ignore_chars(2)
+          con.push_element md_entity('apos')
+        elsif d == '"'
+          src.ignore_chars(2)
+          con.push_element md_entity('quot')
+        elsif escaped.include? d
+          src.ignore_chars(2)
+          con.push_char d
+        else
+          con.push_char src.shift_char
+        end
+      when '['
+        if markdown_extra? && src.next_char == '^'
+          read_footnote_ref(src,con)
+        elsif IgnoreWikiLinks && src.next_char == '['
+          con.push_char src.shift_char
+          con.push_char src.shift_char
+        else
+          read_link(src, con)
+        end
+      when '!'
+        if src.next_char == '['
+          read_image(src, con)
+        else
+          con.push_char src.shift_char
+        end
+      when '&'
+        # named references
+        if m = src.read_regexp(/\&(\w+);/)
+          con.push_element md_entity(m[1])
+          # numeric
+        elsif m = src.read_regexp(/\&\#(x)?(\w+);/)
+          num = m[1] ? m[2].hex : m[2].to_i
+          con.push_element md_entity(num)
+        else
+          con.push_char src.shift_char
+        end
+      when '*'
+        if !src.next_char
+          maruku_error "Opening * as last char.", src, con, 'Treating as literal'
+          con.push_char src.shift_char
+        else
+          follows = src.cur_chars(4)
+          if follows =~ /^\*\*\*[^\s\*]/
+            con.push_element read_emstrong(src, '***')
+          elsif follows  =~ /^\*\*[^\s\*]/
+            con.push_element read_strong(src, '**')
+          elsif follows =~ /^\*[^\s\*]/
+            con.push_element read_em(src, '*')
+          else # * is just a normal char
+            con.push_char src.shift_char
+          end
+        end
+      when '_'
+        if !src.next_char
+          maruku_error "Opening _ as last char", src, con, 'Treating as literal'
+          con.push_char src.shift_char
+        else
+          # we don't want "mod_ruby" to start an emphasis
+          # so we start one only if
+          # 1) there's nothing else in the span (first char)
+          # or 2) the last char was a space
+          # or 3) the current string is empty
+          #if con.elements.empty? ||
+          if con.is_end?
+            # also, we check the next characters
+            follows = src.cur_chars(4)
+            if  follows =~ /^\_\_\_[^\s\_]/
+              con.push_element read_emstrong(src, '___')
+            elsif follows  =~ /^\_\_[^\s\_]/
+              con.push_element read_strong(src, '__')
+            elsif follows =~ /^\_[^\s\_]/
+              con.push_element read_em(src, '_')
+            else # _ is just a normal char
+              con.push_char src.shift_char
+            end
+          else
+            # _ is just a normal char
+            con.push_char src.shift_char
+          end
+        end
+      when '{' # extension
+        if ['#', '.', ':'].include? src.next_char
+          src.ignore_char # {
+          interpret_extension(src, con, '}')
+          src.ignore_char # }
+        else
+          con.push_char src.shift_char
+        end
+      when nil
+        maruku_error( ("Unclosed span (waiting for %s" +
+                       "#{exit_on_strings.inspect})") %
+                      [ exit_on_chars ? "#{exit_on_chars.inspect} or" : "" ],
+                      src, con)
+        break
+      else # normal text
+        con.push_char src.shift_char
+      end # end case
+    end # end while true
+    con.push_string_if_present
+    # Assign IAL to elements
+    merge_ial(con.elements, src, con)
+    # Remove leading space
+    if (s = con.elements.first).kind_of? String
+      if s[0, 1] == ' '
+        con.elements[0] = s[1..-1]
+      end
+      con.elements.shift if s.empty?
+    end
+    # Remove final spaces
+    if (s = con.elements.last).kind_of? String
+      s.chop! if s[-1, 1] == ' '
+      con.elements.pop if s.empty?
+    end
+    educate(con.elements)
+  end
+  def read_xml_instr_span(src, con)
+    src.ignore_chars(2) # starting <?
+    # read target <?target code... ?>
+    target = if m = src.read_regexp(/^(\w+)/)
+               m[1]
+             else
+               # XML instructions are invalid without a target
+               ''
+             end
+    delim = "?>"
+    code = read_simple(src, nil, nil, delim)
+    src.ignore_chars delim.size
+    code = (code || "").strip
+    con.push_element md_xml_instr(target, code)
+  end
+  # Start: cursor on character **after** '{'
+  # End: curson on '}' or EOF
+  def interpret_extension(src, con, break_on_chars=nil)
+    case src.cur_char
+    when ':'
+      src.ignore_char # :
+      extension_meta(src, con, break_on_chars)
+    when '#', '.'
+      extension_meta(src, con, break_on_chars)
+    else
+      stuff = read_simple(src, '}', break_on_chars)
+      if stuff =~ /^(\w+\s|[^\w])/
+        extension_id = $1.strip
+        maruku_recover "I don't know what to do with extension '#{extension_id}'\n" +
+          "I will treat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
+      else
+        maruku_recover "I will treat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
+      end
+      extension_meta(src, con, break_on_chars)
+    end
+  end
+  def extension_meta(src, con, break_on_chars=nil)
+    if m = src.read_regexp(/([^\s\:\"\'}]+?):/)
+      name = m[1]
+      al = read_attribute_list(src, con, break_on_chars)
+      self.doc.ald[name] = al
+      con.push md_ald(name, al)
+    else
+      al = read_attribute_list(src, con, break_on_chars)
+      con.push md_ial(al)
+    end
+  end
+  def read_url_el(src,con)
+    src.ignore_char # leading <
+    url = read_simple(src, nil, '>')
+    src.ignore_char # closing >
+    con.push_element md_url(url)
+  end
+  def read_email_el(src,con)
+    src.ignore_char # leading <
+    mail = read_simple(src, nil, '>')
+    src.ignore_char # closing >
+    address = mail.gsub(/^mailto:/, '')
+    con.push_element md_email(address)
+  end
+  def read_url(src, break_on)
+    if ["'", '"'].include? src.cur_char
+      maruku_error 'Invalid char for url', src
+    end
+    url = read_simple(src, nil, break_on) || ''
+    if url[0, 1] == '<' && url[-1, 1] == '>'
+      url = url[1, url.size-2]
+    end
+    return nil if url.empty?
+    url
+  end
+  def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
+    case src.cur_char
+    when "'", '"'
+      read_quoted(src, con)
+    else
+      read_simple(src, escaped, exit_on_chars, nil, false)
+    end
+  end
+  # Tries to read a quoted value. If stream does not
+  # start with ' or ", returns nil.
+  def read_quoted(src, con)
+    case src.cur_char
+    when "'", '"'
+      quote_char = src.shift_char # opening quote
+      string = read_simple(src, EscapedCharInQuotes, quote_char)
+      src.ignore_char # closing quote
+      string
+    else
+      nil
+    end
+  end
+  # Reads a simple string (no formatting) until one of break_on_chars,
+  # while escaping the escaped.
+  # If the string is empty, it returns nil.
+  # By default, raises on error if the string terminates unexpectedly. This can be
+  # by setting the last argument to false.
+  def read_simple(src, escaped, exit_on_chars=nil, exit_on_strings=nil, warn=true)
+    text = ""
+    escaped = Array(escaped)
+    exit_on_chars = Array(exit_on_chars)
+    exit_on_strings = Array(exit_on_strings)
+    while true
+      c = src.cur_char
+      break if exit_on_chars.include?(c)
+      break if exit_on_strings.any? {|x| src.cur_chars_are x }
+      case c
+      when nil
+        if warn
+          maruku_error "String finished while reading (break on " +
+            "#{exit_on_chars.inspect})" +
+            " already read: #{text.inspect}", src
+        end
+        break
+      when "\\"
+        d = src.next_char
+        if escaped.include? d
+          src.ignore_chars(2)
+          text << d
+        else
+          text << src.shift_char
+        end
+      else
+        text << src.shift_char
+      end
+    end
+    text.empty? ? nil : text
+  end
+  def read_em(src, delim)
+    src.ignore_char
+    children = read_span(src, EscapedCharInText, nil, delim)
+    src.ignore_char
+    md_em(children)
+  end
+  def read_strong(src, delim)
+    src.ignore_chars(2)
+    children = read_span(src, EscapedCharInText, nil, delim)
+    src.ignore_chars(2)
+    md_strong(children)
+  end
+  def read_emstrong(src, delim)
+    src.ignore_chars(3)
+    children = read_span(src, EscapedCharInText, nil, delim)
+    src.ignore_chars(3)
+    md_emstrong(children)
+  end
+  # Reads a bracketed id "[refid]". Consumes also both brackets.
+  def read_ref_id(src, con)
+    src.ignore_char # [
+    if m = src.read_regexp(/([^\]]*?)\]/)
+      m[1]
+    else
+      nil
+    end
+  end
+  def read_footnote_ref(src,con)
+    ref = read_ref_id(src,con)
+    con.push_element md_foot_ref(ref)
+  end
+  def read_inline_html(src, con)
+    h = HTMLHelper.new
+    begin
+      # This is our current buffer in the context
+      next_stuff = src.current_remaining_buffer
+      consumed = 0
+      while true
+        if consumed >= next_stuff.size
+          maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
+          break
+        end
+        h.eat_this next_stuff[consumed].chr
+        consumed += 1
+        break if h.is_finished?
+      end
+      src.ignore_chars(consumed)
+      con.push_element md_html(h.stuff_you_read)
+    rescue => e
+      maruku_error "Bad html: \n" +
+        e.inspect.gsub(/^/, '>'), src, con, "I will try to continue after bad HTML."
+      con.push_char src.shift_char
+    end
+  end
+  def read_inline_code(src, con)
+    # Count the number of ticks
+    num_ticks = 0
+    while src.cur_char == '`'
+      num_ticks += 1
+      src.ignore_char
+    end
+    # We will read until this string
+    end_string = "`" * num_ticks
+    code = read_simple(src, nil, nil, end_string)
+    # We didn't find a closing batch!
+    if !code || src.cur_char != '`'
+      con.push_element(end_string + (code || '')) and return
+    end
+    # We didn't find a closing batch!
+    if !code || src.cur_char != '`'
+      con.push_element(end_string + (code || ''))
+      return
+    end
+    #   puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
+    src.ignore_chars num_ticks
+    # Ignore at most one space
+    if num_ticks > 1 && code[0, 1] == ' '
+      code = code[1..-1]
+    end
+    # drop last space
+    if num_ticks > 1 && code[-1, 1] == ' '
+      code = code[0..-2]
+    end
+    #   puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
+    con.push_element md_code(code)
+  end
+  def read_link(src, con)
+    # we read the string and see what happens
+    src.ignore_char # opening bracket
+    children = read_span(src, EscapedCharInText, ']')
+    src.ignore_char # closing bracket
+    # ignore space
+    if src.cur_char == ' ' && ['[', '('].include?(src.next_char)
+      src.shift_char
+    end
+    case src.cur_char
+    when '('
+      src.ignore_char # opening (
+      src.consume_whitespace
+      url = read_url(src, [' ', "\t", ")"]) || ''
+      src.consume_whitespace
+      title = nil
+      if src.cur_char != ')' # we have a title
+        quote_char = src.cur_char
+        title = read_quoted(src, con)
+        if not title
+          maruku_error 'Must quote title', src, con
+        else
+          # Tries to read a title with quotes: ![a](url "ti"tle")
+          # this is the most ugly thing in Markdown
+          unless src.next_matches(/\s*\)/)
+            # if there is not a closing par ), then read
+            # the rest and guess it's title with quotes
+            rest = read_simple(src, nil, ')', nil)
+            # chop the closing char
+            rest.chop!
+            title << quote_char << rest
+          end
+        end
+      end
+      src.consume_whitespace
+      closing = src.shift_char # closing )
+      if closing != ')'
+        maruku_error 'Unclosed link', src, con, "No closing ): I will not create" +
+          " the link for #{children.inspect}"
+        con.push_elements children
+        return
+      end
+      con.push_element md_im_link(children, url, title)
+    when '[' # link ref
+      ref_id = read_ref_id(src, con)
+      if ref_id
+        con.push_element md_link(children, ref_id)
+      else
+        maruku_error "Could not read ref_id", src, con, "I will not create the link for " +
+          "#{children.inspect}"
+        con.push_elements children
+        return
+      end
+    else # empty [link]
+      con.push_element md_link(children, nil)
+    end
+  end # read link
+  def read_image(src, con)
+    src.ignore_chars(2) # opening "!["
+    alt_text = read_span(src, EscapedCharInText, ']')
+    src.ignore_char # closing bracket
+    # ignore space
+    if src.cur_char == ' ' && ['[', '('].include?(src.next_char)
+      src.ignore_char
+    end
+    case src.cur_char
+    when '('
+      src.ignore_char # opening (
+      src.consume_whitespace
+      url = read_url(src, [' ', "\t", ')'])
+      unless url
+        maruku_error "Could not read url from #{src.cur_chars(10).inspect}", src, con
+      end
+      src.consume_whitespace
+      title = nil
+      if src.cur_char != ')' # we have a title
+        quote_char = src.cur_char
+        title = read_quoted(src, con)
+        if !title
+          maruku_error 'Must quote title', src, con
+        else
+          # Tries to read a title with quotes: ![a](url "ti"tle")
+          # this is the most ugly thing in Markdown
+          if !src.next_matches(/\s*\)/)
+            # if there is not a closing par ), then read
+            # the rest and guess it's title with quotes
+            rest = read_simple(src, nil, ')', nil)
+            # chop the closing char
+            rest.chop!
+            title << quote_char << rest
+          end
+        end
+      end
+      src.consume_whitespace
+      closing = src.shift_char # closing )
+      if closing != ')'
+        maruku_error "Unclosed link: '#{closing}'" +
+          " Read url=#{url.inspect} title=#{title.inspect}", src, con
+      end
+      con.push_element md_im_image(alt_text, url, title)
+    when '[' # link ref
+      ref_id = read_ref_id(src, con)
+      if !ref_id # TODO: check around
+        maruku_error 'Reference not closed.', src, con
+        ref_id = ""
+      end
+      con.push_element md_image(alt_text, ref_id)
+    else # no stuff
+      ref_id = alt_text.join
+      con.push_element md_image(alt_text, ref_id)
+    end
+  end # read link
+  class SpanContext
+    # Read elements
+    attr_accessor :elements
+    def initialize
+      @elements = []
+      @cur_string = ''
+    end
+    def push_element(e)
+      raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " unless
+        e.kind_of?(String) || e.kind_of?(MaRuKu::MDElement)
+      push_string_if_present
+      @elements << e
+    end
+    alias push push_element
+    def push_elements(a)
+      a.each do |e|
+        if e.kind_of? String
+          @cur_string << e
+        else
+          push_element e
+        end
+      end
+    end
+    def is_end?
+      @cur_string.empty? || @cur_string =~ /\s\z/
+    end
+    def push_string_if_present
+      unless @cur_string.empty?
+        @elements << @cur_string
+        @cur_string = ''
+      end
+    end
+    def push_char(c)
+      @cur_string << c
+    end
+    # push space into current string if
+    # there isn't one
+    def push_space
+      @cur_string << ' ' unless @cur_string[-1, 1] == ' '
+    end
+    def describe
+      lines = @elements.map{|x| x.inspect }.join("\n")
+      s = "Elements read in span: \n" +
+        lines.gsub(/^/, ' -') + "\n"
+      s += "Current string: \n  #{@cur_string.inspect}\n" unless  @cur_string.empty?
+      s
+    end
+  end
+end