maruku 0.2.13 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/maruku +23 -15
- data/bin/maruku0.3 +37 -0
- data/bin/marutest +277 -0
- data/docs/changelog-0.3.html +99 -0
- data/docs/changelog-0.3.md +84 -0
- data/docs/faq.html +46 -0
- data/docs/faq.md +32 -0
- data/docs/index.html +629 -64
- data/docs/markdown_extra2.html +67 -14
- data/docs/markdown_syntax.html +631 -94
- data/docs/markdown_syntax_2.html +152 -0
- data/docs/maruku.html +629 -64
- data/docs/maruku.md +108 -105
- data/docs/proposal.html +362 -55
- data/docs/proposal.md +133 -169
- data/docs/todo.html +30 -0
- data/lib/maruku.rb +13 -3
- data/lib/maruku/errors_management.rb +75 -0
- data/lib/maruku/helpers.rb +164 -0
- data/lib/maruku/html_helper.rb +33 -13
- data/lib/maruku/parse_block.rb +89 -92
- data/lib/maruku/parse_doc.rb +43 -18
- data/lib/maruku/parse_span.rb +17 -46
- data/lib/maruku/parse_span_better.rb +681 -0
- data/lib/maruku/string_utils.rb +17 -10
- data/lib/maruku/structures.rb +62 -35
- data/lib/maruku/structures_iterators.rb +39 -0
- data/lib/maruku/tests/benchmark.rb +12 -4
- data/lib/maruku/tests/new_parser.rb +318 -0
- data/lib/maruku/to_html.rb +113 -44
- data/lib/maruku/to_latex.rb +32 -14
- data/lib/maruku/to_markdown.rb +110 -0
- data/lib/maruku/toc.rb +35 -1
- data/lib/maruku/version.rb +10 -1
- data/lib/test.rb +29 -0
- data/tests/others/escaping.md +6 -4
- data/tests/others/links.md +1 -1
- data/tests/others/lists_after_paragraph.md +44 -0
- data/tests/unittest/abbreviations.md +71 -0
- data/tests/unittest/blank.md +43 -0
- data/tests/unittest/blanks_in_code.md +131 -0
- data/tests/unittest/code.md +64 -0
- data/tests/unittest/code2.md +59 -0
- data/tests/unittest/code3.md +121 -0
- data/tests/unittest/easy.md +36 -0
- data/tests/unittest/email.md +39 -0
- data/tests/unittest/encoding/iso-8859-1.md +9 -0
- data/tests/unittest/encoding/utf-8.md +38 -0
- data/tests/unittest/entities.md +174 -0
- data/tests/unittest/escaping.md +97 -0
- data/tests/unittest/extra_dl.md +81 -0
- data/tests/unittest/extra_header_id.md +96 -0
- data/tests/unittest/extra_table1.md +78 -0
- data/tests/unittest/footnotes.md +120 -0
- data/tests/unittest/headers.md +64 -0
- data/tests/unittest/hrule.md +77 -0
- data/tests/unittest/images.md +114 -0
- data/tests/unittest/inline_html.md +185 -0
- data/tests/unittest/links.md +162 -0
- data/tests/unittest/list1.md +80 -0
- data/tests/unittest/list2.md +75 -0
- data/tests/unittest/list3.md +111 -0
- data/tests/unittest/list4.md +43 -0
- data/tests/unittest/lists.md +262 -0
- data/tests/unittest/lists_after_paragraph.md +280 -0
- data/tests/unittest/lists_ol.md +323 -0
- data/tests/unittest/misc_sw.md +751 -0
- data/tests/unittest/notyet/escape.md +46 -0
- data/tests/unittest/notyet/header_after_par.md +85 -0
- data/tests/unittest/notyet/ticks.md +67 -0
- data/tests/unittest/notyet/triggering.md +210 -0
- data/tests/unittest/one.md +33 -0
- data/tests/unittest/paragraph.md +34 -0
- data/tests/unittest/paragraph_rules/dont_merge_ref.md +60 -0
- data/tests/unittest/paragraph_rules/tab_is_blank.md +43 -0
- data/tests/unittest/paragraphs.md +84 -0
- data/tests/unittest/recover/recover_links.md +32 -0
- data/tests/unittest/references/long_example.md +87 -0
- data/tests/unittest/references/spaces_and_numbers.md +27 -0
- data/tests/unittest/syntax_hl.md +99 -0
- data/tests/unittest/test.md +36 -0
- data/tests/unittest/wrapping.md +88 -0
- data/tests/utf8-files/simple.md +1 -0
- metadata +139 -86
- data/lib/maruku/maruku.rb +0 -50
- data/tests/a.md +0 -10
    
        data/lib/maruku.rb
    CHANGED
    
    | @@ -18,10 +18,15 @@ | |
| 18 18 |  | 
| 19 19 | 
             
            # Structures definition
         | 
| 20 20 | 
             
            require 'maruku/structures'
         | 
| 21 | 
            +
            # Less typing
         | 
| 22 | 
            +
            require 'maruku/helpers'
         | 
| 21 23 |  | 
| 22 24 | 
             
            # Code for parsing whole Markdown documents
         | 
| 23 25 | 
             
            require 'maruku/parse_doc'
         | 
| 24 26 |  | 
| 27 | 
            +
            # Ugly things kept in a closet
         | 
| 28 | 
            +
            require 'maruku/string_utils'
         | 
| 29 | 
            +
             | 
| 25 30 | 
             
            # A class for reading and sanitizing inline HTML
         | 
| 26 31 | 
             
            require 'maruku/html_helper'
         | 
| 27 32 |  | 
| @@ -29,10 +34,12 @@ require 'maruku/html_helper' | |
| 29 34 | 
             
            require 'maruku/parse_block'
         | 
| 30 35 |  | 
| 31 36 | 
             
            # Code for parsing Markdown span-level elements
         | 
| 32 | 
            -
            require 'maruku/ | 
| 37 | 
            +
            require 'maruku/parse_span_better'
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            require 'maruku/structures_iterators'
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            require 'maruku/errors_management'
         | 
| 33 42 |  | 
| 34 | 
            -
            # Ugly things kept in a closet
         | 
| 35 | 
            -
            require 'maruku/string_utils'
         | 
| 36 43 |  | 
| 37 44 | 
             
            # Code for creating a table of contents
         | 
| 38 45 | 
             
            require 'maruku/toc'
         | 
| @@ -49,5 +56,8 @@ require 'maruku/to_latex' | |
| 49 56 | 
             
            require 'maruku/to_latex_strings'
         | 
| 50 57 | 
             
            require 'maruku/to_latex_entities'
         | 
| 51 58 |  | 
| 59 | 
            +
            # Pretty print
         | 
| 60 | 
            +
            require 'maruku/to_markdown'
         | 
| 61 | 
            +
             | 
| 52 62 | 
             
            # Exporting to text: strips all formatting (not complete)
         | 
| 53 63 | 
             
            require 'maruku/to_s'
         | 
| @@ -0,0 +1,75 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
             | 
| 3 | 
            +
            # Any method that detects formatting error calls the
         | 
| 4 | 
            +
            # error() method. 
         | 
| 5 | 
            +
            # if @meta[:on_error] == 
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            # - :warning   write on the standard err (or @error_stream if defined), 
         | 
| 8 | 
            +
            #              then do your best.
         | 
| 9 | 
            +
            # - :ignore    be shy and try to continue
         | 
| 10 | 
            +
            # - :raise     raises a MarukuException
         | 
| 11 | 
            +
            #
         | 
| 12 | 
            +
            # default is :raise
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            class MarukuException < RuntimeError
         | 
| 15 | 
            +
            	
         | 
| 16 | 
            +
            end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
            module MarukuErrors
         | 
| 20 | 
            +
            	Default_on_error = :warning
         | 
| 21 | 
            +
            	
         | 
| 22 | 
            +
            	def maruku_error(s,src=nil,con=nil)
         | 
| 23 | 
            +
            		policy = @doc ? (@doc.meta[:on_error] || Default_on_error) : :raise
         | 
| 24 | 
            +
            		
         | 
| 25 | 
            +
            		case policy
         | 
| 26 | 
            +
            		when :ignore 
         | 
| 27 | 
            +
            		when :raise
         | 
| 28 | 
            +
            			raise_error describe_error(s,src,con)
         | 
| 29 | 
            +
            		when :warning
         | 
| 30 | 
            +
            			tell_user describe_error(s,src,con)
         | 
| 31 | 
            +
            		end
         | 
| 32 | 
            +
            	end
         | 
| 33 | 
            +
            	
         | 
| 34 | 
            +
            	alias error maruku_error
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            	def raise_error(s)
         | 
| 37 | 
            +
            		raise MarukuException, s, caller
         | 
| 38 | 
            +
            	end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            	def tell_user(s)
         | 
| 41 | 
            +
            		n = 75
         | 
| 42 | 
            +
            		(@error_stream || $stderr) <<
         | 
| 43 | 
            +
            		" "+"_"*n << "\n"<<
         | 
| 44 | 
            +
            		"| Maruku tells you (#{caller[0]})\n" << 
         | 
| 45 | 
            +
            		"+"+"-"*n +"\n"+
         | 
| 46 | 
            +
            		add_tabs(s,1,'| ') << "\n" <<
         | 
| 47 | 
            +
            		"+" << "-"*n << "\n" <<
         | 
| 48 | 
            +
            		add_tabs(caller.join("\n"),1,'!') << "\n" <<
         | 
| 49 | 
            +
            		"\\" << "_"*n << "\n"
         | 
| 50 | 
            +
            	end
         | 
| 51 | 
            +
            	
         | 
| 52 | 
            +
            	def set_error_stream(os)
         | 
| 53 | 
            +
            		@error_stream = os
         | 
| 54 | 
            +
            	end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            	def describe_error(s,src,con)
         | 
| 57 | 
            +
            		t = s
         | 
| 58 | 
            +
            		if src
         | 
| 59 | 
            +
            			t += "\n#{src.describe}\n"
         | 
| 60 | 
            +
            		end
         | 
| 61 | 
            +
            		if con
         | 
| 62 | 
            +
            			t += "\n#{con.describe}\n"
         | 
| 63 | 
            +
            		end
         | 
| 64 | 
            +
            		t
         | 
| 65 | 
            +
            	end
         | 
| 66 | 
            +
            	
         | 
| 67 | 
            +
            end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            class MDElement
         | 
| 70 | 
            +
            	include MarukuErrors
         | 
| 71 | 
            +
            end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
             | 
| 74 | 
            +
             | 
| 75 | 
            +
             | 
| @@ -0,0 +1,164 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
             | 
| 3 | 
            +
            # A series of helper functions for creating elements
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Helpers
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            	def md_el(node_type, children=[], meta={})
         | 
| 8 | 
            +
            		e=MDElement.new(node_type, children, meta)
         | 
| 9 | 
            +
            		e.doc = self
         | 
| 10 | 
            +
            		e
         | 
| 11 | 
            +
            	end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            	def md_code(code)
         | 
| 14 | 
            +
            		md_el(:inline_code, [], {:raw_code => code})
         | 
| 15 | 
            +
            	end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            	def md_par(children, meta={})
         | 
| 18 | 
            +
            		md_el(:paragraph, [], meta)
         | 
| 19 | 
            +
            	end
         | 
| 20 | 
            +
            	
         | 
| 21 | 
            +
            	def md_html(raw_html)
         | 
| 22 | 
            +
            		e = md_el(:raw_html, [], {:raw_html=>raw_html})
         | 
| 23 | 
            +
            		begin
         | 
| 24 | 
            +
            			# remove newlines and whitespace at begin
         | 
| 25 | 
            +
            			# end end of string, or else REXML gets confused
         | 
| 26 | 
            +
            			raw_html = raw_html.gsub(/\A\s*</,'<').
         | 
| 27 | 
            +
            			                    gsub(/>[\s\n]*\Z/,'>')
         | 
| 28 | 
            +
            			e.instance_variable_set :@parsed_html,
         | 
| 29 | 
            +
            			 	REXML::Document.new(raw_html)
         | 
| 30 | 
            +
            		
         | 
| 31 | 
            +
            		rescue Exception => ex
         | 
| 32 | 
            +
            			tell_user "Malformed block of HTML:\n"+
         | 
| 33 | 
            +
            			add_tabs(raw_html,1,'|')
         | 
| 34 | 
            +
            #			"  #{raw_html.inspect}\n\n"+ex.inspect
         | 
| 35 | 
            +
            		end
         | 
| 36 | 
            +
            		e
         | 
| 37 | 
            +
            	end
         | 
| 38 | 
            +
            		
         | 
| 39 | 
            +
            	def md_link(children, ref_id)
         | 
| 40 | 
            +
            		md_el(:link, children, {:ref_id=>ref_id.downcase})
         | 
| 41 | 
            +
            	end
         | 
| 42 | 
            +
            	
         | 
| 43 | 
            +
            	def md_im_link(children, url, title=nil)
         | 
| 44 | 
            +
            		md_el(:link, children, {:url=>url,:title=>title})
         | 
| 45 | 
            +
            	end
         | 
| 46 | 
            +
            	
         | 
| 47 | 
            +
            	
         | 
| 48 | 
            +
            	def md_image(children, ref_id)
         | 
| 49 | 
            +
            		md_el(:image, children, {:ref_id=>ref_id})
         | 
| 50 | 
            +
            	end
         | 
| 51 | 
            +
            	
         | 
| 52 | 
            +
            	def md_im_image(children, url, title=nil)
         | 
| 53 | 
            +
            		md_el(:image, children, {:url=>url,:title=>title})
         | 
| 54 | 
            +
            	end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            	def md_em(children)
         | 
| 57 | 
            +
            		md_el(:emphasis, [children].flatten)
         | 
| 58 | 
            +
            	end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            	def md_strong(children)
         | 
| 61 | 
            +
            		md_el(:strong, [children].flatten)
         | 
| 62 | 
            +
            	end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            	def md_emstrong(children)
         | 
| 65 | 
            +
            		md_strong(md_em(children))
         | 
| 66 | 
            +
            	end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            	# <http://www.example.com/>
         | 
| 69 | 
            +
            	def md_url(url)
         | 
| 70 | 
            +
            		md_el(:immediate_link, [], {:url=>url})
         | 
| 71 | 
            +
            	end
         | 
| 72 | 
            +
            	
         | 
| 73 | 
            +
            	# <andrea@rubyforge.org>
         | 
| 74 | 
            +
            	# <mailto:andrea@rubyforge.org>
         | 
| 75 | 
            +
            	def md_email(email)
         | 
| 76 | 
            +
            		md_el(:email_address, [], {:email=>email})
         | 
| 77 | 
            +
            	end
         | 
| 78 | 
            +
            	
         | 
| 79 | 
            +
            	def md_entity(entity_name)
         | 
| 80 | 
            +
            		md_el(:entity, [], {:entity_name=>entity_name})
         | 
| 81 | 
            +
            	end
         | 
| 82 | 
            +
            	
         | 
| 83 | 
            +
            	# Markdown extra
         | 
| 84 | 
            +
            	def md_foot_ref(ref_id)
         | 
| 85 | 
            +
            		md_el(:footnote_reference, [], {:footnote_id=>ref_id})
         | 
| 86 | 
            +
            	end
         | 
| 87 | 
            +
            	
         | 
| 88 | 
            +
            	def md_par(children, meta={})
         | 
| 89 | 
            +
            		md_el(:paragraph, children, meta)
         | 
| 90 | 
            +
            	end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            	# [1]: http://url [properties]
         | 
| 93 | 
            +
            	def md_ref_def(ref_id, url, title=nil, meta={})
         | 
| 94 | 
            +
            		meta[:url] = url
         | 
| 95 | 
            +
            		meta[:ref_id] = ref_id
         | 
| 96 | 
            +
            		meta[:title] = title if title
         | 
| 97 | 
            +
            		md_el(:ref_definition, [], meta)
         | 
| 98 | 
            +
            	end
         | 
| 99 | 
            +
            end
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            class MDElement	
         | 
| 102 | 
            +
            	# outputs abbreviated form 
         | 
| 103 | 
            +
            	def inspect2 
         | 
| 104 | 
            +
            		case @node_type
         | 
| 105 | 
            +
            		when :paragraph
         | 
| 106 | 
            +
            			"md_par(%s)" % children_inspect
         | 
| 107 | 
            +
            		when :footnote_reference
         | 
| 108 | 
            +
            			"md_foot_ref(%s)" % @meta[:footnote_id].inspect
         | 
| 109 | 
            +
            		when :entity
         | 
| 110 | 
            +
            			"md_entity(%s)" % @meta[:entity_name].inspect
         | 
| 111 | 
            +
            		when :email_address
         | 
| 112 | 
            +
            			"md_email(%s)" % @meta[:email].inspect
         | 
| 113 | 
            +
            		when :inline_code
         | 
| 114 | 
            +
            			"md_code(%s)" % @meta[:raw_code].inspect
         | 
| 115 | 
            +
            		when :raw_html
         | 
| 116 | 
            +
            			"md_html(%s)" % @meta[:raw_html].inspect
         | 
| 117 | 
            +
            		when :emphasis 
         | 
| 118 | 
            +
            			"md_em(%s)" % children_inspect
         | 
| 119 | 
            +
            		when :strong
         | 
| 120 | 
            +
            			"md_strong(%s)" % children_inspect
         | 
| 121 | 
            +
            		when :immediate_link
         | 
| 122 | 
            +
            			"md_url(%s)" % @meta[:url].inspect
         | 
| 123 | 
            +
            		when :image
         | 
| 124 | 
            +
            			if @meta[:ref_id]
         | 
| 125 | 
            +
            				"md_image(%s,%s)" % [
         | 
| 126 | 
            +
            					children_inspect, @meta[:ref_id].inspect]
         | 
| 127 | 
            +
            			else
         | 
| 128 | 
            +
            				"md_im_image(%s, %s %s)" % [
         | 
| 129 | 
            +
            					children_inspect, @meta[:url].inspect,
         | 
| 130 | 
            +
            					(title=@meta[:title]) ? (", "+ title.inspect) : ""
         | 
| 131 | 
            +
            				]
         | 
| 132 | 
            +
            			end
         | 
| 133 | 
            +
            		when :link
         | 
| 134 | 
            +
            			if @meta[:ref_id]
         | 
| 135 | 
            +
            				"md_link(%s,%s)" % [
         | 
| 136 | 
            +
            					children_inspect, @meta[:ref_id].inspect]
         | 
| 137 | 
            +
            			else
         | 
| 138 | 
            +
            				"md_im_link(%s, %s %s)" % [
         | 
| 139 | 
            +
            					children_inspect, @meta[:url].inspect,
         | 
| 140 | 
            +
            					(title=@meta[:title]) ? (", "+ title.inspect) : ""
         | 
| 141 | 
            +
            				]
         | 
| 142 | 
            +
            			end
         | 
| 143 | 
            +
            		when :ref_definition
         | 
| 144 | 
            +
            			"md_ref_def(%s, %s %s)" % 
         | 
| 145 | 
            +
            				[
         | 
| 146 | 
            +
            					@meta[:ref_id].inspect, 
         | 
| 147 | 
            +
            					@meta[:url].inspect,
         | 
| 148 | 
            +
            					@meta[:title] ? ","+@meta[:title].inspect : ""
         | 
| 149 | 
            +
            				]
         | 
| 150 | 
            +
            		else
         | 
| 151 | 
            +
            			nil
         | 
| 152 | 
            +
            		end
         | 
| 153 | 
            +
            	end
         | 
| 154 | 
            +
            	
         | 
| 155 | 
            +
            end
         | 
| 156 | 
            +
             | 
| 157 | 
            +
             | 
| 158 | 
            +
             | 
| 159 | 
            +
             | 
| 160 | 
            +
             | 
| 161 | 
            +
             | 
| 162 | 
            +
             | 
| 163 | 
            +
             | 
| 164 | 
            +
             | 
    
        data/lib/maruku/html_helper.rb
    CHANGED
    
    | @@ -1,16 +1,21 @@ | |
| 1 1 |  | 
| 2 2 | 
             
            class Maruku
         | 
| 3 3 |  | 
| 4 | 
            -
            #  | 
| 4 | 
            +
            # This class helps me read and sanitize HTML blocks
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # I tried to do this with REXML, but wasn't able to. (suggestions?)
         | 
| 5 7 |  | 
| 6 8 | 
             
            	class HTMLHelper
         | 
| 7 | 
            -
            		 | 
| 9 | 
            +
            		include MarukuStrings
         | 
| 10 | 
            +
            		
         | 
| 11 | 
            +
            		Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
         | 
| 8 12 | 
             
            		EverythingElse = %r{^[^<]+}m
         | 
| 9 13 | 
             
            		CommentStart = %r{^<!--}x
         | 
| 10 14 | 
             
            		CommentEnd = %r{^.*-->}
         | 
| 11 15 | 
             
            		TO_SANITIZE = ['img','hr'] 
         | 
| 12 16 |  | 
| 13 | 
            -
            		attr_accessor :inside_comment
         | 
| 17 | 
            +
            #		attr_accessor :inside_comment
         | 
| 18 | 
            +
            		attr_reader :rest
         | 
| 14 19 |  | 
| 15 20 | 
             
            		def initialize 
         | 
| 16 21 | 
             
            			@rest = ""
         | 
| @@ -21,8 +26,8 @@ class Maruku | |
| 21 26 | 
             
            		end
         | 
| 22 27 |  | 
| 23 28 | 
             
            		def eat_this(line)
         | 
| 24 | 
            -
            			@rest = line | 
| 25 | 
            -
            			
         | 
| 29 | 
            +
            			@rest = line  + @rest
         | 
| 30 | 
            +
            			things_read = 0
         | 
| 26 31 | 
             
            			until @rest.empty?
         | 
| 27 32 | 
             
            				if @inside_comment
         | 
| 28 33 | 
             
            					if @m = CommentEnd.match(@rest)
         | 
| @@ -35,10 +40,12 @@ class Maruku | |
| 35 40 | 
             
            					end
         | 
| 36 41 | 
             
            				else
         | 
| 37 42 | 
             
            					if @m = CommentStart.match(@rest)
         | 
| 43 | 
            +
            						things_read += 1
         | 
| 38 44 | 
             
            						@inside_comment = true
         | 
| 39 45 | 
             
            						@already += @m.pre_match + @m.to_s
         | 
| 40 46 | 
             
            						@rest = @m.post_match
         | 
| 41 47 | 
             
            					elsif @m = Tag.match(@rest)
         | 
| 48 | 
            +
            						things_read += 1
         | 
| 42 49 | 
             
            						@already += @m.pre_match
         | 
| 43 50 | 
             
            						@rest = @m.post_match
         | 
| 44 51 |  | 
| @@ -53,7 +60,13 @@ class Maruku | |
| 53 60 | 
             
            						end
         | 
| 54 61 |  | 
| 55 62 | 
             
            						if TO_SANITIZE.include? tag 
         | 
| 56 | 
            -
            							 | 
| 63 | 
            +
            							attributes.strip!
         | 
| 64 | 
            +
            					#		puts "Attributes: #{attributes.inspect}"
         | 
| 65 | 
            +
            							if attributes.size > 0
         | 
| 66 | 
            +
            								@already +=  '<%s %s />' % [tag, attributes]
         | 
| 67 | 
            +
            							else
         | 
| 68 | 
            +
            								@already +=  '<%s />' % [tag]
         | 
| 69 | 
            +
            							end
         | 
| 57 70 | 
             
            						elsif is_closing
         | 
| 58 71 | 
             
            							@already += @m.to_s
         | 
| 59 72 | 
             
            							if @tag_stack.last != tag
         | 
| @@ -73,29 +86,36 @@ class Maruku | |
| 73 86 | 
             
            						@already += @m.pre_match + @m.to_s
         | 
| 74 87 | 
             
            						@rest = @m.post_match
         | 
| 75 88 | 
             
            					else
         | 
| 76 | 
            -
             | 
| 89 | 
            +
            						error "Malformed HTML: not complete: #{@rest.inspect}"
         | 
| 77 90 | 
             
            					end
         | 
| 78 91 | 
             
            				end # not inside comment
         | 
| 79 92 |  | 
| 80 93 | 
             
            #				puts inspect
         | 
| 81 94 | 
             
            #				puts "Read: #{@tag_stack.inspect}"
         | 
| 95 | 
            +
            				break if is_finished? and things_read>0	
         | 
| 82 96 | 
             
            			end
         | 
| 83 97 | 
             
            		end
         | 
| 84 98 |  | 
| 85 99 |  | 
| 86 100 | 
             
            		def error(s)
         | 
| 87 | 
            -
            			raise "Error: #{s} "+ inspect
         | 
| 101 | 
            +
            			raise RuntimeError, "Error: #{s} "+ inspect, caller
         | 
| 88 102 | 
             
            		end
         | 
| 89 103 |  | 
| 90 | 
            -
            		def inspect; "HTML READER\n comment=#{inside_comment} "+
         | 
| 91 | 
            -
            			"match=#{@m.to_s.inspect}"+
         | 
| 92 | 
            -
            			" | 
| 93 | 
            -
            			" | 
| 94 | 
            -
            			"\n | 
| 104 | 
            +
            		def inspect; "HTML READER\n comment=#{@inside_comment} "+
         | 
| 105 | 
            +
            			"match=#{@m.to_s.inspect}\n"+
         | 
| 106 | 
            +
            			"Tag stack = #{@tag_stack.inspect} \n"+
         | 
| 107 | 
            +
            			"Before:\n"+
         | 
| 108 | 
            +
            			add_tabs(@already,1,'|')+"\n"+
         | 
| 109 | 
            +
            			"After:\n"+
         | 
| 110 | 
            +
            			add_tabs(@rest,1,'|')+"\n"
         | 
| 111 | 
            +
            			
         | 
| 95 112 | 
             
            		end
         | 
| 113 | 
            +
            		
         | 
| 114 | 
            +
            		
         | 
| 96 115 | 
             
            		def stuff_you_read
         | 
| 97 116 | 
             
            			@already
         | 
| 98 117 | 
             
            		end
         | 
| 118 | 
            +
            		
         | 
| 99 119 | 
             
            		def is_finished?
         | 
| 100 120 | 
             
            			not @inside_comment and @tag_stack.empty?
         | 
| 101 121 | 
             
            		end
         | 
    
        data/lib/maruku/parse_block.rb
    CHANGED
    
    | @@ -17,9 +17,11 @@ | |
| 17 17 | 
             
            #   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
         | 
| 18 18 |  | 
| 19 19 | 
             
            class Maruku
         | 
| 20 | 
            +
            	include Helpers
         | 
| 21 | 
            +
            	
         | 
| 20 22 | 
             
            	# Splits the string and calls parse_lines_as_markdown
         | 
| 21 23 | 
             
            	def parse_text_as_markdown(text)
         | 
| 22 | 
            -
            		lines =  split_lines(text)
         | 
| 24 | 
            +
            		lines =  Maruku.split_lines(text)
         | 
| 23 25 | 
             
            		parse_lines_as_markdown(lines)
         | 
| 24 26 | 
             
            	end
         | 
| 25 27 |  | 
| @@ -74,7 +76,7 @@ class Maruku | |
| 74 76 |  | 
| 75 77 | 
             
            				# these do not produce output
         | 
| 76 78 | 
             
            				when :footnote_text; read_footnote_text
         | 
| 77 | 
            -
            				when : | 
| 79 | 
            +
            				when :ref_definition;  output << read_ref_definition
         | 
| 78 80 | 
             
            				when :abbreviation;  read_abbreviation
         | 
| 79 81 | 
             
            				when :metadata;      just_read_metadata = read_metadata
         | 
| 80 82 |  | 
| @@ -82,7 +84,7 @@ class Maruku | |
| 82 84 | 
             
            				else
         | 
| 83 85 | 
             
            					node_type = cur_line_node_type
         | 
| 84 86 | 
             
            					line = shift_line
         | 
| 85 | 
            -
             | 
| 87 | 
            +
            					tell_user "Ignoring line '#{line}' type = #{node_type}"
         | 
| 86 88 | 
             
            			end
         | 
| 87 89 |  | 
| 88 90 | 
             
            			if current_metadata and output.last
         | 
| @@ -122,10 +124,8 @@ class Maruku | |
| 122 124 | 
             
            		output
         | 
| 123 125 | 
             
            	end
         | 
| 124 126 |  | 
| 125 | 
            -
            	def create_md_element(node_type, children=[])
         | 
| 126 | 
            -
            		e = MDElement.new
         | 
| 127 | 
            -
            		e.node_type = node_type
         | 
| 128 | 
            -
            		e.children = children
         | 
| 127 | 
            +
            	def create_md_element(node_type, children=[], meta = {})
         | 
| 128 | 
            +
            		e = MDElement.new(node_type, children, meta)
         | 
| 129 129 | 
             
            		e.doc = self
         | 
| 130 130 | 
             
            		e
         | 
| 131 131 | 
             
            	end
         | 
| @@ -134,7 +134,8 @@ class Maruku | |
| 134 134 | 
             
            	def cur_line_node_type; line_node_type top.first  end
         | 
| 135 135 | 
             
            	def cur_line; top.empty? ? nil : top.first end
         | 
| 136 136 | 
             
            	def next_line; top.empty? ? nil : top[1] end
         | 
| 137 | 
            -
            	def next_line_node_type | 
| 137 | 
            +
            	def next_line_node_type
         | 
| 138 | 
            +
            		(top.size >= 2) ? line_node_type(top[1]) : nil end
         | 
| 138 139 | 
             
            	def shift_line; top.shift; end
         | 
| 139 140 |  | 
| 140 141 | 
             
            	# reads a header (with ----- or ========)
         | 
| @@ -191,41 +192,33 @@ class Maruku | |
| 191 192 | 
             
            			while cur_line and not h.is_finished? 
         | 
| 192 193 | 
             
            				l=shift_line
         | 
| 193 194 | 
             
            #				puts "html -> #{l.inspect}"
         | 
| 194 | 
            -
            				h.eat_this l
         | 
| 195 | 
            +
            				h.eat_this "\n"+l
         | 
| 195 196 | 
             
            			end
         | 
| 196 197 | 
             
            		rescue Exception => e
         | 
| 197 | 
            -
            			 | 
| 198 | 
            +
            			tell_user e.inspect + e.backtrace.join("\n")
         | 
| 198 199 | 
             
            #			puts h.inspect
         | 
| 199 200 | 
             
            		end
         | 
| 200 201 |  | 
| 201 202 | 
             
            		raw_html = h.stuff_you_read
         | 
| 202 203 |  | 
| 203 | 
            -
            		 | 
| 204 | 
            -
             | 
| 205 | 
            -
            		begin
         | 
| 206 | 
            -
            			# remove newlines and whitespace at begin
         | 
| 207 | 
            -
            			# end end of string, or else REXML gets confused
         | 
| 208 | 
            -
            			raw_html = raw_html.gsub(/\A\s*</,'<').
         | 
| 209 | 
            -
            			                    gsub(/>[\s\n]*\Z/,'>')
         | 
| 210 | 
            -
            			e.meta[:parsed_html] = Document.new(raw_html)
         | 
| 211 | 
            -
            		rescue 
         | 
| 212 | 
            -
            			#$stderr.puts "Malformed block of HTML:\n#{raw_html}"
         | 
| 213 | 
            -
            			#puts h.inspect
         | 
| 214 | 
            -
            		end
         | 
| 215 | 
            -
            		
         | 
| 216 | 
            -
            		e.meta[:raw_html] = raw_html
         | 
| 217 | 
            -
            		e
         | 
| 204 | 
            +
            		md_html(raw_html)
         | 
| 218 205 | 
             
            	end
         | 
| 219 206 |  | 
| 220 207 | 
             
            	def read_paragraph
         | 
| 221 208 | 
             
            		lines = []
         | 
| 222 | 
            -
            		while cur_line  | 
| 209 | 
            +
            		while cur_line 
         | 
| 210 | 
            +
            			break if [:quote,:header3,:empty,:raw_html,:ref_definition].include?(
         | 
| 211 | 
            +
            				cur_line_node_type)
         | 
| 212 | 
            +
            			break if cur_line.strip.size == 0
         | 
| 213 | 
            +
            			
         | 
| 214 | 
            +
            			break if [:header1,:header2].include? next_line_node_type
         | 
| 215 | 
            +
            			
         | 
| 223 216 | 
             
            			lines << shift_line
         | 
| 224 217 | 
             
            		end
         | 
| 225 218 | 
             
            #		dbg_describe_ary(lines, 'PAR')
         | 
| 226 219 | 
             
            		children = parse_lines_as_span(lines)
         | 
| 227 220 |  | 
| 228 | 
            -
            		 | 
| 221 | 
            +
            		md_par(children)
         | 
| 229 222 | 
             
            	end
         | 
| 230 223 |  | 
| 231 224 |  | 
| @@ -291,56 +284,56 @@ class Maruku | |
| 291 284 | 
             
            	# This is the only ugly function in the code base.
         | 
| 292 285 | 
             
            	# It is used to read list items, descriptions, footnote text
         | 
| 293 286 | 
             
            	def read_indented_content(indentation, break_list, item_type)
         | 
| 294 | 
            -
             | 
| 295 | 
            -
             | 
| 296 | 
            -
             | 
| 297 | 
            -
             | 
| 298 | 
            -
             | 
| 299 | 
            -
             | 
| 300 | 
            -
             | 
| 301 | 
            -
             | 
| 302 | 
            -
             | 
| 303 | 
            -
             | 
| 304 | 
            -
             | 
| 305 | 
            -
             | 
| 306 | 
            -
             | 
| 307 | 
            -
             | 
| 287 | 
            +
            		lines =[]
         | 
| 288 | 
            +
            		# collect all indented lines
         | 
| 289 | 
            +
            		saw_empty = false; saw_anything_after = false
         | 
| 290 | 
            +
            		while cur_line 
         | 
| 291 | 
            +
            			if cur_line_node_type == :empty
         | 
| 292 | 
            +
            				saw_empty = true
         | 
| 293 | 
            +
            				lines << shift_line
         | 
| 294 | 
            +
            				next
         | 
| 295 | 
            +
            			end
         | 
| 296 | 
            +
            		
         | 
| 297 | 
            +
            			# after a white line
         | 
| 298 | 
            +
            			if saw_empty
         | 
| 299 | 
            +
            				# we expect things to be properly aligned
         | 
| 300 | 
            +
            				if number_of_leading_spaces(cur_line) < indentation
         | 
| 308 301 | 
             
            #						debug "breaking for spaces: #{cur_line}"
         | 
| 309 | 
            -
             | 
| 310 | 
            -
            					end
         | 
| 311 | 
            -
            					saw_anything_after = true
         | 
| 312 | 
            -
            				else
         | 
| 313 | 
            -
            					break if break_list.include? cur_line_node_type
         | 
| 314 | 
            -
            	#				break if cur_line_node_type != :text
         | 
| 315 | 
            -
            				end
         | 
| 316 | 
            -
            			
         | 
| 317 | 
            -
            	#			debug "Accepted '#{cur_line}'"
         | 
| 318 | 
            -
             | 
| 319 | 
            -
            				stripped = strip_indent(shift_line, indentation)
         | 
| 320 | 
            -
            				lines << stripped
         | 
| 321 | 
            -
            			
         | 
| 322 | 
            -
            				# You are only required to indent the first line of 
         | 
| 323 | 
            -
            				# a child paragraph.
         | 
| 324 | 
            -
            				if line_node_type(stripped) == :text
         | 
| 325 | 
            -
            					while cur_line && (cur_line_node_type == :text)
         | 
| 326 | 
            -
            						lines << strip_indent(shift_line, indentation)
         | 
| 327 | 
            -
            					end
         | 
| 302 | 
            +
            					break
         | 
| 328 303 | 
             
            				end
         | 
| 304 | 
            +
            				saw_anything_after = true
         | 
| 305 | 
            +
            			else
         | 
| 306 | 
            +
            				break if break_list.include? cur_line_node_type
         | 
| 307 | 
            +
            #				break if cur_line_node_type != :text
         | 
| 329 308 | 
             
            			end
         | 
| 330 | 
            -
             | 
| 331 | 
            -
            			want_my_paragraph = saw_anything_after || 
         | 
| 332 | 
            -
            				(saw_empty && (cur_line  && (cur_line_node_type == item_type))) 
         | 
| 333 309 |  | 
| 334 | 
            -
             | 
| 335 | 
            -
             | 
| 310 | 
            +
            #			debug "Accepted '#{cur_line}'"
         | 
| 311 | 
            +
             | 
| 312 | 
            +
            			stripped = strip_indent(shift_line, indentation)
         | 
| 313 | 
            +
            			lines << stripped
         | 
| 336 314 |  | 
| 337 | 
            -
            			 | 
| 338 | 
            -
             | 
| 315 | 
            +
            			# You are only required to indent the first line of 
         | 
| 316 | 
            +
            			# a child paragraph.
         | 
| 317 | 
            +
            			if line_node_type(stripped) == :text
         | 
| 318 | 
            +
            				while cur_line && (cur_line_node_type == :text)
         | 
| 319 | 
            +
            					lines << strip_indent(shift_line, indentation)
         | 
| 320 | 
            +
            				end
         | 
| 339 321 | 
             
            			end
         | 
| 340 | 
            -
            			
         | 
| 341 | 
            -
            			return lines, want_my_paragraph
         | 
| 342 322 | 
             
            		end
         | 
| 343 323 |  | 
| 324 | 
            +
            		want_my_paragraph = saw_anything_after || 
         | 
| 325 | 
            +
            			(saw_empty && (cur_line  && (cur_line_node_type == item_type))) 
         | 
| 326 | 
            +
            	
         | 
| 327 | 
            +
            #		dbg_describe_ary(lines, 'LI')
         | 
| 328 | 
            +
            		# create a new context 
         | 
| 329 | 
            +
            	
         | 
| 330 | 
            +
            		while lines.last && (line_node_type(lines.last) == :empty)
         | 
| 331 | 
            +
            			lines.pop
         | 
| 332 | 
            +
            		end
         | 
| 333 | 
            +
            		
         | 
| 334 | 
            +
            		return lines, want_my_paragraph
         | 
| 335 | 
            +
            	end
         | 
| 336 | 
            +
             | 
| 344 337 |  | 
| 345 338 | 
             
            	def read_quote
         | 
| 346 339 | 
             
            		lines = []
         | 
| @@ -413,40 +406,42 @@ class Maruku | |
| 413 406 |  | 
| 414 407 |  | 
| 415 408 |  | 
| 416 | 
            -
            	def  | 
| 409 | 
            +
            	def read_ref_definition
         | 
| 417 410 | 
             
            		line = shift_line
         | 
| 418 411 |  | 
| 419 412 | 
             
            		# if link is incomplete, shift next line
         | 
| 420 | 
            -
            		 | 
| 413 | 
            +
            		if cur_line && (cur_line_node_type != :ref_definition) && 
         | 
| 421 414 | 
             
            			([1,2,3].include? number_of_leading_spaces(cur_line) )
         | 
| 422 415 | 
             
            			line += " "+ shift_line
         | 
| 423 416 | 
             
            		end
         | 
| 424 417 |  | 
| 425 418 | 
             
            #		puts "total= #{line}"
         | 
| 426 419 |  | 
| 427 | 
            -
            		 | 
| 428 | 
            -
             | 
| 429 | 
            -
            			 | 
| 430 | 
            -
             | 
| 431 | 
            -
             | 
| 432 | 
            -
             | 
| 433 | 
            -
             | 
| 434 | 
            -
             | 
| 435 | 
            -
             | 
| 436 | 
            -
             | 
| 420 | 
            +
            		match = LinkRegex.match(line)
         | 
| 421 | 
            +
            		if not match
         | 
| 422 | 
            +
            			error "Link does not respect format: '#{line}'"
         | 
| 423 | 
            +
            		end
         | 
| 424 | 
            +
            		
         | 
| 425 | 
            +
            		id = match[1]; url = match[2]; title = match[3]; 
         | 
| 426 | 
            +
            		id = id.strip.downcase
         | 
| 427 | 
            +
            		
         | 
| 428 | 
            +
            		hash = self.refs[id] = {:url=>url,:title=>title}
         | 
| 429 | 
            +
            		
         | 
| 430 | 
            +
            		stuff=match[4]
         | 
| 431 | 
            +
            		
         | 
| 432 | 
            +
            		if stuff
         | 
| 433 | 
            +
            			stuff.split.each do |couple|
         | 
| 437 434 | 
             
            #					puts "found #{couple}"
         | 
| 438 | 
            -
             | 
| 439 | 
            -
             | 
| 440 | 
            -
             | 
| 435 | 
            +
            				k, v = couple.split('=')
         | 
| 436 | 
            +
            				v ||= ""
         | 
| 437 | 
            +
            				if v[0,1]=='"' then v = v[1, v.size-2] end
         | 
| 441 438 | 
             
            #					puts "key:_#{k}_ value=_#{v}_"
         | 
| 442 | 
            -
             | 
| 443 | 
            -
            				end
         | 
| 439 | 
            +
            				hash[k.to_sym] = v
         | 
| 444 440 | 
             
            			end
         | 
| 445 | 
            -
            #			puts hash.inspect
         | 
| 446 | 
            -
            			
         | 
| 447 | 
            -
            		else
         | 
| 448 | 
            -
            			raise "Link does not respect format: '#{line}'"
         | 
| 449 441 | 
             
            		end
         | 
| 442 | 
            +
            #			puts hash.inspect
         | 
| 443 | 
            +
            		
         | 
| 444 | 
            +
            		md_ref_def(id, url, meta={:title=>title})
         | 
| 450 445 | 
             
            	end
         | 
| 451 446 |  | 
| 452 447 | 
             
            	def read_table
         | 
| @@ -466,7 +461,8 @@ class Maruku | |
| 466 461 | 
             
            		num_columns = align.size
         | 
| 467 462 |  | 
| 468 463 | 
             
            		if head.size != num_columns
         | 
| 469 | 
            -
            			 | 
| 464 | 
            +
            			error "Head does not have #{num_columns} columns: \n#{head.inspect}"
         | 
| 465 | 
            +
            			# XXX try to recover
         | 
| 470 466 | 
             
            			return create_md_element(:linebreak)
         | 
| 471 467 | 
             
            		end
         | 
| 472 468 |  | 
| @@ -476,7 +472,8 @@ class Maruku | |
| 476 472 | 
             
            			row = split_cells(shift_line).map{|s|
         | 
| 477 473 | 
             
            				create_md_element(:cell, parse_lines_as_span([s]))}
         | 
| 478 474 | 
             
            			if head.size != num_columns
         | 
| 479 | 
            -
            				 | 
| 475 | 
            +
            				error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
         | 
| 476 | 
            +
            				# XXX try to recover
         | 
| 480 477 | 
             
            				return create_md_element(:linebreak)
         | 
| 481 478 | 
             
            			end
         | 
| 482 479 | 
             
            			rows << row
         |