RubyGems - maruku - Versions diffs - 0.3.0 → 0.4.0 - Mend

maruku 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

data/bin/{maruku0.3 → marudown} +6 -14
data/bin/maruku +1 -1
data/bin/marutest +37 -9
data/docs/TOFIX.html +22 -0
data/docs/TOFIX.md +3 -0
data/docs/changelog-0.2.13.html +30 -0
data/docs/changelog-0.2.13.md +6 -0
data/docs/changelog-0.3.html +19 -5
data/docs/faq.html +51 -40
data/docs/faq.md +3 -3
data/docs/hidden_o_n_squared.md +10 -0
data/docs/index.html +84 -396
data/docs/markdown_syntax.html +139 -330
data/docs/markdown_syntax.md +80 -93
data/docs/maruku.html +84 -396
data/docs/maruku.md +88 -158
data/docs/proposal.html +13 -106
data/docs/proposal.md +3 -3
data/docs/todo.html +38 -28
data/lib/maruku.rb +77 -11
data/lib/maruku/attributes.rb +186 -0
data/lib/maruku/defaults.rb +40 -0
data/lib/maruku/errors_management.rb +55 -39
data/lib/maruku/helpers.rb +156 -72
data/lib/maruku/input/charsource.rb +319 -0
data/lib/maruku/{html_helper.rb → input/html_helper.rb} +30 -9
data/lib/maruku/input/linesource.rb +111 -0
data/lib/maruku/input/parse_block.rb +562 -0
data/lib/maruku/{parse_doc.rb → input/parse_doc.rb} +60 -28
data/lib/maruku/{parse_span_better.rb → input/parse_span_better.rb} +226 -256
data/lib/maruku/input/type_detection.rb +137 -0
data/lib/maruku/maruku.rb +33 -0
data/lib/maruku/{to_html.rb → output/to_html.rb} +151 -132
data/lib/maruku/{to_latex.rb → output/to_latex.rb} +31 -35
data/lib/maruku/{to_latex_entities.rb → output/to_latex_entities.rb} +25 -3
data/lib/maruku/output/to_latex_strings.rb +64 -0
data/lib/maruku/output/to_markdown.rb +164 -0
data/lib/maruku/{to_s.rb → output/to_s.rb} +6 -0
data/lib/maruku/string_utils.rb +12 -181
data/lib/maruku/structures.rb +91 -67
data/lib/maruku/structures_inspect.rb +78 -0
data/lib/maruku/structures_iterators.rb +24 -2
data/lib/maruku/tests/benchmark.rb +41 -9
data/lib/maruku/tests/new_parser.rb +317 -286
data/lib/maruku/tests/tests.rb +20 -0
data/lib/maruku/toc.rb +64 -64
data/lib/maruku/usage/example1.rb +33 -0
data/lib/maruku/version.rb +8 -2
data/tests/unittest/abbreviations.md +27 -16
data/tests/unittest/attributes/attributes.md +89 -0
data/tests/unittest/attributes/circular.md +51 -0
data/tests/unittest/attributes/default.md +47 -0
data/tests/unittest/blank.md +10 -6
data/tests/unittest/blanks_in_code.md +26 -26
data/tests/unittest/code.md +9 -9
data/tests/unittest/code2.md +12 -13
data/tests/unittest/code3.md +34 -34
data/tests/unittest/easy.md +9 -7
data/tests/unittest/email.md +9 -7
data/tests/unittest/encoding/iso-8859-1.md +41 -4
data/tests/unittest/encoding/utf-8.md +6 -5
data/tests/unittest/entities.md +52 -80
data/tests/unittest/escaping.md +47 -35
data/tests/unittest/extra_dl.md +19 -29
data/tests/unittest/extra_header_id.md +31 -24
data/tests/unittest/extra_table1.md +14 -32
data/tests/unittest/footnotes.md +58 -42
data/tests/unittest/headers.md +11 -11
data/tests/unittest/hrule.md +14 -24
data/tests/unittest/images.md +41 -26
data/tests/unittest/inline_html.md +104 -56
data/tests/unittest/inline_html2.md +38 -0
data/tests/unittest/links.md +74 -33
data/tests/unittest/list1.md +18 -15
data/tests/unittest/list2.md +31 -13
data/tests/unittest/list3.md +29 -28
data/tests/unittest/list4.md +103 -12
data/tests/unittest/lists.md +86 -53
data/tests/unittest/lists6.md +53 -0
data/tests/unittest/lists7.md +31 -0
data/tests/unittest/lists_after_paragraph.md +105 -71
data/tests/unittest/lists_ol.md +149 -73
data/tests/unittest/misc_sw.md +366 -326
data/tests/unittest/notyet/escape.md +10 -10
data/tests/unittest/notyet/header_after_par.md +20 -14
data/tests/unittest/notyet/ticks.md +8 -35
data/tests/unittest/notyet/triggering.md +72 -45
data/tests/unittest/olist.md +78 -0
data/tests/unittest/one.md +5 -3
data/tests/unittest/paragraph.md +5 -3
data/tests/unittest/paragraph_rules/dont_merge_ref.md +15 -9
data/tests/unittest/paragraph_rules/tab_is_blank.md +9 -5
data/tests/unittest/paragraphs.md +21 -26
data/tests/unittest/recover/recover_links.md +6 -5
data/tests/unittest/references/long_example.md +39 -30
data/tests/unittest/references/spaces_and_numbers.md +2 -2
data/tests/unittest/syntax_hl.md +33 -31
data/tests/unittest/test.md +4 -6
data/tests/unittest/wrapping.md +43 -26
metadata +160 -139
data/docs/markdown_extra2.html +0 -87
data/docs/markdown_extra2.md +0 -83
data/docs/markdown_syntax_2.html +0 -152
data/lib/maruku/parse_block.rb +0 -564
data/lib/maruku/parse_span.rb +0 -451
data/lib/maruku/to_latex_strings.rb +0 -59
data/lib/maruku/to_markdown.rb +0 -110
data/lib/test.rb +0 -29

data/lib/maruku/parse_span.rb DELETED Viewed

@@ -1,451 +0,0 @@
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-# There are two black-magic methods `match_couple_of` and `map_match`,
-# defined at the end of the file, that make the function
-# `parse_lines_as_span` so elegant.
-class Maruku
-	# Takes care of all span-level formatting, links, images, etc.
-	#
-	# Lines must not contain block-level elements.
-	def parse_lines_as_span(lines)
-		# first, get rid of linebreaks
-		res = resolve_linebreaks(lines)
-		span = MDElement.new(:dummy, res)
-		# encode all escapes
-		span.replace_each_string { |s| s.escape_md_special }
-# The order of processing is significant:
-# 1. inline code
-# 2. immediate links
-# 3. inline HTML
-# 4. everything else
-		# search for ``code`` markers
-		span.match_couple_of('``') { |children, match1, match2|
-			e = create_md_element(:inline_code)
-			# this is now opaque to processing
-			e.meta[:raw_code] = children.join('').it_was_a_code_block
-			e
-		}
-		# Search for `single tick`  code markers
-		span.match_couple_of('`') { |children, match1, match2|
-			e = create_md_element(:inline_code)
-			# this is now opaque to processing
-			e.meta[:raw_code] = children.join('').it_was_a_code_block
-			# this is now opaque to processing
-			e
-		}
-		# Detect any immediate link: <http://www.google.com>
-		# we expect an http: or something: at the beginning
-		span.map_match( /<(\w+:[^\>]+)>/) { |match|
-			url = match[1]
-			e = create_md_element(:immediate_link, [])
-			e.meta[:url] = url
-			e
-		}
-		# Search for inline HTML (the support is pretty basic for now)
-		# this searches for a matching block
-		inlineHTML1 = %r{
-			(   # put everything in 1
-			<   # open
-			(\w+) # opening tag in 2
-			>   # close
-			.*  # anything
-			</\2> # match closing tag
-			)
-		}x
-		# this searches for only one block
-		inlineHTML2 = %r{
-			(   # put everything in 1
-			<   # open
-			\w+ #
-			    # close
-			[^<>]*  # anything except
-			/> # closing tag
-			)
-		}x
-		for reg in [inlineHTML1, inlineHTML2]
-			span.map_match(reg) { |match|
-				raw_html = match[1]
-				convert_raw_html_in_list(raw_html)
-			}
-		end
-		# Detect footnotes references: [^1]
-		span.map_match(/\[(\^[^\]]+)\]/) { |match|
-			id = match[1].strip.downcase
-			e = create_md_element(:footnote_reference)
-			e.meta[:footnote_id] = id
-			e
-		}
-		# Detect any image like ![Alt text][url]
-		span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
-			alt = match[1]
-			id = match[2].strip.downcase
-			if id.size == 0
-				id = text.strip.downcase
-			end
-			e = create_md_element(:image)
-			e.meta[:ref_id] = id
-			e
-		}
-		# Detect any immage with immediate url: ![Alt](url "title")
-		# a dummy ref is created and put in the symbol table
-		link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
-		span.map_match(link1) { |match|
-			alt = match[1]
-			url = match[2]
-			title = match[3]
-			url = url.strip
-			# create a dummy id
-			id="dummy_#{@refs.size}"
-			@refs[id] = {:url=>url, :title=>title}
-			e = create_md_element(:image)
-			e.meta[:ref_id] = id
-			e
-		}
-		# an id reference: "[id]",  "[ id  ]"
-		reg_id_ref = %r{
-			\[ # opening bracket
-			([^\]]*) # 0 or more non-closing bracket (this is too permissive)
-			\] # closing bracket
-			}x
-		# validates a url, only $1 is set to the url
- 		reg_url =
-			/((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
-		reg_url = %r{([^\s\]\)]+)}
-		# A string enclosed in quotes.
-		reg_title = %r{
-			" # opening
-			[^"]*   # anything = 1
-			" # closing
-			}x
-		# [bah](http://www.google.com "Google.com"),
-		# [bah](http://www.google.com),
-		# [empty]()
-		reg_url_and_title = %r{
-			\(  # opening
-			\s* # whitespace
-			#{reg_url}?  # url = 1 might be  empty
-			(?:\s+["'](.*)["'])? # optional title  = 2
-			\s* # whitespace
-			\) # closing
-		}x
-		# Detect a link like ![Alt text][id]
-		span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
-			text = match[1]
-			id = match[2].strip.downcase
-			if id.size == 0
-				id = text.strip.downcase
-			end
-			children = parse_lines_as_span(text)
-			e = create_md_element(:link, children)
-			e.meta[:ref_id] = id
-			e
-		}
-		# Detect any immage with immediate url: ![Alt](url "title")
-		# a dummy ref is created and put in the symbol table
-		link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
-		span.map_match(link1) { |match|
-			text = match[1]
-			children = parse_lines_as_span(text)
-			url = match[2]
-			title = match[3]
-			url = url.strip
-			# create a dummy id
-			id="dummy_#{@refs.size}"
-			@refs[id] = {:url=>url, :title=>title}
-			@refs[id][:title] = title if title
-			e = create_md_element(:link, children)
-			e.meta[:ref_id] = id
-			e
-		}
-		# Detect any link like [Google engine][google]
-		span.match_couple_of('[',  # opening bracket
-			%r{\]                   # closing bracket
-			[ ]?                    # optional whitespace
-			#{reg_id_ref} # ref id, with $1 being the reference
-			}x
-				) { |children, match1, match2|
-			id = match2[1]
-			id = id.strip.downcase
-			if id.size == 0
-				id = children.join.strip.downcase
-			end
-			e = create_md_element(:link, children)
-			e.meta[:ref_id] = id
-			e
-		}
-		# Detect any link with immediate url: [Google](http://www.google.com)
-		# XXX Note that the url can be empty: [Empty]()
-		# a dummy ref is created and put in the symbol table
-		span.match_couple_of('[',  # opening bracket
-				%r{\]                   # closing bracket
-				[ ]?                    # optional whitespace
-				#{reg_url_and_title}    # ref id, with $1 being the url and $2 being the title
-				}x
-					) { |children, match1, match2|
-			url   = match2[1]
-			title = match2[3] # XXX? Is it a bug? I would use [2]
-			# create a dummy id
-			id="dummy_#{@refs.size}"
-			@refs[id] = {:url=>url}
-			@refs[id][:title] = title if title
-			e = create_md_element(:link, children)
-			e.meta[:ref_id] = id
-			e
-		}
-		# Detect an email address <andrea@invalid.it>
-		span.map_match(EMailAddress) { |match|
-			email = match[1]
-			e = create_md_element(:email_address, [])
-			e.meta[:email] = email
-			e
-		}
-		# Detect HTML entitis
-		span.map_match(/&([\w\d]+);/) { |match|
-			entity_name = match[1]
-			e = create_md_element(:entity, [])
-			e.meta[:entity_name] = entity_name
-			e
-		}
-		# And now the easy stuff
-		# search for ***strong and em***
-		span.match_couple_of('***') { |children,m1,m2|
-			create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
-		span.match_couple_of('___') { |children,m1,m2|
-			create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
-		# search for **strong**
-		span.match_couple_of('**') { |children,m1,m2|  create_md_element(:strong,   children) }
-		# search for __strong__
-		span.match_couple_of('__') { |children,m1,m2|  create_md_element(:strong,   children) }
-		# search for *emphasis*
-		span.match_couple_of('*')  { |children,m1,m2|  create_md_element(:emphasis, children) }
-		# search for _emphasis_
-		span.match_couple_of('_')  { |children,m1,m2|  create_md_element(:emphasis, children) }
-		# finally, unescape the special characters
-		span.replace_each_string { |s|  s.unescape_md_special}
-		span.children
-	end
-	# returns array containing Strings or :linebreak elements
-	def resolve_linebreaks(lines)
-		res = []
-		s = ""
-		lines.each do |l|
-			s += (s.size>0 ? " " : "") + l.strip
-			if force_linebreak?(l)
-				res << s
-				res << create_md_element(:linebreak)
-				s = ""
-			end
-		end
-		res << s if s.size > 0
-		res
-	end
-	# raw_html is something like
-	#  <em> A</em> dopwkk *maruk* <em>A</em>
-	def convert_raw_html_in_list(raw_html)
-		e = create_md_element(:raw_html)
-		e.meta[:raw_html]  = raw_html
-		begin
-			e.meta[:parsed_html] = Document.new(raw_html)
-		rescue
-			$stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
-		end
-		e
-	end
-end
-# And now the black magic that makes the part above so elegant
-class MDElement
-	# Try to match the regexp to each string in the hierarchy
-	# (using `replace_each_string`). If the regexp match, eliminate
-	# the matching string and substitute it with the pre_match, the
-	# result of the block, and the post_match
-	#
-	#   ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
-	#
-	# the block might return arrays.
-	#
-	def map_match(regexp, &block)
-		replace_each_string { |s|
-			processed = []
-			while (match = regexp.match(s))
-				# save the pre_match
-				processed << match.pre_match if match.pre_match && match.pre_match.size>0
-				# transform match
-				result = block.call(match)
-				# and append as processed
-				[*result].each do |e| processed << e end
-				# go on with the rest of the string
-				s = match.post_match
-			end
-			processed << s if s.size > 0
-			processed
-		}
-	end
-	# Finds couple of delimiters in a hierarchy of Strings and MDElements
-	#
-	# Open and close are two delimiters (like '[' and ']'), or two Regexp.
-	#
-	# If you don't pass close, it defaults to open.
-	#
-	# Each block is called with |contained children, match1, match2|
-	def match_couple_of(open, close=nil, &block)
-		close = close || open
-		 open_regexp =  open.kind_of?(Regexp) ?  open : Regexp.new(Regexp.escape(open))
-		close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close))
-		# Do the same to children first
-		for c in @children; if c.kind_of? MDElement
-			c.match_couple_of(open_regexp, close_regexp, &block)
-		end end
-		processed_children = []
-		until @children.empty?
-			c = @children.shift
-			if c.kind_of? String
-				match1 = open_regexp.match(c)
-				if not match1
-					processed_children << c
-				else # we found opening, now search closing
-#					puts "Found opening (#{marker}) in #{c.inspect}"
-					# pre match is processed
-					processed_children.push match1.pre_match if
-						match1.pre_match && match1.pre_match.size > 0
-					# we will process again the post_match
-					@children.unshift match1.post_match if
-						match1.post_match && match1.post_match.size>0
-					contained = []; found_closing = false
-					until @children.empty?  || found_closing
-						c = @children.shift
-						if c.kind_of? String
-							match2 = close_regexp.match(c)
-							if not match2
-								contained << c
-							else
-								# we found closing
-								found_closing = true
-								# pre match is contained
-								contained.push match2.pre_match if
-									match2.pre_match && match2.pre_match.size>0
-								# we will process again the post_match
-								@children.unshift match2.post_match if
-									match2.post_match && match2.post_match.size>0
-								# And now we call the block
-								substitute = block.call(contained, match1, match2)
-								processed_children  << substitute
-#								puts "Found closing (#{marker}) in #{c.inspect}"
-#								puts "Children: #{contained.inspect}"
-#								puts "Substitute: #{substitute.inspect}"
-							end
-						else
-							contained << c
-						end
-					end
-					if not found_closing
-						# $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring"
-						processed_children << match1.to_s
-						contained.reverse.each do |c|
-							@children.unshift c
-						end
-					end
-				end
-			else
-				processed_children << c
-			end
-		end
-		raise "BugBug" unless @children.empty?
-		rebuilt = []
-		# rebuild strings
-		processed_children.each do |c|
-			if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String)
-				rebuilt.last << c
-			else
-				rebuilt << c
-			end
-		end
-		@children = rebuilt
-	end
-end

data/lib/maruku/to_latex_strings.rb DELETED Viewed

@@ -1,59 +0,0 @@
-class String
-	# These are TeX's special characters
-	LATEX_ADD_SLASH = [ '{', '}', '$', '&', '#', '_', '%'].map{|x|x[0]}
-	# These, we transform to {\tt \char<ascii code>}
-	LATEX_TO_CHARCODE = [ '^', '~', '>','<'].map{|x|x[0]}
-	def int_to_string(char)
-		tmp = "0"; tmp[0]=char; tmp;
-	end
-	def escape_to_latex(s)
-		s2 = ""
-		s.each_byte do |b|
-			if LATEX_TO_CHARCODE.include? b
-				s2 += "{\\tt \\char#{b}}"
-			elsif LATEX_ADD_SLASH.include? b
-				s2 += "\\"
-				s2 += int_to_string(b)
-			elsif b == "\\"[0]
-			# there is no backslash in cmr10 fonts
-				s2 += "$\\backslash$"
-			else
-				s2 += int_to_string(b)
-			end
-		end
-		s2
-	end
-	# escapes special characters
-	def to_latex
-		s = self
-		s = escape_to_latex(s)
-#		puts "Before: #{s.inspect}"
-#		puts "after: #{s.inspect}"
-		OtherGoodies.each do |k, v|
-			s.gsub!(k, v)
-		end
-		s
-	end
-	# other things that are good on the eyes
-	OtherGoodies = {
-		/(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \latex
-#		'HTML' => '\\textsc{html}\\xspace ',
-#		'PDF' => '\\textsc{pdf}\\xspace '
-	}
-end