RubyGems - maruku - Versions diffs - 0.2 - Mend

maruku 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

data/bin/maruku +25 -0
data/bin/marutex +29 -0
data/docs/Makefile +25 -0
data/docs/char_codes.xml +884 -0
data/docs/color-package-demo.aux +1 -0
data/docs/color-package-demo.log +127 -0
data/docs/color-package-demo.tex +149 -0
data/docs/index.html +74 -0
data/docs/markdown_syntax.aux +13 -0
data/docs/markdown_syntax.html +266 -0
data/docs/markdown_syntax.log +287 -0
data/docs/markdown_syntax.md +920 -0
data/docs/markdown_syntax.out +0 -0
data/docs/markdown_syntax.pdf +0 -0
data/docs/markdown_syntax.tex +1203 -0
data/docs/maruku.aux +13 -0
data/docs/maruku.html +74 -0
data/docs/maruku.log +294 -0
data/docs/maruku.md +394 -0
data/docs/maruku.out +0 -0
data/docs/maruku.pdf +0 -0
data/docs/maruku.tex +548 -0
data/docs/style.css +65 -0
data/docs/todo.md +12 -0
data/lib/maruku.rb +20 -0
data/lib/maruku/parse_block.rb +577 -0
data/lib/maruku/parse_span.rb +336 -0
data/lib/maruku/string_utils.rb +270 -0
data/lib/maruku/structures.rb +31 -0
data/lib/maruku/to_html.rb +430 -0
data/lib/maruku/to_latex.rb +345 -0
data/lib/maruku/to_latex_strings.rb +330 -0
data/tests/abbreviations.md +11 -0
data/tests/blank.md +4 -0
data/tests/code.md +5 -0
data/tests/code2.md +8 -0
data/tests/code3.md +16 -0
data/tests/email.md +4 -0
data/tests/entities.md +19 -0
data/tests/escaping.md +14 -0
data/tests/extra_dl.md +101 -0
data/tests/extra_header_id.md +13 -0
data/tests/extra_table1.md +40 -0
data/tests/footnotes.md +17 -0
data/tests/headers.md +10 -0
data/tests/hrule.md +10 -0
data/tests/images.md +20 -0
data/tests/inline_html.md +35 -0
data/tests/links.md +31 -0
data/tests/list1.md +4 -0
data/tests/list2.md +5 -0
data/tests/list3.md +8 -0
data/tests/lists.md +32 -0
data/tests/lists_ol.md +39 -0
data/tests/misc_sw.md +105 -0
data/tests/one.md +1 -0
data/tests/paragraphs.md +13 -0
data/tests/sss06.md +352 -0
data/tests/test.md +4 -0
metadata +113 -0

data/lib/maruku/parse_span.rb ADDED

@@ -0,0 +1,336 @@
+# There are two black-magic methods `match_couple_of` and `map_match`,
+# defined at the end of the file, that make the function
+# `parse_lines_as_span` so elegant.
+class Maruku
+	# Takes care of all span-level formatting, links, images, etc.
+	#
+	# Lines must not contain block-level elements.
+	def parse_lines_as_span(lines)
+		# first, get rid of linebreaks
+		res = resolve_linebreaks(lines)
+		span = MDElement.new
+		span.children = res
+		# then, encode all escapes
+		span.replace_each_string { |s| s.escape_md_special }
+		# search for ``code`` markers
+		span.match_couple_of('``') { |children|
+			e = create_md_element(:inline_code)
+			e.meta[:raw_code] = children.join('') # this is now opaque to processing
+			e
+		}
+		# Search for `single tick`  code markers
+		span.match_couple_of('`') { |children|
+			e = create_md_element(:inline_code)
+			e.meta[:raw_code] = children.join('').unescape_md_special
+			# this is now opaque to processing
+			e
+		}
+		# Detect any immediate link: <http://www.google.com>
+		# we expect an http: or something: at the beginning
+		span.map_match( /<(\w+:[^\>]+)>/) { |match|
+			url = match[1]
+			e = create_md_element(:immediate_link, [])
+			e.meta[:url] = url
+			e
+		}
+		# Search for inline HTML (the support is pretty basic for now)
+		# this searches for a matching block
+		inlineHTML1 = %r{
+			(   # put everything in 1
+			<   # open
+			(\w+) # opening tag in 2
+			>   # close
+			.*  # anything
+			</\2> # match closing tag
+			)
+		}x
+		# this searches for only one block
+		inlineHTML2 = %r{
+			(   # put everything in 1
+			<   # open
+			\w+ #
+			    # close
+			[^<>]*  # anything except
+			/> # closing tag
+			)
+		}x
+		for reg in [inlineHTML1, inlineHTML2]
+			span.map_match(reg) { |match|
+				raw_html = (match[1] || raise("No html?"))
+				e = create_md_element(:raw_html)
+				e.meta[:raw_html]  = raw_html
+				begin
+					e.meta[:parsed_html] = Document.new(raw_html)
+				rescue
+					$stderr.puts "Malformed HTML:\n#{raw_html}"
+				end
+				e
+			}
+		end
+		# Detect footnotes references: [^1]
+		span.map_match(/\[(\^[^\]]+)\]/) { |match|
+			id = match[1].strip.downcase
+			e = create_md_element(:footnote_reference)
+			e.meta[:footnote_id] = id
+			e
+		}
+		# Detect any image like ![Alt text][url]
+		span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
+			alt = match[1]
+			id = match[2].strip.downcase
+			if id.size == 0
+				id = text.strip.downcase
+			end
+			e = create_md_element(:image)
+			e.meta[:ref_id] = id
+			e
+		}
+		# Detect any immage with immediate url: ![Alt](url "title")
+		# a dummy ref is created and put in the symbol table
+		link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
+		span.map_match(link1) { |match|
+			alt = match[1]
+			url = match[2]
+			title = match[3]
+			url = url.strip
+			# create a dummy id
+			id="dummy_#{@refs.size}"
+			@refs[id] = {:url=>url, :title=>title}
+			e = create_md_element(:image)
+			e.meta[:ref_id] = id
+			e
+		}
+		# Detect any link like [Google engine][google]
+		span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
+			text = match[1]
+			id = match[2]
+			id = id.strip.downcase
+			if id.size == 0
+				id = text.strip.downcase
+			end
+			e = create_md_element(:link, [text])
+			e.meta[:ref_id] = id
+			e
+		}
+		# Detect any link with immediate url: [Google](http://www.google.com)
+		# a dummy ref is created and put in the symbol table
+		span.map_match(/\[([^\]]+)\]\s?\(([^\)]*)\)/) { |match|
+			text = match[1]
+			url = match[2]
+			url = url.strip.downcase
+			# create a dummy id
+			id="dummy_#{@refs.size}"
+			@refs[id] = {:url=>url}
+			e = create_md_element(:link, [text])
+			e.meta[:ref_id] = id
+			e
+		}
+		# Detect an email address <andrea@invalid.it>
+		span.map_match( /<([^:]+@[^:]+)>/) { |match|
+			email = match[1]
+			e = create_md_element(:email_address, [])
+			e.meta[:email] = email
+			e
+		}
+		# And now the easy stuff
+		# search for **strong**
+		span.match_couple_of('**') { |children|  create_md_element(:strong,   children) }
+		# search for __strong__
+		span.match_couple_of('__') { |children|  create_md_element(:strong,   children) }
+		# search for *emphasis*
+		span.match_couple_of('*')  { |children|  create_md_element(:emphasis, children) }
+		# search for _emphasis_
+		span.match_couple_of('_')  { |children|  create_md_element(:emphasis, children) }
+		# finally, unescape the special characters
+		span.replace_each_string { |s|  s.unescape_md_special}
+		span.children
+	end
+	# returns array containing Strings or :linebreak elements
+	def resolve_linebreaks(lines)
+		res = []
+		s = ""
+		lines.each do |l|
+			s += (s.size>0 ? " " : "") + l.strip
+			if force_linebreak?(l)
+				res << s
+				res << create_md_element(:linebreak)
+				s = ""
+			end
+		end
+		res << s if s.size > 0
+		res
+	end
+end
+# And now the black magic that makes the part above so elegant
+class MDElement
+	# yields to each element of specified node_type
+	def each_element(e_node_type, &block)
+		@children.each do |c|
+			if c.kind_of? MDElement
+				if (not e_node_type) || (e_node_type == c.node_type)
+					block.call c
+				end
+				c.each_element(e_node_type, &block)
+			end
+		end
+	end
+	# Apply passed block to each String in the hierarchy.
+	def replace_each_string(&block)
+		for c in @children
+			if c.kind_of? MDElement
+				c.replace_each_string(&block)
+			end
+		end
+		processed = []
+		until @children.empty?
+			c = @children.shift
+			if c.kind_of? String
+				result = block.call(c)
+				[*result].each do |e| processed << e end
+			else
+				processed << c
+			end
+		end
+		@children = processed
+	end
+	# Try to match the regexp to each string in the hierarchy
+	# (using `replace_each_string`). If the regexp match, eliminate
+	# the matching string and substitute it with the pre_match, the
+	# result of the block, and the post_match
+	#
+	#   ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
+	#
+	# the block might return arrays.
+	#
+	def map_match(regexp, &block)
+		replace_each_string { |s|
+			processed = []
+			while (match = regexp.match(s))
+				# save the pre_match
+				processed << match.pre_match if match.pre_match && match.pre_match.size>0
+				# transform match
+				result = block.call(match)
+				# and append as processed
+				[*result].each do |e| processed << e end
+				# go on with the rest of the string
+				s = match.post_match
+			end
+			processed << s if s.size > 0
+			processed
+		}
+	end
+	# Finds couple of delimiters in a hierarchy of Strings and MDElements
+	def match_couple_of(marker, &block)
+		regexp = Regexp.new(Regexp.escape(marker))
+		for c in @children; if c.kind_of? MDElement
+			c.match_couple_of(marker, &block)
+		end end
+		processed_children = []
+		until @children.empty?
+			c = @children.shift
+			if c.kind_of? String
+				match = regexp.match(c)
+				if not match
+					processed_children << c
+				else # we found opening, now search closing
+#					puts "Found opening (#{marker}) in #{c.inspect}"
+					# pre match is processed
+					processed_children.push match.pre_match if
+						match.pre_match && match.pre_match.size > 0
+					# we will process again the post_match
+					@children.unshift match.post_match if
+						match.post_match && match.post_match.size>0
+					contained = []; found_closing = false
+					until @children.empty?  || found_closing
+						c = @children.shift
+						if c.kind_of? String
+							match = regexp.match(c)
+							if not match
+								contained << c
+							else
+								# we found closing
+								found_closing = true
+								# pre match is contained
+								contained.push match.pre_match if
+									match.pre_match && match.pre_match.size>0
+								# we will process again the post_match
+								@children.unshift match.post_match if
+									match.post_match && match.post_match.size>0
+								# And now we call the block
+								substitute = block.call(contained)
+								processed_children  << substitute
+#								puts "Found closing (#{marker}) in #{c.inspect}"
+#								puts "Children: #{contained.inspect}"
+#								puts "Substitute: #{substitute.inspect}"
+							end
+						else
+							contained << c
+						end
+					end
+					if not found_closing
+						$stderr.puts "##### Could not find closing for #{marker}"
+						processed_children << "?"
+						contained.reverse.each do |c|
+							@children.unshift c
+						end
+					end
+				end
+			else
+				processed_children << c
+			end
+		end
+		@children = processed_children
+	end
+end

data/lib/maruku/string_utils.rb ADDED

@@ -0,0 +1,270 @@
+class Maruku
+	# Split a string into lines, and chomps the newline
+	def split_lines(s)
+		a = []
+		s.each_line do |l|
+			l = l.chomp
+			a << l
+		end
+		a
+	end
+	## This parses email headers. Returns an hash. hash['data'] is the message
+	def parse_email_headers(s)
+		keys={}
+		match = (s =~ /((\w+: .*\n)+)\n/)
+		if match != 0
+			keys[:data] = s
+		else
+			keys[:data] = $'
+			headers = $1
+			headers.split("\n").each do |l|
+				k, v = l.split(':')
+				keys[k.strip.downcase.to_sym] = v.strip
+			end
+		end
+		keys
+	end
+	# Returns the number of leading spaces, considering that
+	# a tab counts as `TabSize` spaces.
+	def number_of_leading_spaces(s)
+		n=0; i=0;
+		while i < s.size
+			c = s[i,1]
+			if c == ' '
+				i+=1; n+=1;
+			elsif c == "\t"
+				i+=1; n+=TabSize;
+			else
+				break
+			end
+		end
+		n
+	end
+	# This returns the position of the first real char in a list item
+	#
+	# For example:
+	#     '*Hello' # => 1
+	#     '* Hello' # => 2
+	#     ' * Hello' # => 3
+	#     ' *   Hello' # => 5
+	#     '1.Hello' # => 2
+	#     ' 1.  Hello' # => 5
+	def spaces_before_first_char(s)
+		case line_node_type(s)
+		when :ulist
+			i=0;
+			# skip whitespace
+			while s[i,1] =~ /\s/; i+=1 end
+			# skip indicator
+			i+=1
+			# skip whitespace
+			while s[i,1] =~ /\s/; i+=1 end
+						#
+			# while i < s.size
+			# 	break if not [' ',"\t",'*','-'].include? s[i,1]
+			# 	i += 1
+			# end
+			return i
+		when :olist
+			i=0;
+			# skip whitespace
+			while s[i,1] =~ /\s/; i+=1 end
+			# skip digits
+			while s[i,1] =~ /\d/; i+=1 end
+			# skip dot
+			i+=1
+			# skip whitespace
+			while s[i,1] =~ /\s/; i+=1 end
+			return i
+		end
+	end
+	# Counts the number of leading '#' in the string
+	def num_leading_hashes(s)
+		i=0;
+		while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
+		i
+	end
+	# Strips initial and final hashes
+	def strip_hashes(s)
+		s = s[num_leading_hashes(s), s.size]
+		i = s.size-1
+		while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
+		s[0, i+1].strip
+	end
+	# removes initial quote
+	def unquote(s)
+		s.gsub(/^>\s?/,'')
+	end
+	# toglie al massimo n caratteri
+	def strip_indent(s, n)
+		i = 0
+		while i < s.size && n>0
+			c = s[i,1]
+			if c == ' '
+				n-=1;
+			elsif c == "\t"
+				n-=TabSize;
+			else
+				break
+			end
+			i+=1
+		end
+		s[i, s.size-1]
+	end
+	def debug(s)
+		$stderr.puts s
+	end
+	def dbg_describe_ary(a, prefix='')
+		i = 0
+		a.each do |l|
+			$stderr.puts "#{prefix} (#{i+=1})##{l}#"
+		end
+	end
+	def force_linebreak?(l)
+		l =~ /  $/
+	end
+	def line_node_type(l)
+		# raw html is like PHP Markdown Extra: at most three spaces before
+		return :code     if number_of_leading_spaces(l)>=4
+		return :footnote_text      if l =~ FootnoteText
+		return :ref if l =~ LinkRegex or l=~ IncompleteLink
+		return :abbreviation if l =~ Abbreviation
+		return :definition if l =~ Definition
+		return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
+		return :ulist    if l =~ /^\s?(\*|-)\s+.*\w+/
+		return :olist    if l =~ /^\s?\d\..*\w+/
+		return :empty    if l.strip.size == 0
+		return :header1  if l =~ /^(=)+/
+		return :header2  if l =~ /^([-\s])+$/
+		return :header3  if l =~ /^(#)+\s*\S+/
+		# at least three asterisks on a line, and only whitespace
+		return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/
+		return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
+		return :quote    if l =~ /^>/
+		return :metadata if l =~ /^@/
+		return :text
+	end
+	# Example:
+	#     ^:blah blah
+	#     ^: blah blah
+	#     ^   : blah blah
+	Definition = %r{
+		^ # begin of line
+		[ ]{0,3} # up to 3 spaces
+		: # colon
+		\s* # whitespace
+		(\S.*) # the text    = $1
+		$ # end of line
+	}x
+	# Example:
+	#     *[HTML]: Hyper Text Markup Language
+	Abbreviation = %r{
+		^  # begin of line
+		\* # one asterisk
+		\[ # opening bracket
+		([^\]]+) # any non-closing bracket:  id = $1
+		\] # closing bracket
+		:  # colon
+		\s* # whitespace
+		(\S.*\S)* #           definition=$2
+		\s* # strip this whitespace
+		$   # end of line
+	}x
+	FootnoteText = %r{
+		^\s*\[(\^.+)\]: # id = $1 (including '^')
+		\s*(\S.*)?$    # text = $2 (not obb.)
+	}x
+	# This regex is taken from BlueCloth sources
+	# Link defs are in the form: ^[id]: \n? url "optional title"
+	LinkRegex = %r{
+		^[ ]*\[(.+)\]:		# id = $1
+		  [ ]*
+		<?(\S+)>?				# url = $2
+		  [ ]*
+		(?:# Titles are delimited by "quotes" or (parens).
+			["(']
+			(.+?)			# title = $3
+			[")']			# Matching ) or "
+			\s*(.+)?   # stuff = $4
+		)?	# title is optional
+	  }x
+	IncompleteLink = %r{^\s*\[(.+)\]:\s*$}
+	HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
+	TabSize = 4;
+	# if contains a pipe, it could be a table header
+	MightBeTableHeader = %r{\|}
+	# -------------:
+	Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
+	# | -------------:| ------------------------------ |
+	TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
+end
+class String
+	S = 240
+	MarkdownEscaped =
+		[["\\",S+0],
+		 ['`',S+1],
+		 ['*',S+2],
+		['_',S+3],['{',S+4],['}',S+5],['[',S+6],[']',S+7],
+		['(',S+8],[')',S+9],['#',S+10],['.',S+11],
+		['!',S+12],
+		# PHP Markdown extra
+		['|',S+13],[':',S+14]]
+	MarkdownAdd = 200
+	def escape_md_special!
+		MarkdownEscaped.each do |c|
+			escape_sequence = "\\#{c[0]}"
+			#puts "Escaping -#{escape_sequence}-"
+			escaped ="0"; escaped[0]=c[1]
+			gsub!(escape_sequence, escaped)
+		end
+		# But if you surround an * or _ with spaces,
+		# it’ll be treated as a literal asterisk or underscore.
+		gsub!(/\s\*(\s|$)/, [S+2].pack('c'))
+		gsub!(/\s_(\s|$)/,  [S+2].pack('c'))
+		self
+	end
+	def unescape_md_special!
+		for i in 0..size-1
+			for e in MarkdownEscaped
+				if self[i] == e[1]
+					self[i,1] = e[0]
+				end
+			end
+		end
+		self
+	end
+	def unescape_md_special; dup.unescape_md_special! end
+	def   escape_md_special; dup.  escape_md_special! end
+end