RubyGems - maruku - Versions diffs - 0.7.0 → 0.7.1 - Mend

maruku 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/docs/markdown_syntax.md +9 -21
data/lib/maruku/defaults.rb +1 -1
data/lib/maruku/element.rb +18 -3
data/lib/maruku/ext/fenced_code.rb +1 -1
data/lib/maruku/ext/math/mathml_engines/blahtex.rb +1 -1
data/lib/maruku/ext/math/to_html.rb +2 -9
data/lib/maruku/html.rb +5 -8
data/lib/maruku/input/html_helper.rb +94 -81
data/lib/maruku/input/mdline.rb +6 -4
data/lib/maruku/input/parse_block.rb +145 -37
data/lib/maruku/input/parse_span.rb +7 -8
data/lib/maruku/input/rubypants.rb +22 -9
data/lib/maruku/maruku.rb +5 -0
data/lib/maruku/output/to_html.rb +15 -6
data/lib/maruku/output/to_latex.rb +9 -3
data/lib/maruku/output/to_s.rb +0 -1
data/lib/maruku/string_utils.rb +2 -2
data/lib/maruku/version.rb +1 -1
data/spec/block_docs/abbrev.md +18 -18
data/spec/block_docs/attribute_sanitize.md +22 -0
data/spec/block_docs/auto_cdata.md +48 -0
data/spec/block_docs/bug_table.md +4 -4
data/spec/block_docs/code4.md +79 -0
data/spec/block_docs/div_without_newline.md +16 -0
data/spec/block_docs/empty_cells.md +3 -9
data/spec/block_docs/entities.md +6 -12
data/spec/block_docs/extra_table1.md +6 -6
data/spec/block_docs/fenced_code_blocks.md +12 -20
data/spec/block_docs/fenced_code_blocks_highlighted.md +1 -2
data/spec/block_docs/footnotes2.md +4 -1
data/spec/block_docs/ignore_bad_header.md +9 -0
data/spec/block_docs/issue106.md +78 -0
data/spec/block_docs/issue115.md +20 -0
data/spec/block_docs/issue117.md +13 -0
data/spec/block_docs/issue120.md +48 -0
data/spec/block_docs/issue123.md +11 -0
data/spec/block_docs/issue124.md +16 -0
data/spec/block_docs/issue40.md +24 -12
data/spec/block_docs/issue89.md +1 -1
data/spec/block_docs/lists_nested_blankline.md +14 -8
data/spec/block_docs/lists_ol.md +5 -5
data/spec/block_docs/lists_paraindent.md +6 -11
data/spec/block_docs/math-blahtex/equations.md +12 -13
data/spec/block_docs/math-blahtex/math2.md +9 -2
data/spec/block_docs/math/embedded_invalid_svg.md +31 -2
data/spec/block_docs/math/embedded_svg.md +41 -2
data/spec/block_docs/math/equations.md +7 -2
data/spec/block_docs/math/inline.md +2 -2
data/spec/block_docs/math/math2.md +9 -1
data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
data/spec/block_docs/math/table.md +2 -2
data/spec/block_docs/math/table2.md +6 -6
data/spec/block_docs/table_attributes.md +4 -6
data/spec/block_docs/table_colspan.md +41 -0
data/spec/block_docs/tables.md +10 -21
data/spec/block_docs/tables2.md +74 -0
data/spec/block_docs/xml_comments.md +32 -0
data/spec/span_spec.rb +1 -1
data/spec/spec_helper.rb +1 -0
metadata +42 -28
metadata.gz.sig +3 -3
data/spec/block_docs/xml2.md +0 -19

data/lib/maruku/input/mdline.rb CHANGED

@@ -40,19 +40,19 @@ module MaRuKu
       return :definition     if self =~ Definition
       # I had a bug with emails and urls at the beginning of the
       # line that were mistaken for raw_html
-      return :text           if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
+      return :text           if self =~ /\A[ ]{0,3}#{EMailAddress}/
       return :text           if self =~ /\A[ ]{0,3}<http:/
       # raw html is like PHP Markdown Extra: at most three spaces before
       return :xml_instr      if self =~ /\A\s*<\?/
-      return :raw_html       if self =~ %r{^[ ]{0,3}</?\s*\w+}
+      return :raw_html       if self =~ %r{\A[ ]{0,3}</?\s*\w+}
       return :raw_html       if self =~ /\A[ ]{0,3}<\!\-\-/
       return :header1        if self =~ /\A(=)+/
       return :header2        if self =~ /\A([-\s])+\z/
       return :header3        if self =~ /\A(#)+\s*\S+/
       # at least three asterisks/hyphens/underscores on a line, and only whitespace
       return :hrule          if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
-      return :ulist          if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
-      return :olist          if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
+      return :ulist          if self =~ /\A[ ]{0,3}([\*\-\+])\s+.*/
+      return :olist          if self =~ /\A[ ]{0,3}\d+\.\s+.*/
       return :code           if number_of_leading_spaces >= 4
       return :quote          if self =~ /\A>/
       return :ald            if self =~ AttributeDefinitionList
@@ -126,4 +126,6 @@ module MaRuKu
   # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
   # | -------------:| ------------------------------ |
   TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
+  EMailAddress = /<([^:@>]+?@[^:@>]+?)>/
 end

data/lib/maruku/input/parse_block.rb CHANGED

@@ -1,5 +1,3 @@
-require 'set'
 module MaRuKu; module In; module Markdown; module BlockLevelParser
   include Helpers
@@ -99,18 +97,19 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
         output << read_abbreviation(src)
       when :xml_instr
         read_xml_instruction(src, output)
-      else # warn if we forgot something
-        line = src.cur_line
-        maruku_error "Ignoring line '#{line}' type = #{md_type}", src
-        src.shift_line
+      else # unhandled line type at this level
+        # Just treat it as raw text
+        read_text_material(src, output)
       end
     end
     merge_ial(output, src, output)
-    output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
-    # get rid of empty line markers
-    output.delete_if {|x| x == :empty }
+    output.delete_if do |x|
+      # Strip out IAL
+      (x.kind_of?(MDElement) && x.node_type == :ial) ||
+      # get rid of empty line markers
+      x == :empty
+    end
     # See for each list if we can omit the paragraphs
     # TODO: do this after
@@ -152,7 +151,7 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
         output << md_el(:definition_list, definition)
       end
     else # Start of a paragraph
-      output << read_paragraph(src)
+      output.concat read_paragraph(src)
     end
   end
@@ -278,16 +277,6 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
         break
       when :olist, :ulist
         break if !src.next_line || src.next_line.md_type == t
-      when :raw_html
-        # This is a pretty awful hack to handle inline HTML
-        # but it means double-parsing HMTL.
-        html = parse_span([src.cur_line], src)
-        unless html.empty? || html.first.is_a?(String)
-          if html.first.parsed_html
-            first_node_name = html.first.parsed_html.first_node_name
-          end
-        end
-        break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
       end
       break if src.cur_line.strip.empty?
       break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
@@ -297,7 +286,48 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
     end
     children = parse_span(lines, src)
-    md_par(children)
+    pick_apart_non_inline_html(children)
+  end
+  # If there are non-inline HTML tags in the paragraph, break them out into
+  # their own elements and make paragraphs out of everything else.
+  def pick_apart_non_inline_html(children)
+    output = []
+    para_children = []
+    children.each do |child|
+      if element_is_non_inline_html?(child)
+        unless para_children.empty?
+          # Fix up paragraphs before non-inline elements having an extra space
+          last_child = para_children.last
+          if last_child.is_a?(String) && !last_child.empty?
+            last_child.replace last_child[0..-2]
+          end
+          output << md_par(para_children)
+          para_children = []
+        end
+        output << child
+      else
+        para_children << child
+      end
+    end
+    unless para_children.empty?
+      output << md_par(para_children)
+    end
+    output
+  end
+  # Is the given element an HTML element whose root is not an inline element?
+  def element_is_non_inline_html?(elem)
+    if elem.is_a?(MDElement) && elem.node_type == :raw_html && elem.parsed_html
+      first_node_name = elem.parsed_html.first_node_name
+      first_node_name && !HTML_INLINE_ELEMS.include?(elem.parsed_html.first_node_name)
+    else
+      false
+    end
   end
   # Reads one list item, either ordered or unordered.
@@ -310,14 +340,18 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
     indentation, ial = spaces_before_first_char(first)
     al = read_attribute_list(CharSource.new(ial, src)) if ial
     ial_offset = ial ? ial.length + 3 : 0
-    lines, want_my_paragraph =
-      read_indented_content(src, indentation, [], item_type, ial_offset)
+    lines, want_my_paragraph = read_indented_content(src, indentation, [], item_type, ial_offset)
+    # in case there is a second line and this line starts a new list, format it.
+    if !lines.empty? && [:ulist, :olist].include?(MaRuKu::MDLine.new(lines.first).md_type)
+      lines.unshift ""
+    end
     # add first line
     # Strip first '*', '-', '+' from first line
-    stripped = first[indentation, first.size - 1]
+    first_changed = first.gsub(/([^\t]*)(\t)/) { $1 + " " * (TAB_SIZE - $1.length % TAB_SIZE) }
+    stripped = first_changed[indentation, first_changed.size - 1]
     lines.unshift stripped
     src2 = LineSource.new(lines, src, parent_offset)
     children = parse_blocks(src2)
@@ -512,8 +546,18 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
     out << md_ref_def(id, url, :title => title)
   end
-  def split_cells(s)
-    s.split('|').reject(&:empty?).map(&:strip)
+  def split_cells(s, allowBlank = false)
+    if allowBlank
+      if /^[|].*[|]$/ =~ s # handle the simple and decorated table cases
+        s.split('|', -1)[1..-2]   # allow blank cells, but only keep the inner elements of the cells
+      elsif /^.*[|]$/ =~ s
+        s.split('|', -1)[0..-2]   # allow blank cells, but only keep the inner elements of the cells
+      else
+        s.split('|', -1)
+      end
+    else
+      s.split('|').reject(&:empty?).map(&:strip)
+    end
   end
   def read_table(src)
@@ -530,7 +574,9 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
       # otherwise left-align.
       starts = s.start_with? ':'
       ends = s.end_with? ':'
-      if starts && ends
+      if s.empty? # blank
+        nil
+      elsif starts && ends
         :center
       elsif ends
         :right
@@ -539,8 +585,11 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
       end
     end
+    align.pop if align[-1].nil? # trailing blank
     num_columns = align.size
+    head.pop if head.size == num_columns + 1 && head[-1].al.size == 0 # trailing blank
     if head.size != num_columns
       maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
       tell_user "I will ignore this table."
@@ -549,23 +598,82 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
     end
     rows = []
-    while src.cur_line && src.cur_line =~ /\|/
-      row = split_cells(src.shift_line).map do |s|
-        md_el(:cell, parse_span(s))
+    while src.cur_line && src.cur_line.include?('|')
+      row = []
+      colCount = 0
+      colspan = 1
+      currElem = nil
+      currIdx = 0
+      split_cells(src.shift_line, true).map do |s|
+        if s.empty?
+          # empty cells increase the colspan of the previous cell
+          found = false
+          colspan +=  1
+          al = (currElem &&currElem.al) || AttributeList.new
+          if al.size > 0
+            elem = find_colspan(al)
+            if elem != nil
+              elem[1] = colspan.to_s
+              found = true
+            end
+          end
+          al.push(["colspan", colspan.to_s]) unless found # also handles the case of and empty attribute list
+        else
+          colspan = 1
+          row[currIdx] = md_el(:cell, parse_span(s))
+          currElem = row[currIdx]
+          currIdx += 1
+        end
       end
+      #
+      # sanity check - make sure the current row has the right number of columns (including spans)
+      #                If not, dump the table and return a break
+      #
+      num_columns = count_columns(row)
+      if num_columns == head.size + 1 && row[-1].al.size == 0 #trailing blank cell
+        row.pop
+        num_columns -= 1
+      end
       if head.size != num_columns
-        maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
+        maruku_error  "Row does not have #{head.size} columns: \n#{row.inspect} - #{num_columns}"
         tell_user "I will ignore this table."
-        # XXX try to recover
+        # XXX need to recover
         return md_br
       end
       rows << row
     end
+    rows.unshift(head) # put the header row on the processed table
+    md_el(:table, rows, { :align => align })
+  end
+  #
+  # count the actual number of elements in a row taking into account colspans
+  #
+  def count_columns(row)
+    colCount = 0
+    row.each do |cell|
+      if cell.al && cell.al.size > 0
+        al = find_colspan(cell.al)
+        if al != nil
+          colCount += al[1].to_i
+        else
+          colCount += 1
+        end
+      else
+        colCount += 1
+      end
+    end
+    colCount
+  end
-    children = (head + rows).flatten
-    md_el(:table, children, { :align => align })
+  #
+  # Search an attribute list looking for a colspan
+  #
+  def find_colspan(al)
+    al.find {|alElem| alElem[0] == "colspan" }
   end
   # If current line is text, a definition list is coming

data/lib/maruku/input/parse_span.rb CHANGED

@@ -335,7 +335,7 @@ module MaRuKu::In::Markdown::SpanLevelParser
     end
   end
-  # Reads a simple string (no formatting) until one of break_on_chars,
+  # Reads a simple string (no formatting) until one of exit_on_chars,
   # while escaping the escaped.
   # If the string is empty, it returns nil.
   # By default, raises on error if the string terminates unexpectedly. This can be
@@ -355,7 +355,7 @@ module MaRuKu::In::Markdown::SpanLevelParser
       when nil
         if warn
           maruku_error "String finished while reading (break on " +
-            "#{exit_on_chars.inspect})" +
+            "#{(exit_on_chars + exit_on_strings).inspect})" +
             " already read: #{text.inspect}", src
         end
         break
@@ -447,6 +447,11 @@ module MaRuKu::In::Markdown::SpanLevelParser
     # We will read until this string
     end_string = "`" * num_ticks
+    # Try to handle empty single-ticks
+    if num_ticks > 1 && !src.next_matches(/.*#{Regexp.escape(end_string)}/)
+      con.push_element(end_string) and return
+    end
     code = read_simple(src, nil, nil, end_string)
     # We didn't find a closing batch!
@@ -454,12 +459,6 @@ module MaRuKu::In::Markdown::SpanLevelParser
       con.push_element(end_string + (code || '')) and return
     end
-    # We didn't find a closing batch!
-    if !code || src.cur_char != '`'
-      con.push_element(end_string + (code || ''))
-      return
-    end
     #   puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
     src.ignore_chars num_ticks

data/lib/maruku/input/rubypants.rb CHANGED

@@ -164,8 +164,17 @@ module MaRuKu::In::Markdown::SpanLevelParser
     # Replace all matches in the input at once with the
     # same elements from "replacement".
     def apply(first, input, output)
-      intersperse(first.split(pattern), replacement).each do |x|
-        append_to_output(output, x)
+      split = first.split(pattern)
+      if split.empty?
+        first.scan(pattern).size.times do
+          clone_elems(replacement).each do |x|
+            append_to_output(output, x)
+          end
+        end
+      else
+        intersperse(first.split(pattern), replacement).each do |x|
+          append_to_output(output, x)
+        end
       end
     end
@@ -174,18 +183,22 @@ module MaRuKu::In::Markdown::SpanLevelParser
     # Sort of like "join" - places the elements in "elem"
     # between each adjacent element in the array.
     def intersperse(ary, elem)
-      return ary if ary.length <= 1
+      return clone_elems(elem) if ary.empty?
+      return ary if ary.length == 1
       h, *t = ary
       t.inject([h]) do |r, e|
-        entities = elem.map do |el|
-          en = el.clone
-          en.doc = doc
-          en
-        end
-        r.concat entities
+        r.concat clone_elems(elem)
         r << e
       end
     end
+    def clone_elems(elems)
+      elems.map do |el|
+        en = el.clone
+        en.doc = doc
+        en
+      end
+    end
   end
   # A more complex rule that uses a capture group from the

data/lib/maruku/maruku.rb CHANGED

@@ -9,4 +9,9 @@ class Maruku < MaRuKu::MDDocument
     self.attributes.merge! meta
     parse_doc(s) if s
   end
+  def to_s
+    warn "Maruku#to_s is deprecated and will be removed or changed in a near-future version of Maruku."
+    super
+  end
 end

data/lib/maruku/output/to_html.rb CHANGED

@@ -4,6 +4,14 @@ require 'cgi'
 # This module groups all functions related to HTML export.
 module MaRuKu::Out::HTML
+  # Escape text for use in HTML (content or attributes) by running it through
+  # standard XML escaping (quotes and angle brackets and ampersands)
+  def self.escapeHTML(text)
+    CGI.escapeHTML(text)
+    # TODO: When we drop Rubies < 1.9.3, re-add .gsub(/[^[:print:]\n\r\t]/, '') to
+    # get rid of non-printable control characters.
+  end
   # A simple class to represent an HTML element for output.
   class HTMLElement
     attr_accessor :name
@@ -88,7 +96,7 @@ module MaRuKu::Out::HTML
   # Helper to create a text node
   def xtext(text)
-    CGI.escapeHTML(text)
+    MaRuKu::Out::HTML.escapeHTML(text)
   end
   # Helper to create an element
@@ -386,15 +394,14 @@ module MaRuKu::Out::HTML
   # Pretty much the same as the HTMLElement constructor except it
   # copies standard attributes out of the Maruku Element's attributes hash.
   def html_element(name, content="", attributes={})
-    if attributes.empty? && content.is_a?(Hash)
-      attributes = content
-    end
+    attributes = content if attributes.empty? && content.is_a?(Hash)
     Array(HTML4Attributes[name]).each do |att|
       if v = @attributes[att]
-        attributes[att.to_s] = v.to_s
+        attributes[att.to_s] = MaRuKu::Out::HTML.escapeHTML(v.to_s)
       end
     end
     content = yield if block_given?
     HTMLElement.new(name, attributes, content)
@@ -798,7 +805,9 @@ module MaRuKu::Out::HTML
   def to_html_table
     num_columns = self.align.size
-    head, *rows = @children.each_slice(num_columns).to_a
+    # The table data is passed as a multi-dimensional array
+    # we just need to split the head from the body
+    head, *rows = @children
     table = html_element('table')
     thead = xelem('thead')