RubyGems - maruku - Versions diffs - 0.6.0 → 0.7.3 - Mend

maruku 0.6.0 → 0.7.3

Files changed (290) hide show

checksums.yaml +7 -0
data/MIT-LICENSE.txt +20 -0
data/bin/maruku +153 -152
data/bin/marutex +2 -29
data/data/entities.xml +261 -0
data/docs/markdown_syntax.md +9 -21
data/docs/math.md +14 -18
data/lib/maruku.rb +65 -78
data/lib/maruku/attributes.rb +109 -214
data/lib/maruku/defaults.rb +45 -67
data/lib/maruku/document.rb +44 -0
data/lib/maruku/element.rb +138 -0
data/lib/maruku/errors.rb +80 -0
data/lib/maruku/ext/div.rb +105 -113
data/lib/maruku/ext/fenced_code.rb +97 -0
data/lib/maruku/ext/math.rb +22 -26
data/lib/maruku/ext/math/elements.rb +20 -26
data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
data/lib/maruku/ext/math/parsing.rb +121 -115
data/lib/maruku/ext/math/to_html.rb +202 -187
data/lib/maruku/ext/math/to_latex.rb +34 -21
data/lib/maruku/helpers.rb +158 -257
data/lib/maruku/html.rb +251 -0
data/lib/maruku/input/charsource.rb +272 -319
data/lib/maruku/input/extensions.rb +62 -63
data/lib/maruku/input/html_helper.rb +233 -189
data/lib/maruku/input/linesource.rb +90 -110
data/lib/maruku/input/mdline.rb +131 -0
data/lib/maruku/input/parse_block.rb +736 -613
data/lib/maruku/input/parse_doc.rb +145 -217
data/lib/maruku/input/parse_span.rb +740 -0
data/lib/maruku/inspect_element.rb +60 -0
data/lib/maruku/maruku.rb +14 -30
data/lib/maruku/output/entity_table.rb +37 -0
data/lib/maruku/output/s5/fancy.rb +462 -462
data/lib/maruku/output/s5/to_s5.rb +115 -135
data/lib/maruku/output/to_html.rb +907 -983
data/lib/maruku/output/to_latex.rb +571 -563
data/lib/maruku/output/to_markdown.rb +207 -162
data/lib/maruku/output/to_s.rb +10 -52
data/lib/maruku/string_utils.rb +129 -179
data/lib/maruku/toc.rb +185 -196
data/lib/maruku/version.rb +33 -38
data/spec/block_docs/abbrev.md +776 -0
data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
data/spec/block_docs/abbreviations2.md +27 -0
data/{tests/unittest → spec/block_docs}/alt.md +2 -14
data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
data/spec/block_docs/attribute_sanitize.md +22 -0
data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
data/spec/block_docs/atx_headers.md +22 -0
data/spec/block_docs/auto_cdata.md +48 -0
data/spec/block_docs/bad_cites.md +30 -0
data/spec/block_docs/bad_divrefs.md +30 -0
data/{tests/unittest → spec/block_docs}/blank.md +0 -12
data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
data/spec/block_docs/block_quotes.md +66 -0
data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
data/{tests/unittest → spec/block_docs}/bug_table.md +7 -19
data/spec/block_docs/cites.md +37 -0
data/{tests/unittest → spec/block_docs}/code.md +7 -14
data/{tests/unittest → spec/block_docs}/code2.md +4 -14
data/{tests/unittest → spec/block_docs}/code3.md +12 -16
data/spec/block_docs/code4.md +79 -0
data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
data/spec/block_docs/div_without_newline.md +16 -0
data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
data/{tests/unittest → spec/block_docs}/easy.md +1 -13
data/spec/block_docs/email.md +29 -0
data/spec/block_docs/empty_cells.md +31 -0
data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
data/{tests/unittest → spec/block_docs}/entities.md +33 -41
data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
data/{tests/unittest → spec/block_docs}/extra_table1.md +9 -21
data/spec/block_docs/fenced_code_blocks.md +58 -0
data/spec/block_docs/fenced_code_blocks_highlighted.md +17 -0
data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
data/spec/block_docs/footnotes2.md +82 -0
data/spec/block_docs/hard.md +25 -0
data/spec/block_docs/header_after_par.md +62 -0
data/{tests/unittest → spec/block_docs}/headers.md +10 -18
data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
data/{tests/unittest → spec/block_docs}/html3.md +1 -13
data/{tests/unittest → spec/block_docs}/html4.md +2 -14
data/{tests/unittest → spec/block_docs}/html5.md +2 -14
data/spec/block_docs/html_block_in_para.md +22 -0
data/spec/block_docs/html_inline.md +25 -0
data/spec/block_docs/html_trailing.md +31 -0
data/spec/block_docs/ie.md +62 -0
data/spec/block_docs/iframe.md +29 -0
data/spec/block_docs/ignore_bad_header.md +9 -0
data/{tests/unittest → spec/block_docs}/images.md +22 -28
data/{tests/unittest → spec/block_docs}/images2.md +7 -17
data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
data/spec/block_docs/inline_html_beginning.md +10 -0
data/spec/block_docs/issue106.md +78 -0
data/spec/block_docs/issue115.md +20 -0
data/spec/block_docs/issue117.md +13 -0
data/spec/block_docs/issue120.md +48 -0
data/spec/block_docs/issue123.md +11 -0
data/spec/block_docs/issue124.md +16 -0
data/spec/block_docs/issue126.md +9 -0
data/spec/block_docs/issue130.md +11 -0
data/spec/block_docs/issue20.md +9 -0
data/spec/block_docs/issue26.md +22 -0
data/spec/block_docs/issue29.md +9 -0
data/spec/block_docs/issue30.md +30 -0
data/spec/block_docs/issue31.md +25 -0
data/spec/block_docs/issue40.md +52 -0
data/spec/block_docs/issue64.md +55 -0
data/spec/block_docs/issue67.md +19 -0
data/spec/block_docs/issue70.md +11 -0
data/spec/block_docs/issue72.md +17 -0
data/spec/block_docs/issue74.md +38 -0
data/spec/block_docs/issue79.md +15 -0
data/spec/block_docs/issue83.md +13 -0
data/spec/block_docs/issue85.md +25 -0
data/spec/block_docs/issue88.md +19 -0
data/spec/block_docs/issue89.md +12 -0
data/spec/block_docs/issue90.md +38 -0
data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
data/{tests/unittest → spec/block_docs}/links.md +33 -32
data/spec/block_docs/links2.md +21 -0
data/{tests/unittest → spec/block_docs}/list1.md +0 -12
data/{tests/unittest → spec/block_docs}/list12.md +2 -14
data/{tests/unittest → spec/block_docs}/list2.md +2 -14
data/spec/block_docs/list_multipara.md +42 -0
data/{tests/unittest → spec/block_docs}/lists.md +28 -29
data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
data/spec/block_docs/lists11.md +23 -0
data/spec/block_docs/lists12.md +43 -0
data/spec/block_docs/lists13.md +55 -0
data/spec/block_docs/lists14.md +61 -0
data/spec/block_docs/lists15.md +36 -0
data/spec/block_docs/lists6.md +88 -0
data/spec/block_docs/lists7b.md +58 -0
data/spec/block_docs/lists9.md +53 -0
data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
data/spec/block_docs/lists_blank.md +35 -0
data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +44 -29
data/spec/block_docs/lists_nested.md +44 -0
data/spec/block_docs/lists_nested_blankline.md +34 -0
data/spec/block_docs/lists_nested_deep.md +43 -0
data/spec/block_docs/lists_ol.md +129 -0
data/spec/block_docs/lists_ol2.md +147 -0
data/spec/block_docs/lists_paraindent.md +42 -0
data/spec/block_docs/lists_tab.md +54 -0
data/spec/block_docs/loss.md +17 -0
data/spec/block_docs/math-blahtex/equations.md +29 -0
data/spec/block_docs/math-blahtex/inline.md +48 -0
data/spec/block_docs/math-blahtex/math2.md +52 -0
data/spec/block_docs/math-blahtex/table.md +25 -0
data/spec/block_docs/math/embedded_invalid_svg.md +108 -0
data/spec/block_docs/math/embedded_svg.md +136 -0
data/spec/block_docs/math/equations.md +49 -0
data/spec/block_docs/math/inline.md +46 -0
data/spec/block_docs/math/math2.md +53 -0
data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
data/spec/block_docs/math/raw_mathml.md +87 -0
data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
data/spec/block_docs/math/table.md +25 -0
data/{tests/unittest → spec/block_docs}/math/table2.md +11 -23
data/{tests/unittest → spec/block_docs}/misc_sw.md +184 -121
data/{tests/unittest → spec/block_docs}/olist.md +6 -18
data/{tests/unittest → spec/block_docs}/one.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
data/spec/block_docs/ref_with_title.md +22 -0
data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
data/{tests/unittest → spec/block_docs}/table_attributes.md +6 -20
data/spec/block_docs/table_colspan.md +41 -0
data/spec/block_docs/tables.md +47 -0
data/spec/block_docs/tables2.md +74 -0
data/{tests/unittest → spec/block_docs}/test.md +1 -13
data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
data/spec/block_docs/toc.md +87 -0
data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
data/spec/block_docs/xml.md +33 -0
data/spec/block_docs/xml3.md +24 -0
data/spec/block_docs/xml_comments.md +32 -0
data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
data/spec/block_spec.rb +110 -0
data/spec/cli_spec.rb +8 -0
data/spec/span_spec.rb +263 -0
data/spec/spec_helper.rb +3 -0
data/spec/to_html_utf8_spec.rb +13 -0
metadata +218 -202
data/Rakefile +0 -73
data/bin/marudown +0 -29
data/bin/marutest +0 -345
data/docs/changelog.md +0 -334
data/lib/maruku/errors_management.rb +0 -92
data/lib/maruku/ext/math/latex_fix.rb +0 -12
data/lib/maruku/input/parse_span_better.rb +0 -746
data/lib/maruku/input/rubypants.rb +0 -225
data/lib/maruku/input/type_detection.rb +0 -147
data/lib/maruku/output/to_latex_entities.rb +0 -367
data/lib/maruku/output/to_latex_strings.rb +0 -64
data/lib/maruku/structures.rb +0 -167
data/lib/maruku/structures_inspect.rb +0 -87
data/lib/maruku/structures_iterators.rb +0 -61
data/lib/maruku/tests/benchmark.rb +0 -82
data/lib/maruku/tests/new_parser.rb +0 -373
data/lib/maruku/tests/tests.rb +0 -136
data/lib/maruku/usage/example1.rb +0 -33
data/maruku_gem.rb +0 -33
data/tests/bugs/code_in_links.md +0 -101
data/tests/bugs/complex_escaping.md +0 -38
data/tests/math/syntax.md +0 -46
data/tests/math_usage/document.md +0 -13
data/tests/others/abbreviations.md +0 -11
data/tests/others/blank.md +0 -4
data/tests/others/code.md +0 -5
data/tests/others/code2.md +0 -8
data/tests/others/code3.md +0 -16
data/tests/others/email.md +0 -4
data/tests/others/entities.md +0 -19
data/tests/others/escaping.md +0 -16
data/tests/others/extra_dl.md +0 -101
data/tests/others/extra_header_id.md +0 -13
data/tests/others/extra_table1.md +0 -40
data/tests/others/footnotes.md +0 -17
data/tests/others/headers.md +0 -10
data/tests/others/hrule.md +0 -10
data/tests/others/images.md +0 -20
data/tests/others/inline_html.md +0 -42
data/tests/others/links.md +0 -38
data/tests/others/list1.md +0 -4
data/tests/others/list2.md +0 -5
data/tests/others/list3.md +0 -8
data/tests/others/lists.md +0 -32
data/tests/others/lists_after_paragraph.md +0 -44
data/tests/others/lists_ol.md +0 -39
data/tests/others/misc_sw.md +0 -105
data/tests/others/one.md +0 -1
data/tests/others/paragraphs.md +0 -13
data/tests/others/sss06.md +0 -352
data/tests/others/test.md +0 -4
data/tests/s5/s5profiling.md +0 -48
data/tests/unittest/bug_def.md +0 -28
data/tests/unittest/email.md +0 -32
data/tests/unittest/hang.md +0 -29
data/tests/unittest/html2.md +0 -34
data/tests/unittest/ie.md +0 -61
data/tests/unittest/links2.md +0 -34
data/tests/unittest/lists11.md +0 -28
data/tests/unittest/lists6.md +0 -53
data/tests/unittest/lists9.md +0 -76
data/tests/unittest/lists_ol.md +0 -274
data/tests/unittest/math/equations.md +0 -86
data/tests/unittest/math/inline.md +0 -58
data/tests/unittest/math/math2.md +0 -57
data/tests/unittest/math/table.md +0 -37
data/tests/unittest/notyet/header_after_par.md +0 -70
data/tests/unittest/pending/empty_cells.md +0 -49
data/tests/unittest/red_tests/abbrev.md +0 -1388
data/tests/unittest/red_tests/lists7.md +0 -68
data/tests/unittest/red_tests/lists7b.md +0 -128
data/tests/unittest/red_tests/lists8.md +0 -76
data/tests/unittest/red_tests/xml.md +0 -70
data/tests/unittest/xml2.md +0 -31
data/tests/unittest/xml3.md +0 -38
data/tests/utf8-files/simple.md +0 -1
data/unit_test_block.sh +0 -5
data/unit_test_span.sh +0 -3

@@ -0,0 +1,251 @@
+require 'set'
+$warned_nokogiri = false
+module MaRuKu
+  HTML_INLINE_ELEMS = Set.new %w[a abbr acronym audio b bdi bdo big br button canvas caption cite code
+    col colgroup command datalist del details dfn dir em fieldset font form i img input ins
+    kbd label legend mark meter optgroup option progress q rp rt ruby s samp select small
+    source span strike strong sub summary sup tbody td tfoot th thead time tr track tt u var video wbr
+    animate animateColor animateMotion animateTransform circle clipPath defs desc ellipse
+    feGaussianBlur filter font-face font-face-name font-face-src foreignObject g glyph hkern
+    linearGradient line marker mask metadata missing-glyph mpath path pattern polygon polyline
+    radialGradient rect set stop svg switch text textPath title tspan use
+    annotation annotation-xml maction math menclose merror mfrac mfenced mi mmultiscripts mn mo
+    mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub msubsup msup mtable
+    mtd mtext mtr munder munderover none semantics]
+  # Parse block-level markdown elements in these HTML tags
+  BLOCK_TAGS = Set.new %w[div section]
+  # This gets mixed into HTML MDElement nodes to hold the parsed document fragment
+  module HTMLElement
+    attr_accessor :parsed_html
+  end
+  # This is just a factory, not an actual class
+  module HTMLFragment
+    # HTMLFragment.new produces a concrete HTMLFragment implementation
+    # that is either a NokogiriHTMLFragment or a REXMLHTMLFragment.
+    def self.new(raw_html)
+      if !$warned_nokogiri && MaRuKu::Globals[:html_parser] == 'nokogiri'
+        begin
+          require 'nokogiri'
+          return NokogiriHTMLFragment.new(raw_html)
+        rescue LoadError
+          warn "Nokogiri could not be loaded. Falling back to REXML."
+          $warned_nokogiri = true
+        end
+      end
+      require 'rexml/document'
+      REXMLHTMLFragment.new(raw_html)
+    end
+  end
+  # Nokogiri backend for HTML handling
+  class NokogiriHTMLFragment
+    def initialize(raw_html)
+      # Wrap our HTML in a dummy document with a doctype (just
+      # for the entity references)
+      wrapped = '<!DOCTYPE html PUBLIC
+  "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
+  "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
+<html>' + raw_html.strip + '</html>'
+      d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
+      @fragment = d.root
+    end
+    # @return The name of the first child element in the fragment.
+    def first_node_name
+      first_child = @fragment.children.first
+      first_child ? first_child.name : nil
+    end
+    # Add a class to the children of this fragment
+    def add_class(class_name)
+      @fragment.children.each do |c|
+        c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
+      end
+    end
+    # Process markdown within the contents of some elements and
+    # replace their contents with the processed version.
+    #
+    # @param doc [MaRuKu::MDDocument] A document to process.
+    def process_markdown_inside_elements(doc)
+      # find span elements or elements with 'markdown' attribute
+      elts = @fragment.css("[markdown]")
+      d = @fragment.children.first
+      if d && HTML_INLINE_ELEMS.include?(d.name)
+        elts << d unless d.attribute('markdown')
+        elts += span_descendents(d)
+      end
+      elts.each do |e|
+        how = e['markdown']
+        e.remove_attribute('markdown')
+        next if "0" == how # user requests no markdown parsing inside
+        parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
+        # Select all text children of e
+        e.xpath("./text()").each do |original_text|
+          s = MaRuKu::Out::HTML.escapeHTML(original_text.text)
+          unless s.strip.empty?
+            parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
+            # restore leading and trailing spaces
+            padding = /\A(\s*).*?(\s*)\z/.match(s)
+            parsed = [padding[1]] + parsed + [padding[2]] if padding
+            el = doc.md_el(:dummy, parsed)
+            # Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
+            guard = Nokogiri::XML::Element.new('guard', @fragment)
+            original_text.replace(guard)
+            el.children_to_html.each do |x|
+              guard.before(x.to_s)
+            end
+            guard.remove
+          end
+        end
+      end
+    end
+    # Convert this fragment to an HTML or XHTML string.
+    # @return [String]
+    def to_html
+      output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
+        Nokogiri::XML::Node::SaveOptions::FORMAT
+      @fragment.children.inject("") do |out, child|
+        out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
+      end
+    end
+    private
+    # Get all span-level descendents of the given element, recursively,
+    # as a flat NodeSet.
+    #
+    # @param e [Nokogiri::XML::Node] An element.
+    # @return [Nokogiri::XML::NodeSet]
+    def span_descendents(e)
+      ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
+      e.element_children.inject(ns) do |descendents, c|
+        if HTML_INLINE_ELEMS.include?(c.name)
+          descendents << c
+          descendents += span_descendents(c)
+        end
+        descendents
+      end
+    end
+  end
+  # An HTMLFragment implementation using REXML
+  class REXMLHTMLFragment
+    def initialize(raw_html)
+      wrapped = '<!DOCTYPE html PUBLIC
+  "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
+  "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
+<html>' + raw_html.strip + '</html>'
+      @fragment = REXML::Document.new(wrapped).root
+    end
+    # The name of the first element in the fragment
+    def first_node_name
+      first_child = @fragment.children.first
+      (first_child && first_child.respond_to?(:name)) ? first_child.name : nil
+    end
+    # Add a class to the children of this fragment
+    def add_class(class_name)
+      @fragment.each_element do |c|
+        c.attributes['class'] = ((c.attributes['class']||'').split(' ') + [class_name]).join(' ')
+      end
+    end
+    # Process markdown within the contents of some elements and
+    # replace their contents with the processed version.
+    def process_markdown_inside_elements(doc)
+      elts = []
+      @fragment.each_element('//*[@markdown]') do |e|
+        elts << e
+      end
+      d = @fragment.children.first
+      if d && HTML_INLINE_ELEMS.include?(first_node_name)
+        elts << d unless d.attributes['markdown']
+        elts += span_descendents(d)
+      end
+      # find span elements or elements with 'markdown' attribute
+      elts.each do |e|
+        # should we parse block-level or span-level?
+        how = e.attributes['markdown']
+        e.attributes.delete('markdown')
+        next if "0" == how # user requests no markdown parsing inside
+        parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
+        # Select all text children of e
+        e.texts.each do |original_text|
+          s = MaRuKu::Out::HTML.escapeHTML(original_text.value)
+          unless s.strip.empty?
+            # TODO extract common functionality
+            parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
+            # restore leading and trailing spaces
+            padding = /\A(\s*).*?(\s*)\z/.match(s)
+            parsed = [padding[1]] + parsed + [padding[2]] if padding
+            el = doc.md_el(:dummy, parsed)
+            new_html = "<dummy>"
+            el.children_to_html.each do |x|
+              new_html << x.to_s
+            end
+            new_html << "</dummy>"
+            newdoc = REXML::Document.new(new_html).root
+            p = original_text.parent
+            newdoc.children.each do |c|
+              p.insert_before(original_text, c)
+            end
+            p.delete(original_text)
+          end
+        end
+      end
+    end
+    def to_html
+      formatter = REXML::Formatters::Default.new(true)
+      @fragment.children.inject("") do |out, child|
+        out << formatter.write(child, '')
+      end
+    end
+    private
+    # Get all span-level descendents of the given element, recursively,
+    # as an Array.
+    #
+    # @param e [REXML::Element] An element.
+    # @return [Array]
+    def span_descendents(e)
+      descendents = []
+      e.each_element do |c|
+        name = c.respond_to?(:name) ? c.name : nil
+        if name && HTML_INLINE_ELEMS.include?(c.name)
+          descendents << c
+          descendents += span_descendents(c)
+        end
+      end
+    end
+  end
+end

data/lib/maruku/input/charsource.rb CHANGED

@@ -1,326 +1,279 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
-module MaRuKu; module In; module Markdown; module SpanLevelParser
-# a string scanner coded by me
-class CharSourceManual; end
-# a wrapper around StringScanner
-class CharSourceStrscan; end
-# A debug scanner that checks the correctness of both
-# by comparing their output
-class CharSourceDebug; end
-# Choose!
-CharSource = CharSourceManual     # faster! 58ms vs. 65ms
-#CharSource = CharSourceStrscan
-#CharSource = CharSourceDebug
-class CharSourceManual
-	include MaRuKu::Strings
-	def initialize(s, parent=nil)
-		raise "Passed #{s.class}" if not s.kind_of? String
-		@buffer = s
-		@buffer_index = 0
-		@parent = parent
-	end
-	# Return current char as a FixNum (or nil).
-	def cur_char; @buffer[@buffer_index]   end
-	# Return the next n chars as a String.
-	def cur_chars(n); @buffer[@buffer_index,n]  end
-	# Return the char after current char as a FixNum (or nil).
-	def next_char; @buffer[@buffer_index+1] end
-	def shift_char
-		c = @buffer[@buffer_index]
-		@buffer_index+=1
-		c
-	end
-	def ignore_char
-		@buffer_index+=1
-		nil
-	end
-	def ignore_chars(n)
-		@buffer_index+=n
-		nil
-	end
-	def current_remaining_buffer
-		@buffer[@buffer_index, @buffer.size-@buffer_index]
-	end
-	def cur_chars_are(string)
-		# There is a bug here
-		if false
-			r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
-			@buffer =~ r2
-		else
-			cur_chars(string.size) == string
-		end
-	end
-	def next_matches(r)
-		r2 = /^.{#{@buffer_index}}#{r}/m
-		md = r2.match @buffer
-		return !!md
-	end
-	def read_regexp3(r)
-		r2 = /^.{#{@buffer_index}}#{r}/m
-		m = r2.match @buffer
-		if m
-			consumed = m.to_s.size - @buffer_index
-#			puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
-			ignore_chars consumed
-		else
-#			puts "Could not read regexp #{r2.inspect} from buffer "+
-#			" index=#{@buffer_index}"
-#			puts "Cur chars = #{cur_chars(20).inspect}"
-#			puts "Matches? = #{cur_chars(20) =~ r}"
-		end
-		m
-	end
-		def read_regexp(r)
-			r2 = /^#{r}/
-			rest = current_remaining_buffer
-			m = r2.match(rest)
-			if m
-				@buffer_index += m.to_s.size
-#				puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
-			end
-			return m
-		end
-	def consume_whitespace
-		while c = cur_char
-		  if (c == ?\s || c == ?\t)
-#				puts "ignoring #{c}"
-				ignore_char
-			else
-#				puts "#{c} is not ws: "<<c
-				break
-			end
-		end
-	end
-	def read_text_chars(out)
-		s = @buffer.size; c=nil
-		while @buffer_index < s && (c=@buffer[@buffer_index]) &&
-			 ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
-				out << c
-				@buffer_index += 1
-		end
-	end
-	def describe
-		s = describe_pos(@buffer, @buffer_index)
-		if @parent
-			s += "\n\n" + @parent.describe
-		end
-		s
-	end
-	include SpanLevelParser
-end
+require 'strscan'
-def describe_pos(buffer, buffer_index)
-	len = 75
-	num_before = [len/2, buffer_index].min
-	num_after = [len/2, buffer.size-buffer_index].min
-	num_before_max = buffer_index
-	num_after_max = buffer.size-buffer_index
-#		puts "num #{num_before} #{num_after}"
-	num_before = [num_before_max, len-num_after].min
-	num_after  = [num_after_max, len-num_before].min
-#		puts "num #{num_before} #{num_after}"
-	index_start = [buffer_index - num_before, 0].max
-	index_end   = [buffer_index + num_after, buffer.size].min
-	size = index_end- index_start
-#		puts "- #{index_start} #{size}"
-	str = buffer[index_start, size]
-	str.gsub!("\n",'N')
-	str.gsub!("\t",'T')
-	if index_end == buffer.size
-		str += "EOF"
-	end
-	pre_s = buffer_index-index_start
-	pre_s = [pre_s, 0].max
-	pre_s2 = [len-pre_s,0].max
-#		puts "pre_S = #{pre_s}"
-	pre =" "*(pre_s)
-	"-"*len+"\n"+
-	str + "\n" +
-	"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
-#		pre + "|\n"+
-	pre + "+--- Byte #{buffer_index}\n"+
-	"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
-	add_tabs(buffer,1,">")
-#		"CharSource: At character #{@buffer_index} of block "+
-#		" beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
-#		" before: \n     ... #{cur_chars(50).inspect} ... "
-end
+module MaRuKu::In::Markdown::SpanLevelParser
+  # a string scanner coded by me
+  class CharSourceManual; end
-require 'strscan'
+  # a wrapper around StringScanner
+  class CharSourceStrscan; end
-class CharSourceStrscan
-	include SpanLevelParser
-	include MaRuKu::Strings
-	def initialize(s, parent=nil)
-		@s = StringScanner.new(s)
-		@parent = parent
-	end
-	# Return current char as a FixNum (or nil).
-	def cur_char
-		 @s.peek(1)[0]
-	end
-	# Return the next n chars as a String.
-	def cur_chars(n);
-		@s.peek(n)
-	end
-	# Return the char after current char as a FixNum (or nil).
-	def next_char;
-		@s.peek(2)[1]
-	end
-	def shift_char
-		(@s.get_byte)[0]
-	end
-	def ignore_char
-		@s.get_byte
-		nil
-	end
-	def ignore_chars(n)
-		n.times do @s.get_byte end
-		nil
-	end
-	def current_remaining_buffer
-		@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
-	end
-	def cur_chars_are(string)
-		cur_chars(string.size) == string
-	end
-	def next_matches(r)
-		len = @s.match?(r)
-		return !!len
-	end
-	def read_regexp(r)
-		string = @s.scan(r)
-		if string
-			return r.match(string)
-		else
-			return nil
-		end
-	end
-	def consume_whitespace
-		@s.scan(/\s+/)
-		nil
-	end
-	def describe
-		describe_pos(@s.string, @s.pos)
-	end
-end
+  # A debug scanner that checks the correctness of both
+  # by comparing their output
+  class CharSourceDebug; end
+  # Choose!
-class CharSourceDebug
-	def initialize(s, parent)
-		@a = CharSourceManual.new(s, parent)
-		@b = CharSourceStrscan.new(s, parent)
-	end
-	def method_missing(methodname, *args)
-		a_bef = @a.describe
-		b_bef = @b.describe
-		a = @a.send(methodname, *args)
-		b = @b.send(methodname, *args)
-#		if methodname == :describe
-#			return a
-#		end
-		if a.kind_of? MatchData
-			if a.to_a != b.to_a
-				puts "called: #{methodname}(#{args})"
-				puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
-				puts "AFTER: "+@a.describe
-				puts "AFTER: "+@b.describe
-				puts "BEFORE: "+a_bef
-				puts "BEFORE: "+b_bef
-				puts caller.join("\n")
-				exit
-			end
-		else
-			if a!=b
-				puts "called: #{methodname}(#{args})"
-				puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
-				puts ""+@a.describe
-				puts ""+@b.describe
-				puts caller.join("\n")
-				exit
-			end
-		end
-		if @a.cur_char != @b.cur_char
-			puts "Fuori sincronia dopo #{methodname}(#{args})"
-			puts ""+@a.describe
-			puts ""+@b.describe
-			exit
-		end
-		return a
-	end
-end
+  CharSource = CharSourceManual     # faster! 58ms vs. 65ms
+  #CharSource = CharSourceStrscan   # Faster on LONG documents. But StringScanner is buggy in Rubinius
+  #CharSource = CharSourceDebug
+  class CharSourceManual
+    def initialize(s, parent=nil)
+      raise "Passed #{s.class}" if not s.kind_of? String
+      @buffer = s
+      @buffer_index = 0
+      @parent = parent
+    end
+    # Return current char as a String (or nil).
+    def cur_char
+      cur_chars(1)
+    end
+    # Return the next n chars as a String.
+    def cur_chars(n)
+      return nil if @buffer_index >= @buffer.size
+      @buffer[@buffer_index, n]
+    end
+    # Return the char after current char as a String (or nil).
+    def next_char
+      return nil if @buffer_index + 1 >= @buffer.size
+      @buffer[@buffer_index + 1, 1]
+    end
+    def shift_char
+      c = cur_char
+      @buffer_index += 1
+      c
+    end
+    def ignore_char
+      @buffer_index += 1
+    end
+    def ignore_chars(n)
+      @buffer_index += n
+    end
+    def current_remaining_buffer
+      @buffer[@buffer_index, @buffer.size - @buffer_index]
+    end
+    def cur_chars_are(string)
+      cur_chars(string.size) == string
+    end
+    def next_matches(r)
+      r2 = /^.{#{@buffer_index}}#{r}/m
+      r2.match @buffer
+    end
+    def read_regexp(r)
+      r2 = /^#{r}/
+      rest = current_remaining_buffer
+      m = r2.match(rest)
+      if m
+        @buffer_index += m.to_s.size
+      end
+      m
+    end
+    def consume_whitespace
+      while c = cur_char
+        break unless (c == ' ' || c == "\t")
+        ignore_char
+      end
+    end
+    def describe
+      s = describe_pos(@buffer, @buffer_index)
+      if @parent
+        s += "\n\n" + @parent.describe
+      end
+      s
+    end
+    def describe_pos(buffer, buffer_index)
+      len = 75
+      num_before = [len/2, buffer_index].min
+      num_after = [len/2, buffer.size - buffer_index].min
+      num_before_max = buffer_index
+      num_after_max = buffer.size - buffer_index
+      num_before = [num_before_max, len - num_after].min
+      num_after  = [num_after_max, len - num_before].min
+      index_start = [buffer_index - num_before, 0].max
+      index_end   = [buffer_index + num_after, buffer.size].min
+      size = index_end - index_start
+      str = buffer[index_start, size]
+      str.gsub!("\n", 'N')
+      str.gsub!("\t", 'T')
+      if index_end == buffer.size
+        str += "EOF"
+      end
+      pre_s = buffer_index - index_start
+      pre_s = [pre_s, 0].max
+      pre_s2 = [len - pre_s, 0].max
+      pre = " " * pre_s
+      "-" * len + "\n" +
+        str + "\n" +
+        "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
+        pre + "+--- Byte #{buffer_index}\n"+
-end end end end
+        "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
+        buffer.gsub(/^/, ">")
+    end
+  end
+  class CharSourceStrscan
+    def initialize(s, parent=nil)
+      @scanner = StringScanner.new(s)
+      @size = s.size
+    end
+    # Return current char as a String (or nil).
+    def cur_char
+      @scanner.peek(1)[0]
+    end
+    # Return the next n chars as a String.
+    def cur_chars(n)
+      @scanner.peek(n)
+    end
+    # Return the char after current char as a String (or nil).
+    def next_char
+      @scanner.peek(2)[1]
+    end
+    # Return a character as a String, advancing the pointer.
+    def shift_char
+      @scanner.getch[0]
+    end
+    # Advance the pointer
+    def ignore_char
+      @scanner.getch
+    end
+    # Advance the pointer by n
+    def ignore_chars(n)
+      n.times { @scanner.getch }
+    end
+    # Return the rest of the string
+    def current_remaining_buffer
+      @scanner.rest
+    end
+    # Returns true if string matches what we're pointing to
+    def cur_chars_are(string)
+      @scanner.peek(string.size) == string
+    end
+    # Returns true if Regexp r matches what we're pointing to
+    def next_matches(r)
+      @scanner.check(r)
+    end
+    def read_regexp(r)
+      r.match(@scanner.scan(r))
+    end
+    def consume_whitespace
+      @scanner.skip(/\s+/)
+    end
+    def describe
+      len = 75
+      num_before = [len/2, @scanner.pos].min
+      num_after = [len/2, @scanner.rest_size].min
+      num_before_max = @scanner.pos
+      num_after_max = @scanner.rest_size
+      num_before = [num_before_max, len - num_after].min
+      num_after  = [num_after_max, len - num_before].min
+      index_start = [@scanner.pos - num_before, 0].max
+      index_end   = [@scanner.pos + num_after, @size].min
+      size = index_end - index_start
+      str = @scanner.string[index_start, size]
+      str.gsub!("\n", 'N')
+      str.gsub!("\t", 'T')
+      if index_end == @size
+        str += "EOF"
+      end
+      pre_s = @scanner.pos - index_start
+      pre_s = [pre_s, 0].max
+      pre_s2 = [len-pre_s, 0].max
+      pre = " " * pre_s
+      "-" * len + "\n" +
+        str + "\n" +
+        "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
+        pre + "+--- Byte #{@scanner.pos}\n" +
+        "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" +
+        @scanner.string.gsub(/^/, ">")
+    end
+  end
+  class CharSourceDebug
+    def initialize(s, parent)
+      @a = CharSourceManual.new(s, parent)
+      @b = CharSourceStrscan.new(s, parent)
+    end
+    def method_missing(methodname, *args)
+      a_bef = @a.describe
+      b_bef = @b.describe
+      a = @a.send(methodname, *args)
+      b = @b.send(methodname, *args)
+      if a.kind_of? MatchData
+        if a.to_a != b.to_a
+          puts "called: #{methodname}(#{args})"
+          puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
+          puts "AFTER: " + @a.describe
+          puts "AFTER: " + @b.describe
+          puts "BEFORE: " + a_bef
+          puts "BEFORE: " + b_bef
+          puts caller.join("\n")
+          exit
+        end
+      else
+        if a != b
+          puts "called: #{methodname}(#{args})"
+          puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
+          puts "" + @a.describe
+          puts "" + @b.describe
+          puts caller.join("\n")
+          exit
+        end
+      end
+      if @a.cur_char != @b.cur_char
+        puts "Fuori sincronia dopo #{methodname}(#{args})"
+        puts "" + @a.describe
+        puts "" + @b.describe
+        exit
+      end
+      return a
+    end
+  end
+end