RubyGems - maruku - Versions diffs - 0.6.1 → 0.7.0.beta1 - Mend

maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (263) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/MIT-LICENSE.txt +20 -0
data/bin/maruku +153 -152
data/bin/marutex +2 -29
data/data/entities.xml +261 -0
data/docs/math.md +14 -18
data/lib/maruku.rb +65 -77
data/lib/maruku/attributes.rb +109 -214
data/lib/maruku/defaults.rb +45 -67
data/lib/maruku/document.rb +43 -0
data/lib/maruku/element.rb +112 -0
data/lib/maruku/errors.rb +71 -0
data/lib/maruku/ext/div.rb +105 -113
data/lib/maruku/ext/fenced_code.rb +97 -0
data/lib/maruku/ext/math.rb +22 -26
data/lib/maruku/ext/math/elements.rb +20 -26
data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
data/lib/maruku/ext/math/parsing.rb +107 -113
data/lib/maruku/ext/math/to_html.rb +184 -187
data/lib/maruku/ext/math/to_latex.rb +30 -21
data/lib/maruku/helpers.rb +158 -257
data/lib/maruku/html.rb +254 -0
data/lib/maruku/input/charsource.rb +272 -319
data/lib/maruku/input/extensions.rb +62 -63
data/lib/maruku/input/html_helper.rb +220 -189
data/lib/maruku/input/linesource.rb +90 -110
data/lib/maruku/input/mdline.rb +129 -0
data/lib/maruku/input/parse_block.rb +618 -612
data/lib/maruku/input/parse_doc.rb +145 -215
data/lib/maruku/input/parse_span.rb +658 -0
data/lib/maruku/input/rubypants.rb +200 -128
data/lib/maruku/inspect_element.rb +60 -0
data/lib/maruku/maruku.rb +10 -31
data/lib/maruku/output/entity_table.rb +33 -0
data/lib/maruku/output/s5/fancy.rb +462 -462
data/lib/maruku/output/s5/to_s5.rb +115 -135
data/lib/maruku/output/to_html.rb +898 -983
data/lib/maruku/output/to_latex.rb +561 -560
data/lib/maruku/output/to_markdown.rb +207 -162
data/lib/maruku/output/to_s.rb +11 -52
data/lib/maruku/string_utils.rb +129 -179
data/lib/maruku/toc.rb +185 -196
data/lib/maruku/version.rb +33 -38
data/spec/block_docs/abbrev.md +776 -0
data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
data/{tests/unittest → spec/block_docs}/alt.md +2 -14
data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
data/{tests/unittest → spec/block_docs}/blank.md +0 -12
data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
data/{tests/unittest → spec/block_docs}/code.md +7 -14
data/{tests/unittest → spec/block_docs}/code2.md +4 -14
data/{tests/unittest → spec/block_docs}/code3.md +12 -16
data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
data/{tests/unittest → spec/block_docs}/easy.md +1 -13
data/spec/block_docs/email.md +29 -0
data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
data/{tests/unittest → spec/block_docs}/entities.md +27 -29
data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
data/spec/block_docs/fenced_code_blocks.md +66 -0
data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
data/spec/block_docs/footnotes2.md +78 -0
data/spec/block_docs/hard.md +25 -0
data/spec/block_docs/header_after_par.md +62 -0
data/{tests/unittest → spec/block_docs}/headers.md +10 -18
data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
data/{tests/unittest → spec/block_docs}/html3.md +1 -13
data/{tests/unittest → spec/block_docs}/html4.md +2 -14
data/{tests/unittest → spec/block_docs}/html5.md +2 -14
data/spec/block_docs/html_block_in_para.md +22 -0
data/spec/block_docs/html_inline.md +25 -0
data/spec/block_docs/html_trailing.md +31 -0
data/spec/block_docs/ie.md +62 -0
data/spec/block_docs/iframe.md +29 -0
data/{tests/unittest → spec/block_docs}/images.md +22 -28
data/{tests/unittest → spec/block_docs}/images2.md +7 -17
data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
data/spec/block_docs/inline_html_beginning.md +10 -0
data/spec/block_docs/issue20.md +9 -0
data/spec/block_docs/issue26.md +22 -0
data/spec/block_docs/issue29.md +9 -0
data/spec/block_docs/issue30.md +30 -0
data/spec/block_docs/issue31.md +25 -0
data/spec/block_docs/issue40.md +40 -0
data/spec/block_docs/issue64.md +55 -0
data/spec/block_docs/issue67.md +19 -0
data/spec/block_docs/issue70.md +11 -0
data/spec/block_docs/issue72.md +17 -0
data/spec/block_docs/issue74.md +38 -0
data/spec/block_docs/issue79.md +15 -0
data/spec/block_docs/issue83.md +13 -0
data/spec/block_docs/issue85.md +25 -0
data/spec/block_docs/issue88.md +19 -0
data/spec/block_docs/issue89.md +12 -0
data/spec/block_docs/issue90.md +38 -0
data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
data/{tests/unittest → spec/block_docs}/links.md +33 -32
data/spec/block_docs/links2.md +21 -0
data/{tests/unittest → spec/block_docs}/list1.md +0 -12
data/{tests/unittest → spec/block_docs}/list12.md +2 -14
data/{tests/unittest → spec/block_docs}/list2.md +2 -14
data/spec/block_docs/list_multipara.md +42 -0
data/{tests/unittest → spec/block_docs}/lists.md +28 -29
data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
data/spec/block_docs/lists11.md +23 -0
data/spec/block_docs/lists12.md +43 -0
data/spec/block_docs/lists13.md +55 -0
data/spec/block_docs/lists14.md +61 -0
data/spec/block_docs/lists15.md +36 -0
data/spec/block_docs/lists6.md +88 -0
data/spec/block_docs/lists7b.md +58 -0
data/spec/block_docs/lists9.md +53 -0
data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
data/spec/block_docs/lists_blank.md +35 -0
data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
data/spec/block_docs/lists_nested.md +44 -0
data/spec/block_docs/lists_nested_blankline.md +28 -0
data/spec/block_docs/lists_nested_deep.md +43 -0
data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
data/spec/block_docs/lists_paraindent.md +47 -0
data/spec/block_docs/lists_tab.md +54 -0
data/spec/block_docs/loss.md +17 -0
data/spec/block_docs/math-blahtex/equations.md +30 -0
data/spec/block_docs/math-blahtex/inline.md +48 -0
data/spec/block_docs/math-blahtex/math2.md +45 -0
data/spec/block_docs/math-blahtex/table.md +25 -0
data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
data/spec/block_docs/math/embedded_svg.md +97 -0
data/spec/block_docs/math/equations.md +44 -0
data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
data/spec/block_docs/math/math2.md +45 -0
data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
data/spec/block_docs/math/raw_mathml.md +87 -0
data/spec/block_docs/math/table.md +25 -0
data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
data/{tests/unittest → spec/block_docs}/olist.md +6 -18
data/{tests/unittest → spec/block_docs}/one.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
data/spec/block_docs/ref_with_title.md +22 -0
data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
data/spec/block_docs/tables.md +58 -0
data/{tests/unittest → spec/block_docs}/test.md +1 -13
data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
data/spec/block_docs/toc.md +87 -0
data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
data/spec/block_docs/xml.md +33 -0
data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
data/spec/block_docs/xml3.md +24 -0
data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
data/spec/block_spec.rb +110 -0
data/spec/cli_spec.rb +8 -0
data/spec/span_spec.rb +256 -0
data/spec/spec_helper.rb +2 -0
data/spec/to_html_utf8_spec.rb +13 -0
metadata +205 -243
metadata.gz.sig +3 -0
data/Rakefile +0 -48
data/bin/marudown +0 -29
data/bin/marutest +0 -345
data/docs/changelog.md +0 -334
data/lib/maruku/errors_management.rb +0 -92
data/lib/maruku/ext/math/latex_fix.rb +0 -12
data/lib/maruku/input/parse_span_better.rb +0 -746
data/lib/maruku/input/type_detection.rb +0 -147
data/lib/maruku/output/to_latex_entities.rb +0 -367
data/lib/maruku/output/to_latex_strings.rb +0 -64
data/lib/maruku/structures.rb +0 -167
data/lib/maruku/structures_inspect.rb +0 -87
data/lib/maruku/structures_iterators.rb +0 -61
data/lib/maruku/tests/benchmark.rb +0 -82
data/lib/maruku/tests/new_parser.rb +0 -373
data/lib/maruku/tests/tests.rb +0 -136
data/lib/maruku/usage/example1.rb +0 -33
data/tests/bugs/code_in_links.md +0 -101
data/tests/bugs/complex_escaping.md +0 -38
data/tests/math/syntax.md +0 -46
data/tests/math_usage/document.md +0 -13
data/tests/others/abbreviations.md +0 -11
data/tests/others/blank.md +0 -4
data/tests/others/code.md +0 -5
data/tests/others/code2.md +0 -8
data/tests/others/code3.md +0 -16
data/tests/others/email.md +0 -4
data/tests/others/entities.md +0 -19
data/tests/others/escaping.md +0 -16
data/tests/others/extra_dl.md +0 -101
data/tests/others/extra_header_id.md +0 -13
data/tests/others/extra_table1.md +0 -40
data/tests/others/footnotes.md +0 -17
data/tests/others/headers.md +0 -10
data/tests/others/hrule.md +0 -10
data/tests/others/images.md +0 -20
data/tests/others/inline_html.md +0 -42
data/tests/others/links.md +0 -38
data/tests/others/list1.md +0 -4
data/tests/others/list2.md +0 -5
data/tests/others/list3.md +0 -8
data/tests/others/lists.md +0 -32
data/tests/others/lists_after_paragraph.md +0 -44
data/tests/others/lists_ol.md +0 -39
data/tests/others/misc_sw.md +0 -105
data/tests/others/one.md +0 -1
data/tests/others/paragraphs.md +0 -13
data/tests/others/sss06.md +0 -352
data/tests/others/test.md +0 -4
data/tests/s5/s5profiling.md +0 -48
data/tests/unittest/bug_def.md +0 -28
data/tests/unittest/email.md +0 -32
data/tests/unittest/html2.md +0 -34
data/tests/unittest/ie.md +0 -61
data/tests/unittest/links2.md +0 -34
data/tests/unittest/lists11.md +0 -28
data/tests/unittest/lists6.md +0 -53
data/tests/unittest/lists9.md +0 -76
data/tests/unittest/math/equations.md +0 -86
data/tests/unittest/math/math2.md +0 -57
data/tests/unittest/math/table.md +0 -37
data/tests/unittest/notyet/header_after_par.md +0 -70
data/tests/unittest/red_tests/abbrev.md +0 -1388
data/tests/unittest/red_tests/lists7.md +0 -68
data/tests/unittest/red_tests/lists7b.md +0 -128
data/tests/unittest/red_tests/lists8.md +0 -76
data/tests/unittest/red_tests/xml.md +0 -70
data/tests/unittest/xml3.md +0 -38
data/tests/utf8-files/simple.md +0 -1
data/unit_test_block.sh +0 -5
data/unit_test_span.sh +0 -3

data/lib/maruku/html.rb ADDED

@@ -0,0 +1,254 @@
+require 'set'
+$warned_nokogiri = false
+module MaRuKu
+  HTML_INLINE_ELEMS = Set.new %w[a abbr acronym audio b bdi bdo big br button canvas caption cite code
+    col colgroup command datalist del details dfn dir em fieldset font form i img input ins
+    kbd label legend mark meter optgroup option progress q rp rt ruby s samp section select small
+    source span strike strong sub summary sup tbody td tfoot th thead time tr track tt u var video wbr
+    animate animateColor animateMotion animateTransform circle clipPath defs desc ellipse
+    feGaussianBlur filter font-face font-face-name font-face-src foreignObject g glyph hkern
+    linearGradient line marker mask metadata missing-glyph mpath path pattern polygon polyline
+    radialGradient rect set stop svg switch text textPath title tspan use
+    annotation annotation-xml maction math menclose merror mfrac mfenced mi mmultiscripts mn mo
+    mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub msubsup msup mtable
+    mtd mtext mtr munder munderover none semantics]
+  # Parse block-level markdown elements in these HTML tags
+  BLOCK_TAGS = %w(div)
+  # This gets mixed into HTML MDElement nodes to hold the parsed document fragment
+  module HTMLElement
+    attr_accessor :parsed_html
+  end
+  # This is just a factory, not an actual class
+  module HTMLFragment
+    # HTMLFragment.new produces a concrete HTMLFragment implementation
+    # that is either a NokogiriHTMLFragment or a REXMLHTMLFragment.
+    def self.new(raw_html)
+      if !$warned_nokogiri && MaRuKu::Globals[:html_parser] == 'nokogiri'
+        begin
+          require 'nokogiri'
+          return NokogiriHTMLFragment.new(raw_html)
+        rescue LoadError
+          warn "Nokogiri could not be loaded. Falling back to REXML."
+          $warned_nokogiri = true
+        end
+      end
+      require 'rexml/document'
+      REXMLHTMLFragment.new(raw_html)
+    end
+  end
+  # Nokogiri backend for HTML handling
+  class NokogiriHTMLFragment
+    def initialize(raw_html)
+      # Wrap our HTML in a dummy document with a doctype (just
+      # for the entity references)
+      wrapped = '<!DOCTYPE html PUBLIC
+  "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
+  "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
+<html>' + raw_html.strip + '</html>'
+      d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
+      @fragment = d.root
+    end
+    # @return The name of the first child element in the fragment.
+    def first_node_name
+      first_child = @fragment.children.first
+      first_child ? first_child.name : nil
+    end
+    # Add a class to the children of this fragment
+    def add_class(class_name)
+      @fragment.children.each do |c|
+        c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
+      end
+    end
+    # Process markdown within the contents of some elements and
+    # replace their contents with the processed version.
+    #
+    # @param doc [MaRuKu::MDDocument] A document to process.
+    def process_markdown_inside_elements(doc)
+      # find span elements or elements with 'markdown' attribute
+      elts = @fragment.css("[markdown]")
+      d = @fragment.children.first
+      if d && HTML_INLINE_ELEMS.include?(d.name)
+        elts << d unless d.attribute('markdown')
+        elts += span_descendents(d)
+      end
+      elts.each do |e|
+        how = e['markdown']
+        e.remove_attribute('markdown')
+        next if "0" == how # user requests no markdown parsing inside
+        parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
+        # Select all text children of e
+        e.xpath("./text()").each do |original_text|
+          s = CGI.escapeHTML(original_text.text)
+          unless s.strip.empty?
+            parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
+            # restore leading and trailing spaces
+            padding = /\A(\s*).*?(\s*)\z/.match(s)
+            parsed = [padding[1]] + parsed + [padding[2]] if padding
+            el = doc.md_el(:dummy, parsed)
+            # Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
+            guard = Nokogiri::XML::Element.new('guard', @fragment)
+            original_text.replace(guard)
+            el.children_to_html.each do |x|
+              guard.before(x.to_s)
+            end
+            guard.remove
+          end
+        end
+      end
+    end
+    # Convert this fragment to an HTML or XHTML string.
+    # @return [String]
+    def to_html
+      output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
+        Nokogiri::XML::Node::SaveOptions::FORMAT
+      @fragment.children.inject("") do |out, child|
+        out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
+      end
+    end
+    private
+    # Get all span-level descendents of the given element, recursively,
+    # as a flat NodeSet.
+    #
+    # @param e [Nokogiri::XML::Node] An element.
+    # @return [Nokogiri::XML::NodeSet]
+    def span_descendents(e)
+      ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
+      e.element_children.inject(ns) do |descendents, c|
+        if HTML_INLINE_ELEMS.include?(c.name)
+          descendents << c
+          descendents += span_descendents(c)
+        end
+        descendents
+      end
+    end
+  end
+  # An HTMLFragment implementation using REXML
+  class REXMLHTMLFragment
+    def initialize(raw_html)
+      wrapped = '<!DOCTYPE html PUBLIC
+  "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
+  "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
+<html>' + raw_html.strip + '</html>'
+      @fragment = REXML::Document.new(wrapped).root
+    end
+    # The name of the first element in the fragment
+    def first_node_name
+      first_child = @fragment.children.first
+      (first_child && first_child.respond_to?(:name)) ? first_child.name : nil
+    end
+    # Add a class to the children of this fragment
+    def add_class(class_name)
+      @fragment.each_element do |c|
+        c.attributes['class'] = ((c.attributes['class']||'').split(' ') + [class_name]).join(' ')
+      end
+    end
+    # Process markdown within the contents of some elements and
+    # replace their contents with the processed version.
+    def process_markdown_inside_elements(doc)
+      # parse block-level markdown elements in these HTML tags
+      block_tags = ['div']
+      elts = []
+      @fragment.each_element('//*[@markdown]') do |e|
+        elts << e
+      end
+      d = @fragment.children.first
+      if d && HTML_INLINE_ELEMS.include?(first_node_name)
+        elts << d unless d.attributes['markdown']
+        elts += span_descendents(d)
+      end
+      # find span elements or elements with 'markdown' attribute
+      elts.each do |e|
+        # should we parse block-level or span-level?
+        how = e.attributes['markdown']
+        e.attributes.delete('markdown')
+        next if "0" == how # user requests no markdown parsing inside
+        parse_blocks = (how == 'block') || block_tags.include?(e.name)
+        # Select all text children of e
+        e.texts.each do |original_text|
+          s = CGI.escapeHTML(original_text.value)
+          unless s.strip.empty?
+            # TODO extract common functionality
+            parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
+            # restore leading and trailing spaces
+            padding = /\A(\s*).*?(\s*)\z/.match(s)
+            parsed = [padding[1]] + parsed + [padding[2]] if padding
+            el = doc.md_el(:dummy, parsed)
+            new_html = "<dummy>"
+            el.children_to_html.each do |x|
+              new_html << x.to_s
+            end
+            new_html << "</dummy>"
+            newdoc = REXML::Document.new(new_html).root
+            p = original_text.parent
+            newdoc.children.each do |c|
+              p.insert_before(original_text, c)
+            end
+            p.delete(original_text)
+          end
+        end
+      end
+    end
+    def to_html
+      formatter = REXML::Formatters::Default.new(true)
+      @fragment.children.inject("") do |out, child|
+        out << formatter.write(child, '')
+      end
+    end
+    private
+    # Get all span-level descendents of the given element, recursively,
+    # as an Array.
+    #
+    # @param e [REXML::Element] An element.
+    # @return [Array]
+    def span_descendents(e)
+      descendents = []
+      e.each_element do |c|
+        name = c.respond_to?(:name) ? c.name : nil
+        if name && HTML_INLINE_ELEMS.include?(c.name)
+          descendents << c
+          descendents += span_descendents(c)
+        end
+      end
+    end
+  end
+end

data/lib/maruku/input/charsource.rb CHANGED

@@ -1,326 +1,279 @@
-#--
-#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
-#
-# This file is part of Maruku.
-#
-#   Maruku is free software; you can redistribute it and/or modify
-#   it under the terms of the GNU General Public License as published by
-#   the Free Software Foundation; either version 2 of the License, or
-#   (at your option) any later version.
-#
-#   Maruku is distributed in the hope that it will be useful,
-#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#   GNU General Public License for more details.
-#
-#   You should have received a copy of the GNU General Public License
-#   along with Maruku; if not, write to the Free Software
-#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-#++
-module MaRuKu; module In; module Markdown; module SpanLevelParser
-# a string scanner coded by me
-class CharSourceManual; end
-# a wrapper around StringScanner
-class CharSourceStrscan; end
-# A debug scanner that checks the correctness of both
-# by comparing their output
-class CharSourceDebug; end
-# Choose!
-CharSource = CharSourceManual     # faster! 58ms vs. 65ms
-#CharSource = CharSourceStrscan
-#CharSource = CharSourceDebug
-class CharSourceManual
-	include MaRuKu::Strings
-	def initialize(s, parent=nil)
-		raise "Passed #{s.class}" if not s.kind_of? String
-		@buffer = s
-		@buffer_index = 0
-		@parent = parent
-	end
-	# Return current char as a FixNum (or nil).
-	def cur_char; @buffer[@buffer_index]   end
-	# Return the next n chars as a String.
-	def cur_chars(n); @buffer[@buffer_index,n]  end
-	# Return the char after current char as a FixNum (or nil).
-	def next_char; @buffer[@buffer_index+1] end
-	def shift_char
-		c = @buffer[@buffer_index]
-		@buffer_index+=1
-		c
-	end
-	def ignore_char
-		@buffer_index+=1
-		nil
-	end
-	def ignore_chars(n)
-		@buffer_index+=n
-		nil
-	end
-	def current_remaining_buffer
-		@buffer[@buffer_index, @buffer.size-@buffer_index]
-	end
-	def cur_chars_are(string)
-		# There is a bug here
-		if false
-			r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
-			@buffer =~ r2
-		else
-			cur_chars(string.size) == string
-		end
-	end
-	def next_matches(r)
-		r2 = /^.{#{@buffer_index}}#{r}/m
-		md = r2.match @buffer
-		return !!md
-	end
-	def read_regexp3(r)
-		r2 = /^.{#{@buffer_index}}#{r}/m
-		m = r2.match @buffer
-		if m
-			consumed = m.to_s.size - @buffer_index
-#			puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
-			ignore_chars consumed
-		else
-#			puts "Could not read regexp #{r2.inspect} from buffer "+
-#			" index=#{@buffer_index}"
-#			puts "Cur chars = #{cur_chars(20).inspect}"
-#			puts "Matches? = #{cur_chars(20) =~ r}"
-		end
-		m
-	end
-		def read_regexp(r)
-			r2 = /^#{r}/
-			rest = current_remaining_buffer
-			m = r2.match(rest)
-			if m
-				@buffer_index += m.to_s.size
-#				puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
-			end
-			return m
-		end
-	def consume_whitespace
-		while c = cur_char
-		  if (c == ?\s || c == ?\t)
-#				puts "ignoring #{c}"
-				ignore_char
-			else
-#				puts "#{c} is not ws: "<<c
-				break
-			end
-		end
-	end
-	def read_text_chars(out)
-		s = @buffer.size; c=nil
-		while @buffer_index < s && (c=@buffer[@buffer_index]) &&
-			 ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
-				out << c
-				@buffer_index += 1
-		end
-	end
-	def describe
-		s = describe_pos(@buffer, @buffer_index)
-		if @parent
-			s += "\n\n" + @parent.describe
-		end
-		s
-	end
-	include SpanLevelParser
-end
+require 'strscan'
-def describe_pos(buffer, buffer_index)
-	len = 75
-	num_before = [len/2, buffer_index].min
-	num_after = [len/2, buffer.size-buffer_index].min
-	num_before_max = buffer_index
-	num_after_max = buffer.size-buffer_index
-#		puts "num #{num_before} #{num_after}"
-	num_before = [num_before_max, len-num_after].min
-	num_after  = [num_after_max, len-num_before].min
-#		puts "num #{num_before} #{num_after}"
-	index_start = [buffer_index - num_before, 0].max
-	index_end   = [buffer_index + num_after, buffer.size].min
-	size = index_end- index_start
-#		puts "- #{index_start} #{size}"
-	str = buffer[index_start, size]
-	str.gsub!("\n",'N')
-	str.gsub!("\t",'T')
-	if index_end == buffer.size
-		str += "EOF"
-	end
-	pre_s = buffer_index-index_start
-	pre_s = [pre_s, 0].max
-	pre_s2 = [len-pre_s,0].max
-#		puts "pre_S = #{pre_s}"
-	pre =" "*(pre_s)
-	"-"*len+"\n"+
-	str + "\n" +
-	"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
-#		pre + "|\n"+
-	pre + "+--- Byte #{buffer_index}\n"+
-	"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
-	add_tabs(buffer,1,">")
-#		"CharSource: At character #{@buffer_index} of block "+
-#		" beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
-#		" before: \n     ... #{cur_chars(50).inspect} ... "
-end
+module MaRuKu::In::Markdown::SpanLevelParser
+  # a string scanner coded by me
+  class CharSourceManual; end
-require 'strscan'
+  # a wrapper around StringScanner
+  class CharSourceStrscan; end
-class CharSourceStrscan
-	include SpanLevelParser
-	include MaRuKu::Strings
-	def initialize(s, parent=nil)
-		@s = StringScanner.new(s)
-		@parent = parent
-	end
-	# Return current char as a FixNum (or nil).
-	def cur_char
-		 @s.peek(1)[0]
-	end
-	# Return the next n chars as a String.
-	def cur_chars(n);
-		@s.peek(n)
-	end
-	# Return the char after current char as a FixNum (or nil).
-	def next_char;
-		@s.peek(2)[1]
-	end
-	def shift_char
-		(@s.get_byte)[0]
-	end
-	def ignore_char
-		@s.get_byte
-		nil
-	end
-	def ignore_chars(n)
-		n.times do @s.get_byte end
-		nil
-	end
-	def current_remaining_buffer
-		@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
-	end
-	def cur_chars_are(string)
-		cur_chars(string.size) == string
-	end
-	def next_matches(r)
-		len = @s.match?(r)
-		return !!len
-	end
-	def read_regexp(r)
-		string = @s.scan(r)
-		if string
-			return r.match(string)
-		else
-			return nil
-		end
-	end
-	def consume_whitespace
-		@s.scan(/\s+/)
-		nil
-	end
-	def describe
-		describe_pos(@s.string, @s.pos)
-	end
-end
+  # A debug scanner that checks the correctness of both
+  # by comparing their output
+  class CharSourceDebug; end
+  # Choose!
-class CharSourceDebug
-	def initialize(s, parent)
-		@a = CharSourceManual.new(s, parent)
-		@b = CharSourceStrscan.new(s, parent)
-	end
-	def method_missing(methodname, *args)
-		a_bef = @a.describe
-		b_bef = @b.describe
-		a = @a.send(methodname, *args)
-		b = @b.send(methodname, *args)
-#		if methodname == :describe
-#			return a
-#		end
-		if a.kind_of? MatchData
-			if a.to_a != b.to_a
-				puts "called: #{methodname}(#{args})"
-				puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
-				puts "AFTER: "+@a.describe
-				puts "AFTER: "+@b.describe
-				puts "BEFORE: "+a_bef
-				puts "BEFORE: "+b_bef
-				puts caller.join("\n")
-				exit
-			end
-		else
-			if a!=b
-				puts "called: #{methodname}(#{args})"
-				puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
-				puts ""+@a.describe
-				puts ""+@b.describe
-				puts caller.join("\n")
-				exit
-			end
-		end
-		if @a.cur_char != @b.cur_char
-			puts "Fuori sincronia dopo #{methodname}(#{args})"
-			puts ""+@a.describe
-			puts ""+@b.describe
-			exit
-		end
-		return a
-	end
-end
+  CharSource = CharSourceManual     # faster! 58ms vs. 65ms
+  #CharSource = CharSourceStrscan   # Faster on LONG documents. But StringScanner is buggy in Rubinius
+  #CharSource = CharSourceDebug
+  class CharSourceManual
+    def initialize(s, parent=nil)
+      raise "Passed #{s.class}" if not s.kind_of? String
+      @buffer = s
+      @buffer_index = 0
+      @parent = parent
+    end
+    # Return current char as a String (or nil).
+    def cur_char
+      cur_chars(1)
+    end
+    # Return the next n chars as a String.
+    def cur_chars(n)
+      return nil if @buffer_index >= @buffer.size
+      @buffer[@buffer_index, n]
+    end
+    # Return the char after current char as a String (or nil).
+    def next_char
+      return nil if @buffer_index + 1 >= @buffer.size
+      @buffer[@buffer_index + 1, 1]
+    end
+    def shift_char
+      c = cur_char
+      @buffer_index += 1
+      c
+    end
+    def ignore_char
+      @buffer_index += 1
+    end
+    def ignore_chars(n)
+      @buffer_index += n
+    end
+    def current_remaining_buffer
+      @buffer[@buffer_index, @buffer.size - @buffer_index]
+    end
+    def cur_chars_are(string)
+      cur_chars(string.size) == string
+    end
+    def next_matches(r)
+      r2 = /^.{#{@buffer_index}}#{r}/m
+      r2.match @buffer
+    end
+    def read_regexp(r)
+      r2 = /^#{r}/
+      rest = current_remaining_buffer
+      m = r2.match(rest)
+      if m
+        @buffer_index += m.to_s.size
+      end
+      m
+    end
+    def consume_whitespace
+      while c = cur_char
+        break unless (c == ' ' || c == "\t")
+        ignore_char
+      end
+    end
+    def describe
+      s = describe_pos(@buffer, @buffer_index)
+      if @parent
+        s += "\n\n" + @parent.describe
+      end
+      s
+    end
+    def describe_pos(buffer, buffer_index)
+      len = 75
+      num_before = [len/2, buffer_index].min
+      num_after = [len/2, buffer.size - buffer_index].min
+      num_before_max = buffer_index
+      num_after_max = buffer.size - buffer_index
+      num_before = [num_before_max, len - num_after].min
+      num_after  = [num_after_max, len - num_before].min
+      index_start = [buffer_index - num_before, 0].max
+      index_end   = [buffer_index + num_after, buffer.size].min
+      size = index_end - index_start
+      str = buffer[index_start, size]
+      str.gsub!("\n", 'N')
+      str.gsub!("\t", 'T')
+      if index_end == buffer.size
+        str += "EOF"
+      end
+      pre_s = buffer_index - index_start
+      pre_s = [pre_s, 0].max
+      pre_s2 = [len - pre_s, 0].max
+      pre = " " * pre_s
+      "-" * len + "\n" +
+        str + "\n" +
+        "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
+        pre + "+--- Byte #{buffer_index}\n"+
-end end end end
+        "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
+        buffer.gsub(/^/, ">")
+    end
+  end
+  class CharSourceStrscan
+    def initialize(s, parent=nil)
+      @scanner = StringScanner.new(s)
+      @size = s.size
+    end
+    # Return current char as a String (or nil).
+    def cur_char
+      @scanner.peek(1)[0]
+    end
+    # Return the next n chars as a String.
+    def cur_chars(n)
+      @scanner.peek(n)
+    end
+    # Return the char after current char as a String (or nil).
+    def next_char
+      @scanner.peek(2)[1]
+    end
+    # Return a character as a String, advancing the pointer.
+    def shift_char
+      @scanner.getch[0]
+    end
+    # Advance the pointer
+    def ignore_char
+      @scanner.getch
+    end
+    # Advance the pointer by n
+    def ignore_chars(n)
+      n.times { @scanner.getch }
+    end
+    # Return the rest of the string
+    def current_remaining_buffer
+      @scanner.rest
+    end
+    # Returns true if string matches what we're pointing to
+    def cur_chars_are(string)
+      @scanner.peek(string.size) == string
+    end
+    # Returns true if Regexp r matches what we're pointing to
+    def next_matches(r)
+      @scanner.check(r)
+    end
+    def read_regexp(r)
+      r.match(@scanner.scan(r))
+    end
+    def consume_whitespace
+      @scanner.skip(/\s+/)
+    end
+    def describe
+      len = 75
+      num_before = [len/2, @scanner.pos].min
+      num_after = [len/2, @scanner.rest_size].min
+      num_before_max = @scanner.pos
+      num_after_max = @scanner.rest_size
+      num_before = [num_before_max, len - num_after].min
+      num_after  = [num_after_max, len - num_before].min
+      index_start = [@scanner.pos - num_before, 0].max
+      index_end   = [@scanner.pos + num_after, @size].min
+      size = index_end - index_start
+      str = @scanner.string[index_start, size]
+      str.gsub!("\n", 'N')
+      str.gsub!("\t", 'T')
+      if index_end == @size
+        str += "EOF"
+      end
+      pre_s = @scanner.pos - index_start
+      pre_s = [pre_s, 0].max
+      pre_s2 = [len-pre_s, 0].max
+      pre = " " * pre_s
+      "-" * len + "\n" +
+        str + "\n" +
+        "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
+        pre + "+--- Byte #{@scanner.pos}\n" +
+        "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" +
+        @scanner.string.gsub(/^/, ">")
+    end
+  end
+  class CharSourceDebug
+    def initialize(s, parent)
+      @a = CharSourceManual.new(s, parent)
+      @b = CharSourceStrscan.new(s, parent)
+    end
+    def method_missing(methodname, *args)
+      a_bef = @a.describe
+      b_bef = @b.describe
+      a = @a.send(methodname, *args)
+      b = @b.send(methodname, *args)
+      if a.kind_of? MatchData
+        if a.to_a != b.to_a
+          puts "called: #{methodname}(#{args})"
+          puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
+          puts "AFTER: " + @a.describe
+          puts "AFTER: " + @b.describe
+          puts "BEFORE: " + a_bef
+          puts "BEFORE: " + b_bef
+          puts caller.join("\n")
+          exit
+        end
+      else
+        if a != b
+          puts "called: #{methodname}(#{args})"
+          puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
+          puts "" + @a.describe
+          puts "" + @b.describe
+          puts caller.join("\n")
+          exit
+        end
+      end
+      if @a.cur_char != @b.cur_char
+        puts "Fuori sincronia dopo #{methodname}(#{args})"
+        puts "" + @a.describe
+        puts "" + @b.describe
+        exit
+      end
+      return a
+    end
+  end
+end