maruku 0.6.0 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE.txt +20 -0
- data/bin/maruku +153 -152
- data/bin/marutex +2 -29
- data/data/entities.xml +261 -0
- data/docs/markdown_syntax.md +9 -21
- data/docs/math.md +14 -18
- data/lib/maruku.rb +65 -78
- data/lib/maruku/attributes.rb +109 -214
- data/lib/maruku/defaults.rb +45 -67
- data/lib/maruku/document.rb +44 -0
- data/lib/maruku/element.rb +138 -0
- data/lib/maruku/errors.rb +80 -0
- data/lib/maruku/ext/div.rb +105 -113
- data/lib/maruku/ext/fenced_code.rb +97 -0
- data/lib/maruku/ext/math.rb +22 -26
- data/lib/maruku/ext/math/elements.rb +20 -26
- data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
- data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
- data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
- data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
- data/lib/maruku/ext/math/parsing.rb +121 -115
- data/lib/maruku/ext/math/to_html.rb +202 -187
- data/lib/maruku/ext/math/to_latex.rb +34 -21
- data/lib/maruku/helpers.rb +158 -257
- data/lib/maruku/html.rb +251 -0
- data/lib/maruku/input/charsource.rb +272 -319
- data/lib/maruku/input/extensions.rb +62 -63
- data/lib/maruku/input/html_helper.rb +233 -189
- data/lib/maruku/input/linesource.rb +90 -110
- data/lib/maruku/input/mdline.rb +131 -0
- data/lib/maruku/input/parse_block.rb +736 -613
- data/lib/maruku/input/parse_doc.rb +145 -217
- data/lib/maruku/input/parse_span.rb +740 -0
- data/lib/maruku/inspect_element.rb +60 -0
- data/lib/maruku/maruku.rb +14 -30
- data/lib/maruku/output/entity_table.rb +37 -0
- data/lib/maruku/output/s5/fancy.rb +462 -462
- data/lib/maruku/output/s5/to_s5.rb +115 -135
- data/lib/maruku/output/to_html.rb +907 -983
- data/lib/maruku/output/to_latex.rb +571 -563
- data/lib/maruku/output/to_markdown.rb +207 -162
- data/lib/maruku/output/to_s.rb +10 -52
- data/lib/maruku/string_utils.rb +129 -179
- data/lib/maruku/toc.rb +185 -196
- data/lib/maruku/version.rb +33 -38
- data/spec/block_docs/abbrev.md +776 -0
- data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
- data/spec/block_docs/abbreviations2.md +27 -0
- data/{tests/unittest → spec/block_docs}/alt.md +2 -14
- data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
- data/spec/block_docs/attribute_sanitize.md +22 -0
- data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
- data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
- data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
- data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
- data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
- data/spec/block_docs/atx_headers.md +22 -0
- data/spec/block_docs/auto_cdata.md +48 -0
- data/spec/block_docs/bad_cites.md +30 -0
- data/spec/block_docs/bad_divrefs.md +30 -0
- data/{tests/unittest → spec/block_docs}/blank.md +0 -12
- data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
- data/spec/block_docs/block_quotes.md +66 -0
- data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
- data/{tests/unittest → spec/block_docs}/bug_table.md +7 -19
- data/spec/block_docs/cites.md +37 -0
- data/{tests/unittest → spec/block_docs}/code.md +7 -14
- data/{tests/unittest → spec/block_docs}/code2.md +4 -14
- data/{tests/unittest → spec/block_docs}/code3.md +12 -16
- data/spec/block_docs/code4.md +79 -0
- data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
- data/spec/block_docs/div_without_newline.md +16 -0
- data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
- data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
- data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
- data/{tests/unittest → spec/block_docs}/easy.md +1 -13
- data/spec/block_docs/email.md +29 -0
- data/spec/block_docs/empty_cells.md +31 -0
- data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
- data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
- data/{tests/unittest → spec/block_docs}/entities.md +33 -41
- data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
- data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
- data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
- data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
- data/{tests/unittest → spec/block_docs}/extra_table1.md +9 -21
- data/spec/block_docs/fenced_code_blocks.md +58 -0
- data/spec/block_docs/fenced_code_blocks_highlighted.md +17 -0
- data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
- data/spec/block_docs/footnotes2.md +82 -0
- data/spec/block_docs/hard.md +25 -0
- data/spec/block_docs/header_after_par.md +62 -0
- data/{tests/unittest → spec/block_docs}/headers.md +10 -18
- data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
- data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
- data/{tests/unittest → spec/block_docs}/html3.md +1 -13
- data/{tests/unittest → spec/block_docs}/html4.md +2 -14
- data/{tests/unittest → spec/block_docs}/html5.md +2 -14
- data/spec/block_docs/html_block_in_para.md +22 -0
- data/spec/block_docs/html_inline.md +25 -0
- data/spec/block_docs/html_trailing.md +31 -0
- data/spec/block_docs/ie.md +62 -0
- data/spec/block_docs/iframe.md +29 -0
- data/spec/block_docs/ignore_bad_header.md +9 -0
- data/{tests/unittest → spec/block_docs}/images.md +22 -28
- data/{tests/unittest → spec/block_docs}/images2.md +7 -17
- data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
- data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
- data/spec/block_docs/inline_html_beginning.md +10 -0
- data/spec/block_docs/issue106.md +78 -0
- data/spec/block_docs/issue115.md +20 -0
- data/spec/block_docs/issue117.md +13 -0
- data/spec/block_docs/issue120.md +48 -0
- data/spec/block_docs/issue123.md +11 -0
- data/spec/block_docs/issue124.md +16 -0
- data/spec/block_docs/issue126.md +9 -0
- data/spec/block_docs/issue130.md +11 -0
- data/spec/block_docs/issue20.md +9 -0
- data/spec/block_docs/issue26.md +22 -0
- data/spec/block_docs/issue29.md +9 -0
- data/spec/block_docs/issue30.md +30 -0
- data/spec/block_docs/issue31.md +25 -0
- data/spec/block_docs/issue40.md +52 -0
- data/spec/block_docs/issue64.md +55 -0
- data/spec/block_docs/issue67.md +19 -0
- data/spec/block_docs/issue70.md +11 -0
- data/spec/block_docs/issue72.md +17 -0
- data/spec/block_docs/issue74.md +38 -0
- data/spec/block_docs/issue79.md +15 -0
- data/spec/block_docs/issue83.md +13 -0
- data/spec/block_docs/issue85.md +25 -0
- data/spec/block_docs/issue88.md +19 -0
- data/spec/block_docs/issue89.md +12 -0
- data/spec/block_docs/issue90.md +38 -0
- data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
- data/{tests/unittest → spec/block_docs}/links.md +33 -32
- data/spec/block_docs/links2.md +21 -0
- data/{tests/unittest → spec/block_docs}/list1.md +0 -12
- data/{tests/unittest → spec/block_docs}/list12.md +2 -14
- data/{tests/unittest → spec/block_docs}/list2.md +2 -14
- data/spec/block_docs/list_multipara.md +42 -0
- data/{tests/unittest → spec/block_docs}/lists.md +28 -29
- data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
- data/spec/block_docs/lists11.md +23 -0
- data/spec/block_docs/lists12.md +43 -0
- data/spec/block_docs/lists13.md +55 -0
- data/spec/block_docs/lists14.md +61 -0
- data/spec/block_docs/lists15.md +36 -0
- data/spec/block_docs/lists6.md +88 -0
- data/spec/block_docs/lists7b.md +58 -0
- data/spec/block_docs/lists9.md +53 -0
- data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
- data/spec/block_docs/lists_blank.md +35 -0
- data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
- data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +44 -29
- data/spec/block_docs/lists_nested.md +44 -0
- data/spec/block_docs/lists_nested_blankline.md +34 -0
- data/spec/block_docs/lists_nested_deep.md +43 -0
- data/spec/block_docs/lists_ol.md +129 -0
- data/spec/block_docs/lists_ol2.md +147 -0
- data/spec/block_docs/lists_paraindent.md +42 -0
- data/spec/block_docs/lists_tab.md +54 -0
- data/spec/block_docs/loss.md +17 -0
- data/spec/block_docs/math-blahtex/equations.md +29 -0
- data/spec/block_docs/math-blahtex/inline.md +48 -0
- data/spec/block_docs/math-blahtex/math2.md +52 -0
- data/spec/block_docs/math-blahtex/table.md +25 -0
- data/spec/block_docs/math/embedded_invalid_svg.md +108 -0
- data/spec/block_docs/math/embedded_svg.md +136 -0
- data/spec/block_docs/math/equations.md +49 -0
- data/spec/block_docs/math/inline.md +46 -0
- data/spec/block_docs/math/math2.md +53 -0
- data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
- data/spec/block_docs/math/raw_mathml.md +87 -0
- data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
- data/spec/block_docs/math/table.md +25 -0
- data/{tests/unittest → spec/block_docs}/math/table2.md +11 -23
- data/{tests/unittest → spec/block_docs}/misc_sw.md +184 -121
- data/{tests/unittest → spec/block_docs}/olist.md +6 -18
- data/{tests/unittest → spec/block_docs}/one.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
- data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
- data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
- data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
- data/spec/block_docs/ref_with_title.md +22 -0
- data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
- data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
- data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
- data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
- data/{tests/unittest → spec/block_docs}/table_attributes.md +6 -20
- data/spec/block_docs/table_colspan.md +41 -0
- data/spec/block_docs/tables.md +47 -0
- data/spec/block_docs/tables2.md +74 -0
- data/{tests/unittest → spec/block_docs}/test.md +1 -13
- data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
- data/spec/block_docs/toc.md +87 -0
- data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
- data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
- data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
- data/spec/block_docs/xml.md +33 -0
- data/spec/block_docs/xml3.md +24 -0
- data/spec/block_docs/xml_comments.md +32 -0
- data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
- data/spec/block_spec.rb +110 -0
- data/spec/cli_spec.rb +8 -0
- data/spec/span_spec.rb +263 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/to_html_utf8_spec.rb +13 -0
- metadata +218 -202
- data/Rakefile +0 -73
- data/bin/marudown +0 -29
- data/bin/marutest +0 -345
- data/docs/changelog.md +0 -334
- data/lib/maruku/errors_management.rb +0 -92
- data/lib/maruku/ext/math/latex_fix.rb +0 -12
- data/lib/maruku/input/parse_span_better.rb +0 -746
- data/lib/maruku/input/rubypants.rb +0 -225
- data/lib/maruku/input/type_detection.rb +0 -147
- data/lib/maruku/output/to_latex_entities.rb +0 -367
- data/lib/maruku/output/to_latex_strings.rb +0 -64
- data/lib/maruku/structures.rb +0 -167
- data/lib/maruku/structures_inspect.rb +0 -87
- data/lib/maruku/structures_iterators.rb +0 -61
- data/lib/maruku/tests/benchmark.rb +0 -82
- data/lib/maruku/tests/new_parser.rb +0 -373
- data/lib/maruku/tests/tests.rb +0 -136
- data/lib/maruku/usage/example1.rb +0 -33
- data/maruku_gem.rb +0 -33
- data/tests/bugs/code_in_links.md +0 -101
- data/tests/bugs/complex_escaping.md +0 -38
- data/tests/math/syntax.md +0 -46
- data/tests/math_usage/document.md +0 -13
- data/tests/others/abbreviations.md +0 -11
- data/tests/others/blank.md +0 -4
- data/tests/others/code.md +0 -5
- data/tests/others/code2.md +0 -8
- data/tests/others/code3.md +0 -16
- data/tests/others/email.md +0 -4
- data/tests/others/entities.md +0 -19
- data/tests/others/escaping.md +0 -16
- data/tests/others/extra_dl.md +0 -101
- data/tests/others/extra_header_id.md +0 -13
- data/tests/others/extra_table1.md +0 -40
- data/tests/others/footnotes.md +0 -17
- data/tests/others/headers.md +0 -10
- data/tests/others/hrule.md +0 -10
- data/tests/others/images.md +0 -20
- data/tests/others/inline_html.md +0 -42
- data/tests/others/links.md +0 -38
- data/tests/others/list1.md +0 -4
- data/tests/others/list2.md +0 -5
- data/tests/others/list3.md +0 -8
- data/tests/others/lists.md +0 -32
- data/tests/others/lists_after_paragraph.md +0 -44
- data/tests/others/lists_ol.md +0 -39
- data/tests/others/misc_sw.md +0 -105
- data/tests/others/one.md +0 -1
- data/tests/others/paragraphs.md +0 -13
- data/tests/others/sss06.md +0 -352
- data/tests/others/test.md +0 -4
- data/tests/s5/s5profiling.md +0 -48
- data/tests/unittest/bug_def.md +0 -28
- data/tests/unittest/email.md +0 -32
- data/tests/unittest/hang.md +0 -29
- data/tests/unittest/html2.md +0 -34
- data/tests/unittest/ie.md +0 -61
- data/tests/unittest/links2.md +0 -34
- data/tests/unittest/lists11.md +0 -28
- data/tests/unittest/lists6.md +0 -53
- data/tests/unittest/lists9.md +0 -76
- data/tests/unittest/lists_ol.md +0 -274
- data/tests/unittest/math/equations.md +0 -86
- data/tests/unittest/math/inline.md +0 -58
- data/tests/unittest/math/math2.md +0 -57
- data/tests/unittest/math/table.md +0 -37
- data/tests/unittest/notyet/header_after_par.md +0 -70
- data/tests/unittest/pending/empty_cells.md +0 -49
- data/tests/unittest/red_tests/abbrev.md +0 -1388
- data/tests/unittest/red_tests/lists7.md +0 -68
- data/tests/unittest/red_tests/lists7b.md +0 -128
- data/tests/unittest/red_tests/lists8.md +0 -76
- data/tests/unittest/red_tests/xml.md +0 -70
- data/tests/unittest/xml2.md +0 -31
- data/tests/unittest/xml3.md +0 -38
- data/tests/utf8-files/simple.md +0 -1
- data/unit_test_block.sh +0 -5
- data/unit_test_span.sh +0 -3
data/lib/maruku/html.rb
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
$warned_nokogiri = false
|
4
|
+
|
5
|
+
module MaRuKu
|
6
|
+
HTML_INLINE_ELEMS = Set.new %w[a abbr acronym audio b bdi bdo big br button canvas caption cite code
|
7
|
+
col colgroup command datalist del details dfn dir em fieldset font form i img input ins
|
8
|
+
kbd label legend mark meter optgroup option progress q rp rt ruby s samp select small
|
9
|
+
source span strike strong sub summary sup tbody td tfoot th thead time tr track tt u var video wbr
|
10
|
+
animate animateColor animateMotion animateTransform circle clipPath defs desc ellipse
|
11
|
+
feGaussianBlur filter font-face font-face-name font-face-src foreignObject g glyph hkern
|
12
|
+
linearGradient line marker mask metadata missing-glyph mpath path pattern polygon polyline
|
13
|
+
radialGradient rect set stop svg switch text textPath title tspan use
|
14
|
+
annotation annotation-xml maction math menclose merror mfrac mfenced mi mmultiscripts mn mo
|
15
|
+
mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub msubsup msup mtable
|
16
|
+
mtd mtext mtr munder munderover none semantics]
|
17
|
+
|
18
|
+
# Parse block-level markdown elements in these HTML tags
|
19
|
+
BLOCK_TAGS = Set.new %w[div section]
|
20
|
+
|
21
|
+
# This gets mixed into HTML MDElement nodes to hold the parsed document fragment
|
22
|
+
module HTMLElement
|
23
|
+
attr_accessor :parsed_html
|
24
|
+
end
|
25
|
+
|
26
|
+
# This is just a factory, not an actual class
|
27
|
+
module HTMLFragment
|
28
|
+
|
29
|
+
# HTMLFragment.new produces a concrete HTMLFragment implementation
|
30
|
+
# that is either a NokogiriHTMLFragment or a REXMLHTMLFragment.
|
31
|
+
def self.new(raw_html)
|
32
|
+
if !$warned_nokogiri && MaRuKu::Globals[:html_parser] == 'nokogiri'
|
33
|
+
begin
|
34
|
+
require 'nokogiri'
|
35
|
+
return NokogiriHTMLFragment.new(raw_html)
|
36
|
+
rescue LoadError
|
37
|
+
warn "Nokogiri could not be loaded. Falling back to REXML."
|
38
|
+
$warned_nokogiri = true
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
require 'rexml/document'
|
43
|
+
REXMLHTMLFragment.new(raw_html)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Nokogiri backend for HTML handling
|
48
|
+
class NokogiriHTMLFragment
|
49
|
+
def initialize(raw_html)
|
50
|
+
# Wrap our HTML in a dummy document with a doctype (just
|
51
|
+
# for the entity references)
|
52
|
+
wrapped = '<!DOCTYPE html PUBLIC
|
53
|
+
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
|
54
|
+
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
|
55
|
+
<html>' + raw_html.strip + '</html>'
|
56
|
+
|
57
|
+
d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
|
58
|
+
@fragment = d.root
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return The name of the first child element in the fragment.
|
62
|
+
def first_node_name
|
63
|
+
first_child = @fragment.children.first
|
64
|
+
first_child ? first_child.name : nil
|
65
|
+
end
|
66
|
+
|
67
|
+
# Add a class to the children of this fragment
|
68
|
+
def add_class(class_name)
|
69
|
+
@fragment.children.each do |c|
|
70
|
+
c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Process markdown within the contents of some elements and
|
75
|
+
# replace their contents with the processed version.
|
76
|
+
#
|
77
|
+
# @param doc [MaRuKu::MDDocument] A document to process.
|
78
|
+
def process_markdown_inside_elements(doc)
|
79
|
+
# find span elements or elements with 'markdown' attribute
|
80
|
+
elts = @fragment.css("[markdown]")
|
81
|
+
|
82
|
+
d = @fragment.children.first
|
83
|
+
if d && HTML_INLINE_ELEMS.include?(d.name)
|
84
|
+
elts << d unless d.attribute('markdown')
|
85
|
+
elts += span_descendents(d)
|
86
|
+
end
|
87
|
+
|
88
|
+
elts.each do |e|
|
89
|
+
how = e['markdown']
|
90
|
+
e.remove_attribute('markdown')
|
91
|
+
|
92
|
+
next if "0" == how # user requests no markdown parsing inside
|
93
|
+
parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
|
94
|
+
|
95
|
+
# Select all text children of e
|
96
|
+
e.xpath("./text()").each do |original_text|
|
97
|
+
s = MaRuKu::Out::HTML.escapeHTML(original_text.text)
|
98
|
+
unless s.strip.empty?
|
99
|
+
parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
|
100
|
+
|
101
|
+
# restore leading and trailing spaces
|
102
|
+
padding = /\A(\s*).*?(\s*)\z/.match(s)
|
103
|
+
parsed = [padding[1]] + parsed + [padding[2]] if padding
|
104
|
+
|
105
|
+
el = doc.md_el(:dummy, parsed)
|
106
|
+
|
107
|
+
# Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
|
108
|
+
guard = Nokogiri::XML::Element.new('guard', @fragment)
|
109
|
+
original_text.replace(guard)
|
110
|
+
el.children_to_html.each do |x|
|
111
|
+
guard.before(x.to_s)
|
112
|
+
end
|
113
|
+
guard.remove
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Convert this fragment to an HTML or XHTML string.
|
120
|
+
# @return [String]
|
121
|
+
def to_html
|
122
|
+
output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
|
123
|
+
Nokogiri::XML::Node::SaveOptions::FORMAT
|
124
|
+
@fragment.children.inject("") do |out, child|
|
125
|
+
out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
private
|
130
|
+
|
131
|
+
# Get all span-level descendents of the given element, recursively,
|
132
|
+
# as a flat NodeSet.
|
133
|
+
#
|
134
|
+
# @param e [Nokogiri::XML::Node] An element.
|
135
|
+
# @return [Nokogiri::XML::NodeSet]
|
136
|
+
def span_descendents(e)
|
137
|
+
ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
|
138
|
+
e.element_children.inject(ns) do |descendents, c|
|
139
|
+
if HTML_INLINE_ELEMS.include?(c.name)
|
140
|
+
descendents << c
|
141
|
+
descendents += span_descendents(c)
|
142
|
+
end
|
143
|
+
descendents
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# An HTMLFragment implementation using REXML
|
149
|
+
class REXMLHTMLFragment
|
150
|
+
def initialize(raw_html)
|
151
|
+
wrapped = '<!DOCTYPE html PUBLIC
|
152
|
+
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
|
153
|
+
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
|
154
|
+
<html>' + raw_html.strip + '</html>'
|
155
|
+
|
156
|
+
@fragment = REXML::Document.new(wrapped).root
|
157
|
+
end
|
158
|
+
|
159
|
+
# The name of the first element in the fragment
|
160
|
+
def first_node_name
|
161
|
+
first_child = @fragment.children.first
|
162
|
+
(first_child && first_child.respond_to?(:name)) ? first_child.name : nil
|
163
|
+
end
|
164
|
+
|
165
|
+
# Add a class to the children of this fragment
|
166
|
+
def add_class(class_name)
|
167
|
+
@fragment.each_element do |c|
|
168
|
+
c.attributes['class'] = ((c.attributes['class']||'').split(' ') + [class_name]).join(' ')
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# Process markdown within the contents of some elements and
|
173
|
+
# replace their contents with the processed version.
|
174
|
+
def process_markdown_inside_elements(doc)
|
175
|
+
elts = []
|
176
|
+
@fragment.each_element('//*[@markdown]') do |e|
|
177
|
+
elts << e
|
178
|
+
end
|
179
|
+
|
180
|
+
d = @fragment.children.first
|
181
|
+
if d && HTML_INLINE_ELEMS.include?(first_node_name)
|
182
|
+
elts << d unless d.attributes['markdown']
|
183
|
+
elts += span_descendents(d)
|
184
|
+
end
|
185
|
+
|
186
|
+
# find span elements or elements with 'markdown' attribute
|
187
|
+
elts.each do |e|
|
188
|
+
# should we parse block-level or span-level?
|
189
|
+
how = e.attributes['markdown']
|
190
|
+
e.attributes.delete('markdown')
|
191
|
+
|
192
|
+
next if "0" == how # user requests no markdown parsing inside
|
193
|
+
parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
|
194
|
+
|
195
|
+
# Select all text children of e
|
196
|
+
e.texts.each do |original_text|
|
197
|
+
s = MaRuKu::Out::HTML.escapeHTML(original_text.value)
|
198
|
+
unless s.strip.empty?
|
199
|
+
# TODO extract common functionality
|
200
|
+
parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
|
201
|
+
# restore leading and trailing spaces
|
202
|
+
padding = /\A(\s*).*?(\s*)\z/.match(s)
|
203
|
+
parsed = [padding[1]] + parsed + [padding[2]] if padding
|
204
|
+
|
205
|
+
el = doc.md_el(:dummy, parsed)
|
206
|
+
|
207
|
+
new_html = "<dummy>"
|
208
|
+
el.children_to_html.each do |x|
|
209
|
+
new_html << x.to_s
|
210
|
+
end
|
211
|
+
new_html << "</dummy>"
|
212
|
+
|
213
|
+
newdoc = REXML::Document.new(new_html).root
|
214
|
+
|
215
|
+
p = original_text.parent
|
216
|
+
newdoc.children.each do |c|
|
217
|
+
p.insert_before(original_text, c)
|
218
|
+
end
|
219
|
+
|
220
|
+
p.delete(original_text)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def to_html
|
227
|
+
formatter = REXML::Formatters::Default.new(true)
|
228
|
+
@fragment.children.inject("") do |out, child|
|
229
|
+
out << formatter.write(child, '')
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
private
|
234
|
+
|
235
|
+
# Get all span-level descendents of the given element, recursively,
|
236
|
+
# as an Array.
|
237
|
+
#
|
238
|
+
# @param e [REXML::Element] An element.
|
239
|
+
# @return [Array]
|
240
|
+
def span_descendents(e)
|
241
|
+
descendents = []
|
242
|
+
e.each_element do |c|
|
243
|
+
name = c.respond_to?(:name) ? c.name : nil
|
244
|
+
if name && HTML_INLINE_ELEMS.include?(c.name)
|
245
|
+
descendents << c
|
246
|
+
descendents += span_descendents(c)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
@@ -1,326 +1,279 @@
|
|
1
|
-
|
2
|
-
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
-
#
|
4
|
-
# This file is part of Maruku.
|
5
|
-
#
|
6
|
-
# Maruku is free software; you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation; either version 2 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Maruku is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Maruku; if not, write to the Free Software
|
18
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
-
#++
|
20
|
-
|
21
|
-
|
22
|
-
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
23
|
-
|
24
|
-
# a string scanner coded by me
|
25
|
-
class CharSourceManual; end
|
26
|
-
|
27
|
-
# a wrapper around StringScanner
|
28
|
-
class CharSourceStrscan; end
|
29
|
-
|
30
|
-
# A debug scanner that checks the correctness of both
|
31
|
-
# by comparing their output
|
32
|
-
class CharSourceDebug; end
|
33
|
-
|
34
|
-
# Choose!
|
35
|
-
|
36
|
-
CharSource = CharSourceManual # faster! 58ms vs. 65ms
|
37
|
-
#CharSource = CharSourceStrscan
|
38
|
-
#CharSource = CharSourceDebug
|
39
|
-
|
40
|
-
|
41
|
-
class CharSourceManual
|
42
|
-
include MaRuKu::Strings
|
43
|
-
|
44
|
-
def initialize(s, parent=nil)
|
45
|
-
raise "Passed #{s.class}" if not s.kind_of? String
|
46
|
-
@buffer = s
|
47
|
-
@buffer_index = 0
|
48
|
-
@parent = parent
|
49
|
-
end
|
50
|
-
|
51
|
-
# Return current char as a FixNum (or nil).
|
52
|
-
def cur_char; @buffer[@buffer_index] end
|
53
|
-
|
54
|
-
# Return the next n chars as a String.
|
55
|
-
def cur_chars(n); @buffer[@buffer_index,n] end
|
56
|
-
|
57
|
-
# Return the char after current char as a FixNum (or nil).
|
58
|
-
def next_char; @buffer[@buffer_index+1] end
|
59
|
-
|
60
|
-
def shift_char
|
61
|
-
c = @buffer[@buffer_index]
|
62
|
-
@buffer_index+=1
|
63
|
-
c
|
64
|
-
end
|
65
|
-
|
66
|
-
def ignore_char
|
67
|
-
@buffer_index+=1
|
68
|
-
nil
|
69
|
-
end
|
70
|
-
|
71
|
-
def ignore_chars(n)
|
72
|
-
@buffer_index+=n
|
73
|
-
nil
|
74
|
-
end
|
75
|
-
|
76
|
-
def current_remaining_buffer
|
77
|
-
@buffer[@buffer_index, @buffer.size-@buffer_index]
|
78
|
-
end
|
79
|
-
|
80
|
-
def cur_chars_are(string)
|
81
|
-
# There is a bug here
|
82
|
-
if false
|
83
|
-
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
|
84
|
-
@buffer =~ r2
|
85
|
-
else
|
86
|
-
cur_chars(string.size) == string
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
def next_matches(r)
|
91
|
-
r2 = /^.{#{@buffer_index}}#{r}/m
|
92
|
-
md = r2.match @buffer
|
93
|
-
return !!md
|
94
|
-
end
|
95
|
-
|
96
|
-
def read_regexp3(r)
|
97
|
-
r2 = /^.{#{@buffer_index}}#{r}/m
|
98
|
-
m = r2.match @buffer
|
99
|
-
if m
|
100
|
-
consumed = m.to_s.size - @buffer_index
|
101
|
-
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
|
102
|
-
ignore_chars consumed
|
103
|
-
else
|
104
|
-
# puts "Could not read regexp #{r2.inspect} from buffer "+
|
105
|
-
# " index=#{@buffer_index}"
|
106
|
-
# puts "Cur chars = #{cur_chars(20).inspect}"
|
107
|
-
# puts "Matches? = #{cur_chars(20) =~ r}"
|
108
|
-
end
|
109
|
-
m
|
110
|
-
end
|
111
|
-
|
112
|
-
def read_regexp(r)
|
113
|
-
r2 = /^#{r}/
|
114
|
-
rest = current_remaining_buffer
|
115
|
-
m = r2.match(rest)
|
116
|
-
if m
|
117
|
-
@buffer_index += m.to_s.size
|
118
|
-
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
|
119
|
-
end
|
120
|
-
return m
|
121
|
-
end
|
122
|
-
|
123
|
-
def consume_whitespace
|
124
|
-
while c = cur_char
|
125
|
-
if (c == ?\s || c == ?\t)
|
126
|
-
# puts "ignoring #{c}"
|
127
|
-
ignore_char
|
128
|
-
else
|
129
|
-
# puts "#{c} is not ws: "<<c
|
130
|
-
break
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
def read_text_chars(out)
|
136
|
-
s = @buffer.size; c=nil
|
137
|
-
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
|
138
|
-
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
|
139
|
-
out << c
|
140
|
-
@buffer_index += 1
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
def describe
|
145
|
-
s = describe_pos(@buffer, @buffer_index)
|
146
|
-
if @parent
|
147
|
-
s += "\n\n" + @parent.describe
|
148
|
-
end
|
149
|
-
s
|
150
|
-
end
|
151
|
-
include SpanLevelParser
|
152
|
-
end
|
1
|
+
require 'strscan'
|
153
2
|
|
154
|
-
|
155
|
-
len = 75
|
156
|
-
num_before = [len/2, buffer_index].min
|
157
|
-
num_after = [len/2, buffer.size-buffer_index].min
|
158
|
-
num_before_max = buffer_index
|
159
|
-
num_after_max = buffer.size-buffer_index
|
160
|
-
|
161
|
-
# puts "num #{num_before} #{num_after}"
|
162
|
-
num_before = [num_before_max, len-num_after].min
|
163
|
-
num_after = [num_after_max, len-num_before].min
|
164
|
-
# puts "num #{num_before} #{num_after}"
|
165
|
-
|
166
|
-
index_start = [buffer_index - num_before, 0].max
|
167
|
-
index_end = [buffer_index + num_after, buffer.size].min
|
168
|
-
|
169
|
-
size = index_end- index_start
|
170
|
-
|
171
|
-
# puts "- #{index_start} #{size}"
|
172
|
-
|
173
|
-
str = buffer[index_start, size]
|
174
|
-
str.gsub!("\n",'N')
|
175
|
-
str.gsub!("\t",'T')
|
176
|
-
|
177
|
-
if index_end == buffer.size
|
178
|
-
str += "EOF"
|
179
|
-
end
|
180
|
-
|
181
|
-
pre_s = buffer_index-index_start
|
182
|
-
pre_s = [pre_s, 0].max
|
183
|
-
pre_s2 = [len-pre_s,0].max
|
184
|
-
# puts "pre_S = #{pre_s}"
|
185
|
-
pre =" "*(pre_s)
|
186
|
-
|
187
|
-
"-"*len+"\n"+
|
188
|
-
str + "\n" +
|
189
|
-
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
|
190
|
-
# pre + "|\n"+
|
191
|
-
pre + "+--- Byte #{buffer_index}\n"+
|
192
|
-
|
193
|
-
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
|
194
|
-
add_tabs(buffer,1,">")
|
195
|
-
|
196
|
-
# "CharSource: At character #{@buffer_index} of block "+
|
197
|
-
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
|
198
|
-
# " before: \n ... #{cur_chars(50).inspect} ... "
|
199
|
-
end
|
3
|
+
module MaRuKu::In::Markdown::SpanLevelParser
|
200
4
|
|
5
|
+
# a string scanner coded by me
|
6
|
+
class CharSourceManual; end
|
201
7
|
|
202
|
-
|
8
|
+
# a wrapper around StringScanner
|
9
|
+
class CharSourceStrscan; end
|
203
10
|
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
def initialize(s, parent=nil)
|
209
|
-
@s = StringScanner.new(s)
|
210
|
-
@parent = parent
|
211
|
-
end
|
212
|
-
|
213
|
-
# Return current char as a FixNum (or nil).
|
214
|
-
def cur_char
|
215
|
-
@s.peek(1)[0]
|
216
|
-
end
|
217
|
-
|
218
|
-
# Return the next n chars as a String.
|
219
|
-
def cur_chars(n);
|
220
|
-
@s.peek(n)
|
221
|
-
end
|
222
|
-
|
223
|
-
# Return the char after current char as a FixNum (or nil).
|
224
|
-
def next_char;
|
225
|
-
@s.peek(2)[1]
|
226
|
-
end
|
227
|
-
|
228
|
-
def shift_char
|
229
|
-
(@s.get_byte)[0]
|
230
|
-
end
|
231
|
-
|
232
|
-
def ignore_char
|
233
|
-
@s.get_byte
|
234
|
-
nil
|
235
|
-
end
|
236
|
-
|
237
|
-
def ignore_chars(n)
|
238
|
-
n.times do @s.get_byte end
|
239
|
-
nil
|
240
|
-
end
|
241
|
-
|
242
|
-
def current_remaining_buffer
|
243
|
-
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
|
244
|
-
end
|
245
|
-
|
246
|
-
def cur_chars_are(string)
|
247
|
-
cur_chars(string.size) == string
|
248
|
-
end
|
249
|
-
|
250
|
-
def next_matches(r)
|
251
|
-
len = @s.match?(r)
|
252
|
-
return !!len
|
253
|
-
end
|
254
|
-
|
255
|
-
def read_regexp(r)
|
256
|
-
string = @s.scan(r)
|
257
|
-
if string
|
258
|
-
return r.match(string)
|
259
|
-
else
|
260
|
-
return nil
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
def consume_whitespace
|
265
|
-
@s.scan(/\s+/)
|
266
|
-
nil
|
267
|
-
end
|
268
|
-
|
269
|
-
def describe
|
270
|
-
describe_pos(@s.string, @s.pos)
|
271
|
-
end
|
272
|
-
|
273
|
-
end
|
11
|
+
# A debug scanner that checks the correctness of both
|
12
|
+
# by comparing their output
|
13
|
+
class CharSourceDebug; end
|
274
14
|
|
15
|
+
# Choose!
|
275
16
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
#
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
17
|
+
CharSource = CharSourceManual # faster! 58ms vs. 65ms
|
18
|
+
#CharSource = CharSourceStrscan # Faster on LONG documents. But StringScanner is buggy in Rubinius
|
19
|
+
#CharSource = CharSourceDebug
|
20
|
+
|
21
|
+
|
22
|
+
class CharSourceManual
|
23
|
+
def initialize(s, parent=nil)
|
24
|
+
raise "Passed #{s.class}" if not s.kind_of? String
|
25
|
+
@buffer = s
|
26
|
+
@buffer_index = 0
|
27
|
+
@parent = parent
|
28
|
+
end
|
29
|
+
|
30
|
+
# Return current char as a String (or nil).
|
31
|
+
def cur_char
|
32
|
+
cur_chars(1)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Return the next n chars as a String.
|
36
|
+
def cur_chars(n)
|
37
|
+
return nil if @buffer_index >= @buffer.size
|
38
|
+
@buffer[@buffer_index, n]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return the char after current char as a String (or nil).
|
42
|
+
def next_char
|
43
|
+
return nil if @buffer_index + 1 >= @buffer.size
|
44
|
+
@buffer[@buffer_index + 1, 1]
|
45
|
+
end
|
46
|
+
|
47
|
+
def shift_char
|
48
|
+
c = cur_char
|
49
|
+
@buffer_index += 1
|
50
|
+
c
|
51
|
+
end
|
52
|
+
|
53
|
+
def ignore_char
|
54
|
+
@buffer_index += 1
|
55
|
+
end
|
56
|
+
|
57
|
+
def ignore_chars(n)
|
58
|
+
@buffer_index += n
|
59
|
+
end
|
60
|
+
|
61
|
+
def current_remaining_buffer
|
62
|
+
@buffer[@buffer_index, @buffer.size - @buffer_index]
|
63
|
+
end
|
64
|
+
|
65
|
+
def cur_chars_are(string)
|
66
|
+
cur_chars(string.size) == string
|
67
|
+
end
|
68
|
+
|
69
|
+
def next_matches(r)
|
70
|
+
r2 = /^.{#{@buffer_index}}#{r}/m
|
71
|
+
r2.match @buffer
|
72
|
+
end
|
73
|
+
|
74
|
+
def read_regexp(r)
|
75
|
+
r2 = /^#{r}/
|
76
|
+
rest = current_remaining_buffer
|
77
|
+
m = r2.match(rest)
|
78
|
+
if m
|
79
|
+
@buffer_index += m.to_s.size
|
80
|
+
end
|
81
|
+
m
|
82
|
+
end
|
83
|
+
|
84
|
+
def consume_whitespace
|
85
|
+
while c = cur_char
|
86
|
+
break unless (c == ' ' || c == "\t")
|
87
|
+
ignore_char
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def describe
|
92
|
+
s = describe_pos(@buffer, @buffer_index)
|
93
|
+
if @parent
|
94
|
+
s += "\n\n" + @parent.describe
|
95
|
+
end
|
96
|
+
s
|
97
|
+
end
|
98
|
+
|
99
|
+
def describe_pos(buffer, buffer_index)
|
100
|
+
len = 75
|
101
|
+
num_before = [len/2, buffer_index].min
|
102
|
+
num_after = [len/2, buffer.size - buffer_index].min
|
103
|
+
num_before_max = buffer_index
|
104
|
+
num_after_max = buffer.size - buffer_index
|
105
|
+
|
106
|
+
num_before = [num_before_max, len - num_after].min
|
107
|
+
num_after = [num_after_max, len - num_before].min
|
108
|
+
|
109
|
+
index_start = [buffer_index - num_before, 0].max
|
110
|
+
index_end = [buffer_index + num_after, buffer.size].min
|
111
|
+
|
112
|
+
size = index_end - index_start
|
113
|
+
|
114
|
+
str = buffer[index_start, size]
|
115
|
+
str.gsub!("\n", 'N')
|
116
|
+
str.gsub!("\t", 'T')
|
117
|
+
|
118
|
+
if index_end == buffer.size
|
119
|
+
str += "EOF"
|
120
|
+
end
|
121
|
+
|
122
|
+
pre_s = buffer_index - index_start
|
123
|
+
pre_s = [pre_s, 0].max
|
124
|
+
pre_s2 = [len - pre_s, 0].max
|
125
|
+
pre = " " * pre_s
|
126
|
+
|
127
|
+
"-" * len + "\n" +
|
128
|
+
str + "\n" +
|
129
|
+
"-" * pre_s + "|" + "-" * pre_s2 + "\n" +
|
130
|
+
pre + "+--- Byte #{buffer_index}\n"+
|
325
131
|
|
326
|
-
|
132
|
+
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
|
133
|
+
buffer.gsub(/^/, ">")
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
class CharSourceStrscan
|
138
|
+
|
139
|
+
def initialize(s, parent=nil)
|
140
|
+
@scanner = StringScanner.new(s)
|
141
|
+
@size = s.size
|
142
|
+
end
|
143
|
+
|
144
|
+
# Return current char as a String (or nil).
|
145
|
+
def cur_char
|
146
|
+
@scanner.peek(1)[0]
|
147
|
+
end
|
148
|
+
|
149
|
+
# Return the next n chars as a String.
|
150
|
+
def cur_chars(n)
|
151
|
+
@scanner.peek(n)
|
152
|
+
end
|
153
|
+
|
154
|
+
# Return the char after current char as a String (or nil).
|
155
|
+
def next_char
|
156
|
+
@scanner.peek(2)[1]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Return a character as a String, advancing the pointer.
|
160
|
+
def shift_char
|
161
|
+
@scanner.getch[0]
|
162
|
+
end
|
163
|
+
|
164
|
+
# Advance the pointer
|
165
|
+
def ignore_char
|
166
|
+
@scanner.getch
|
167
|
+
end
|
168
|
+
|
169
|
+
# Advance the pointer by n
|
170
|
+
def ignore_chars(n)
|
171
|
+
n.times { @scanner.getch }
|
172
|
+
end
|
173
|
+
|
174
|
+
# Return the rest of the string
|
175
|
+
def current_remaining_buffer
|
176
|
+
@scanner.rest
|
177
|
+
end
|
178
|
+
|
179
|
+
# Returns true if string matches what we're pointing to
|
180
|
+
def cur_chars_are(string)
|
181
|
+
@scanner.peek(string.size) == string
|
182
|
+
end
|
183
|
+
|
184
|
+
# Returns true if Regexp r matches what we're pointing to
|
185
|
+
def next_matches(r)
|
186
|
+
@scanner.check(r)
|
187
|
+
end
|
188
|
+
|
189
|
+
def read_regexp(r)
|
190
|
+
r.match(@scanner.scan(r))
|
191
|
+
end
|
192
|
+
|
193
|
+
def consume_whitespace
|
194
|
+
@scanner.skip(/\s+/)
|
195
|
+
end
|
196
|
+
|
197
|
+
def describe
|
198
|
+
len = 75
|
199
|
+
num_before = [len/2, @scanner.pos].min
|
200
|
+
num_after = [len/2, @scanner.rest_size].min
|
201
|
+
num_before_max = @scanner.pos
|
202
|
+
num_after_max = @scanner.rest_size
|
203
|
+
|
204
|
+
num_before = [num_before_max, len - num_after].min
|
205
|
+
num_after = [num_after_max, len - num_before].min
|
206
|
+
|
207
|
+
index_start = [@scanner.pos - num_before, 0].max
|
208
|
+
index_end = [@scanner.pos + num_after, @size].min
|
209
|
+
|
210
|
+
size = index_end - index_start
|
211
|
+
|
212
|
+
str = @scanner.string[index_start, size]
|
213
|
+
str.gsub!("\n", 'N')
|
214
|
+
str.gsub!("\t", 'T')
|
215
|
+
|
216
|
+
if index_end == @size
|
217
|
+
str += "EOF"
|
218
|
+
end
|
219
|
+
|
220
|
+
pre_s = @scanner.pos - index_start
|
221
|
+
pre_s = [pre_s, 0].max
|
222
|
+
pre_s2 = [len-pre_s, 0].max
|
223
|
+
pre = " " * pre_s
|
224
|
+
|
225
|
+
"-" * len + "\n" +
|
226
|
+
str + "\n" +
|
227
|
+
"-" * pre_s + "|" + "-" * pre_s2 + "\n" +
|
228
|
+
pre + "+--- Byte #{@scanner.pos}\n" +
|
229
|
+
"Shown bytes [#{index_start} to #{size}] of #{@size}:\n" +
|
230
|
+
@scanner.string.gsub(/^/, ">")
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
class CharSourceDebug
|
235
|
+
def initialize(s, parent)
|
236
|
+
@a = CharSourceManual.new(s, parent)
|
237
|
+
@b = CharSourceStrscan.new(s, parent)
|
238
|
+
end
|
239
|
+
|
240
|
+
def method_missing(methodname, *args)
|
241
|
+
a_bef = @a.describe
|
242
|
+
b_bef = @b.describe
|
243
|
+
|
244
|
+
a = @a.send(methodname, *args)
|
245
|
+
b = @b.send(methodname, *args)
|
246
|
+
|
247
|
+
if a.kind_of? MatchData
|
248
|
+
if a.to_a != b.to_a
|
249
|
+
puts "called: #{methodname}(#{args})"
|
250
|
+
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
|
251
|
+
puts "AFTER: " + @a.describe
|
252
|
+
puts "AFTER: " + @b.describe
|
253
|
+
puts "BEFORE: " + a_bef
|
254
|
+
puts "BEFORE: " + b_bef
|
255
|
+
puts caller.join("\n")
|
256
|
+
exit
|
257
|
+
end
|
258
|
+
else
|
259
|
+
if a != b
|
260
|
+
puts "called: #{methodname}(#{args})"
|
261
|
+
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
|
262
|
+
puts "" + @a.describe
|
263
|
+
puts "" + @b.describe
|
264
|
+
puts caller.join("\n")
|
265
|
+
exit
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
if @a.cur_char != @b.cur_char
|
270
|
+
puts "Fuori sincronia dopo #{methodname}(#{args})"
|
271
|
+
puts "" + @a.describe
|
272
|
+
puts "" + @b.describe
|
273
|
+
exit
|
274
|
+
end
|
275
|
+
|
276
|
+
return a
|
277
|
+
end
|
278
|
+
end
|
279
|
+
end
|