maruku 0.6.1 → 0.7.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/MIT-LICENSE.txt +20 -0
- data/bin/maruku +153 -152
- data/bin/marutex +2 -29
- data/data/entities.xml +261 -0
- data/docs/math.md +14 -18
- data/lib/maruku.rb +65 -77
- data/lib/maruku/attributes.rb +109 -214
- data/lib/maruku/defaults.rb +45 -67
- data/lib/maruku/document.rb +43 -0
- data/lib/maruku/element.rb +112 -0
- data/lib/maruku/errors.rb +71 -0
- data/lib/maruku/ext/div.rb +105 -113
- data/lib/maruku/ext/fenced_code.rb +97 -0
- data/lib/maruku/ext/math.rb +22 -26
- data/lib/maruku/ext/math/elements.rb +20 -26
- data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
- data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
- data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
- data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
- data/lib/maruku/ext/math/parsing.rb +107 -113
- data/lib/maruku/ext/math/to_html.rb +184 -187
- data/lib/maruku/ext/math/to_latex.rb +30 -21
- data/lib/maruku/helpers.rb +158 -257
- data/lib/maruku/html.rb +254 -0
- data/lib/maruku/input/charsource.rb +272 -319
- data/lib/maruku/input/extensions.rb +62 -63
- data/lib/maruku/input/html_helper.rb +220 -189
- data/lib/maruku/input/linesource.rb +90 -110
- data/lib/maruku/input/mdline.rb +129 -0
- data/lib/maruku/input/parse_block.rb +618 -612
- data/lib/maruku/input/parse_doc.rb +145 -215
- data/lib/maruku/input/parse_span.rb +658 -0
- data/lib/maruku/input/rubypants.rb +200 -128
- data/lib/maruku/inspect_element.rb +60 -0
- data/lib/maruku/maruku.rb +10 -31
- data/lib/maruku/output/entity_table.rb +33 -0
- data/lib/maruku/output/s5/fancy.rb +462 -462
- data/lib/maruku/output/s5/to_s5.rb +115 -135
- data/lib/maruku/output/to_html.rb +898 -983
- data/lib/maruku/output/to_latex.rb +561 -560
- data/lib/maruku/output/to_markdown.rb +207 -162
- data/lib/maruku/output/to_s.rb +11 -52
- data/lib/maruku/string_utils.rb +129 -179
- data/lib/maruku/toc.rb +185 -196
- data/lib/maruku/version.rb +33 -38
- data/spec/block_docs/abbrev.md +776 -0
- data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
- data/{tests/unittest → spec/block_docs}/alt.md +2 -14
- data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
- data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
- data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
- data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
- data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
- data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
- data/{tests/unittest → spec/block_docs}/blank.md +0 -12
- data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
- data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
- data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
- data/{tests/unittest → spec/block_docs}/code.md +7 -14
- data/{tests/unittest → spec/block_docs}/code2.md +4 -14
- data/{tests/unittest → spec/block_docs}/code3.md +12 -16
- data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
- data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
- data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
- data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
- data/{tests/unittest → spec/block_docs}/easy.md +1 -13
- data/spec/block_docs/email.md +29 -0
- data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
- data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
- data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
- data/{tests/unittest → spec/block_docs}/entities.md +27 -29
- data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
- data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
- data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
- data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
- data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
- data/spec/block_docs/fenced_code_blocks.md +66 -0
- data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
- data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
- data/spec/block_docs/footnotes2.md +78 -0
- data/spec/block_docs/hard.md +25 -0
- data/spec/block_docs/header_after_par.md +62 -0
- data/{tests/unittest → spec/block_docs}/headers.md +10 -18
- data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
- data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
- data/{tests/unittest → spec/block_docs}/html3.md +1 -13
- data/{tests/unittest → spec/block_docs}/html4.md +2 -14
- data/{tests/unittest → spec/block_docs}/html5.md +2 -14
- data/spec/block_docs/html_block_in_para.md +22 -0
- data/spec/block_docs/html_inline.md +25 -0
- data/spec/block_docs/html_trailing.md +31 -0
- data/spec/block_docs/ie.md +62 -0
- data/spec/block_docs/iframe.md +29 -0
- data/{tests/unittest → spec/block_docs}/images.md +22 -28
- data/{tests/unittest → spec/block_docs}/images2.md +7 -17
- data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
- data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
- data/spec/block_docs/inline_html_beginning.md +10 -0
- data/spec/block_docs/issue20.md +9 -0
- data/spec/block_docs/issue26.md +22 -0
- data/spec/block_docs/issue29.md +9 -0
- data/spec/block_docs/issue30.md +30 -0
- data/spec/block_docs/issue31.md +25 -0
- data/spec/block_docs/issue40.md +40 -0
- data/spec/block_docs/issue64.md +55 -0
- data/spec/block_docs/issue67.md +19 -0
- data/spec/block_docs/issue70.md +11 -0
- data/spec/block_docs/issue72.md +17 -0
- data/spec/block_docs/issue74.md +38 -0
- data/spec/block_docs/issue79.md +15 -0
- data/spec/block_docs/issue83.md +13 -0
- data/spec/block_docs/issue85.md +25 -0
- data/spec/block_docs/issue88.md +19 -0
- data/spec/block_docs/issue89.md +12 -0
- data/spec/block_docs/issue90.md +38 -0
- data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
- data/{tests/unittest → spec/block_docs}/links.md +33 -32
- data/spec/block_docs/links2.md +21 -0
- data/{tests/unittest → spec/block_docs}/list1.md +0 -12
- data/{tests/unittest → spec/block_docs}/list12.md +2 -14
- data/{tests/unittest → spec/block_docs}/list2.md +2 -14
- data/spec/block_docs/list_multipara.md +42 -0
- data/{tests/unittest → spec/block_docs}/lists.md +28 -29
- data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
- data/spec/block_docs/lists11.md +23 -0
- data/spec/block_docs/lists12.md +43 -0
- data/spec/block_docs/lists13.md +55 -0
- data/spec/block_docs/lists14.md +61 -0
- data/spec/block_docs/lists15.md +36 -0
- data/spec/block_docs/lists6.md +88 -0
- data/spec/block_docs/lists7b.md +58 -0
- data/spec/block_docs/lists9.md +53 -0
- data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
- data/spec/block_docs/lists_blank.md +35 -0
- data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
- data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
- data/spec/block_docs/lists_nested.md +44 -0
- data/spec/block_docs/lists_nested_blankline.md +28 -0
- data/spec/block_docs/lists_nested_deep.md +43 -0
- data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
- data/spec/block_docs/lists_paraindent.md +47 -0
- data/spec/block_docs/lists_tab.md +54 -0
- data/spec/block_docs/loss.md +17 -0
- data/spec/block_docs/math-blahtex/equations.md +30 -0
- data/spec/block_docs/math-blahtex/inline.md +48 -0
- data/spec/block_docs/math-blahtex/math2.md +45 -0
- data/spec/block_docs/math-blahtex/table.md +25 -0
- data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
- data/spec/block_docs/math/embedded_svg.md +97 -0
- data/spec/block_docs/math/equations.md +44 -0
- data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
- data/spec/block_docs/math/math2.md +45 -0
- data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
- data/spec/block_docs/math/raw_mathml.md +87 -0
- data/spec/block_docs/math/table.md +25 -0
- data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
- data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
- data/{tests/unittest → spec/block_docs}/olist.md +6 -18
- data/{tests/unittest → spec/block_docs}/one.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
- data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
- data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
- data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
- data/spec/block_docs/ref_with_title.md +22 -0
- data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
- data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
- data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
- data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
- data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
- data/spec/block_docs/tables.md +58 -0
- data/{tests/unittest → spec/block_docs}/test.md +1 -13
- data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
- data/spec/block_docs/toc.md +87 -0
- data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
- data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
- data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
- data/spec/block_docs/xml.md +33 -0
- data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
- data/spec/block_docs/xml3.md +24 -0
- data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
- data/spec/block_spec.rb +110 -0
- data/spec/cli_spec.rb +8 -0
- data/spec/span_spec.rb +256 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/to_html_utf8_spec.rb +13 -0
- metadata +205 -243
- metadata.gz.sig +3 -0
- data/Rakefile +0 -48
- data/bin/marudown +0 -29
- data/bin/marutest +0 -345
- data/docs/changelog.md +0 -334
- data/lib/maruku/errors_management.rb +0 -92
- data/lib/maruku/ext/math/latex_fix.rb +0 -12
- data/lib/maruku/input/parse_span_better.rb +0 -746
- data/lib/maruku/input/type_detection.rb +0 -147
- data/lib/maruku/output/to_latex_entities.rb +0 -367
- data/lib/maruku/output/to_latex_strings.rb +0 -64
- data/lib/maruku/structures.rb +0 -167
- data/lib/maruku/structures_inspect.rb +0 -87
- data/lib/maruku/structures_iterators.rb +0 -61
- data/lib/maruku/tests/benchmark.rb +0 -82
- data/lib/maruku/tests/new_parser.rb +0 -373
- data/lib/maruku/tests/tests.rb +0 -136
- data/lib/maruku/usage/example1.rb +0 -33
- data/tests/bugs/code_in_links.md +0 -101
- data/tests/bugs/complex_escaping.md +0 -38
- data/tests/math/syntax.md +0 -46
- data/tests/math_usage/document.md +0 -13
- data/tests/others/abbreviations.md +0 -11
- data/tests/others/blank.md +0 -4
- data/tests/others/code.md +0 -5
- data/tests/others/code2.md +0 -8
- data/tests/others/code3.md +0 -16
- data/tests/others/email.md +0 -4
- data/tests/others/entities.md +0 -19
- data/tests/others/escaping.md +0 -16
- data/tests/others/extra_dl.md +0 -101
- data/tests/others/extra_header_id.md +0 -13
- data/tests/others/extra_table1.md +0 -40
- data/tests/others/footnotes.md +0 -17
- data/tests/others/headers.md +0 -10
- data/tests/others/hrule.md +0 -10
- data/tests/others/images.md +0 -20
- data/tests/others/inline_html.md +0 -42
- data/tests/others/links.md +0 -38
- data/tests/others/list1.md +0 -4
- data/tests/others/list2.md +0 -5
- data/tests/others/list3.md +0 -8
- data/tests/others/lists.md +0 -32
- data/tests/others/lists_after_paragraph.md +0 -44
- data/tests/others/lists_ol.md +0 -39
- data/tests/others/misc_sw.md +0 -105
- data/tests/others/one.md +0 -1
- data/tests/others/paragraphs.md +0 -13
- data/tests/others/sss06.md +0 -352
- data/tests/others/test.md +0 -4
- data/tests/s5/s5profiling.md +0 -48
- data/tests/unittest/bug_def.md +0 -28
- data/tests/unittest/email.md +0 -32
- data/tests/unittest/html2.md +0 -34
- data/tests/unittest/ie.md +0 -61
- data/tests/unittest/links2.md +0 -34
- data/tests/unittest/lists11.md +0 -28
- data/tests/unittest/lists6.md +0 -53
- data/tests/unittest/lists9.md +0 -76
- data/tests/unittest/math/equations.md +0 -86
- data/tests/unittest/math/math2.md +0 -57
- data/tests/unittest/math/table.md +0 -37
- data/tests/unittest/notyet/header_after_par.md +0 -70
- data/tests/unittest/red_tests/abbrev.md +0 -1388
- data/tests/unittest/red_tests/lists7.md +0 -68
- data/tests/unittest/red_tests/lists7b.md +0 -128
- data/tests/unittest/red_tests/lists8.md +0 -76
- data/tests/unittest/red_tests/xml.md +0 -70
- data/tests/unittest/xml3.md +0 -38
- data/tests/utf8-files/simple.md +0 -1
- data/unit_test_block.sh +0 -5
- data/unit_test_span.sh +0 -3
@@ -1,111 +1,91 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
# Returns the type of next line as a string
|
92
|
-
# breaks at first :definition
|
93
|
-
def tell_me_the_future
|
94
|
-
s = ""; num_e = 0;
|
95
|
-
for i in @lines_index..@lines.size-1
|
96
|
-
c = case @lines[i].md_type
|
97
|
-
when :text; "t"
|
98
|
-
when :empty; num_e+=1; "e"
|
99
|
-
when :definition; "d"
|
100
|
-
else "o"
|
101
|
-
end
|
102
|
-
s += c
|
103
|
-
break if c == "d" or num_e>1
|
104
|
-
end
|
105
|
-
s
|
106
|
-
end
|
107
|
-
|
108
|
-
end # linesource
|
109
|
-
|
110
|
-
end end end end # block
|
1
|
+
module MaRuKu::In::Markdown::BlockLevelParser
|
2
|
+
|
3
|
+
# This represents a source of lines that can be consumed.
|
4
|
+
#
|
5
|
+
# It is the twin of CharSource.
|
6
|
+
#
|
7
|
+
|
8
|
+
class LineSource
|
9
|
+
attr_reader :parent
|
10
|
+
|
11
|
+
def initialize(lines, parent=nil, parent_offset=nil)
|
12
|
+
raise "NIL lines? " unless lines
|
13
|
+
@lines = lines.map {|l| l.kind_of?(MaRuKu::MDLine) ? l : MaRuKu::MDLine.new(l) }
|
14
|
+
@lines_index = 0
|
15
|
+
@parent = parent
|
16
|
+
@parent_offset = parent_offset
|
17
|
+
end
|
18
|
+
|
19
|
+
def cur_line
|
20
|
+
@lines[@lines_index]
|
21
|
+
end
|
22
|
+
|
23
|
+
def next_line
|
24
|
+
@lines[@lines_index + 1]
|
25
|
+
end
|
26
|
+
|
27
|
+
def shift_line
|
28
|
+
raise "Over the rainbow" if @lines_index >= @lines.size
|
29
|
+
l = @lines[@lines_index]
|
30
|
+
@lines_index += 1
|
31
|
+
l
|
32
|
+
end
|
33
|
+
|
34
|
+
def ignore_line
|
35
|
+
raise "Over the rainbow" if @lines_index >= @lines.size
|
36
|
+
@lines_index += 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def describe
|
40
|
+
s = "At line #{original_line_number(@lines_index)}\n"
|
41
|
+
|
42
|
+
context = 3 # lines
|
43
|
+
from = [@lines_index - context, 0].max
|
44
|
+
to = [@lines_index + context, @lines.size - 1].min
|
45
|
+
|
46
|
+
from.upto(to) do |i|
|
47
|
+
prefix = (i == @lines_index) ? '--> ' : ' ';
|
48
|
+
l = @lines[i]
|
49
|
+
s += "%10s %4s|%s" %
|
50
|
+
[@lines[i].md_type.to_s, prefix, l]
|
51
|
+
|
52
|
+
s += "|\n"
|
53
|
+
end
|
54
|
+
|
55
|
+
s
|
56
|
+
end
|
57
|
+
|
58
|
+
def original_line_number(index)
|
59
|
+
if @parent
|
60
|
+
index + @parent.original_line_number(@parent_offset)
|
61
|
+
else
|
62
|
+
1 + index
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def cur_index
|
67
|
+
@lines_index
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns the type of next line as a string
|
71
|
+
# breaks at first :definition
|
72
|
+
def tell_me_the_future
|
73
|
+
s = ""
|
74
|
+
num_e = 0
|
75
|
+
|
76
|
+
@lines_index.upto(@lines.size - 1) do |i|
|
77
|
+
c = case @lines[i].md_type
|
78
|
+
when :text; "t"
|
79
|
+
when :empty; num_e += 1; "e"
|
80
|
+
when :definition; "d"
|
81
|
+
else "o"
|
82
|
+
end
|
83
|
+
s << c
|
84
|
+
break if c == "d" or num_e > 1
|
85
|
+
end
|
86
|
+
s
|
87
|
+
end
|
88
|
+
|
89
|
+
end # linesource
|
90
|
+
end
|
111
91
|
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# This code does the classification of lines for block-level parsing.
|
2
|
+
module MaRuKu
|
3
|
+
|
4
|
+
# Represents a single line in a Markdown source file, as produced by
|
5
|
+
# LineSource.
|
6
|
+
class MDLine < String
|
7
|
+
def md_type
|
8
|
+
@md_type ||= line_md_type
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns the number of leading spaces on this string,
|
12
|
+
# considering that a tab counts as {MaRuKu::Strings::TAB_SIZE} spaces.
|
13
|
+
#
|
14
|
+
# @param s [String]
|
15
|
+
# @return [Fixnum]
|
16
|
+
def number_of_leading_spaces
|
17
|
+
if self =~ /\A\s+/
|
18
|
+
spaces = $&
|
19
|
+
spaces.count(" ") + spaces.count("\t") * MaRuKu::Strings::TAB_SIZE
|
20
|
+
else
|
21
|
+
0
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def gsub!(*args)
|
26
|
+
# Any in-place-modification method should reset the md_type
|
27
|
+
@md_type = nil
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def line_md_type
|
34
|
+
# The order of evaluation is important (:text is a catch-all)
|
35
|
+
return :text if self =~ /\A[a-zA-Z]/
|
36
|
+
return :empty if self =~ /\A\s*\z/
|
37
|
+
return :footnote_text if self =~ FootnoteText
|
38
|
+
return :ref_definition if self =~ LinkRegex || self =~ IncompleteLink
|
39
|
+
return :abbreviation if self =~ Abbreviation
|
40
|
+
return :definition if self =~ Definition
|
41
|
+
# I had a bug with emails and urls at the beginning of the
|
42
|
+
# line that were mistaken for raw_html
|
43
|
+
return :text if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
|
44
|
+
return :text if self =~ /\A[ ]{0,3}<http:/
|
45
|
+
# raw html is like PHP Markdown Extra: at most three spaces before
|
46
|
+
return :xml_instr if self =~ /\A\s*<\?/
|
47
|
+
return :raw_html if self =~ %r{^[ ]{0,3}</?\s*\w+}
|
48
|
+
return :raw_html if self =~ /\A[ ]{0,3}<\!\-\-/
|
49
|
+
return :header1 if self =~ /\A(=)+/
|
50
|
+
return :header2 if self =~ /\A([-\s])+\z/
|
51
|
+
return :header3 if self =~ /\A(#)+\s*\S+/
|
52
|
+
# at least three asterisks/hyphens/underscores on a line, and only whitespace
|
53
|
+
return :hrule if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
|
54
|
+
return :ulist if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
|
55
|
+
return :olist if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
|
56
|
+
return :code if number_of_leading_spaces >= 4
|
57
|
+
return :quote if self =~ /\A>/
|
58
|
+
return :ald if self =~ AttributeDefinitionList
|
59
|
+
return :ial if self =~ InlineAttributeList
|
60
|
+
return :text # else, it's just text
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# MacRuby has trouble with commented regexes, so just put the expanded form
|
65
|
+
# in a comment.
|
66
|
+
|
67
|
+
# $1 = id $2 = attribute list
|
68
|
+
AttributeDefinitionList = /\A\s{0,3}\{([\w\s]+)\}:\s*(.*?)\s*\z/
|
69
|
+
#
|
70
|
+
InlineAttributeList = /\A\s{0,3}\{([:#\.].*?)\}\s*\z/
|
71
|
+
# Example:
|
72
|
+
# ^:blah blah
|
73
|
+
# ^: blah blah
|
74
|
+
# ^ : blah blah
|
75
|
+
Definition = /\A[ ]{0,3}:\s*(\S.*)\z/
|
76
|
+
# %r{
|
77
|
+
# ^ # begin of line
|
78
|
+
# [ ]{0,3} # up to 3 spaces
|
79
|
+
# : # colon
|
80
|
+
# \s* # whitespace
|
81
|
+
# (\S.*) # the text = $1
|
82
|
+
# $ # end of line
|
83
|
+
# }x
|
84
|
+
|
85
|
+
# Example:
|
86
|
+
# *[HTML]: Hyper Text Markup Language
|
87
|
+
Abbreviation = /\A[ ]{0,3}\*\[([^\]]+)\]:\s*(\S.*\S)*\s*\z/
|
88
|
+
# %r{
|
89
|
+
# ^ # begin of line
|
90
|
+
# [ ]{0,3} # up to 3 spaces
|
91
|
+
# \* # one asterisk
|
92
|
+
# \[ # opening bracket
|
93
|
+
# ([^\]]+) # any non-closing bracket: id = $1
|
94
|
+
# \] # closing bracket
|
95
|
+
# : # colon
|
96
|
+
# \s* # whitespace
|
97
|
+
# (\S.*\S)* # definition=$2
|
98
|
+
# \s* # strip this whitespace
|
99
|
+
# $ # end of line
|
100
|
+
# }x
|
101
|
+
|
102
|
+
FootnoteText = /\A[ ]{0,3}\[(\^.+)\]:\s*(\S.*)?\z/
|
103
|
+
# %r{
|
104
|
+
# ^ # begin of line
|
105
|
+
# [ ]{0,3} # up to 3 spaces
|
106
|
+
# \[(\^.+)\]: # id = $1 (including '^')
|
107
|
+
# \s*(\S.*)?$ # text = $2 (not obb.)
|
108
|
+
# }x
|
109
|
+
|
110
|
+
# This regex is taken from BlueCloth sources
|
111
|
+
# Link defs are in the form: ^[id]: \n? url "optional title"
|
112
|
+
LinkRegex = /\A[ ]{0,3}\[([^\[\]]+)\]:[ ]*<?([^>\s]+)>?[ ]*(?:(?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\)))\s*(.+)?)?/
|
113
|
+
#%r{
|
114
|
+
# ^[ ]{0,3}\[([^\[\]]+)\]: # id = $1
|
115
|
+
# [ ]*
|
116
|
+
# <?([^>\s]+)>? # url = $2
|
117
|
+
# [ ]*
|
118
|
+
# (?: # Titles are delimited by "quotes" or (parens).
|
119
|
+
# (?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\))) # title = $3, $4, or $5
|
120
|
+
# \s*(.+)? # stuff = $6
|
121
|
+
# )? # title is optional
|
122
|
+
#}x
|
123
|
+
|
124
|
+
IncompleteLink = /\A[ ]{0,3}\[([^\[\]]+?)\]:\s*\z/
|
125
|
+
|
126
|
+
# Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
|
127
|
+
# | -------------:| ------------------------------ |
|
128
|
+
TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
|
129
|
+
end
|
@@ -1,615 +1,621 @@
|
|
1
|
-
|
2
|
-
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
-
#
|
4
|
-
# This file is part of Maruku.
|
5
|
-
#
|
6
|
-
# Maruku is free software; you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation; either version 2 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Maruku is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Maruku; if not, write to the Free Software
|
18
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
-
#++
|
20
|
-
|
1
|
+
require 'set'
|
21
2
|
|
22
3
|
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
23
4
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
#
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
#
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
#
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
5
|
+
include Helpers
|
6
|
+
include MaRuKu::Strings
|
7
|
+
include MaRuKu::In::Markdown::SpanLevelParser
|
8
|
+
|
9
|
+
class BlockContext < Array
|
10
|
+
def describe
|
11
|
+
n = 5
|
12
|
+
desc = size > n ? self[-n, n] : self
|
13
|
+
"Last #{n} elements: " +
|
14
|
+
desc.map {|x| "\n -" + x.inspect }.join
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Splits the string and calls parse_lines_as_markdown
|
19
|
+
def parse_text_as_markdown(text)
|
20
|
+
lines = split_lines(text)
|
21
|
+
src = LineSource.new(lines)
|
22
|
+
parse_blocks(src)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Input is a LineSource
|
26
|
+
def parse_blocks(src)
|
27
|
+
output = BlockContext.new
|
28
|
+
|
29
|
+
# run state machine
|
30
|
+
while src.cur_line
|
31
|
+
next if check_block_extensions(src, output, src.cur_line)
|
32
|
+
|
33
|
+
md_type = src.cur_line.md_type
|
34
|
+
|
35
|
+
# Prints detected type (useful for debugging)
|
36
|
+
#puts "parse_blocks #{md_type}|#{src.cur_line}"
|
37
|
+
case md_type
|
38
|
+
when :empty
|
39
|
+
output << :empty
|
40
|
+
src.ignore_line
|
41
|
+
when :ial
|
42
|
+
m = InlineAttributeList.match src.shift_line
|
43
|
+
content = m[1] || ""
|
44
|
+
src2 = CharSource.new(content, src)
|
45
|
+
interpret_extension(src2, output)
|
46
|
+
when :ald
|
47
|
+
output << read_ald(src)
|
48
|
+
when :text
|
49
|
+
# paragraph, or table, or definition list
|
50
|
+
read_text_material(src, output)
|
51
|
+
when :header2, :hrule
|
52
|
+
# hrule
|
53
|
+
src.shift_line
|
54
|
+
output << md_hrule
|
55
|
+
when :header3
|
56
|
+
output << read_header3(src)
|
57
|
+
when :ulist, :olist
|
58
|
+
list_type = (md_type == :ulist) ? :ul : :ol
|
59
|
+
li = read_list_item(src)
|
60
|
+
# append to current list if we have one
|
61
|
+
if output.last.kind_of?(MDElement) &&
|
62
|
+
output.last.node_type == list_type then
|
63
|
+
output.last.children << li
|
64
|
+
else
|
65
|
+
output << md_el(list_type, li)
|
66
|
+
end
|
67
|
+
when :quote
|
68
|
+
output << read_quote(src)
|
69
|
+
when :code
|
70
|
+
e = read_code(src)
|
71
|
+
output << e if e
|
72
|
+
when :raw_html
|
73
|
+
# More extra hacky stuff - if there's more than just HTML, we either wrap it
|
74
|
+
# in a paragraph or break it up depending on whether it's an inline element or not
|
75
|
+
e = read_raw_html(src)
|
76
|
+
unless e.empty?
|
77
|
+
if e.first.parsed_html &&
|
78
|
+
(first_node_name = e.first.parsed_html.first_node_name) &&
|
79
|
+
HTML_INLINE_ELEMS.include?(first_node_name) &&
|
80
|
+
!%w(svg math).include?(first_node_name)
|
81
|
+
content = [e.first]
|
82
|
+
if e.size > 1
|
83
|
+
content.concat(e[1].children)
|
84
|
+
end
|
85
|
+
output << md_par(content)
|
86
|
+
else
|
87
|
+
output.concat(e)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
when :footnote_text
|
91
|
+
output << read_footnote_text(src)
|
92
|
+
when :ref_definition
|
93
|
+
if src.parent && src.cur_index == 0
|
94
|
+
read_text_material(src, output)
|
95
|
+
else
|
96
|
+
read_ref_definition(src, output)
|
97
|
+
end
|
98
|
+
when :abbreviation
|
99
|
+
output << read_abbreviation(src)
|
100
|
+
when :xml_instr
|
101
|
+
read_xml_instruction(src, output)
|
102
|
+
else # warn if we forgot something
|
103
|
+
line = src.cur_line
|
104
|
+
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
|
105
|
+
src.shift_line
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
merge_ial(output, src, output)
|
110
|
+
output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
|
111
|
+
|
112
|
+
# get rid of empty line markers
|
113
|
+
output.delete_if {|x| x == :empty }
|
114
|
+
|
115
|
+
# See for each list if we can omit the paragraphs
|
116
|
+
# TODO: do this after
|
117
|
+
output.each do |c|
|
118
|
+
# Remove paragraphs that we can get rid of
|
119
|
+
if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
|
120
|
+
c.children.each do |d|
|
121
|
+
if d.children.first && d.children.first.node_type == :paragraph
|
122
|
+
d.children = d.children.first.children + d.children[1..-1]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
|
126
|
+
c.children.each do |definition|
|
127
|
+
definition.definitions.each do |dd|
|
128
|
+
if dd.children.first.node_type == :paragraph
|
129
|
+
dd.children = dd.children.first.children + dd.children[1..-1]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
output
|
137
|
+
end
|
138
|
+
|
139
|
+
def read_text_material(src, output)
|
140
|
+
if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
|
141
|
+
src.next_line &&
|
142
|
+
src.next_line.rstrip =~ TableSeparator
|
143
|
+
output << read_table(src)
|
144
|
+
elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
|
145
|
+
output << read_header12(src)
|
146
|
+
elsif eventually_comes_a_def_list(src)
|
147
|
+
definition = read_definition(src)
|
148
|
+
if output.last.kind_of?(MDElement) &&
|
149
|
+
output.last.node_type == :definition_list then
|
150
|
+
output.last.children << definition
|
151
|
+
else
|
152
|
+
output << md_el(:definition_list, definition)
|
153
|
+
end
|
154
|
+
else # Start of a paragraph
|
155
|
+
output << read_paragraph(src)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def read_ald(src)
|
160
|
+
if (l = src.shift_line) =~ AttributeDefinitionList
|
161
|
+
id = $1
|
162
|
+
al = read_attribute_list(CharSource.new($2, src))
|
163
|
+
self.ald[id] = al;
|
164
|
+
md_ald(id, al)
|
165
|
+
else
|
166
|
+
maruku_error "Bug Bug:\n#{l.inspect}"
|
167
|
+
nil
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# reads a header (with ----- or ========)
|
172
|
+
def read_header12(src)
|
173
|
+
line = src.shift_line.strip
|
174
|
+
al = nil
|
175
|
+
# Check if there is an IAL
|
176
|
+
if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
|
177
|
+
line = $1.strip
|
178
|
+
ial = $2
|
179
|
+
al = read_attribute_list(CharSource.new(ial, src))
|
180
|
+
end
|
181
|
+
text = parse_span line
|
182
|
+
if text.empty?
|
183
|
+
text = "{#{ial}}"
|
184
|
+
al = nil
|
185
|
+
end
|
186
|
+
level = src.cur_line.md_type == :header2 ? 2 : 1;
|
187
|
+
src.shift_line
|
188
|
+
md_header(level, text, al)
|
189
|
+
end
|
190
|
+
|
191
|
+
# reads a header like '#### header ####'
|
192
|
+
def read_header3(src)
|
193
|
+
line = src.shift_line.strip
|
194
|
+
al = nil
|
195
|
+
# Check if there is an IAL
|
196
|
+
if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
|
197
|
+
line = $1.strip
|
198
|
+
ial = $2
|
199
|
+
al = read_attribute_list(CharSource.new(ial, src))
|
200
|
+
end
|
201
|
+
level = line[/^#+/].size
|
202
|
+
text = parse_span line.gsub(/\A#+|#+\z/, '')
|
203
|
+
if text.empty?
|
204
|
+
text = "{#{ial}}"
|
205
|
+
al = nil
|
206
|
+
end
|
207
|
+
md_header(level, text, al)
|
208
|
+
end
|
209
|
+
|
210
|
+
def read_xml_instruction(src, output)
|
211
|
+
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
|
212
|
+
raise "BugBug" unless m
|
213
|
+
target = m[2] || ''
|
214
|
+
code = m[3]
|
215
|
+
until code.include?('?>')
|
216
|
+
code << "\n" << src.shift_line
|
217
|
+
end
|
218
|
+
unless code =~ /\?>\s*$/
|
219
|
+
garbage = (/\?>(.*)$/.match(code))[1]
|
220
|
+
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
|
221
|
+
code.gsub(/^/, '|'), src
|
222
|
+
end
|
223
|
+
code.gsub!(/\?>\s*$/, '')
|
224
|
+
|
225
|
+
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
|
226
|
+
result = safe_execute_code(self, code)
|
227
|
+
if result
|
228
|
+
if result.kind_of? String
|
229
|
+
raise "Not expected"
|
230
|
+
else
|
231
|
+
output.push(*result)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
else
|
235
|
+
output << md_xml_instr(target, code)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def read_raw_html(src)
|
240
|
+
extra_line = nil
|
241
|
+
h = HTMLHelper.new
|
242
|
+
begin
|
243
|
+
l = src.shift_line
|
244
|
+
h.eat_this(l)
|
245
|
+
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
|
246
|
+
while src.cur_line && !h.is_finished?
|
247
|
+
l = src.shift_line
|
248
|
+
# puts "html -> #{l.inspect}"
|
249
|
+
h.eat_this "\n" + l
|
250
|
+
end
|
251
|
+
rescue => e
|
252
|
+
maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
|
253
|
+
end
|
254
|
+
unless h.rest =~ /^\s*$/
|
255
|
+
extra_line = h.rest
|
256
|
+
end
|
257
|
+
raw_html = h.stuff_you_read
|
258
|
+
|
259
|
+
is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)
|
260
|
+
|
261
|
+
if extra_line
|
262
|
+
remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
|
263
|
+
if extra_line.start_with?(' ')
|
264
|
+
remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
|
265
|
+
end
|
266
|
+
is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
|
267
|
+
else
|
268
|
+
[md_html(raw_html)]
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def read_paragraph(src)
|
273
|
+
lines = [src.shift_line]
|
274
|
+
while src.cur_line
|
275
|
+
# :olist does not break
|
276
|
+
case t = src.cur_line.md_type
|
277
|
+
when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
|
278
|
+
break
|
279
|
+
when :olist, :ulist
|
280
|
+
break if !src.next_line || src.next_line.md_type == t
|
281
|
+
when :raw_html
|
282
|
+
# This is a pretty awful hack to handle inline HTML
|
283
|
+
# but it means double-parsing HMTL.
|
284
|
+
html = parse_span([src.cur_line], src)
|
285
|
+
unless html.empty? || html.first.is_a?(String)
|
286
|
+
if html.first.parsed_html
|
287
|
+
first_node_name = html.first.parsed_html.first_node_name
|
288
|
+
end
|
289
|
+
end
|
290
|
+
break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
|
291
|
+
end
|
292
|
+
break if src.cur_line.strip.empty?
|
293
|
+
break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
|
294
|
+
break if any_matching_block_extension?(src.cur_line)
|
295
|
+
|
296
|
+
lines << src.shift_line
|
297
|
+
end
|
298
|
+
children = parse_span(lines, src)
|
299
|
+
|
300
|
+
md_par(children)
|
301
|
+
end
|
302
|
+
|
303
|
+
# Reads one list item, either ordered or unordered.
|
304
|
+
def read_list_item(src)
|
305
|
+
parent_offset = src.cur_index
|
306
|
+
|
307
|
+
item_type = src.cur_line.md_type
|
308
|
+
first = src.shift_line
|
309
|
+
|
310
|
+
indentation, ial = spaces_before_first_char(first)
|
311
|
+
al = read_attribute_list(CharSource.new(ial, src)) if ial
|
312
|
+
ial_offset = ial ? ial.length + 3 : 0
|
313
|
+
lines, want_my_paragraph =
|
314
|
+
read_indented_content(src, indentation, [], item_type, ial_offset)
|
315
|
+
|
316
|
+
# add first line
|
317
|
+
# Strip first '*', '-', '+' from first line
|
318
|
+
stripped = first[indentation, first.size - 1]
|
319
|
+
lines.unshift stripped
|
320
|
+
|
321
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
322
|
+
children = parse_blocks(src2)
|
323
|
+
|
324
|
+
md_li(children, want_my_paragraph, al)
|
325
|
+
end
|
326
|
+
|
327
|
+
def read_abbreviation(src)
|
328
|
+
unless (l = src.shift_line) =~ Abbreviation
|
329
|
+
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
|
330
|
+
end
|
331
|
+
|
332
|
+
abbr = $1
|
333
|
+
desc = $2
|
334
|
+
|
335
|
+
if !abbr || abbr.empty?
|
336
|
+
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
|
337
|
+
end
|
338
|
+
|
339
|
+
self.abbreviations[abbr] = desc
|
340
|
+
|
341
|
+
md_abbr_def(abbr, desc)
|
342
|
+
end
|
343
|
+
|
344
|
+
def read_footnote_text(src)
|
345
|
+
parent_offset = src.cur_index
|
346
|
+
|
347
|
+
first = src.shift_line
|
348
|
+
|
349
|
+
unless first =~ FootnoteText
|
350
|
+
maruku_error "Bug (it's Andrea's fault)"
|
351
|
+
end
|
352
|
+
|
353
|
+
id = $1
|
354
|
+
text = $2 || ''
|
355
|
+
|
356
|
+
indentation = 4 #first.size-text.size
|
357
|
+
|
358
|
+
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
|
359
|
+
|
360
|
+
break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
|
361
|
+
item_type = :footnote_text
|
362
|
+
lines, _ = read_indented_content(src, indentation, break_list, item_type)
|
363
|
+
|
364
|
+
# add first line
|
365
|
+
lines.unshift text unless text.strip.empty?
|
366
|
+
|
367
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
368
|
+
children = parse_blocks(src2)
|
369
|
+
|
370
|
+
e = md_footnote(id, children)
|
371
|
+
self.footnotes[id] = e
|
372
|
+
e
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
# This is the only ugly function in the code base.
|
377
|
+
# It is used to read list items, descriptions, footnote text
|
378
|
+
def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
|
379
|
+
lines = []
|
380
|
+
# collect all indented lines
|
381
|
+
saw_empty = false
|
382
|
+
saw_anything_after = false
|
383
|
+
break_list = Array(break_list)
|
384
|
+
len = indentation - ial_offset
|
385
|
+
|
386
|
+
while src.cur_line
|
387
|
+
num_leading_spaces = src.cur_line.number_of_leading_spaces
|
388
|
+
break if num_leading_spaces < len && ![:text, :empty].include?(src.cur_line.md_type)
|
389
|
+
|
390
|
+
line = strip_indent(src.cur_line, indentation)
|
391
|
+
md_type = line.md_type
|
392
|
+
|
393
|
+
if md_type == :empty
|
394
|
+
saw_empty = true
|
395
|
+
lines << line
|
396
|
+
src.shift_line
|
397
|
+
next
|
398
|
+
end
|
399
|
+
|
400
|
+
# Unquestioningly grab anything that's deeper-indented
|
401
|
+
if md_type != :code && num_leading_spaces > len
|
402
|
+
lines << line
|
403
|
+
src.shift_line
|
404
|
+
next
|
405
|
+
end
|
406
|
+
|
407
|
+
# after a white line
|
408
|
+
if saw_empty
|
409
|
+
# we expect things to be properly aligned
|
410
|
+
break if num_leading_spaces < len
|
411
|
+
saw_anything_after = true
|
412
|
+
else
|
413
|
+
break if break_list.include?(md_type)
|
414
|
+
end
|
415
|
+
|
416
|
+
lines << line
|
417
|
+
src.shift_line
|
418
|
+
|
419
|
+
# You are only required to indent the first line of
|
420
|
+
# a child paragraph.
|
421
|
+
if md_type == :text
|
422
|
+
while src.cur_line && src.cur_line.md_type == :text
|
423
|
+
lines << strip_indent(src.shift_line, indentation)
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
# TODO fix this
|
429
|
+
want_my_paragraph = saw_anything_after ||
|
430
|
+
(saw_empty && src.cur_line && src.cur_line.md_type == item_type)
|
431
|
+
|
432
|
+
# create a new context
|
433
|
+
|
434
|
+
while lines.last && lines.last.md_type == :empty
|
435
|
+
lines.pop
|
436
|
+
end
|
437
|
+
|
438
|
+
return lines, want_my_paragraph
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
def read_quote(src)
|
443
|
+
parent_offset = src.cur_index
|
444
|
+
|
445
|
+
lines = []
|
446
|
+
# collect all indented lines
|
447
|
+
while src.cur_line && src.cur_line.md_type == :quote
|
448
|
+
lines << unquote(src.shift_line)
|
449
|
+
end
|
450
|
+
|
451
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
452
|
+
children = parse_blocks(src2)
|
453
|
+
md_quote(children)
|
454
|
+
end
|
455
|
+
|
456
|
+
def read_code(src)
|
457
|
+
# collect all indented lines
|
458
|
+
lines = []
|
459
|
+
while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
|
460
|
+
lines << strip_indent(src.shift_line, 4)
|
461
|
+
end
|
462
|
+
|
463
|
+
#while lines.last && (lines.last.md_type == :empty )
|
464
|
+
while lines.last && lines.last.strip.size == 0
|
465
|
+
lines.pop
|
466
|
+
end
|
467
|
+
|
468
|
+
while lines.first && lines.first.strip.size == 0
|
469
|
+
lines.shift
|
470
|
+
end
|
471
|
+
|
472
|
+
return nil if lines.empty?
|
473
|
+
|
474
|
+
source = lines.join("\n")
|
475
|
+
|
476
|
+
md_codeblock(source)
|
477
|
+
end
|
478
|
+
|
479
|
+
def read_ref_definition(src, out)
|
480
|
+
line = src.shift_line
|
481
|
+
|
482
|
+
# if link is incomplete, shift next line
|
483
|
+
if src.cur_line &&
|
484
|
+
![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
|
485
|
+
(1..3).include?(src.cur_line.number_of_leading_spaces)
|
486
|
+
line << " " << src.shift_line
|
487
|
+
end
|
488
|
+
|
489
|
+
match = LinkRegex.match(line)
|
490
|
+
unless match
|
491
|
+
maruku_error "Link does not respect format: '#{line}'" and return
|
492
|
+
end
|
493
|
+
|
494
|
+
id = match[1]
|
495
|
+
url = match[2]
|
496
|
+
title = match[3] || match[4] || match[5]
|
497
|
+
id = sanitize_ref_id(id)
|
498
|
+
|
499
|
+
hash = self.refs[id] = {
|
500
|
+
:url => url,
|
501
|
+
:title => title
|
502
|
+
}
|
503
|
+
|
504
|
+
stuff = (match[6] || '')
|
505
|
+
stuff.split.each do |couple|
|
506
|
+
k, v = couple.split('=')
|
507
|
+
v ||= ""
|
508
|
+
v = v[1..-2] if v.start_with?('"') # strip quotes
|
509
|
+
hash[k.to_sym] = v
|
510
|
+
end
|
511
|
+
|
512
|
+
out << md_ref_def(id, url, :title => title)
|
513
|
+
end
|
514
|
+
|
515
|
+
def split_cells(s)
|
516
|
+
s.split('|').reject(&:empty?).map(&:strip)
|
517
|
+
end
|
518
|
+
|
519
|
+
def read_table(src)
|
520
|
+
head = split_cells(src.shift_line).map do |s|
|
521
|
+
md_el(:head_cell, parse_span(s))
|
522
|
+
end
|
523
|
+
|
524
|
+
separator = split_cells(src.shift_line)
|
525
|
+
|
526
|
+
align = separator.map do |s|
|
527
|
+
# ex: :-------------------:
|
528
|
+
# If the separator starts and ends with a colon,
|
529
|
+
# center the cell. If it's on the right, right-align,
|
530
|
+
# otherwise left-align.
|
531
|
+
starts = s.start_with? ':'
|
532
|
+
ends = s.end_with? ':'
|
533
|
+
if starts && ends
|
534
|
+
:center
|
535
|
+
elsif ends
|
536
|
+
:right
|
537
|
+
else
|
538
|
+
:left
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
num_columns = align.size
|
543
|
+
|
544
|
+
if head.size != num_columns
|
545
|
+
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
|
546
|
+
tell_user "I will ignore this table."
|
547
|
+
# XXX try to recover
|
548
|
+
return md_br
|
549
|
+
end
|
550
|
+
|
551
|
+
rows = []
|
552
|
+
|
553
|
+
while src.cur_line && src.cur_line =~ /\|/
|
554
|
+
row = split_cells(src.shift_line).map do |s|
|
555
|
+
md_el(:cell, parse_span(s))
|
556
|
+
end
|
557
|
+
|
558
|
+
if head.size != num_columns
|
559
|
+
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
|
560
|
+
tell_user "I will ignore this table."
|
561
|
+
# XXX try to recover
|
562
|
+
return md_br
|
563
|
+
end
|
564
|
+
rows << row
|
565
|
+
end
|
566
|
+
|
567
|
+
children = (head + rows).flatten
|
568
|
+
md_el(:table, children, { :align => align })
|
569
|
+
end
|
570
|
+
|
571
|
+
# If current line is text, a definition list is coming
|
572
|
+
# if 1) text,empty,[text,empty]*,definition
|
573
|
+
def eventually_comes_a_def_list(src)
|
574
|
+
src.tell_me_the_future =~ %r{^t+e?d}x
|
575
|
+
end
|
576
|
+
|
577
|
+
def read_definition(src)
|
578
|
+
# Read one or more terms
|
579
|
+
terms = []
|
580
|
+
while src.cur_line && src.cur_line.md_type == :text
|
581
|
+
terms << md_el(:definition_term, parse_span(src.shift_line))
|
582
|
+
end
|
583
|
+
|
584
|
+
want_my_paragraph = false
|
585
|
+
|
586
|
+
raise "Chunky Bacon!" unless src.cur_line
|
587
|
+
|
588
|
+
# one optional empty
|
589
|
+
if src.cur_line.md_type == :empty
|
590
|
+
want_my_paragraph = true
|
591
|
+
src.shift_line
|
592
|
+
end
|
593
|
+
|
594
|
+
raise "Chunky Bacon!" unless src.cur_line.md_type == :definition
|
595
|
+
|
596
|
+
# Read one or more definitions
|
597
|
+
definitions = []
|
598
|
+
while src.cur_line && src.cur_line.md_type == :definition
|
599
|
+
parent_offset = src.cur_index
|
600
|
+
|
601
|
+
first = src.shift_line
|
602
|
+
first =~ Definition
|
603
|
+
first = $1
|
604
|
+
|
605
|
+
lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
|
606
|
+
want_my_paragraph ||= w_m_p
|
607
|
+
|
608
|
+
lines.unshift first
|
609
|
+
|
610
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
611
|
+
children = parse_blocks(src2)
|
612
|
+
definitions << md_el(:definition_data, children)
|
613
|
+
end
|
614
|
+
|
615
|
+
md_el(:definition, terms + definitions, {
|
616
|
+
:terms => terms,
|
617
|
+
:definitions => definitions,
|
618
|
+
:want_my_paragraph => want_my_paragraph
|
619
|
+
})
|
620
|
+
end
|
621
|
+
end end end end
|