maruku 0.6.1 → 0.7.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/MIT-LICENSE.txt +20 -0
- data/bin/maruku +153 -152
- data/bin/marutex +2 -29
- data/data/entities.xml +261 -0
- data/docs/math.md +14 -18
- data/lib/maruku.rb +65 -77
- data/lib/maruku/attributes.rb +109 -214
- data/lib/maruku/defaults.rb +45 -67
- data/lib/maruku/document.rb +43 -0
- data/lib/maruku/element.rb +112 -0
- data/lib/maruku/errors.rb +71 -0
- data/lib/maruku/ext/div.rb +105 -113
- data/lib/maruku/ext/fenced_code.rb +97 -0
- data/lib/maruku/ext/math.rb +22 -26
- data/lib/maruku/ext/math/elements.rb +20 -26
- data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
- data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
- data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
- data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
- data/lib/maruku/ext/math/parsing.rb +107 -113
- data/lib/maruku/ext/math/to_html.rb +184 -187
- data/lib/maruku/ext/math/to_latex.rb +30 -21
- data/lib/maruku/helpers.rb +158 -257
- data/lib/maruku/html.rb +254 -0
- data/lib/maruku/input/charsource.rb +272 -319
- data/lib/maruku/input/extensions.rb +62 -63
- data/lib/maruku/input/html_helper.rb +220 -189
- data/lib/maruku/input/linesource.rb +90 -110
- data/lib/maruku/input/mdline.rb +129 -0
- data/lib/maruku/input/parse_block.rb +618 -612
- data/lib/maruku/input/parse_doc.rb +145 -215
- data/lib/maruku/input/parse_span.rb +658 -0
- data/lib/maruku/input/rubypants.rb +200 -128
- data/lib/maruku/inspect_element.rb +60 -0
- data/lib/maruku/maruku.rb +10 -31
- data/lib/maruku/output/entity_table.rb +33 -0
- data/lib/maruku/output/s5/fancy.rb +462 -462
- data/lib/maruku/output/s5/to_s5.rb +115 -135
- data/lib/maruku/output/to_html.rb +898 -983
- data/lib/maruku/output/to_latex.rb +561 -560
- data/lib/maruku/output/to_markdown.rb +207 -162
- data/lib/maruku/output/to_s.rb +11 -52
- data/lib/maruku/string_utils.rb +129 -179
- data/lib/maruku/toc.rb +185 -196
- data/lib/maruku/version.rb +33 -38
- data/spec/block_docs/abbrev.md +776 -0
- data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
- data/{tests/unittest → spec/block_docs}/alt.md +2 -14
- data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
- data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
- data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
- data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
- data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
- data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
- data/{tests/unittest → spec/block_docs}/blank.md +0 -12
- data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
- data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
- data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
- data/{tests/unittest → spec/block_docs}/code.md +7 -14
- data/{tests/unittest → spec/block_docs}/code2.md +4 -14
- data/{tests/unittest → spec/block_docs}/code3.md +12 -16
- data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
- data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
- data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
- data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
- data/{tests/unittest → spec/block_docs}/easy.md +1 -13
- data/spec/block_docs/email.md +29 -0
- data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
- data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
- data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
- data/{tests/unittest → spec/block_docs}/entities.md +27 -29
- data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
- data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
- data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
- data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
- data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
- data/spec/block_docs/fenced_code_blocks.md +66 -0
- data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
- data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
- data/spec/block_docs/footnotes2.md +78 -0
- data/spec/block_docs/hard.md +25 -0
- data/spec/block_docs/header_after_par.md +62 -0
- data/{tests/unittest → spec/block_docs}/headers.md +10 -18
- data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
- data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
- data/{tests/unittest → spec/block_docs}/html3.md +1 -13
- data/{tests/unittest → spec/block_docs}/html4.md +2 -14
- data/{tests/unittest → spec/block_docs}/html5.md +2 -14
- data/spec/block_docs/html_block_in_para.md +22 -0
- data/spec/block_docs/html_inline.md +25 -0
- data/spec/block_docs/html_trailing.md +31 -0
- data/spec/block_docs/ie.md +62 -0
- data/spec/block_docs/iframe.md +29 -0
- data/{tests/unittest → spec/block_docs}/images.md +22 -28
- data/{tests/unittest → spec/block_docs}/images2.md +7 -17
- data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
- data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
- data/spec/block_docs/inline_html_beginning.md +10 -0
- data/spec/block_docs/issue20.md +9 -0
- data/spec/block_docs/issue26.md +22 -0
- data/spec/block_docs/issue29.md +9 -0
- data/spec/block_docs/issue30.md +30 -0
- data/spec/block_docs/issue31.md +25 -0
- data/spec/block_docs/issue40.md +40 -0
- data/spec/block_docs/issue64.md +55 -0
- data/spec/block_docs/issue67.md +19 -0
- data/spec/block_docs/issue70.md +11 -0
- data/spec/block_docs/issue72.md +17 -0
- data/spec/block_docs/issue74.md +38 -0
- data/spec/block_docs/issue79.md +15 -0
- data/spec/block_docs/issue83.md +13 -0
- data/spec/block_docs/issue85.md +25 -0
- data/spec/block_docs/issue88.md +19 -0
- data/spec/block_docs/issue89.md +12 -0
- data/spec/block_docs/issue90.md +38 -0
- data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
- data/{tests/unittest → spec/block_docs}/links.md +33 -32
- data/spec/block_docs/links2.md +21 -0
- data/{tests/unittest → spec/block_docs}/list1.md +0 -12
- data/{tests/unittest → spec/block_docs}/list12.md +2 -14
- data/{tests/unittest → spec/block_docs}/list2.md +2 -14
- data/spec/block_docs/list_multipara.md +42 -0
- data/{tests/unittest → spec/block_docs}/lists.md +28 -29
- data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
- data/spec/block_docs/lists11.md +23 -0
- data/spec/block_docs/lists12.md +43 -0
- data/spec/block_docs/lists13.md +55 -0
- data/spec/block_docs/lists14.md +61 -0
- data/spec/block_docs/lists15.md +36 -0
- data/spec/block_docs/lists6.md +88 -0
- data/spec/block_docs/lists7b.md +58 -0
- data/spec/block_docs/lists9.md +53 -0
- data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
- data/spec/block_docs/lists_blank.md +35 -0
- data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
- data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
- data/spec/block_docs/lists_nested.md +44 -0
- data/spec/block_docs/lists_nested_blankline.md +28 -0
- data/spec/block_docs/lists_nested_deep.md +43 -0
- data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
- data/spec/block_docs/lists_paraindent.md +47 -0
- data/spec/block_docs/lists_tab.md +54 -0
- data/spec/block_docs/loss.md +17 -0
- data/spec/block_docs/math-blahtex/equations.md +30 -0
- data/spec/block_docs/math-blahtex/inline.md +48 -0
- data/spec/block_docs/math-blahtex/math2.md +45 -0
- data/spec/block_docs/math-blahtex/table.md +25 -0
- data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
- data/spec/block_docs/math/embedded_svg.md +97 -0
- data/spec/block_docs/math/equations.md +44 -0
- data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
- data/spec/block_docs/math/math2.md +45 -0
- data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
- data/spec/block_docs/math/raw_mathml.md +87 -0
- data/spec/block_docs/math/table.md +25 -0
- data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
- data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
- data/{tests/unittest → spec/block_docs}/olist.md +6 -18
- data/{tests/unittest → spec/block_docs}/one.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
- data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
- data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
- data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
- data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
- data/spec/block_docs/ref_with_title.md +22 -0
- data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
- data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
- data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
- data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
- data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
- data/spec/block_docs/tables.md +58 -0
- data/{tests/unittest → spec/block_docs}/test.md +1 -13
- data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
- data/spec/block_docs/toc.md +87 -0
- data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
- data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
- data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
- data/spec/block_docs/xml.md +33 -0
- data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
- data/spec/block_docs/xml3.md +24 -0
- data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
- data/spec/block_spec.rb +110 -0
- data/spec/cli_spec.rb +8 -0
- data/spec/span_spec.rb +256 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/to_html_utf8_spec.rb +13 -0
- metadata +205 -243
- metadata.gz.sig +3 -0
- data/Rakefile +0 -48
- data/bin/marudown +0 -29
- data/bin/marutest +0 -345
- data/docs/changelog.md +0 -334
- data/lib/maruku/errors_management.rb +0 -92
- data/lib/maruku/ext/math/latex_fix.rb +0 -12
- data/lib/maruku/input/parse_span_better.rb +0 -746
- data/lib/maruku/input/type_detection.rb +0 -147
- data/lib/maruku/output/to_latex_entities.rb +0 -367
- data/lib/maruku/output/to_latex_strings.rb +0 -64
- data/lib/maruku/structures.rb +0 -167
- data/lib/maruku/structures_inspect.rb +0 -87
- data/lib/maruku/structures_iterators.rb +0 -61
- data/lib/maruku/tests/benchmark.rb +0 -82
- data/lib/maruku/tests/new_parser.rb +0 -373
- data/lib/maruku/tests/tests.rb +0 -136
- data/lib/maruku/usage/example1.rb +0 -33
- data/tests/bugs/code_in_links.md +0 -101
- data/tests/bugs/complex_escaping.md +0 -38
- data/tests/math/syntax.md +0 -46
- data/tests/math_usage/document.md +0 -13
- data/tests/others/abbreviations.md +0 -11
- data/tests/others/blank.md +0 -4
- data/tests/others/code.md +0 -5
- data/tests/others/code2.md +0 -8
- data/tests/others/code3.md +0 -16
- data/tests/others/email.md +0 -4
- data/tests/others/entities.md +0 -19
- data/tests/others/escaping.md +0 -16
- data/tests/others/extra_dl.md +0 -101
- data/tests/others/extra_header_id.md +0 -13
- data/tests/others/extra_table1.md +0 -40
- data/tests/others/footnotes.md +0 -17
- data/tests/others/headers.md +0 -10
- data/tests/others/hrule.md +0 -10
- data/tests/others/images.md +0 -20
- data/tests/others/inline_html.md +0 -42
- data/tests/others/links.md +0 -38
- data/tests/others/list1.md +0 -4
- data/tests/others/list2.md +0 -5
- data/tests/others/list3.md +0 -8
- data/tests/others/lists.md +0 -32
- data/tests/others/lists_after_paragraph.md +0 -44
- data/tests/others/lists_ol.md +0 -39
- data/tests/others/misc_sw.md +0 -105
- data/tests/others/one.md +0 -1
- data/tests/others/paragraphs.md +0 -13
- data/tests/others/sss06.md +0 -352
- data/tests/others/test.md +0 -4
- data/tests/s5/s5profiling.md +0 -48
- data/tests/unittest/bug_def.md +0 -28
- data/tests/unittest/email.md +0 -32
- data/tests/unittest/html2.md +0 -34
- data/tests/unittest/ie.md +0 -61
- data/tests/unittest/links2.md +0 -34
- data/tests/unittest/lists11.md +0 -28
- data/tests/unittest/lists6.md +0 -53
- data/tests/unittest/lists9.md +0 -76
- data/tests/unittest/math/equations.md +0 -86
- data/tests/unittest/math/math2.md +0 -57
- data/tests/unittest/math/table.md +0 -37
- data/tests/unittest/notyet/header_after_par.md +0 -70
- data/tests/unittest/red_tests/abbrev.md +0 -1388
- data/tests/unittest/red_tests/lists7.md +0 -68
- data/tests/unittest/red_tests/lists7b.md +0 -128
- data/tests/unittest/red_tests/lists8.md +0 -76
- data/tests/unittest/red_tests/xml.md +0 -70
- data/tests/unittest/xml3.md +0 -38
- data/tests/utf8-files/simple.md +0 -1
- data/unit_test_block.sh +0 -5
- data/unit_test_span.sh +0 -3
@@ -1,111 +1,91 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
# Returns the type of next line as a string
|
92
|
-
# breaks at first :definition
|
93
|
-
def tell_me_the_future
|
94
|
-
s = ""; num_e = 0;
|
95
|
-
for i in @lines_index..@lines.size-1
|
96
|
-
c = case @lines[i].md_type
|
97
|
-
when :text; "t"
|
98
|
-
when :empty; num_e+=1; "e"
|
99
|
-
when :definition; "d"
|
100
|
-
else "o"
|
101
|
-
end
|
102
|
-
s += c
|
103
|
-
break if c == "d" or num_e>1
|
104
|
-
end
|
105
|
-
s
|
106
|
-
end
|
107
|
-
|
108
|
-
end # linesource
|
109
|
-
|
110
|
-
end end end end # block
|
1
|
+
module MaRuKu::In::Markdown::BlockLevelParser
|
2
|
+
|
3
|
+
# This represents a source of lines that can be consumed.
|
4
|
+
#
|
5
|
+
# It is the twin of CharSource.
|
6
|
+
#
|
7
|
+
|
8
|
+
class LineSource
|
9
|
+
attr_reader :parent
|
10
|
+
|
11
|
+
def initialize(lines, parent=nil, parent_offset=nil)
|
12
|
+
raise "NIL lines? " unless lines
|
13
|
+
@lines = lines.map {|l| l.kind_of?(MaRuKu::MDLine) ? l : MaRuKu::MDLine.new(l) }
|
14
|
+
@lines_index = 0
|
15
|
+
@parent = parent
|
16
|
+
@parent_offset = parent_offset
|
17
|
+
end
|
18
|
+
|
19
|
+
def cur_line
|
20
|
+
@lines[@lines_index]
|
21
|
+
end
|
22
|
+
|
23
|
+
def next_line
|
24
|
+
@lines[@lines_index + 1]
|
25
|
+
end
|
26
|
+
|
27
|
+
def shift_line
|
28
|
+
raise "Over the rainbow" if @lines_index >= @lines.size
|
29
|
+
l = @lines[@lines_index]
|
30
|
+
@lines_index += 1
|
31
|
+
l
|
32
|
+
end
|
33
|
+
|
34
|
+
def ignore_line
|
35
|
+
raise "Over the rainbow" if @lines_index >= @lines.size
|
36
|
+
@lines_index += 1
|
37
|
+
end
|
38
|
+
|
39
|
+
def describe
|
40
|
+
s = "At line #{original_line_number(@lines_index)}\n"
|
41
|
+
|
42
|
+
context = 3 # lines
|
43
|
+
from = [@lines_index - context, 0].max
|
44
|
+
to = [@lines_index + context, @lines.size - 1].min
|
45
|
+
|
46
|
+
from.upto(to) do |i|
|
47
|
+
prefix = (i == @lines_index) ? '--> ' : ' ';
|
48
|
+
l = @lines[i]
|
49
|
+
s += "%10s %4s|%s" %
|
50
|
+
[@lines[i].md_type.to_s, prefix, l]
|
51
|
+
|
52
|
+
s += "|\n"
|
53
|
+
end
|
54
|
+
|
55
|
+
s
|
56
|
+
end
|
57
|
+
|
58
|
+
def original_line_number(index)
|
59
|
+
if @parent
|
60
|
+
index + @parent.original_line_number(@parent_offset)
|
61
|
+
else
|
62
|
+
1 + index
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def cur_index
|
67
|
+
@lines_index
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns the type of next line as a string
|
71
|
+
# breaks at first :definition
|
72
|
+
def tell_me_the_future
|
73
|
+
s = ""
|
74
|
+
num_e = 0
|
75
|
+
|
76
|
+
@lines_index.upto(@lines.size - 1) do |i|
|
77
|
+
c = case @lines[i].md_type
|
78
|
+
when :text; "t"
|
79
|
+
when :empty; num_e += 1; "e"
|
80
|
+
when :definition; "d"
|
81
|
+
else "o"
|
82
|
+
end
|
83
|
+
s << c
|
84
|
+
break if c == "d" or num_e > 1
|
85
|
+
end
|
86
|
+
s
|
87
|
+
end
|
88
|
+
|
89
|
+
end # linesource
|
90
|
+
end
|
111
91
|
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# This code does the classification of lines for block-level parsing.
|
2
|
+
module MaRuKu
|
3
|
+
|
4
|
+
# Represents a single line in a Markdown source file, as produced by
|
5
|
+
# LineSource.
|
6
|
+
class MDLine < String
|
7
|
+
def md_type
|
8
|
+
@md_type ||= line_md_type
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns the number of leading spaces on this string,
|
12
|
+
# considering that a tab counts as {MaRuKu::Strings::TAB_SIZE} spaces.
|
13
|
+
#
|
14
|
+
# @param s [String]
|
15
|
+
# @return [Fixnum]
|
16
|
+
def number_of_leading_spaces
|
17
|
+
if self =~ /\A\s+/
|
18
|
+
spaces = $&
|
19
|
+
spaces.count(" ") + spaces.count("\t") * MaRuKu::Strings::TAB_SIZE
|
20
|
+
else
|
21
|
+
0
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def gsub!(*args)
|
26
|
+
# Any in-place-modification method should reset the md_type
|
27
|
+
@md_type = nil
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def line_md_type
|
34
|
+
# The order of evaluation is important (:text is a catch-all)
|
35
|
+
return :text if self =~ /\A[a-zA-Z]/
|
36
|
+
return :empty if self =~ /\A\s*\z/
|
37
|
+
return :footnote_text if self =~ FootnoteText
|
38
|
+
return :ref_definition if self =~ LinkRegex || self =~ IncompleteLink
|
39
|
+
return :abbreviation if self =~ Abbreviation
|
40
|
+
return :definition if self =~ Definition
|
41
|
+
# I had a bug with emails and urls at the beginning of the
|
42
|
+
# line that were mistaken for raw_html
|
43
|
+
return :text if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
|
44
|
+
return :text if self =~ /\A[ ]{0,3}<http:/
|
45
|
+
# raw html is like PHP Markdown Extra: at most three spaces before
|
46
|
+
return :xml_instr if self =~ /\A\s*<\?/
|
47
|
+
return :raw_html if self =~ %r{^[ ]{0,3}</?\s*\w+}
|
48
|
+
return :raw_html if self =~ /\A[ ]{0,3}<\!\-\-/
|
49
|
+
return :header1 if self =~ /\A(=)+/
|
50
|
+
return :header2 if self =~ /\A([-\s])+\z/
|
51
|
+
return :header3 if self =~ /\A(#)+\s*\S+/
|
52
|
+
# at least three asterisks/hyphens/underscores on a line, and only whitespace
|
53
|
+
return :hrule if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
|
54
|
+
return :ulist if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
|
55
|
+
return :olist if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
|
56
|
+
return :code if number_of_leading_spaces >= 4
|
57
|
+
return :quote if self =~ /\A>/
|
58
|
+
return :ald if self =~ AttributeDefinitionList
|
59
|
+
return :ial if self =~ InlineAttributeList
|
60
|
+
return :text # else, it's just text
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# MacRuby has trouble with commented regexes, so just put the expanded form
|
65
|
+
# in a comment.
|
66
|
+
|
67
|
+
# $1 = id $2 = attribute list
|
68
|
+
AttributeDefinitionList = /\A\s{0,3}\{([\w\s]+)\}:\s*(.*?)\s*\z/
|
69
|
+
#
|
70
|
+
InlineAttributeList = /\A\s{0,3}\{([:#\.].*?)\}\s*\z/
|
71
|
+
# Example:
|
72
|
+
# ^:blah blah
|
73
|
+
# ^: blah blah
|
74
|
+
# ^ : blah blah
|
75
|
+
Definition = /\A[ ]{0,3}:\s*(\S.*)\z/
|
76
|
+
# %r{
|
77
|
+
# ^ # begin of line
|
78
|
+
# [ ]{0,3} # up to 3 spaces
|
79
|
+
# : # colon
|
80
|
+
# \s* # whitespace
|
81
|
+
# (\S.*) # the text = $1
|
82
|
+
# $ # end of line
|
83
|
+
# }x
|
84
|
+
|
85
|
+
# Example:
|
86
|
+
# *[HTML]: Hyper Text Markup Language
|
87
|
+
Abbreviation = /\A[ ]{0,3}\*\[([^\]]+)\]:\s*(\S.*\S)*\s*\z/
|
88
|
+
# %r{
|
89
|
+
# ^ # begin of line
|
90
|
+
# [ ]{0,3} # up to 3 spaces
|
91
|
+
# \* # one asterisk
|
92
|
+
# \[ # opening bracket
|
93
|
+
# ([^\]]+) # any non-closing bracket: id = $1
|
94
|
+
# \] # closing bracket
|
95
|
+
# : # colon
|
96
|
+
# \s* # whitespace
|
97
|
+
# (\S.*\S)* # definition=$2
|
98
|
+
# \s* # strip this whitespace
|
99
|
+
# $ # end of line
|
100
|
+
# }x
|
101
|
+
|
102
|
+
FootnoteText = /\A[ ]{0,3}\[(\^.+)\]:\s*(\S.*)?\z/
|
103
|
+
# %r{
|
104
|
+
# ^ # begin of line
|
105
|
+
# [ ]{0,3} # up to 3 spaces
|
106
|
+
# \[(\^.+)\]: # id = $1 (including '^')
|
107
|
+
# \s*(\S.*)?$ # text = $2 (not obb.)
|
108
|
+
# }x
|
109
|
+
|
110
|
+
# This regex is taken from BlueCloth sources
|
111
|
+
# Link defs are in the form: ^[id]: \n? url "optional title"
|
112
|
+
LinkRegex = /\A[ ]{0,3}\[([^\[\]]+)\]:[ ]*<?([^>\s]+)>?[ ]*(?:(?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\)))\s*(.+)?)?/
|
113
|
+
#%r{
|
114
|
+
# ^[ ]{0,3}\[([^\[\]]+)\]: # id = $1
|
115
|
+
# [ ]*
|
116
|
+
# <?([^>\s]+)>? # url = $2
|
117
|
+
# [ ]*
|
118
|
+
# (?: # Titles are delimited by "quotes" or (parens).
|
119
|
+
# (?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\))) # title = $3, $4, or $5
|
120
|
+
# \s*(.+)? # stuff = $6
|
121
|
+
# )? # title is optional
|
122
|
+
#}x
|
123
|
+
|
124
|
+
IncompleteLink = /\A[ ]{0,3}\[([^\[\]]+?)\]:\s*\z/
|
125
|
+
|
126
|
+
# Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
|
127
|
+
# | -------------:| ------------------------------ |
|
128
|
+
TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
|
129
|
+
end
|
@@ -1,615 +1,621 @@
|
|
1
|
-
|
2
|
-
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
-
#
|
4
|
-
# This file is part of Maruku.
|
5
|
-
#
|
6
|
-
# Maruku is free software; you can redistribute it and/or modify
|
7
|
-
# it under the terms of the GNU General Public License as published by
|
8
|
-
# the Free Software Foundation; either version 2 of the License, or
|
9
|
-
# (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Maruku is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
# GNU General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public License
|
17
|
-
# along with Maruku; if not, write to the Free Software
|
18
|
-
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
-
#++
|
20
|
-
|
1
|
+
require 'set'
|
21
2
|
|
22
3
|
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
23
4
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
#
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
#
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
#
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
5
|
+
include Helpers
|
6
|
+
include MaRuKu::Strings
|
7
|
+
include MaRuKu::In::Markdown::SpanLevelParser
|
8
|
+
|
9
|
+
class BlockContext < Array
|
10
|
+
def describe
|
11
|
+
n = 5
|
12
|
+
desc = size > n ? self[-n, n] : self
|
13
|
+
"Last #{n} elements: " +
|
14
|
+
desc.map {|x| "\n -" + x.inspect }.join
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Splits the string and calls parse_lines_as_markdown
|
19
|
+
def parse_text_as_markdown(text)
|
20
|
+
lines = split_lines(text)
|
21
|
+
src = LineSource.new(lines)
|
22
|
+
parse_blocks(src)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Input is a LineSource
|
26
|
+
def parse_blocks(src)
|
27
|
+
output = BlockContext.new
|
28
|
+
|
29
|
+
# run state machine
|
30
|
+
while src.cur_line
|
31
|
+
next if check_block_extensions(src, output, src.cur_line)
|
32
|
+
|
33
|
+
md_type = src.cur_line.md_type
|
34
|
+
|
35
|
+
# Prints detected type (useful for debugging)
|
36
|
+
#puts "parse_blocks #{md_type}|#{src.cur_line}"
|
37
|
+
case md_type
|
38
|
+
when :empty
|
39
|
+
output << :empty
|
40
|
+
src.ignore_line
|
41
|
+
when :ial
|
42
|
+
m = InlineAttributeList.match src.shift_line
|
43
|
+
content = m[1] || ""
|
44
|
+
src2 = CharSource.new(content, src)
|
45
|
+
interpret_extension(src2, output)
|
46
|
+
when :ald
|
47
|
+
output << read_ald(src)
|
48
|
+
when :text
|
49
|
+
# paragraph, or table, or definition list
|
50
|
+
read_text_material(src, output)
|
51
|
+
when :header2, :hrule
|
52
|
+
# hrule
|
53
|
+
src.shift_line
|
54
|
+
output << md_hrule
|
55
|
+
when :header3
|
56
|
+
output << read_header3(src)
|
57
|
+
when :ulist, :olist
|
58
|
+
list_type = (md_type == :ulist) ? :ul : :ol
|
59
|
+
li = read_list_item(src)
|
60
|
+
# append to current list if we have one
|
61
|
+
if output.last.kind_of?(MDElement) &&
|
62
|
+
output.last.node_type == list_type then
|
63
|
+
output.last.children << li
|
64
|
+
else
|
65
|
+
output << md_el(list_type, li)
|
66
|
+
end
|
67
|
+
when :quote
|
68
|
+
output << read_quote(src)
|
69
|
+
when :code
|
70
|
+
e = read_code(src)
|
71
|
+
output << e if e
|
72
|
+
when :raw_html
|
73
|
+
# More extra hacky stuff - if there's more than just HTML, we either wrap it
|
74
|
+
# in a paragraph or break it up depending on whether it's an inline element or not
|
75
|
+
e = read_raw_html(src)
|
76
|
+
unless e.empty?
|
77
|
+
if e.first.parsed_html &&
|
78
|
+
(first_node_name = e.first.parsed_html.first_node_name) &&
|
79
|
+
HTML_INLINE_ELEMS.include?(first_node_name) &&
|
80
|
+
!%w(svg math).include?(first_node_name)
|
81
|
+
content = [e.first]
|
82
|
+
if e.size > 1
|
83
|
+
content.concat(e[1].children)
|
84
|
+
end
|
85
|
+
output << md_par(content)
|
86
|
+
else
|
87
|
+
output.concat(e)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
when :footnote_text
|
91
|
+
output << read_footnote_text(src)
|
92
|
+
when :ref_definition
|
93
|
+
if src.parent && src.cur_index == 0
|
94
|
+
read_text_material(src, output)
|
95
|
+
else
|
96
|
+
read_ref_definition(src, output)
|
97
|
+
end
|
98
|
+
when :abbreviation
|
99
|
+
output << read_abbreviation(src)
|
100
|
+
when :xml_instr
|
101
|
+
read_xml_instruction(src, output)
|
102
|
+
else # warn if we forgot something
|
103
|
+
line = src.cur_line
|
104
|
+
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
|
105
|
+
src.shift_line
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
merge_ial(output, src, output)
|
110
|
+
output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
|
111
|
+
|
112
|
+
# get rid of empty line markers
|
113
|
+
output.delete_if {|x| x == :empty }
|
114
|
+
|
115
|
+
# See for each list if we can omit the paragraphs
|
116
|
+
# TODO: do this after
|
117
|
+
output.each do |c|
|
118
|
+
# Remove paragraphs that we can get rid of
|
119
|
+
if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
|
120
|
+
c.children.each do |d|
|
121
|
+
if d.children.first && d.children.first.node_type == :paragraph
|
122
|
+
d.children = d.children.first.children + d.children[1..-1]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
|
126
|
+
c.children.each do |definition|
|
127
|
+
definition.definitions.each do |dd|
|
128
|
+
if dd.children.first.node_type == :paragraph
|
129
|
+
dd.children = dd.children.first.children + dd.children[1..-1]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
output
|
137
|
+
end
|
138
|
+
|
139
|
+
def read_text_material(src, output)
|
140
|
+
if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
|
141
|
+
src.next_line &&
|
142
|
+
src.next_line.rstrip =~ TableSeparator
|
143
|
+
output << read_table(src)
|
144
|
+
elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
|
145
|
+
output << read_header12(src)
|
146
|
+
elsif eventually_comes_a_def_list(src)
|
147
|
+
definition = read_definition(src)
|
148
|
+
if output.last.kind_of?(MDElement) &&
|
149
|
+
output.last.node_type == :definition_list then
|
150
|
+
output.last.children << definition
|
151
|
+
else
|
152
|
+
output << md_el(:definition_list, definition)
|
153
|
+
end
|
154
|
+
else # Start of a paragraph
|
155
|
+
output << read_paragraph(src)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def read_ald(src)
|
160
|
+
if (l = src.shift_line) =~ AttributeDefinitionList
|
161
|
+
id = $1
|
162
|
+
al = read_attribute_list(CharSource.new($2, src))
|
163
|
+
self.ald[id] = al;
|
164
|
+
md_ald(id, al)
|
165
|
+
else
|
166
|
+
maruku_error "Bug Bug:\n#{l.inspect}"
|
167
|
+
nil
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# reads a header (with ----- or ========)
|
172
|
+
def read_header12(src)
|
173
|
+
line = src.shift_line.strip
|
174
|
+
al = nil
|
175
|
+
# Check if there is an IAL
|
176
|
+
if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
|
177
|
+
line = $1.strip
|
178
|
+
ial = $2
|
179
|
+
al = read_attribute_list(CharSource.new(ial, src))
|
180
|
+
end
|
181
|
+
text = parse_span line
|
182
|
+
if text.empty?
|
183
|
+
text = "{#{ial}}"
|
184
|
+
al = nil
|
185
|
+
end
|
186
|
+
level = src.cur_line.md_type == :header2 ? 2 : 1;
|
187
|
+
src.shift_line
|
188
|
+
md_header(level, text, al)
|
189
|
+
end
|
190
|
+
|
191
|
+
# reads a header like '#### header ####'
|
192
|
+
def read_header3(src)
|
193
|
+
line = src.shift_line.strip
|
194
|
+
al = nil
|
195
|
+
# Check if there is an IAL
|
196
|
+
if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
|
197
|
+
line = $1.strip
|
198
|
+
ial = $2
|
199
|
+
al = read_attribute_list(CharSource.new(ial, src))
|
200
|
+
end
|
201
|
+
level = line[/^#+/].size
|
202
|
+
text = parse_span line.gsub(/\A#+|#+\z/, '')
|
203
|
+
if text.empty?
|
204
|
+
text = "{#{ial}}"
|
205
|
+
al = nil
|
206
|
+
end
|
207
|
+
md_header(level, text, al)
|
208
|
+
end
|
209
|
+
|
210
|
+
def read_xml_instruction(src, output)
|
211
|
+
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
|
212
|
+
raise "BugBug" unless m
|
213
|
+
target = m[2] || ''
|
214
|
+
code = m[3]
|
215
|
+
until code.include?('?>')
|
216
|
+
code << "\n" << src.shift_line
|
217
|
+
end
|
218
|
+
unless code =~ /\?>\s*$/
|
219
|
+
garbage = (/\?>(.*)$/.match(code))[1]
|
220
|
+
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
|
221
|
+
code.gsub(/^/, '|'), src
|
222
|
+
end
|
223
|
+
code.gsub!(/\?>\s*$/, '')
|
224
|
+
|
225
|
+
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
|
226
|
+
result = safe_execute_code(self, code)
|
227
|
+
if result
|
228
|
+
if result.kind_of? String
|
229
|
+
raise "Not expected"
|
230
|
+
else
|
231
|
+
output.push(*result)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
else
|
235
|
+
output << md_xml_instr(target, code)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def read_raw_html(src)
|
240
|
+
extra_line = nil
|
241
|
+
h = HTMLHelper.new
|
242
|
+
begin
|
243
|
+
l = src.shift_line
|
244
|
+
h.eat_this(l)
|
245
|
+
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
|
246
|
+
while src.cur_line && !h.is_finished?
|
247
|
+
l = src.shift_line
|
248
|
+
# puts "html -> #{l.inspect}"
|
249
|
+
h.eat_this "\n" + l
|
250
|
+
end
|
251
|
+
rescue => e
|
252
|
+
maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
|
253
|
+
end
|
254
|
+
unless h.rest =~ /^\s*$/
|
255
|
+
extra_line = h.rest
|
256
|
+
end
|
257
|
+
raw_html = h.stuff_you_read
|
258
|
+
|
259
|
+
is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)
|
260
|
+
|
261
|
+
if extra_line
|
262
|
+
remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
|
263
|
+
if extra_line.start_with?(' ')
|
264
|
+
remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
|
265
|
+
end
|
266
|
+
is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
|
267
|
+
else
|
268
|
+
[md_html(raw_html)]
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def read_paragraph(src)
|
273
|
+
lines = [src.shift_line]
|
274
|
+
while src.cur_line
|
275
|
+
# :olist does not break
|
276
|
+
case t = src.cur_line.md_type
|
277
|
+
when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
|
278
|
+
break
|
279
|
+
when :olist, :ulist
|
280
|
+
break if !src.next_line || src.next_line.md_type == t
|
281
|
+
when :raw_html
|
282
|
+
# This is a pretty awful hack to handle inline HTML
|
283
|
+
# but it means double-parsing HMTL.
|
284
|
+
html = parse_span([src.cur_line], src)
|
285
|
+
unless html.empty? || html.first.is_a?(String)
|
286
|
+
if html.first.parsed_html
|
287
|
+
first_node_name = html.first.parsed_html.first_node_name
|
288
|
+
end
|
289
|
+
end
|
290
|
+
break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
|
291
|
+
end
|
292
|
+
break if src.cur_line.strip.empty?
|
293
|
+
break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
|
294
|
+
break if any_matching_block_extension?(src.cur_line)
|
295
|
+
|
296
|
+
lines << src.shift_line
|
297
|
+
end
|
298
|
+
children = parse_span(lines, src)
|
299
|
+
|
300
|
+
md_par(children)
|
301
|
+
end
|
302
|
+
|
303
|
+
# Reads one list item, either ordered or unordered.
|
304
|
+
def read_list_item(src)
|
305
|
+
parent_offset = src.cur_index
|
306
|
+
|
307
|
+
item_type = src.cur_line.md_type
|
308
|
+
first = src.shift_line
|
309
|
+
|
310
|
+
indentation, ial = spaces_before_first_char(first)
|
311
|
+
al = read_attribute_list(CharSource.new(ial, src)) if ial
|
312
|
+
ial_offset = ial ? ial.length + 3 : 0
|
313
|
+
lines, want_my_paragraph =
|
314
|
+
read_indented_content(src, indentation, [], item_type, ial_offset)
|
315
|
+
|
316
|
+
# add first line
|
317
|
+
# Strip first '*', '-', '+' from first line
|
318
|
+
stripped = first[indentation, first.size - 1]
|
319
|
+
lines.unshift stripped
|
320
|
+
|
321
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
322
|
+
children = parse_blocks(src2)
|
323
|
+
|
324
|
+
md_li(children, want_my_paragraph, al)
|
325
|
+
end
|
326
|
+
|
327
|
+
def read_abbreviation(src)
|
328
|
+
unless (l = src.shift_line) =~ Abbreviation
|
329
|
+
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
|
330
|
+
end
|
331
|
+
|
332
|
+
abbr = $1
|
333
|
+
desc = $2
|
334
|
+
|
335
|
+
if !abbr || abbr.empty?
|
336
|
+
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
|
337
|
+
end
|
338
|
+
|
339
|
+
self.abbreviations[abbr] = desc
|
340
|
+
|
341
|
+
md_abbr_def(abbr, desc)
|
342
|
+
end
|
343
|
+
|
344
|
+
def read_footnote_text(src)
|
345
|
+
parent_offset = src.cur_index
|
346
|
+
|
347
|
+
first = src.shift_line
|
348
|
+
|
349
|
+
unless first =~ FootnoteText
|
350
|
+
maruku_error "Bug (it's Andrea's fault)"
|
351
|
+
end
|
352
|
+
|
353
|
+
id = $1
|
354
|
+
text = $2 || ''
|
355
|
+
|
356
|
+
indentation = 4 #first.size-text.size
|
357
|
+
|
358
|
+
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
|
359
|
+
|
360
|
+
break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
|
361
|
+
item_type = :footnote_text
|
362
|
+
lines, _ = read_indented_content(src, indentation, break_list, item_type)
|
363
|
+
|
364
|
+
# add first line
|
365
|
+
lines.unshift text unless text.strip.empty?
|
366
|
+
|
367
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
368
|
+
children = parse_blocks(src2)
|
369
|
+
|
370
|
+
e = md_footnote(id, children)
|
371
|
+
self.footnotes[id] = e
|
372
|
+
e
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
# This is the only ugly function in the code base.
|
377
|
+
# It is used to read list items, descriptions, footnote text
|
378
|
+
def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
|
379
|
+
lines = []
|
380
|
+
# collect all indented lines
|
381
|
+
saw_empty = false
|
382
|
+
saw_anything_after = false
|
383
|
+
break_list = Array(break_list)
|
384
|
+
len = indentation - ial_offset
|
385
|
+
|
386
|
+
while src.cur_line
|
387
|
+
num_leading_spaces = src.cur_line.number_of_leading_spaces
|
388
|
+
break if num_leading_spaces < len && ![:text, :empty].include?(src.cur_line.md_type)
|
389
|
+
|
390
|
+
line = strip_indent(src.cur_line, indentation)
|
391
|
+
md_type = line.md_type
|
392
|
+
|
393
|
+
if md_type == :empty
|
394
|
+
saw_empty = true
|
395
|
+
lines << line
|
396
|
+
src.shift_line
|
397
|
+
next
|
398
|
+
end
|
399
|
+
|
400
|
+
# Unquestioningly grab anything that's deeper-indented
|
401
|
+
if md_type != :code && num_leading_spaces > len
|
402
|
+
lines << line
|
403
|
+
src.shift_line
|
404
|
+
next
|
405
|
+
end
|
406
|
+
|
407
|
+
# after a white line
|
408
|
+
if saw_empty
|
409
|
+
# we expect things to be properly aligned
|
410
|
+
break if num_leading_spaces < len
|
411
|
+
saw_anything_after = true
|
412
|
+
else
|
413
|
+
break if break_list.include?(md_type)
|
414
|
+
end
|
415
|
+
|
416
|
+
lines << line
|
417
|
+
src.shift_line
|
418
|
+
|
419
|
+
# You are only required to indent the first line of
|
420
|
+
# a child paragraph.
|
421
|
+
if md_type == :text
|
422
|
+
while src.cur_line && src.cur_line.md_type == :text
|
423
|
+
lines << strip_indent(src.shift_line, indentation)
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
# TODO fix this
|
429
|
+
want_my_paragraph = saw_anything_after ||
|
430
|
+
(saw_empty && src.cur_line && src.cur_line.md_type == item_type)
|
431
|
+
|
432
|
+
# create a new context
|
433
|
+
|
434
|
+
while lines.last && lines.last.md_type == :empty
|
435
|
+
lines.pop
|
436
|
+
end
|
437
|
+
|
438
|
+
return lines, want_my_paragraph
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
def read_quote(src)
|
443
|
+
parent_offset = src.cur_index
|
444
|
+
|
445
|
+
lines = []
|
446
|
+
# collect all indented lines
|
447
|
+
while src.cur_line && src.cur_line.md_type == :quote
|
448
|
+
lines << unquote(src.shift_line)
|
449
|
+
end
|
450
|
+
|
451
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
452
|
+
children = parse_blocks(src2)
|
453
|
+
md_quote(children)
|
454
|
+
end
|
455
|
+
|
456
|
+
def read_code(src)
|
457
|
+
# collect all indented lines
|
458
|
+
lines = []
|
459
|
+
while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
|
460
|
+
lines << strip_indent(src.shift_line, 4)
|
461
|
+
end
|
462
|
+
|
463
|
+
#while lines.last && (lines.last.md_type == :empty )
|
464
|
+
while lines.last && lines.last.strip.size == 0
|
465
|
+
lines.pop
|
466
|
+
end
|
467
|
+
|
468
|
+
while lines.first && lines.first.strip.size == 0
|
469
|
+
lines.shift
|
470
|
+
end
|
471
|
+
|
472
|
+
return nil if lines.empty?
|
473
|
+
|
474
|
+
source = lines.join("\n")
|
475
|
+
|
476
|
+
md_codeblock(source)
|
477
|
+
end
|
478
|
+
|
479
|
+
def read_ref_definition(src, out)
|
480
|
+
line = src.shift_line
|
481
|
+
|
482
|
+
# if link is incomplete, shift next line
|
483
|
+
if src.cur_line &&
|
484
|
+
![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
|
485
|
+
(1..3).include?(src.cur_line.number_of_leading_spaces)
|
486
|
+
line << " " << src.shift_line
|
487
|
+
end
|
488
|
+
|
489
|
+
match = LinkRegex.match(line)
|
490
|
+
unless match
|
491
|
+
maruku_error "Link does not respect format: '#{line}'" and return
|
492
|
+
end
|
493
|
+
|
494
|
+
id = match[1]
|
495
|
+
url = match[2]
|
496
|
+
title = match[3] || match[4] || match[5]
|
497
|
+
id = sanitize_ref_id(id)
|
498
|
+
|
499
|
+
hash = self.refs[id] = {
|
500
|
+
:url => url,
|
501
|
+
:title => title
|
502
|
+
}
|
503
|
+
|
504
|
+
stuff = (match[6] || '')
|
505
|
+
stuff.split.each do |couple|
|
506
|
+
k, v = couple.split('=')
|
507
|
+
v ||= ""
|
508
|
+
v = v[1..-2] if v.start_with?('"') # strip quotes
|
509
|
+
hash[k.to_sym] = v
|
510
|
+
end
|
511
|
+
|
512
|
+
out << md_ref_def(id, url, :title => title)
|
513
|
+
end
|
514
|
+
|
515
|
+
def split_cells(s)
|
516
|
+
s.split('|').reject(&:empty?).map(&:strip)
|
517
|
+
end
|
518
|
+
|
519
|
+
def read_table(src)
|
520
|
+
head = split_cells(src.shift_line).map do |s|
|
521
|
+
md_el(:head_cell, parse_span(s))
|
522
|
+
end
|
523
|
+
|
524
|
+
separator = split_cells(src.shift_line)
|
525
|
+
|
526
|
+
align = separator.map do |s|
|
527
|
+
# ex: :-------------------:
|
528
|
+
# If the separator starts and ends with a colon,
|
529
|
+
# center the cell. If it's on the right, right-align,
|
530
|
+
# otherwise left-align.
|
531
|
+
starts = s.start_with? ':'
|
532
|
+
ends = s.end_with? ':'
|
533
|
+
if starts && ends
|
534
|
+
:center
|
535
|
+
elsif ends
|
536
|
+
:right
|
537
|
+
else
|
538
|
+
:left
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
num_columns = align.size
|
543
|
+
|
544
|
+
if head.size != num_columns
|
545
|
+
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
|
546
|
+
tell_user "I will ignore this table."
|
547
|
+
# XXX try to recover
|
548
|
+
return md_br
|
549
|
+
end
|
550
|
+
|
551
|
+
rows = []
|
552
|
+
|
553
|
+
while src.cur_line && src.cur_line =~ /\|/
|
554
|
+
row = split_cells(src.shift_line).map do |s|
|
555
|
+
md_el(:cell, parse_span(s))
|
556
|
+
end
|
557
|
+
|
558
|
+
if head.size != num_columns
|
559
|
+
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
|
560
|
+
tell_user "I will ignore this table."
|
561
|
+
# XXX try to recover
|
562
|
+
return md_br
|
563
|
+
end
|
564
|
+
rows << row
|
565
|
+
end
|
566
|
+
|
567
|
+
children = (head + rows).flatten
|
568
|
+
md_el(:table, children, { :align => align })
|
569
|
+
end
|
570
|
+
|
571
|
+
# If current line is text, a definition list is coming
|
572
|
+
# if 1) text,empty,[text,empty]*,definition
|
573
|
+
def eventually_comes_a_def_list(src)
|
574
|
+
src.tell_me_the_future =~ %r{^t+e?d}x
|
575
|
+
end
|
576
|
+
|
577
|
+
def read_definition(src)
|
578
|
+
# Read one or more terms
|
579
|
+
terms = []
|
580
|
+
while src.cur_line && src.cur_line.md_type == :text
|
581
|
+
terms << md_el(:definition_term, parse_span(src.shift_line))
|
582
|
+
end
|
583
|
+
|
584
|
+
want_my_paragraph = false
|
585
|
+
|
586
|
+
raise "Chunky Bacon!" unless src.cur_line
|
587
|
+
|
588
|
+
# one optional empty
|
589
|
+
if src.cur_line.md_type == :empty
|
590
|
+
want_my_paragraph = true
|
591
|
+
src.shift_line
|
592
|
+
end
|
593
|
+
|
594
|
+
raise "Chunky Bacon!" unless src.cur_line.md_type == :definition
|
595
|
+
|
596
|
+
# Read one or more definitions
|
597
|
+
definitions = []
|
598
|
+
while src.cur_line && src.cur_line.md_type == :definition
|
599
|
+
parent_offset = src.cur_index
|
600
|
+
|
601
|
+
first = src.shift_line
|
602
|
+
first =~ Definition
|
603
|
+
first = $1
|
604
|
+
|
605
|
+
lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
|
606
|
+
want_my_paragraph ||= w_m_p
|
607
|
+
|
608
|
+
lines.unshift first
|
609
|
+
|
610
|
+
src2 = LineSource.new(lines, src, parent_offset)
|
611
|
+
children = parse_blocks(src2)
|
612
|
+
definitions << md_el(:definition_data, children)
|
613
|
+
end
|
614
|
+
|
615
|
+
md_el(:definition, terms + definitions, {
|
616
|
+
:terms => terms,
|
617
|
+
:definitions => definitions,
|
618
|
+
:want_my_paragraph => want_my_paragraph
|
619
|
+
})
|
620
|
+
end
|
621
|
+
end end end end
|