maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -1,111 +1,91 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module BlockLevelParser
23
-
24
- # This represents a source of lines that can be consumed.
25
- #
26
- # It is the twin of CharSource.
27
- #
28
-
29
- class LineSource
30
- include MaRuKu::Strings
31
- attr_reader :parent
32
-
33
- def initialize(lines, parent=nil, parent_offset=nil)
34
- raise "NIL lines? " if not lines
35
- @lines = lines
36
- @lines_index = 0
37
- @parent = parent
38
- @parent_offset = parent_offset
39
- end
40
-
41
- def cur_line() @lines[@lines_index] end
42
- def next_line() @lines[@lines_index+1] end
43
-
44
- def shift_line()
45
- raise "Over the rainbow" if @lines_index >= @lines.size
46
- l = @lines[@lines_index]
47
- @lines_index += 1
48
- return l
49
- end
50
-
51
- def ignore_line
52
- raise "Over the rainbow" if @lines_index >= @lines.size
53
- @lines_index += 1
54
- end
55
-
56
- def describe
57
- s = "At line #{original_line_number(@lines_index)}\n"
58
-
59
- context = 3 # lines
60
- from = [@lines_index-context, 0].max
61
- to = [@lines_index+context, @lines.size-1].min
62
-
63
- for i in from..to
64
- prefix = (i == @lines_index) ? '--> ' : ' ';
65
- l = @lines[i]
66
- s += "%10s %4s|%s" %
67
- [@lines[i].md_type.to_s, prefix, l]
68
-
69
- s += "|\n"
70
- end
71
-
72
- # if @parent
73
- # s << "Parent context is: \n"
74
- # s << add_tabs(@parent.describe,1,'|')
75
- # end
76
- s
77
- end
78
-
79
- def original_line_number(index)
80
- if @parent
81
- return index + @parent.original_line_number(@parent_offset)
82
- else
83
- 1 + index
84
- end
85
- end
86
-
87
- def cur_index
88
- @lines_index
89
- end
90
-
91
- # Returns the type of next line as a string
92
- # breaks at first :definition
93
- def tell_me_the_future
94
- s = ""; num_e = 0;
95
- for i in @lines_index..@lines.size-1
96
- c = case @lines[i].md_type
97
- when :text; "t"
98
- when :empty; num_e+=1; "e"
99
- when :definition; "d"
100
- else "o"
101
- end
102
- s += c
103
- break if c == "d" or num_e>1
104
- end
105
- s
106
- end
107
-
108
- end # linesource
109
-
110
- end end end end # block
1
+ module MaRuKu::In::Markdown::BlockLevelParser
2
+
3
+ # This represents a source of lines that can be consumed.
4
+ #
5
+ # It is the twin of CharSource.
6
+ #
7
+
8
+ class LineSource
9
+ attr_reader :parent
10
+
11
+ def initialize(lines, parent=nil, parent_offset=nil)
12
+ raise "NIL lines? " unless lines
13
+ @lines = lines.map {|l| l.kind_of?(MaRuKu::MDLine) ? l : MaRuKu::MDLine.new(l) }
14
+ @lines_index = 0
15
+ @parent = parent
16
+ @parent_offset = parent_offset
17
+ end
18
+
19
+ def cur_line
20
+ @lines[@lines_index]
21
+ end
22
+
23
+ def next_line
24
+ @lines[@lines_index + 1]
25
+ end
26
+
27
+ def shift_line
28
+ raise "Over the rainbow" if @lines_index >= @lines.size
29
+ l = @lines[@lines_index]
30
+ @lines_index += 1
31
+ l
32
+ end
33
+
34
+ def ignore_line
35
+ raise "Over the rainbow" if @lines_index >= @lines.size
36
+ @lines_index += 1
37
+ end
38
+
39
+ def describe
40
+ s = "At line #{original_line_number(@lines_index)}\n"
41
+
42
+ context = 3 # lines
43
+ from = [@lines_index - context, 0].max
44
+ to = [@lines_index + context, @lines.size - 1].min
45
+
46
+ from.upto(to) do |i|
47
+ prefix = (i == @lines_index) ? '--> ' : ' ';
48
+ l = @lines[i]
49
+ s += "%10s %4s|%s" %
50
+ [@lines[i].md_type.to_s, prefix, l]
51
+
52
+ s += "|\n"
53
+ end
54
+
55
+ s
56
+ end
57
+
58
+ def original_line_number(index)
59
+ if @parent
60
+ index + @parent.original_line_number(@parent_offset)
61
+ else
62
+ 1 + index
63
+ end
64
+ end
65
+
66
+ def cur_index
67
+ @lines_index
68
+ end
69
+
70
+ # Returns the type of next line as a string
71
+ # breaks at first :definition
72
+ def tell_me_the_future
73
+ s = ""
74
+ num_e = 0
75
+
76
+ @lines_index.upto(@lines.size - 1) do |i|
77
+ c = case @lines[i].md_type
78
+ when :text; "t"
79
+ when :empty; num_e += 1; "e"
80
+ when :definition; "d"
81
+ else "o"
82
+ end
83
+ s << c
84
+ break if c == "d" or num_e > 1
85
+ end
86
+ s
87
+ end
88
+
89
+ end # linesource
90
+ end
111
91
 
@@ -0,0 +1,129 @@
1
+ # This code does the classification of lines for block-level parsing.
2
+ module MaRuKu
3
+
4
+ # Represents a single line in a Markdown source file, as produced by
5
+ # LineSource.
6
+ class MDLine < String
7
+ def md_type
8
+ @md_type ||= line_md_type
9
+ end
10
+
11
+ # Returns the number of leading spaces on this string,
12
+ # considering that a tab counts as {MaRuKu::Strings::TAB_SIZE} spaces.
13
+ #
14
+ # @param s [String]
15
+ # @return [Fixnum]
16
+ def number_of_leading_spaces
17
+ if self =~ /\A\s+/
18
+ spaces = $&
19
+ spaces.count(" ") + spaces.count("\t") * MaRuKu::Strings::TAB_SIZE
20
+ else
21
+ 0
22
+ end
23
+ end
24
+
25
+ def gsub!(*args)
26
+ # Any in-place-modification method should reset the md_type
27
+ @md_type = nil
28
+ super
29
+ end
30
+
31
+ private
32
+
33
+ def line_md_type
34
+ # The order of evaluation is important (:text is a catch-all)
35
+ return :text if self =~ /\A[a-zA-Z]/
36
+ return :empty if self =~ /\A\s*\z/
37
+ return :footnote_text if self =~ FootnoteText
38
+ return :ref_definition if self =~ LinkRegex || self =~ IncompleteLink
39
+ return :abbreviation if self =~ Abbreviation
40
+ return :definition if self =~ Definition
41
+ # I had a bug with emails and urls at the beginning of the
42
+ # line that were mistaken for raw_html
43
+ return :text if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
44
+ return :text if self =~ /\A[ ]{0,3}<http:/
45
+ # raw html is like PHP Markdown Extra: at most three spaces before
46
+ return :xml_instr if self =~ /\A\s*<\?/
47
+ return :raw_html if self =~ %r{^[ ]{0,3}</?\s*\w+}
48
+ return :raw_html if self =~ /\A[ ]{0,3}<\!\-\-/
49
+ return :header1 if self =~ /\A(=)+/
50
+ return :header2 if self =~ /\A([-\s])+\z/
51
+ return :header3 if self =~ /\A(#)+\s*\S+/
52
+ # at least three asterisks/hyphens/underscores on a line, and only whitespace
53
+ return :hrule if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
54
+ return :ulist if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
55
+ return :olist if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
56
+ return :code if number_of_leading_spaces >= 4
57
+ return :quote if self =~ /\A>/
58
+ return :ald if self =~ AttributeDefinitionList
59
+ return :ial if self =~ InlineAttributeList
60
+ return :text # else, it's just text
61
+ end
62
+ end
63
+
64
+ # MacRuby has trouble with commented regexes, so just put the expanded form
65
+ # in a comment.
66
+
67
+ # $1 = id $2 = attribute list
68
+ AttributeDefinitionList = /\A\s{0,3}\{([\w\s]+)\}:\s*(.*?)\s*\z/
69
+ #
70
+ InlineAttributeList = /\A\s{0,3}\{([:#\.].*?)\}\s*\z/
71
+ # Example:
72
+ # ^:blah blah
73
+ # ^: blah blah
74
+ # ^ : blah blah
75
+ Definition = /\A[ ]{0,3}:\s*(\S.*)\z/
76
+ # %r{
77
+ # ^ # begin of line
78
+ # [ ]{0,3} # up to 3 spaces
79
+ # : # colon
80
+ # \s* # whitespace
81
+ # (\S.*) # the text = $1
82
+ # $ # end of line
83
+ # }x
84
+
85
+ # Example:
86
+ # *[HTML]: Hyper Text Markup Language
87
+ Abbreviation = /\A[ ]{0,3}\*\[([^\]]+)\]:\s*(\S.*\S)*\s*\z/
88
+ # %r{
89
+ # ^ # begin of line
90
+ # [ ]{0,3} # up to 3 spaces
91
+ # \* # one asterisk
92
+ # \[ # opening bracket
93
+ # ([^\]]+) # any non-closing bracket: id = $1
94
+ # \] # closing bracket
95
+ # : # colon
96
+ # \s* # whitespace
97
+ # (\S.*\S)* # definition=$2
98
+ # \s* # strip this whitespace
99
+ # $ # end of line
100
+ # }x
101
+
102
+ FootnoteText = /\A[ ]{0,3}\[(\^.+)\]:\s*(\S.*)?\z/
103
+ # %r{
104
+ # ^ # begin of line
105
+ # [ ]{0,3} # up to 3 spaces
106
+ # \[(\^.+)\]: # id = $1 (including '^')
107
+ # \s*(\S.*)?$ # text = $2 (not obb.)
108
+ # }x
109
+
110
+ # This regex is taken from BlueCloth sources
111
+ # Link defs are in the form: ^[id]: \n? url "optional title"
112
+ LinkRegex = /\A[ ]{0,3}\[([^\[\]]+)\]:[ ]*<?([^>\s]+)>?[ ]*(?:(?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\)))\s*(.+)?)?/
113
+ #%r{
114
+ # ^[ ]{0,3}\[([^\[\]]+)\]: # id = $1
115
+ # [ ]*
116
+ # <?([^>\s]+)>? # url = $2
117
+ # [ ]*
118
+ # (?: # Titles are delimited by "quotes" or (parens).
119
+ # (?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\))) # title = $3, $4, or $5
120
+ # \s*(.+)? # stuff = $6
121
+ # )? # title is optional
122
+ #}x
123
+
124
+ IncompleteLink = /\A[ ]{0,3}\[([^\[\]]+?)\]:\s*\z/
125
+
126
+ # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
127
+ # | -------------:| ------------------------------ |
128
+ TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
129
+ end
@@ -1,615 +1,621 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
1
+ require 'set'
21
2
 
22
3
  module MaRuKu; module In; module Markdown; module BlockLevelParser
23
4
 
24
- include Helpers
25
- include MaRuKu::Strings
26
- include MaRuKu::In::Markdown::SpanLevelParser
27
-
28
- class BlockContext < Array
29
- def describe
30
- n = 5
31
- desc = size > n ? self[-n,n] : self
32
- "Last #{n} elements: "+
33
- desc.map{|x| "\n -" + x.inspect}.join
34
- end
35
- end
36
-
37
- # Splits the string and calls parse_lines_as_markdown
38
- def parse_text_as_markdown(text)
39
- lines = split_lines(text)
40
- src = LineSource.new(lines)
41
- return parse_blocks(src)
42
- end
43
-
44
- # Input is a LineSource
45
- def parse_blocks(src)
46
- output = BlockContext.new
47
-
48
- # run state machine
49
- while src.cur_line
50
-
51
- next if check_block_extensions(src, output, src.cur_line)
52
-
53
- # Prints detected type (useful for debugging)
54
- # puts "#{src.cur_line.md_type}|#{src.cur_line}"
55
- case src.cur_line.md_type
56
- when :empty;
57
- output.push :empty
58
- src.ignore_line
59
- when :ial
60
- m = InlineAttributeList.match src.shift_line
61
- content = m[1] || ""
62
- # puts "Content: #{content.inspect}"
63
- src2 = CharSource.new(content, src)
64
- interpret_extension(src2, output, [nil])
65
- when :ald
66
- output.push read_ald(src)
67
- when :text
68
- # paragraph, or table, or definition list
69
- read_text_material(src, output)
70
- when :header2, :hrule
71
- # hrule
72
- src.shift_line
73
- output.push md_hrule()
74
- when :header3
75
- output.push read_header3(src)
76
- when :ulist, :olist
77
- list_type = src.cur_line.md_type == :ulist ? :ul : :ol
78
- li = read_list_item(src)
79
- # append to current list if we have one
80
- if output.last.kind_of?(MDElement) &&
81
- output.last.node_type == list_type then
82
- output.last.children << li
83
- else
84
- output.push md_el(list_type, [li])
85
- end
86
- when :quote; output.push read_quote(src)
87
- when :code; e = read_code(src); output << e if e
88
- when :raw_html; e = read_raw_html(src); output << e if e
89
-
90
- when :footnote_text; output.push read_footnote_text(src)
91
- when :ref_definition;
92
- if src.parent && (src.cur_index == 0)
93
- read_text_material(src, output)
94
- else
95
- read_ref_definition(src, output)
96
- end
97
- when :abbreviation; output.push read_abbreviation(src)
98
- when :xml_instr; read_xml_instruction(src, output)
99
- when :metadata;
100
- maruku_error "Please use the new meta-data syntax: \n"+
101
- " http://maruku.rubyforge.org/proposal.html\n", src
102
- src.ignore_line
103
- else # warn if we forgot something
104
- md_type = src.cur_line.md_type
105
- line = src.cur_line
106
- maruku_error "Ignoring line '#{line}' type = #{md_type}", src
107
- src.shift_line
108
- end
109
- end
110
-
111
- merge_ial(output, src, output)
112
- output.delete_if {|x| x.kind_of?(MDElement) &&
113
- x.node_type == :ial}
114
-
115
- # get rid of empty line markers
116
- output.delete_if {|x| x == :empty}
117
- # See for each list if we can omit the paragraphs and use li_span
118
- # TODO: do this after
119
- output.each do |c|
120
- # Remove paragraphs that we can get rid of
121
- if [:ul,:ol].include? c.node_type
122
- if c.children.all? {|li| !li.want_my_paragraph} then
123
- c.children.each do |d|
124
- d.node_type = :li_span
125
- d.children = d.children[0].children
126
- end
127
- end
128
- end
129
- if c.node_type == :definition_list
130
- if c.children.all?{|defi| !defi.want_my_paragraph} then
131
- c.children.each do |definition|
132
- definition.definitions.each do |dd|
133
- dd.children = dd.children[0].children
134
- end
135
- end
136
- end
137
- end
138
- end
139
-
140
- output
141
- end
142
-
143
- def read_text_material(src, output)
144
- if src.cur_line =~ MightBeTableHeader and
145
- (src.next_line && src.next_line =~ TableSeparator)
146
- output.push read_table(src)
147
- elsif [:header1,:header2].include? src.next_line.md_type
148
- output.push read_header12(src)
149
- elsif eventually_comes_a_def_list(src)
150
- definition = read_definition(src)
151
- if output.last.kind_of?(MDElement) &&
152
- output.last.node_type == :definition_list then
153
- output.last.children << definition
154
- else
155
- output.push md_el(:definition_list, [definition])
156
- end
157
- else # Start of a paragraph
158
- output.push read_paragraph(src)
159
- end
160
- end
161
-
162
-
163
- def read_ald(src)
164
- if (l=src.shift_line) =~ AttributeDefinitionList
165
- id = $1; al=$2;
166
- al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
167
- self.ald[id] = al;
168
- return md_ald(id, al)
169
- else
170
- maruku_error "Bug Bug:\n#{l.inspect}"
171
- return nil
172
- end
173
- end
174
-
175
- # reads a header (with ----- or ========)
176
- def read_header12(src)
177
- line = src.shift_line.strip
178
- al = nil
179
- # Check if there is an IAL
180
- if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
181
- line = $1.strip
182
- ial = $2
183
- al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
184
- end
185
- text = parse_lines_as_span [ line ]
186
- level = src.cur_line.md_type == :header2 ? 2 : 1;
187
- src.shift_line
188
- return md_header(level, text, al)
189
- end
190
-
191
- # reads a header like '#### header ####'
192
- def read_header3(src)
193
- line = src.shift_line.strip
194
- al = nil
195
- # Check if there is an IAL
196
- if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
197
- line = $1.strip
198
- ial = $2
199
- al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
200
- end
201
- level = num_leading_hashes(line)
202
- text = parse_lines_as_span [strip_hashes(line)]
203
- return md_header(level, text, al)
204
- end
205
-
206
- def read_xml_instruction(src, output)
207
- m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
208
- raise "BugBug" if not m
209
- target = m[2] || ''
210
- code = m[3]
211
- until code =~ /\?>/
212
- code += "\n"+src.shift_line
213
- end
214
- if not code =~ (/\?>\s*$/)
215
- garbage = (/\?>(.*)$/.match(code))[1]
216
- maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
217
- add_tabs(code, 1, '|'), src
218
- end
219
- code.gsub!(/\?>\s*$/, '')
220
-
221
- if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
222
- result = safe_execute_code(self, code)
223
- if result
224
- if result.kind_of? String
225
- raise "Not expected"
226
- else
227
- output.push(*result)
228
- end
229
- end
230
- else
231
- output.push md_xml_instr(target, code)
232
- end
233
- end
234
-
235
- def read_raw_html(src)
236
- h = HTMLHelper.new
237
- begin
238
- h.eat_this(l=src.shift_line)
239
- # puts "\nBLOCK:\nhtml -> #{l.inspect}"
240
- while src.cur_line and not h.is_finished?
241
- l=src.shift_line
242
- # puts "html -> #{l.inspect}"
243
- h.eat_this "\n"+l
244
- end
245
- rescue Exception => e
246
- ex = e.inspect + e.backtrace.join("\n")
247
- maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
248
- end
249
- if not (h.rest =~ /^\s*$/)
250
- maruku_error "Could you please format this better?\n"+
251
- "I see that #{h.rest.inspect} is left after the raw HTML.", src
252
- end
253
- raw_html = h.stuff_you_read
254
-
255
- return md_html(raw_html)
256
- end
257
-
258
- def read_paragraph(src)
259
- lines = [src.shift_line]
260
- while src.cur_line
261
- # :olist does not break
262
- case t = src.cur_line.md_type
263
- when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
264
- break
265
- when :olist,:ulist
266
- break if src.next_line.md_type == t
267
- end
268
- break if src.cur_line.strip.size == 0
269
- break if [:header1,:header2].include? src.next_line.md_type
270
- break if any_matching_block_extension?(src.cur_line)
271
-
272
- lines << src.shift_line
273
- end
274
- # dbg_describe_ary(lines, 'PAR')
275
- children = parse_lines_as_span(lines, src)
276
-
277
- return md_par(children)
278
- end
279
-
280
- # Reads one list item, either ordered or unordered.
281
- def read_list_item(src)
282
- parent_offset = src.cur_index
283
-
284
- item_type = src.cur_line.md_type
285
- first = src.shift_line
286
-
287
- indentation = spaces_before_first_char(first)
288
- break_list = [:ulist, :olist, :ial]
289
- # Ugly things going on inside `read_indented_content`
290
- lines, want_my_paragraph =
291
- read_indented_content(src,indentation, break_list, item_type)
292
-
293
- # add first line
294
- # Strip first '*', '-', '+' from first line
295
- stripped = first[indentation, first.size-1]
296
- lines.unshift stripped
297
-
298
- # dbg_describe_ary(lines, 'LIST ITEM ')
299
-
300
- src2 = LineSource.new(lines, src, parent_offset)
301
- children = parse_blocks(src2)
302
- with_par = want_my_paragraph || (children.size>1)
303
-
304
- return md_li(children, with_par)
305
- end
306
-
307
- def read_abbreviation(src)
308
- if not (l=src.shift_line) =~ Abbreviation
309
- maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
310
- end
311
-
312
- abbr = $1
313
- desc = $2
314
-
315
- if (not abbr) or (abbr.size==0)
316
- maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
317
- end
318
-
319
- self.abbreviations[abbr] = desc
320
-
321
- return md_abbr_def(abbr, desc)
322
- end
323
-
324
- def read_footnote_text(src)
325
- parent_offset = src.cur_index
326
-
327
- first = src.shift_line
328
-
329
- if not first =~ FootnoteText
330
- maruku_error "Bug (it's Andrea's fault)"
331
- end
332
-
333
- id = $1
334
- text = $2
335
-
336
- # Ugly things going on inside `read_indented_content`
337
- indentation = 4 #first.size-text.size
338
-
339
- # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
340
-
341
- break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
342
- item_type = :footnote_text
343
- lines, want_my_paragraph =
344
- read_indented_content(src,indentation, break_list, item_type)
345
-
346
- # add first line
347
- if text && text.strip != "" then lines.unshift text end
348
-
349
- # dbg_describe_ary(lines, 'FOOTNOTE')
350
- src2 = LineSource.new(lines, src, parent_offset)
351
- children = parse_blocks(src2)
352
-
353
- e = md_footnote(id, children)
354
- self.footnotes[id] = e
355
- return e
356
- end
357
-
358
-
359
- # This is the only ugly function in the code base.
360
- # It is used to read list items, descriptions, footnote text
361
- def read_indented_content(src, indentation, break_list, item_type)
362
- lines =[]
363
- # collect all indented lines
364
- saw_empty = false; saw_anything_after = false
365
- while src.cur_line
366
- # puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
367
- #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
368
- if src.cur_line.md_type == :empty
369
- saw_empty = true
370
- lines << src.shift_line
371
- next
372
- end
373
-
374
- # after a white line
375
- if saw_empty
376
- # we expect things to be properly aligned
377
- if (ns=number_of_leading_spaces(src.cur_line)) < indentation
378
- #puts "breaking for spaces, only #{ns}: #{src.cur_line}"
379
- break
380
- end
381
- saw_anything_after = true
382
- else
383
- # if src.cur_line[0] != ?\
384
- break if break_list.include? src.cur_line.md_type
385
- # end
386
- # break if src.cur_line.md_type != :text
387
- end
388
-
389
-
390
- stripped = strip_indent(src.shift_line, indentation)
391
- lines << stripped
392
-
393
- #puts "Accepted as #{stripped.inspect}"
394
-
395
- # You are only required to indent the first line of
396
- # a child paragraph.
397
- if stripped.md_type == :text
398
- while src.cur_line && (src.cur_line.md_type == :text)
399
- lines << strip_indent(src.shift_line, indentation)
400
- end
401
- end
402
- end
403
-
404
- want_my_paragraph = saw_anything_after ||
405
- (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
406
-
407
- # dbg_describe_ary(lines, 'LI')
408
- # create a new context
409
-
410
- while lines.last && (lines.last.md_type == :empty)
411
- lines.pop
412
- end
413
-
414
- return lines, want_my_paragraph
415
- end
416
-
417
-
418
- def read_quote(src)
419
- parent_offset = src.cur_index
420
-
421
- lines = []
422
- # collect all indented lines
423
- while src.cur_line && src.cur_line.md_type == :quote
424
- lines << unquote(src.shift_line)
425
- end
426
- # dbg_describe_ary(lines, 'QUOTE')
427
-
428
- src2 = LineSource.new(lines, src, parent_offset)
429
- children = parse_blocks(src2)
430
- return md_quote(children)
431
- end
432
-
433
- def read_code(src)
434
- # collect all indented lines
435
- lines = []
436
- while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
437
- lines << strip_indent(src.shift_line, 4)
438
- end
439
-
440
- #while lines.last && (lines.last.md_type == :empty )
441
- while lines.last && lines.last.strip.size == 0
442
- lines.pop
443
- end
444
-
445
- while lines.first && lines.first.strip.size == 0
446
- lines.shift
447
- end
448
-
449
- return nil if lines.empty?
450
-
451
- source = lines.join("\n")
452
-
453
- # dbg_describe_ary(lines, 'CODE')
454
-
455
- return md_codeblock(source)
456
- end
457
-
458
- # Reads a series of metadata lines with empty lines in between
459
- def read_metadata(src)
460
- hash = {}
461
- while src.cur_line
462
- case src.cur_line.md_type
463
- when :empty; src.shift_line
464
- when :metadata; hash.merge! parse_metadata(src.shift_line)
465
- else break
466
- end
467
- end
468
- hash
469
- end
470
-
471
-
472
- def read_ref_definition(src, out)
473
- line = src.shift_line
474
-
475
-
476
- # if link is incomplete, shift next line
477
- if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
478
- ([1,2,3].include? number_of_leading_spaces(src.cur_line) )
479
- line += " "+ src.shift_line
480
- end
481
-
482
- # puts "total= #{line}"
483
-
484
- match = LinkRegex.match(line)
485
- if not match
486
- maruku_error "Link does not respect format: '#{line}'"
487
- return
488
- end
489
-
490
- id = match[1]; url = match[2]; title = match[3];
491
- id = sanitize_ref_id(id)
492
-
493
- hash = self.refs[id] = {:url=>url,:title=>title}
494
-
495
- stuff=match[4]
496
-
497
- if stuff
498
- stuff.split.each do |couple|
499
- # puts "found #{couple}"
500
- k, v = couple.split('=')
501
- v ||= ""
502
- if v[0,1]=='"' then v = v[1, v.size-2] end
503
- # puts "key:_#{k}_ value=_#{v}_"
504
- hash[k.to_sym] = v
505
- end
506
- end
507
- # puts hash.inspect
508
-
509
- out.push md_ref_def(id, url, meta={:title=>title})
510
- end
511
-
512
- def split_cells(s)
513
- # s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
514
- # changed to allow empty cells
515
- s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
516
- end
517
-
518
- def read_table(src)
519
- head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
520
-
521
- separator=split_cells(src.shift_line)
522
-
523
- align = separator.map { |s| s =~ Sep
524
- if $1 and $2 then :center elsif $2 then :right else :left end }
525
-
526
- num_columns = align.size
527
-
528
- if head.size != num_columns
529
- maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
530
- tell_user "I will ignore this table."
531
- # XXX try to recover
532
- return md_br()
533
- end
534
-
535
- rows = []
536
-
537
- while src.cur_line && src.cur_line =~ /\|/
538
- row = split_cells(src.shift_line).map{|s|
539
- md_el(:cell, parse_lines_as_span([s]))}
540
- if head.size != num_columns
541
- maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
542
- tell_user "I will ignore this table."
543
- # XXX try to recover
544
- return md_br()
545
- end
546
- rows << row
547
- end
548
-
549
- children = (head+rows).flatten
550
- return md_el(:table, children, {:align => align})
551
- end
552
-
553
- # If current line is text, a definition list is coming
554
- # if 1) text,empty,[text,empty]*,definition
555
-
556
- def eventually_comes_a_def_list(src)
557
- future = src.tell_me_the_future
558
- ok = future =~ %r{^t+e?d}x
559
- # puts "future: #{future} - #{ok}"
560
- ok
561
- end
562
-
563
-
564
- def read_definition(src)
565
- # Read one or more terms
566
- terms = []
567
- while src.cur_line && src.cur_line.md_type == :text
568
- terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
569
- end
570
- # dbg_describe_ary(terms, 'DT')
571
-
572
- want_my_paragraph = false
573
-
574
- raise "Chunky Bacon!" if not src.cur_line
575
-
576
- # one optional empty
577
- if src.cur_line.md_type == :empty
578
- want_my_paragraph = true
579
- src.shift_line
580
- end
581
-
582
- raise "Chunky Bacon!" if src.cur_line.md_type != :definition
583
-
584
- # Read one or more definitions
585
- definitions = []
586
- while src.cur_line && src.cur_line.md_type == :definition
587
- parent_offset = src.cur_index
588
-
589
- first = src.shift_line
590
- first =~ Definition
591
- first = $1
592
-
593
- # I know, it's ugly!!!
594
-
595
- lines, w_m_p =
596
- read_indented_content(src,4, [:definition], :definition)
597
- want_my_paragraph ||= w_m_p
598
-
599
- lines.unshift first
600
-
601
- # dbg_describe_ary(lines, 'DD')
602
- src2 = LineSource.new(lines, src, parent_offset)
603
- children = parse_blocks(src2)
604
- definitions << md_el(:definition_data, children)
605
- end
606
-
607
- return md_el(:definition, terms+definitions, {
608
- :terms => terms,
609
- :definitions => definitions,
610
- :want_my_paragraph => want_my_paragraph})
611
- end
612
- end # BlockLevelParser
613
- end # MaRuKu
614
- end
615
- end
5
+ include Helpers
6
+ include MaRuKu::Strings
7
+ include MaRuKu::In::Markdown::SpanLevelParser
8
+
9
+ class BlockContext < Array
10
+ def describe
11
+ n = 5
12
+ desc = size > n ? self[-n, n] : self
13
+ "Last #{n} elements: " +
14
+ desc.map {|x| "\n -" + x.inspect }.join
15
+ end
16
+ end
17
+
18
+ # Splits the string and calls parse_lines_as_markdown
19
+ def parse_text_as_markdown(text)
20
+ lines = split_lines(text)
21
+ src = LineSource.new(lines)
22
+ parse_blocks(src)
23
+ end
24
+
25
+ # Input is a LineSource
26
+ def parse_blocks(src)
27
+ output = BlockContext.new
28
+
29
+ # run state machine
30
+ while src.cur_line
31
+ next if check_block_extensions(src, output, src.cur_line)
32
+
33
+ md_type = src.cur_line.md_type
34
+
35
+ # Prints detected type (useful for debugging)
36
+ #puts "parse_blocks #{md_type}|#{src.cur_line}"
37
+ case md_type
38
+ when :empty
39
+ output << :empty
40
+ src.ignore_line
41
+ when :ial
42
+ m = InlineAttributeList.match src.shift_line
43
+ content = m[1] || ""
44
+ src2 = CharSource.new(content, src)
45
+ interpret_extension(src2, output)
46
+ when :ald
47
+ output << read_ald(src)
48
+ when :text
49
+ # paragraph, or table, or definition list
50
+ read_text_material(src, output)
51
+ when :header2, :hrule
52
+ # hrule
53
+ src.shift_line
54
+ output << md_hrule
55
+ when :header3
56
+ output << read_header3(src)
57
+ when :ulist, :olist
58
+ list_type = (md_type == :ulist) ? :ul : :ol
59
+ li = read_list_item(src)
60
+ # append to current list if we have one
61
+ if output.last.kind_of?(MDElement) &&
62
+ output.last.node_type == list_type then
63
+ output.last.children << li
64
+ else
65
+ output << md_el(list_type, li)
66
+ end
67
+ when :quote
68
+ output << read_quote(src)
69
+ when :code
70
+ e = read_code(src)
71
+ output << e if e
72
+ when :raw_html
73
+ # More extra hacky stuff - if there's more than just HTML, we either wrap it
74
+ # in a paragraph or break it up depending on whether it's an inline element or not
75
+ e = read_raw_html(src)
76
+ unless e.empty?
77
+ if e.first.parsed_html &&
78
+ (first_node_name = e.first.parsed_html.first_node_name) &&
79
+ HTML_INLINE_ELEMS.include?(first_node_name) &&
80
+ !%w(svg math).include?(first_node_name)
81
+ content = [e.first]
82
+ if e.size > 1
83
+ content.concat(e[1].children)
84
+ end
85
+ output << md_par(content)
86
+ else
87
+ output.concat(e)
88
+ end
89
+ end
90
+ when :footnote_text
91
+ output << read_footnote_text(src)
92
+ when :ref_definition
93
+ if src.parent && src.cur_index == 0
94
+ read_text_material(src, output)
95
+ else
96
+ read_ref_definition(src, output)
97
+ end
98
+ when :abbreviation
99
+ output << read_abbreviation(src)
100
+ when :xml_instr
101
+ read_xml_instruction(src, output)
102
+ else # warn if we forgot something
103
+ line = src.cur_line
104
+ maruku_error "Ignoring line '#{line}' type = #{md_type}", src
105
+ src.shift_line
106
+ end
107
+ end
108
+
109
+ merge_ial(output, src, output)
110
+ output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
111
+
112
+ # get rid of empty line markers
113
+ output.delete_if {|x| x == :empty }
114
+
115
+ # See for each list if we can omit the paragraphs
116
+ # TODO: do this after
117
+ output.each do |c|
118
+ # Remove paragraphs that we can get rid of
119
+ if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
120
+ c.children.each do |d|
121
+ if d.children.first && d.children.first.node_type == :paragraph
122
+ d.children = d.children.first.children + d.children[1..-1]
123
+ end
124
+ end
125
+ elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
126
+ c.children.each do |definition|
127
+ definition.definitions.each do |dd|
128
+ if dd.children.first.node_type == :paragraph
129
+ dd.children = dd.children.first.children + dd.children[1..-1]
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+ output
137
+ end
138
+
139
+ def read_text_material(src, output)
140
+ if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
141
+ src.next_line &&
142
+ src.next_line.rstrip =~ TableSeparator
143
+ output << read_table(src)
144
+ elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
145
+ output << read_header12(src)
146
+ elsif eventually_comes_a_def_list(src)
147
+ definition = read_definition(src)
148
+ if output.last.kind_of?(MDElement) &&
149
+ output.last.node_type == :definition_list then
150
+ output.last.children << definition
151
+ else
152
+ output << md_el(:definition_list, definition)
153
+ end
154
+ else # Start of a paragraph
155
+ output << read_paragraph(src)
156
+ end
157
+ end
158
+
159
+ def read_ald(src)
160
+ if (l = src.shift_line) =~ AttributeDefinitionList
161
+ id = $1
162
+ al = read_attribute_list(CharSource.new($2, src))
163
+ self.ald[id] = al;
164
+ md_ald(id, al)
165
+ else
166
+ maruku_error "Bug Bug:\n#{l.inspect}"
167
+ nil
168
+ end
169
+ end
170
+
171
+ # reads a header (with ----- or ========)
172
+ def read_header12(src)
173
+ line = src.shift_line.strip
174
+ al = nil
175
+ # Check if there is an IAL
176
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
177
+ line = $1.strip
178
+ ial = $2
179
+ al = read_attribute_list(CharSource.new(ial, src))
180
+ end
181
+ text = parse_span line
182
+ if text.empty?
183
+ text = "{#{ial}}"
184
+ al = nil
185
+ end
186
+ level = src.cur_line.md_type == :header2 ? 2 : 1;
187
+ src.shift_line
188
+ md_header(level, text, al)
189
+ end
190
+
191
+ # reads a header like '#### header ####'
192
+ def read_header3(src)
193
+ line = src.shift_line.strip
194
+ al = nil
195
+ # Check if there is an IAL
196
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
197
+ line = $1.strip
198
+ ial = $2
199
+ al = read_attribute_list(CharSource.new(ial, src))
200
+ end
201
+ level = line[/^#+/].size
202
+ text = parse_span line.gsub(/\A#+|#+\z/, '')
203
+ if text.empty?
204
+ text = "{#{ial}}"
205
+ al = nil
206
+ end
207
+ md_header(level, text, al)
208
+ end
209
+
210
+ def read_xml_instruction(src, output)
211
+ m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
212
+ raise "BugBug" unless m
213
+ target = m[2] || ''
214
+ code = m[3]
215
+ until code.include?('?>')
216
+ code << "\n" << src.shift_line
217
+ end
218
+ unless code =~ /\?>\s*$/
219
+ garbage = (/\?>(.*)$/.match(code))[1]
220
+ maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
221
+ code.gsub(/^/, '|'), src
222
+ end
223
+ code.gsub!(/\?>\s*$/, '')
224
+
225
+ if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
226
+ result = safe_execute_code(self, code)
227
+ if result
228
+ if result.kind_of? String
229
+ raise "Not expected"
230
+ else
231
+ output.push(*result)
232
+ end
233
+ end
234
+ else
235
+ output << md_xml_instr(target, code)
236
+ end
237
+ end
238
+
239
+ def read_raw_html(src)
240
+ extra_line = nil
241
+ h = HTMLHelper.new
242
+ begin
243
+ l = src.shift_line
244
+ h.eat_this(l)
245
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
246
+ while src.cur_line && !h.is_finished?
247
+ l = src.shift_line
248
+ # puts "html -> #{l.inspect}"
249
+ h.eat_this "\n" + l
250
+ end
251
+ rescue => e
252
+ maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
253
+ end
254
+ unless h.rest =~ /^\s*$/
255
+ extra_line = h.rest
256
+ end
257
+ raw_html = h.stuff_you_read
258
+
259
+ is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)
260
+
261
+ if extra_line
262
+ remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
263
+ if extra_line.start_with?(' ')
264
+ remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
265
+ end
266
+ is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
267
+ else
268
+ [md_html(raw_html)]
269
+ end
270
+ end
271
+
272
+ def read_paragraph(src)
273
+ lines = [src.shift_line]
274
+ while src.cur_line
275
+ # :olist does not break
276
+ case t = src.cur_line.md_type
277
+ when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
278
+ break
279
+ when :olist, :ulist
280
+ break if !src.next_line || src.next_line.md_type == t
281
+ when :raw_html
282
+ # This is a pretty awful hack to handle inline HTML
283
+ # but it means double-parsing HMTL.
284
+ html = parse_span([src.cur_line], src)
285
+ unless html.empty? || html.first.is_a?(String)
286
+ if html.first.parsed_html
287
+ first_node_name = html.first.parsed_html.first_node_name
288
+ end
289
+ end
290
+ break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
291
+ end
292
+ break if src.cur_line.strip.empty?
293
+ break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
294
+ break if any_matching_block_extension?(src.cur_line)
295
+
296
+ lines << src.shift_line
297
+ end
298
+ children = parse_span(lines, src)
299
+
300
+ md_par(children)
301
+ end
302
+
303
+ # Reads one list item, either ordered or unordered.
304
+ def read_list_item(src)
305
+ parent_offset = src.cur_index
306
+
307
+ item_type = src.cur_line.md_type
308
+ first = src.shift_line
309
+
310
+ indentation, ial = spaces_before_first_char(first)
311
+ al = read_attribute_list(CharSource.new(ial, src)) if ial
312
+ ial_offset = ial ? ial.length + 3 : 0
313
+ lines, want_my_paragraph =
314
+ read_indented_content(src, indentation, [], item_type, ial_offset)
315
+
316
+ # add first line
317
+ # Strip first '*', '-', '+' from first line
318
+ stripped = first[indentation, first.size - 1]
319
+ lines.unshift stripped
320
+
321
+ src2 = LineSource.new(lines, src, parent_offset)
322
+ children = parse_blocks(src2)
323
+
324
+ md_li(children, want_my_paragraph, al)
325
+ end
326
+
327
+ def read_abbreviation(src)
328
+ unless (l = src.shift_line) =~ Abbreviation
329
+ maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
330
+ end
331
+
332
+ abbr = $1
333
+ desc = $2
334
+
335
+ if !abbr || abbr.empty?
336
+ maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
337
+ end
338
+
339
+ self.abbreviations[abbr] = desc
340
+
341
+ md_abbr_def(abbr, desc)
342
+ end
343
+
344
+ def read_footnote_text(src)
345
+ parent_offset = src.cur_index
346
+
347
+ first = src.shift_line
348
+
349
+ unless first =~ FootnoteText
350
+ maruku_error "Bug (it's Andrea's fault)"
351
+ end
352
+
353
+ id = $1
354
+ text = $2 || ''
355
+
356
+ indentation = 4 #first.size-text.size
357
+
358
+ # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
359
+
360
+ break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
361
+ item_type = :footnote_text
362
+ lines, _ = read_indented_content(src, indentation, break_list, item_type)
363
+
364
+ # add first line
365
+ lines.unshift text unless text.strip.empty?
366
+
367
+ src2 = LineSource.new(lines, src, parent_offset)
368
+ children = parse_blocks(src2)
369
+
370
+ e = md_footnote(id, children)
371
+ self.footnotes[id] = e
372
+ e
373
+ end
374
+
375
+
376
+ # This is the only ugly function in the code base.
377
+ # It is used to read list items, descriptions, footnote text
378
+ def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
379
+ lines = []
380
+ # collect all indented lines
381
+ saw_empty = false
382
+ saw_anything_after = false
383
+ break_list = Array(break_list)
384
+ len = indentation - ial_offset
385
+
386
+ while src.cur_line
387
+ num_leading_spaces = src.cur_line.number_of_leading_spaces
388
+ break if num_leading_spaces < len && ![:text, :empty].include?(src.cur_line.md_type)
389
+
390
+ line = strip_indent(src.cur_line, indentation)
391
+ md_type = line.md_type
392
+
393
+ if md_type == :empty
394
+ saw_empty = true
395
+ lines << line
396
+ src.shift_line
397
+ next
398
+ end
399
+
400
+ # Unquestioningly grab anything that's deeper-indented
401
+ if md_type != :code && num_leading_spaces > len
402
+ lines << line
403
+ src.shift_line
404
+ next
405
+ end
406
+
407
+ # after a white line
408
+ if saw_empty
409
+ # we expect things to be properly aligned
410
+ break if num_leading_spaces < len
411
+ saw_anything_after = true
412
+ else
413
+ break if break_list.include?(md_type)
414
+ end
415
+
416
+ lines << line
417
+ src.shift_line
418
+
419
+ # You are only required to indent the first line of
420
+ # a child paragraph.
421
+ if md_type == :text
422
+ while src.cur_line && src.cur_line.md_type == :text
423
+ lines << strip_indent(src.shift_line, indentation)
424
+ end
425
+ end
426
+ end
427
+
428
+ # TODO fix this
429
+ want_my_paragraph = saw_anything_after ||
430
+ (saw_empty && src.cur_line && src.cur_line.md_type == item_type)
431
+
432
+ # create a new context
433
+
434
+ while lines.last && lines.last.md_type == :empty
435
+ lines.pop
436
+ end
437
+
438
+ return lines, want_my_paragraph
439
+ end
440
+
441
+
442
+ def read_quote(src)
443
+ parent_offset = src.cur_index
444
+
445
+ lines = []
446
+ # collect all indented lines
447
+ while src.cur_line && src.cur_line.md_type == :quote
448
+ lines << unquote(src.shift_line)
449
+ end
450
+
451
+ src2 = LineSource.new(lines, src, parent_offset)
452
+ children = parse_blocks(src2)
453
+ md_quote(children)
454
+ end
455
+
456
+ def read_code(src)
457
+ # collect all indented lines
458
+ lines = []
459
+ while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
460
+ lines << strip_indent(src.shift_line, 4)
461
+ end
462
+
463
+ #while lines.last && (lines.last.md_type == :empty )
464
+ while lines.last && lines.last.strip.size == 0
465
+ lines.pop
466
+ end
467
+
468
+ while lines.first && lines.first.strip.size == 0
469
+ lines.shift
470
+ end
471
+
472
+ return nil if lines.empty?
473
+
474
+ source = lines.join("\n")
475
+
476
+ md_codeblock(source)
477
+ end
478
+
479
+ def read_ref_definition(src, out)
480
+ line = src.shift_line
481
+
482
+ # if link is incomplete, shift next line
483
+ if src.cur_line &&
484
+ ![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
485
+ (1..3).include?(src.cur_line.number_of_leading_spaces)
486
+ line << " " << src.shift_line
487
+ end
488
+
489
+ match = LinkRegex.match(line)
490
+ unless match
491
+ maruku_error "Link does not respect format: '#{line}'" and return
492
+ end
493
+
494
+ id = match[1]
495
+ url = match[2]
496
+ title = match[3] || match[4] || match[5]
497
+ id = sanitize_ref_id(id)
498
+
499
+ hash = self.refs[id] = {
500
+ :url => url,
501
+ :title => title
502
+ }
503
+
504
+ stuff = (match[6] || '')
505
+ stuff.split.each do |couple|
506
+ k, v = couple.split('=')
507
+ v ||= ""
508
+ v = v[1..-2] if v.start_with?('"') # strip quotes
509
+ hash[k.to_sym] = v
510
+ end
511
+
512
+ out << md_ref_def(id, url, :title => title)
513
+ end
514
+
515
+ def split_cells(s)
516
+ s.split('|').reject(&:empty?).map(&:strip)
517
+ end
518
+
519
+ def read_table(src)
520
+ head = split_cells(src.shift_line).map do |s|
521
+ md_el(:head_cell, parse_span(s))
522
+ end
523
+
524
+ separator = split_cells(src.shift_line)
525
+
526
+ align = separator.map do |s|
527
+ # ex: :-------------------:
528
+ # If the separator starts and ends with a colon,
529
+ # center the cell. If it's on the right, right-align,
530
+ # otherwise left-align.
531
+ starts = s.start_with? ':'
532
+ ends = s.end_with? ':'
533
+ if starts && ends
534
+ :center
535
+ elsif ends
536
+ :right
537
+ else
538
+ :left
539
+ end
540
+ end
541
+
542
+ num_columns = align.size
543
+
544
+ if head.size != num_columns
545
+ maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
546
+ tell_user "I will ignore this table."
547
+ # XXX try to recover
548
+ return md_br
549
+ end
550
+
551
+ rows = []
552
+
553
+ while src.cur_line && src.cur_line =~ /\|/
554
+ row = split_cells(src.shift_line).map do |s|
555
+ md_el(:cell, parse_span(s))
556
+ end
557
+
558
+ if head.size != num_columns
559
+ maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
560
+ tell_user "I will ignore this table."
561
+ # XXX try to recover
562
+ return md_br
563
+ end
564
+ rows << row
565
+ end
566
+
567
+ children = (head + rows).flatten
568
+ md_el(:table, children, { :align => align })
569
+ end
570
+
571
+ # If current line is text, a definition list is coming
572
+ # if 1) text,empty,[text,empty]*,definition
573
+ def eventually_comes_a_def_list(src)
574
+ src.tell_me_the_future =~ %r{^t+e?d}x
575
+ end
576
+
577
+ def read_definition(src)
578
+ # Read one or more terms
579
+ terms = []
580
+ while src.cur_line && src.cur_line.md_type == :text
581
+ terms << md_el(:definition_term, parse_span(src.shift_line))
582
+ end
583
+
584
+ want_my_paragraph = false
585
+
586
+ raise "Chunky Bacon!" unless src.cur_line
587
+
588
+ # one optional empty
589
+ if src.cur_line.md_type == :empty
590
+ want_my_paragraph = true
591
+ src.shift_line
592
+ end
593
+
594
+ raise "Chunky Bacon!" unless src.cur_line.md_type == :definition
595
+
596
+ # Read one or more definitions
597
+ definitions = []
598
+ while src.cur_line && src.cur_line.md_type == :definition
599
+ parent_offset = src.cur_index
600
+
601
+ first = src.shift_line
602
+ first =~ Definition
603
+ first = $1
604
+
605
+ lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
606
+ want_my_paragraph ||= w_m_p
607
+
608
+ lines.unshift first
609
+
610
+ src2 = LineSource.new(lines, src, parent_offset)
611
+ children = parse_blocks(src2)
612
+ definitions << md_el(:definition_data, children)
613
+ end
614
+
615
+ md_el(:definition, terms + definitions, {
616
+ :terms => terms,
617
+ :definitions => definitions,
618
+ :want_my_paragraph => want_my_paragraph
619
+ })
620
+ end
621
+ end end end end