maruku 0.6.1 → 0.7.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -1,111 +1,91 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module BlockLevelParser
23
-
24
- # This represents a source of lines that can be consumed.
25
- #
26
- # It is the twin of CharSource.
27
- #
28
-
29
- class LineSource
30
- include MaRuKu::Strings
31
- attr_reader :parent
32
-
33
- def initialize(lines, parent=nil, parent_offset=nil)
34
- raise "NIL lines? " if not lines
35
- @lines = lines
36
- @lines_index = 0
37
- @parent = parent
38
- @parent_offset = parent_offset
39
- end
40
-
41
- def cur_line() @lines[@lines_index] end
42
- def next_line() @lines[@lines_index+1] end
43
-
44
- def shift_line()
45
- raise "Over the rainbow" if @lines_index >= @lines.size
46
- l = @lines[@lines_index]
47
- @lines_index += 1
48
- return l
49
- end
50
-
51
- def ignore_line
52
- raise "Over the rainbow" if @lines_index >= @lines.size
53
- @lines_index += 1
54
- end
55
-
56
- def describe
57
- s = "At line #{original_line_number(@lines_index)}\n"
58
-
59
- context = 3 # lines
60
- from = [@lines_index-context, 0].max
61
- to = [@lines_index+context, @lines.size-1].min
62
-
63
- for i in from..to
64
- prefix = (i == @lines_index) ? '--> ' : ' ';
65
- l = @lines[i]
66
- s += "%10s %4s|%s" %
67
- [@lines[i].md_type.to_s, prefix, l]
68
-
69
- s += "|\n"
70
- end
71
-
72
- # if @parent
73
- # s << "Parent context is: \n"
74
- # s << add_tabs(@parent.describe,1,'|')
75
- # end
76
- s
77
- end
78
-
79
- def original_line_number(index)
80
- if @parent
81
- return index + @parent.original_line_number(@parent_offset)
82
- else
83
- 1 + index
84
- end
85
- end
86
-
87
- def cur_index
88
- @lines_index
89
- end
90
-
91
- # Returns the type of next line as a string
92
- # breaks at first :definition
93
- def tell_me_the_future
94
- s = ""; num_e = 0;
95
- for i in @lines_index..@lines.size-1
96
- c = case @lines[i].md_type
97
- when :text; "t"
98
- when :empty; num_e+=1; "e"
99
- when :definition; "d"
100
- else "o"
101
- end
102
- s += c
103
- break if c == "d" or num_e>1
104
- end
105
- s
106
- end
107
-
108
- end # linesource
109
-
110
- end end end end # block
1
+ module MaRuKu::In::Markdown::BlockLevelParser
2
+
3
+ # This represents a source of lines that can be consumed.
4
+ #
5
+ # It is the twin of CharSource.
6
+ #
7
+
8
+ class LineSource
9
+ attr_reader :parent
10
+
11
+ def initialize(lines, parent=nil, parent_offset=nil)
12
+ raise "NIL lines? " unless lines
13
+ @lines = lines.map {|l| l.kind_of?(MaRuKu::MDLine) ? l : MaRuKu::MDLine.new(l) }
14
+ @lines_index = 0
15
+ @parent = parent
16
+ @parent_offset = parent_offset
17
+ end
18
+
19
+ def cur_line
20
+ @lines[@lines_index]
21
+ end
22
+
23
+ def next_line
24
+ @lines[@lines_index + 1]
25
+ end
26
+
27
+ def shift_line
28
+ raise "Over the rainbow" if @lines_index >= @lines.size
29
+ l = @lines[@lines_index]
30
+ @lines_index += 1
31
+ l
32
+ end
33
+
34
+ def ignore_line
35
+ raise "Over the rainbow" if @lines_index >= @lines.size
36
+ @lines_index += 1
37
+ end
38
+
39
+ def describe
40
+ s = "At line #{original_line_number(@lines_index)}\n"
41
+
42
+ context = 3 # lines
43
+ from = [@lines_index - context, 0].max
44
+ to = [@lines_index + context, @lines.size - 1].min
45
+
46
+ from.upto(to) do |i|
47
+ prefix = (i == @lines_index) ? '--> ' : ' ';
48
+ l = @lines[i]
49
+ s += "%10s %4s|%s" %
50
+ [@lines[i].md_type.to_s, prefix, l]
51
+
52
+ s += "|\n"
53
+ end
54
+
55
+ s
56
+ end
57
+
58
+ def original_line_number(index)
59
+ if @parent
60
+ index + @parent.original_line_number(@parent_offset)
61
+ else
62
+ 1 + index
63
+ end
64
+ end
65
+
66
+ def cur_index
67
+ @lines_index
68
+ end
69
+
70
+ # Returns the type of next line as a string
71
+ # breaks at first :definition
72
+ def tell_me_the_future
73
+ s = ""
74
+ num_e = 0
75
+
76
+ @lines_index.upto(@lines.size - 1) do |i|
77
+ c = case @lines[i].md_type
78
+ when :text; "t"
79
+ when :empty; num_e += 1; "e"
80
+ when :definition; "d"
81
+ else "o"
82
+ end
83
+ s << c
84
+ break if c == "d" or num_e > 1
85
+ end
86
+ s
87
+ end
88
+
89
+ end # linesource
90
+ end
111
91
 
@@ -0,0 +1,129 @@
1
+ # This code does the classification of lines for block-level parsing.
2
+ module MaRuKu
3
+
4
+ # Represents a single line in a Markdown source file, as produced by
5
+ # LineSource.
6
+ class MDLine < String
7
+ def md_type
8
+ @md_type ||= line_md_type
9
+ end
10
+
11
+ # Returns the number of leading spaces on this string,
12
+ # considering that a tab counts as {MaRuKu::Strings::TAB_SIZE} spaces.
13
+ #
14
+ # @param s [String]
15
+ # @return [Fixnum]
16
+ def number_of_leading_spaces
17
+ if self =~ /\A\s+/
18
+ spaces = $&
19
+ spaces.count(" ") + spaces.count("\t") * MaRuKu::Strings::TAB_SIZE
20
+ else
21
+ 0
22
+ end
23
+ end
24
+
25
+ def gsub!(*args)
26
+ # Any in-place-modification method should reset the md_type
27
+ @md_type = nil
28
+ super
29
+ end
30
+
31
+ private
32
+
33
+ def line_md_type
34
+ # The order of evaluation is important (:text is a catch-all)
35
+ return :text if self =~ /\A[a-zA-Z]/
36
+ return :empty if self =~ /\A\s*\z/
37
+ return :footnote_text if self =~ FootnoteText
38
+ return :ref_definition if self =~ LinkRegex || self =~ IncompleteLink
39
+ return :abbreviation if self =~ Abbreviation
40
+ return :definition if self =~ Definition
41
+ # I had a bug with emails and urls at the beginning of the
42
+ # line that were mistaken for raw_html
43
+ return :text if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
44
+ return :text if self =~ /\A[ ]{0,3}<http:/
45
+ # raw html is like PHP Markdown Extra: at most three spaces before
46
+ return :xml_instr if self =~ /\A\s*<\?/
47
+ return :raw_html if self =~ %r{^[ ]{0,3}</?\s*\w+}
48
+ return :raw_html if self =~ /\A[ ]{0,3}<\!\-\-/
49
+ return :header1 if self =~ /\A(=)+/
50
+ return :header2 if self =~ /\A([-\s])+\z/
51
+ return :header3 if self =~ /\A(#)+\s*\S+/
52
+ # at least three asterisks/hyphens/underscores on a line, and only whitespace
53
+ return :hrule if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
54
+ return :ulist if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
55
+ return :olist if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
56
+ return :code if number_of_leading_spaces >= 4
57
+ return :quote if self =~ /\A>/
58
+ return :ald if self =~ AttributeDefinitionList
59
+ return :ial if self =~ InlineAttributeList
60
+ return :text # else, it's just text
61
+ end
62
+ end
63
+
64
+ # MacRuby has trouble with commented regexes, so just put the expanded form
65
+ # in a comment.
66
+
67
+ # $1 = id $2 = attribute list
68
+ AttributeDefinitionList = /\A\s{0,3}\{([\w\s]+)\}:\s*(.*?)\s*\z/
69
+ #
70
+ InlineAttributeList = /\A\s{0,3}\{([:#\.].*?)\}\s*\z/
71
+ # Example:
72
+ # ^:blah blah
73
+ # ^: blah blah
74
+ # ^ : blah blah
75
+ Definition = /\A[ ]{0,3}:\s*(\S.*)\z/
76
+ # %r{
77
+ # ^ # begin of line
78
+ # [ ]{0,3} # up to 3 spaces
79
+ # : # colon
80
+ # \s* # whitespace
81
+ # (\S.*) # the text = $1
82
+ # $ # end of line
83
+ # }x
84
+
85
+ # Example:
86
+ # *[HTML]: Hyper Text Markup Language
87
+ Abbreviation = /\A[ ]{0,3}\*\[([^\]]+)\]:\s*(\S.*\S)*\s*\z/
88
+ # %r{
89
+ # ^ # begin of line
90
+ # [ ]{0,3} # up to 3 spaces
91
+ # \* # one asterisk
92
+ # \[ # opening bracket
93
+ # ([^\]]+) # any non-closing bracket: id = $1
94
+ # \] # closing bracket
95
+ # : # colon
96
+ # \s* # whitespace
97
+ # (\S.*\S)* # definition=$2
98
+ # \s* # strip this whitespace
99
+ # $ # end of line
100
+ # }x
101
+
102
+ FootnoteText = /\A[ ]{0,3}\[(\^.+)\]:\s*(\S.*)?\z/
103
+ # %r{
104
+ # ^ # begin of line
105
+ # [ ]{0,3} # up to 3 spaces
106
+ # \[(\^.+)\]: # id = $1 (including '^')
107
+ # \s*(\S.*)?$ # text = $2 (not obb.)
108
+ # }x
109
+
110
+ # This regex is taken from BlueCloth sources
111
+ # Link defs are in the form: ^[id]: \n? url "optional title"
112
+ LinkRegex = /\A[ ]{0,3}\[([^\[\]]+)\]:[ ]*<?([^>\s]+)>?[ ]*(?:(?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\)))\s*(.+)?)?/
113
+ #%r{
114
+ # ^[ ]{0,3}\[([^\[\]]+)\]: # id = $1
115
+ # [ ]*
116
+ # <?([^>\s]+)>? # url = $2
117
+ # [ ]*
118
+ # (?: # Titles are delimited by "quotes" or (parens).
119
+ # (?:(?:"([^"]+)")|(?:'([^']+)')|(?:\(([^\(\)]+)\))) # title = $3, $4, or $5
120
+ # \s*(.+)? # stuff = $6
121
+ # )? # title is optional
122
+ #}x
123
+
124
+ IncompleteLink = /\A[ ]{0,3}\[([^\[\]]+?)\]:\s*\z/
125
+
126
+ # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
127
+ # | -------------:| ------------------------------ |
128
+ TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
129
+ end
@@ -1,615 +1,621 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
1
+ require 'set'
21
2
 
22
3
  module MaRuKu; module In; module Markdown; module BlockLevelParser
23
4
 
24
- include Helpers
25
- include MaRuKu::Strings
26
- include MaRuKu::In::Markdown::SpanLevelParser
27
-
28
- class BlockContext < Array
29
- def describe
30
- n = 5
31
- desc = size > n ? self[-n,n] : self
32
- "Last #{n} elements: "+
33
- desc.map{|x| "\n -" + x.inspect}.join
34
- end
35
- end
36
-
37
- # Splits the string and calls parse_lines_as_markdown
38
- def parse_text_as_markdown(text)
39
- lines = split_lines(text)
40
- src = LineSource.new(lines)
41
- return parse_blocks(src)
42
- end
43
-
44
- # Input is a LineSource
45
- def parse_blocks(src)
46
- output = BlockContext.new
47
-
48
- # run state machine
49
- while src.cur_line
50
-
51
- next if check_block_extensions(src, output, src.cur_line)
52
-
53
- # Prints detected type (useful for debugging)
54
- # puts "#{src.cur_line.md_type}|#{src.cur_line}"
55
- case src.cur_line.md_type
56
- when :empty;
57
- output.push :empty
58
- src.ignore_line
59
- when :ial
60
- m = InlineAttributeList.match src.shift_line
61
- content = m[1] || ""
62
- # puts "Content: #{content.inspect}"
63
- src2 = CharSource.new(content, src)
64
- interpret_extension(src2, output, [nil])
65
- when :ald
66
- output.push read_ald(src)
67
- when :text
68
- # paragraph, or table, or definition list
69
- read_text_material(src, output)
70
- when :header2, :hrule
71
- # hrule
72
- src.shift_line
73
- output.push md_hrule()
74
- when :header3
75
- output.push read_header3(src)
76
- when :ulist, :olist
77
- list_type = src.cur_line.md_type == :ulist ? :ul : :ol
78
- li = read_list_item(src)
79
- # append to current list if we have one
80
- if output.last.kind_of?(MDElement) &&
81
- output.last.node_type == list_type then
82
- output.last.children << li
83
- else
84
- output.push md_el(list_type, [li])
85
- end
86
- when :quote; output.push read_quote(src)
87
- when :code; e = read_code(src); output << e if e
88
- when :raw_html; e = read_raw_html(src); output << e if e
89
-
90
- when :footnote_text; output.push read_footnote_text(src)
91
- when :ref_definition;
92
- if src.parent && (src.cur_index == 0)
93
- read_text_material(src, output)
94
- else
95
- read_ref_definition(src, output)
96
- end
97
- when :abbreviation; output.push read_abbreviation(src)
98
- when :xml_instr; read_xml_instruction(src, output)
99
- when :metadata;
100
- maruku_error "Please use the new meta-data syntax: \n"+
101
- " http://maruku.rubyforge.org/proposal.html\n", src
102
- src.ignore_line
103
- else # warn if we forgot something
104
- md_type = src.cur_line.md_type
105
- line = src.cur_line
106
- maruku_error "Ignoring line '#{line}' type = #{md_type}", src
107
- src.shift_line
108
- end
109
- end
110
-
111
- merge_ial(output, src, output)
112
- output.delete_if {|x| x.kind_of?(MDElement) &&
113
- x.node_type == :ial}
114
-
115
- # get rid of empty line markers
116
- output.delete_if {|x| x == :empty}
117
- # See for each list if we can omit the paragraphs and use li_span
118
- # TODO: do this after
119
- output.each do |c|
120
- # Remove paragraphs that we can get rid of
121
- if [:ul,:ol].include? c.node_type
122
- if c.children.all? {|li| !li.want_my_paragraph} then
123
- c.children.each do |d|
124
- d.node_type = :li_span
125
- d.children = d.children[0].children
126
- end
127
- end
128
- end
129
- if c.node_type == :definition_list
130
- if c.children.all?{|defi| !defi.want_my_paragraph} then
131
- c.children.each do |definition|
132
- definition.definitions.each do |dd|
133
- dd.children = dd.children[0].children
134
- end
135
- end
136
- end
137
- end
138
- end
139
-
140
- output
141
- end
142
-
143
- def read_text_material(src, output)
144
- if src.cur_line =~ MightBeTableHeader and
145
- (src.next_line && src.next_line =~ TableSeparator)
146
- output.push read_table(src)
147
- elsif [:header1,:header2].include? src.next_line.md_type
148
- output.push read_header12(src)
149
- elsif eventually_comes_a_def_list(src)
150
- definition = read_definition(src)
151
- if output.last.kind_of?(MDElement) &&
152
- output.last.node_type == :definition_list then
153
- output.last.children << definition
154
- else
155
- output.push md_el(:definition_list, [definition])
156
- end
157
- else # Start of a paragraph
158
- output.push read_paragraph(src)
159
- end
160
- end
161
-
162
-
163
- def read_ald(src)
164
- if (l=src.shift_line) =~ AttributeDefinitionList
165
- id = $1; al=$2;
166
- al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
167
- self.ald[id] = al;
168
- return md_ald(id, al)
169
- else
170
- maruku_error "Bug Bug:\n#{l.inspect}"
171
- return nil
172
- end
173
- end
174
-
175
- # reads a header (with ----- or ========)
176
- def read_header12(src)
177
- line = src.shift_line.strip
178
- al = nil
179
- # Check if there is an IAL
180
- if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
181
- line = $1.strip
182
- ial = $2
183
- al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
184
- end
185
- text = parse_lines_as_span [ line ]
186
- level = src.cur_line.md_type == :header2 ? 2 : 1;
187
- src.shift_line
188
- return md_header(level, text, al)
189
- end
190
-
191
- # reads a header like '#### header ####'
192
- def read_header3(src)
193
- line = src.shift_line.strip
194
- al = nil
195
- # Check if there is an IAL
196
- if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
197
- line = $1.strip
198
- ial = $2
199
- al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
200
- end
201
- level = num_leading_hashes(line)
202
- text = parse_lines_as_span [strip_hashes(line)]
203
- return md_header(level, text, al)
204
- end
205
-
206
- def read_xml_instruction(src, output)
207
- m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
208
- raise "BugBug" if not m
209
- target = m[2] || ''
210
- code = m[3]
211
- until code =~ /\?>/
212
- code += "\n"+src.shift_line
213
- end
214
- if not code =~ (/\?>\s*$/)
215
- garbage = (/\?>(.*)$/.match(code))[1]
216
- maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
217
- add_tabs(code, 1, '|'), src
218
- end
219
- code.gsub!(/\?>\s*$/, '')
220
-
221
- if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
222
- result = safe_execute_code(self, code)
223
- if result
224
- if result.kind_of? String
225
- raise "Not expected"
226
- else
227
- output.push(*result)
228
- end
229
- end
230
- else
231
- output.push md_xml_instr(target, code)
232
- end
233
- end
234
-
235
- def read_raw_html(src)
236
- h = HTMLHelper.new
237
- begin
238
- h.eat_this(l=src.shift_line)
239
- # puts "\nBLOCK:\nhtml -> #{l.inspect}"
240
- while src.cur_line and not h.is_finished?
241
- l=src.shift_line
242
- # puts "html -> #{l.inspect}"
243
- h.eat_this "\n"+l
244
- end
245
- rescue Exception => e
246
- ex = e.inspect + e.backtrace.join("\n")
247
- maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
248
- end
249
- if not (h.rest =~ /^\s*$/)
250
- maruku_error "Could you please format this better?\n"+
251
- "I see that #{h.rest.inspect} is left after the raw HTML.", src
252
- end
253
- raw_html = h.stuff_you_read
254
-
255
- return md_html(raw_html)
256
- end
257
-
258
- def read_paragraph(src)
259
- lines = [src.shift_line]
260
- while src.cur_line
261
- # :olist does not break
262
- case t = src.cur_line.md_type
263
- when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
264
- break
265
- when :olist,:ulist
266
- break if src.next_line.md_type == t
267
- end
268
- break if src.cur_line.strip.size == 0
269
- break if [:header1,:header2].include? src.next_line.md_type
270
- break if any_matching_block_extension?(src.cur_line)
271
-
272
- lines << src.shift_line
273
- end
274
- # dbg_describe_ary(lines, 'PAR')
275
- children = parse_lines_as_span(lines, src)
276
-
277
- return md_par(children)
278
- end
279
-
280
- # Reads one list item, either ordered or unordered.
281
- def read_list_item(src)
282
- parent_offset = src.cur_index
283
-
284
- item_type = src.cur_line.md_type
285
- first = src.shift_line
286
-
287
- indentation = spaces_before_first_char(first)
288
- break_list = [:ulist, :olist, :ial]
289
- # Ugly things going on inside `read_indented_content`
290
- lines, want_my_paragraph =
291
- read_indented_content(src,indentation, break_list, item_type)
292
-
293
- # add first line
294
- # Strip first '*', '-', '+' from first line
295
- stripped = first[indentation, first.size-1]
296
- lines.unshift stripped
297
-
298
- # dbg_describe_ary(lines, 'LIST ITEM ')
299
-
300
- src2 = LineSource.new(lines, src, parent_offset)
301
- children = parse_blocks(src2)
302
- with_par = want_my_paragraph || (children.size>1)
303
-
304
- return md_li(children, with_par)
305
- end
306
-
307
- def read_abbreviation(src)
308
- if not (l=src.shift_line) =~ Abbreviation
309
- maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
310
- end
311
-
312
- abbr = $1
313
- desc = $2
314
-
315
- if (not abbr) or (abbr.size==0)
316
- maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
317
- end
318
-
319
- self.abbreviations[abbr] = desc
320
-
321
- return md_abbr_def(abbr, desc)
322
- end
323
-
324
- def read_footnote_text(src)
325
- parent_offset = src.cur_index
326
-
327
- first = src.shift_line
328
-
329
- if not first =~ FootnoteText
330
- maruku_error "Bug (it's Andrea's fault)"
331
- end
332
-
333
- id = $1
334
- text = $2
335
-
336
- # Ugly things going on inside `read_indented_content`
337
- indentation = 4 #first.size-text.size
338
-
339
- # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
340
-
341
- break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
342
- item_type = :footnote_text
343
- lines, want_my_paragraph =
344
- read_indented_content(src,indentation, break_list, item_type)
345
-
346
- # add first line
347
- if text && text.strip != "" then lines.unshift text end
348
-
349
- # dbg_describe_ary(lines, 'FOOTNOTE')
350
- src2 = LineSource.new(lines, src, parent_offset)
351
- children = parse_blocks(src2)
352
-
353
- e = md_footnote(id, children)
354
- self.footnotes[id] = e
355
- return e
356
- end
357
-
358
-
359
- # This is the only ugly function in the code base.
360
- # It is used to read list items, descriptions, footnote text
361
- def read_indented_content(src, indentation, break_list, item_type)
362
- lines =[]
363
- # collect all indented lines
364
- saw_empty = false; saw_anything_after = false
365
- while src.cur_line
366
- # puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
367
- #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
368
- if src.cur_line.md_type == :empty
369
- saw_empty = true
370
- lines << src.shift_line
371
- next
372
- end
373
-
374
- # after a white line
375
- if saw_empty
376
- # we expect things to be properly aligned
377
- if (ns=number_of_leading_spaces(src.cur_line)) < indentation
378
- #puts "breaking for spaces, only #{ns}: #{src.cur_line}"
379
- break
380
- end
381
- saw_anything_after = true
382
- else
383
- # if src.cur_line[0] != ?\
384
- break if break_list.include? src.cur_line.md_type
385
- # end
386
- # break if src.cur_line.md_type != :text
387
- end
388
-
389
-
390
- stripped = strip_indent(src.shift_line, indentation)
391
- lines << stripped
392
-
393
- #puts "Accepted as #{stripped.inspect}"
394
-
395
- # You are only required to indent the first line of
396
- # a child paragraph.
397
- if stripped.md_type == :text
398
- while src.cur_line && (src.cur_line.md_type == :text)
399
- lines << strip_indent(src.shift_line, indentation)
400
- end
401
- end
402
- end
403
-
404
- want_my_paragraph = saw_anything_after ||
405
- (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
406
-
407
- # dbg_describe_ary(lines, 'LI')
408
- # create a new context
409
-
410
- while lines.last && (lines.last.md_type == :empty)
411
- lines.pop
412
- end
413
-
414
- return lines, want_my_paragraph
415
- end
416
-
417
-
418
- def read_quote(src)
419
- parent_offset = src.cur_index
420
-
421
- lines = []
422
- # collect all indented lines
423
- while src.cur_line && src.cur_line.md_type == :quote
424
- lines << unquote(src.shift_line)
425
- end
426
- # dbg_describe_ary(lines, 'QUOTE')
427
-
428
- src2 = LineSource.new(lines, src, parent_offset)
429
- children = parse_blocks(src2)
430
- return md_quote(children)
431
- end
432
-
433
- def read_code(src)
434
- # collect all indented lines
435
- lines = []
436
- while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
437
- lines << strip_indent(src.shift_line, 4)
438
- end
439
-
440
- #while lines.last && (lines.last.md_type == :empty )
441
- while lines.last && lines.last.strip.size == 0
442
- lines.pop
443
- end
444
-
445
- while lines.first && lines.first.strip.size == 0
446
- lines.shift
447
- end
448
-
449
- return nil if lines.empty?
450
-
451
- source = lines.join("\n")
452
-
453
- # dbg_describe_ary(lines, 'CODE')
454
-
455
- return md_codeblock(source)
456
- end
457
-
458
- # Reads a series of metadata lines with empty lines in between
459
- def read_metadata(src)
460
- hash = {}
461
- while src.cur_line
462
- case src.cur_line.md_type
463
- when :empty; src.shift_line
464
- when :metadata; hash.merge! parse_metadata(src.shift_line)
465
- else break
466
- end
467
- end
468
- hash
469
- end
470
-
471
-
472
- def read_ref_definition(src, out)
473
- line = src.shift_line
474
-
475
-
476
- # if link is incomplete, shift next line
477
- if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
478
- ([1,2,3].include? number_of_leading_spaces(src.cur_line) )
479
- line += " "+ src.shift_line
480
- end
481
-
482
- # puts "total= #{line}"
483
-
484
- match = LinkRegex.match(line)
485
- if not match
486
- maruku_error "Link does not respect format: '#{line}'"
487
- return
488
- end
489
-
490
- id = match[1]; url = match[2]; title = match[3];
491
- id = sanitize_ref_id(id)
492
-
493
- hash = self.refs[id] = {:url=>url,:title=>title}
494
-
495
- stuff=match[4]
496
-
497
- if stuff
498
- stuff.split.each do |couple|
499
- # puts "found #{couple}"
500
- k, v = couple.split('=')
501
- v ||= ""
502
- if v[0,1]=='"' then v = v[1, v.size-2] end
503
- # puts "key:_#{k}_ value=_#{v}_"
504
- hash[k.to_sym] = v
505
- end
506
- end
507
- # puts hash.inspect
508
-
509
- out.push md_ref_def(id, url, meta={:title=>title})
510
- end
511
-
512
- def split_cells(s)
513
- # s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
514
- # changed to allow empty cells
515
- s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
516
- end
517
-
518
- def read_table(src)
519
- head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
520
-
521
- separator=split_cells(src.shift_line)
522
-
523
- align = separator.map { |s| s =~ Sep
524
- if $1 and $2 then :center elsif $2 then :right else :left end }
525
-
526
- num_columns = align.size
527
-
528
- if head.size != num_columns
529
- maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
530
- tell_user "I will ignore this table."
531
- # XXX try to recover
532
- return md_br()
533
- end
534
-
535
- rows = []
536
-
537
- while src.cur_line && src.cur_line =~ /\|/
538
- row = split_cells(src.shift_line).map{|s|
539
- md_el(:cell, parse_lines_as_span([s]))}
540
- if head.size != num_columns
541
- maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
542
- tell_user "I will ignore this table."
543
- # XXX try to recover
544
- return md_br()
545
- end
546
- rows << row
547
- end
548
-
549
- children = (head+rows).flatten
550
- return md_el(:table, children, {:align => align})
551
- end
552
-
553
- # If current line is text, a definition list is coming
554
- # if 1) text,empty,[text,empty]*,definition
555
-
556
- def eventually_comes_a_def_list(src)
557
- future = src.tell_me_the_future
558
- ok = future =~ %r{^t+e?d}x
559
- # puts "future: #{future} - #{ok}"
560
- ok
561
- end
562
-
563
-
564
- def read_definition(src)
565
- # Read one or more terms
566
- terms = []
567
- while src.cur_line && src.cur_line.md_type == :text
568
- terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
569
- end
570
- # dbg_describe_ary(terms, 'DT')
571
-
572
- want_my_paragraph = false
573
-
574
- raise "Chunky Bacon!" if not src.cur_line
575
-
576
- # one optional empty
577
- if src.cur_line.md_type == :empty
578
- want_my_paragraph = true
579
- src.shift_line
580
- end
581
-
582
- raise "Chunky Bacon!" if src.cur_line.md_type != :definition
583
-
584
- # Read one or more definitions
585
- definitions = []
586
- while src.cur_line && src.cur_line.md_type == :definition
587
- parent_offset = src.cur_index
588
-
589
- first = src.shift_line
590
- first =~ Definition
591
- first = $1
592
-
593
- # I know, it's ugly!!!
594
-
595
- lines, w_m_p =
596
- read_indented_content(src,4, [:definition], :definition)
597
- want_my_paragraph ||= w_m_p
598
-
599
- lines.unshift first
600
-
601
- # dbg_describe_ary(lines, 'DD')
602
- src2 = LineSource.new(lines, src, parent_offset)
603
- children = parse_blocks(src2)
604
- definitions << md_el(:definition_data, children)
605
- end
606
-
607
- return md_el(:definition, terms+definitions, {
608
- :terms => terms,
609
- :definitions => definitions,
610
- :want_my_paragraph => want_my_paragraph})
611
- end
612
- end # BlockLevelParser
613
- end # MaRuKu
614
- end
615
- end
5
+ include Helpers
6
+ include MaRuKu::Strings
7
+ include MaRuKu::In::Markdown::SpanLevelParser
8
+
9
+ class BlockContext < Array
10
+ def describe
11
+ n = 5
12
+ desc = size > n ? self[-n, n] : self
13
+ "Last #{n} elements: " +
14
+ desc.map {|x| "\n -" + x.inspect }.join
15
+ end
16
+ end
17
+
18
+ # Splits the string and calls parse_lines_as_markdown
19
+ def parse_text_as_markdown(text)
20
+ lines = split_lines(text)
21
+ src = LineSource.new(lines)
22
+ parse_blocks(src)
23
+ end
24
+
25
+ # Input is a LineSource
26
+ def parse_blocks(src)
27
+ output = BlockContext.new
28
+
29
+ # run state machine
30
+ while src.cur_line
31
+ next if check_block_extensions(src, output, src.cur_line)
32
+
33
+ md_type = src.cur_line.md_type
34
+
35
+ # Prints detected type (useful for debugging)
36
+ #puts "parse_blocks #{md_type}|#{src.cur_line}"
37
+ case md_type
38
+ when :empty
39
+ output << :empty
40
+ src.ignore_line
41
+ when :ial
42
+ m = InlineAttributeList.match src.shift_line
43
+ content = m[1] || ""
44
+ src2 = CharSource.new(content, src)
45
+ interpret_extension(src2, output)
46
+ when :ald
47
+ output << read_ald(src)
48
+ when :text
49
+ # paragraph, or table, or definition list
50
+ read_text_material(src, output)
51
+ when :header2, :hrule
52
+ # hrule
53
+ src.shift_line
54
+ output << md_hrule
55
+ when :header3
56
+ output << read_header3(src)
57
+ when :ulist, :olist
58
+ list_type = (md_type == :ulist) ? :ul : :ol
59
+ li = read_list_item(src)
60
+ # append to current list if we have one
61
+ if output.last.kind_of?(MDElement) &&
62
+ output.last.node_type == list_type then
63
+ output.last.children << li
64
+ else
65
+ output << md_el(list_type, li)
66
+ end
67
+ when :quote
68
+ output << read_quote(src)
69
+ when :code
70
+ e = read_code(src)
71
+ output << e if e
72
+ when :raw_html
73
+ # More extra hacky stuff - if there's more than just HTML, we either wrap it
74
+ # in a paragraph or break it up depending on whether it's an inline element or not
75
+ e = read_raw_html(src)
76
+ unless e.empty?
77
+ if e.first.parsed_html &&
78
+ (first_node_name = e.first.parsed_html.first_node_name) &&
79
+ HTML_INLINE_ELEMS.include?(first_node_name) &&
80
+ !%w(svg math).include?(first_node_name)
81
+ content = [e.first]
82
+ if e.size > 1
83
+ content.concat(e[1].children)
84
+ end
85
+ output << md_par(content)
86
+ else
87
+ output.concat(e)
88
+ end
89
+ end
90
+ when :footnote_text
91
+ output << read_footnote_text(src)
92
+ when :ref_definition
93
+ if src.parent && src.cur_index == 0
94
+ read_text_material(src, output)
95
+ else
96
+ read_ref_definition(src, output)
97
+ end
98
+ when :abbreviation
99
+ output << read_abbreviation(src)
100
+ when :xml_instr
101
+ read_xml_instruction(src, output)
102
+ else # warn if we forgot something
103
+ line = src.cur_line
104
+ maruku_error "Ignoring line '#{line}' type = #{md_type}", src
105
+ src.shift_line
106
+ end
107
+ end
108
+
109
+ merge_ial(output, src, output)
110
+ output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
111
+
112
+ # get rid of empty line markers
113
+ output.delete_if {|x| x == :empty }
114
+
115
+ # See for each list if we can omit the paragraphs
116
+ # TODO: do this after
117
+ output.each do |c|
118
+ # Remove paragraphs that we can get rid of
119
+ if [:ul, :ol].include?(c.node_type) && c.children.none?(&:want_my_paragraph)
120
+ c.children.each do |d|
121
+ if d.children.first && d.children.first.node_type == :paragraph
122
+ d.children = d.children.first.children + d.children[1..-1]
123
+ end
124
+ end
125
+ elsif c.node_type == :definition_list && c.children.none?(&:want_my_paragraph)
126
+ c.children.each do |definition|
127
+ definition.definitions.each do |dd|
128
+ if dd.children.first.node_type == :paragraph
129
+ dd.children = dd.children.first.children + dd.children[1..-1]
130
+ end
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+ output
137
+ end
138
+
139
+ def read_text_material(src, output)
140
+ if src.cur_line.include?('|') && # if contains a pipe, it could be a table header
141
+ src.next_line &&
142
+ src.next_line.rstrip =~ TableSeparator
143
+ output << read_table(src)
144
+ elsif src.next_line && [:header1, :header2].include?(src.next_line.md_type)
145
+ output << read_header12(src)
146
+ elsif eventually_comes_a_def_list(src)
147
+ definition = read_definition(src)
148
+ if output.last.kind_of?(MDElement) &&
149
+ output.last.node_type == :definition_list then
150
+ output.last.children << definition
151
+ else
152
+ output << md_el(:definition_list, definition)
153
+ end
154
+ else # Start of a paragraph
155
+ output << read_paragraph(src)
156
+ end
157
+ end
158
+
159
+ def read_ald(src)
160
+ if (l = src.shift_line) =~ AttributeDefinitionList
161
+ id = $1
162
+ al = read_attribute_list(CharSource.new($2, src))
163
+ self.ald[id] = al;
164
+ md_ald(id, al)
165
+ else
166
+ maruku_error "Bug Bug:\n#{l.inspect}"
167
+ nil
168
+ end
169
+ end
170
+
171
+ # reads a header (with ----- or ========)
172
+ def read_header12(src)
173
+ line = src.shift_line.strip
174
+ al = nil
175
+ # Check if there is an IAL
176
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
177
+ line = $1.strip
178
+ ial = $2
179
+ al = read_attribute_list(CharSource.new(ial, src))
180
+ end
181
+ text = parse_span line
182
+ if text.empty?
183
+ text = "{#{ial}}"
184
+ al = nil
185
+ end
186
+ level = src.cur_line.md_type == :header2 ? 2 : 1;
187
+ src.shift_line
188
+ md_header(level, text, al)
189
+ end
190
+
191
+ # reads a header like '#### header ####'
192
+ def read_header3(src)
193
+ line = src.shift_line.strip
194
+ al = nil
195
+ # Check if there is an IAL
196
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
197
+ line = $1.strip
198
+ ial = $2
199
+ al = read_attribute_list(CharSource.new(ial, src))
200
+ end
201
+ level = line[/^#+/].size
202
+ text = parse_span line.gsub(/\A#+|#+\z/, '')
203
+ if text.empty?
204
+ text = "{#{ial}}"
205
+ al = nil
206
+ end
207
+ md_header(level, text, al)
208
+ end
209
+
210
+ def read_xml_instruction(src, output)
211
+ m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
212
+ raise "BugBug" unless m
213
+ target = m[2] || ''
214
+ code = m[3]
215
+ until code.include?('?>')
216
+ code << "\n" << src.shift_line
217
+ end
218
+ unless code =~ /\?>\s*$/
219
+ garbage = (/\?>(.*)$/.match(code))[1]
220
+ maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n" +
221
+ code.gsub(/^/, '|'), src
222
+ end
223
+ code.gsub!(/\?>\s*$/, '')
224
+
225
+ if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
226
+ result = safe_execute_code(self, code)
227
+ if result
228
+ if result.kind_of? String
229
+ raise "Not expected"
230
+ else
231
+ output.push(*result)
232
+ end
233
+ end
234
+ else
235
+ output << md_xml_instr(target, code)
236
+ end
237
+ end
238
+
239
+ def read_raw_html(src)
240
+ extra_line = nil
241
+ h = HTMLHelper.new
242
+ begin
243
+ l = src.shift_line
244
+ h.eat_this(l)
245
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
246
+ while src.cur_line && !h.is_finished?
247
+ l = src.shift_line
248
+ # puts "html -> #{l.inspect}"
249
+ h.eat_this "\n" + l
250
+ end
251
+ rescue => e
252
+ maruku_error "Bad block-level HTML:\n#{e.inspect.gsub(/^/, '|')}\n", src
253
+ end
254
+ unless h.rest =~ /^\s*$/
255
+ extra_line = h.rest
256
+ end
257
+ raw_html = h.stuff_you_read
258
+
259
+ is_inline = HTML_INLINE_ELEMS.include?(h.first_tag)
260
+
261
+ if extra_line
262
+ remainder = is_inline ? parse_span(extra_line) : parse_text_as_markdown(extra_line)
263
+ if extra_line.start_with?(' ')
264
+ remainder[0] = ' ' + remainder[0] if remainder[0].is_a?(String)
265
+ end
266
+ is_inline ? [md_html(raw_html), md_par(remainder)] : [md_html(raw_html)] + remainder
267
+ else
268
+ [md_html(raw_html)]
269
+ end
270
+ end
271
+
272
+ def read_paragraph(src)
273
+ lines = [src.shift_line]
274
+ while src.cur_line
275
+ # :olist does not break
276
+ case t = src.cur_line.md_type
277
+ when :quote, :header3, :empty, :ref_definition, :ial, :xml_instr
278
+ break
279
+ when :olist, :ulist
280
+ break if !src.next_line || src.next_line.md_type == t
281
+ when :raw_html
282
+ # This is a pretty awful hack to handle inline HTML
283
+ # but it means double-parsing HMTL.
284
+ html = parse_span([src.cur_line], src)
285
+ unless html.empty? || html.first.is_a?(String)
286
+ if html.first.parsed_html
287
+ first_node_name = html.first.parsed_html.first_node_name
288
+ end
289
+ end
290
+ break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
291
+ end
292
+ break if src.cur_line.strip.empty?
293
+ break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
294
+ break if any_matching_block_extension?(src.cur_line)
295
+
296
+ lines << src.shift_line
297
+ end
298
+ children = parse_span(lines, src)
299
+
300
+ md_par(children)
301
+ end
302
+
303
+ # Reads one list item, either ordered or unordered.
304
+ def read_list_item(src)
305
+ parent_offset = src.cur_index
306
+
307
+ item_type = src.cur_line.md_type
308
+ first = src.shift_line
309
+
310
+ indentation, ial = spaces_before_first_char(first)
311
+ al = read_attribute_list(CharSource.new(ial, src)) if ial
312
+ ial_offset = ial ? ial.length + 3 : 0
313
+ lines, want_my_paragraph =
314
+ read_indented_content(src, indentation, [], item_type, ial_offset)
315
+
316
+ # add first line
317
+ # Strip first '*', '-', '+' from first line
318
+ stripped = first[indentation, first.size - 1]
319
+ lines.unshift stripped
320
+
321
+ src2 = LineSource.new(lines, src, parent_offset)
322
+ children = parse_blocks(src2)
323
+
324
+ md_li(children, want_my_paragraph, al)
325
+ end
326
+
327
+ def read_abbreviation(src)
328
+ unless (l = src.shift_line) =~ Abbreviation
329
+ maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
330
+ end
331
+
332
+ abbr = $1
333
+ desc = $2
334
+
335
+ if !abbr || abbr.empty?
336
+ maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
337
+ end
338
+
339
+ self.abbreviations[abbr] = desc
340
+
341
+ md_abbr_def(abbr, desc)
342
+ end
343
+
344
+ def read_footnote_text(src)
345
+ parent_offset = src.cur_index
346
+
347
+ first = src.shift_line
348
+
349
+ unless first =~ FootnoteText
350
+ maruku_error "Bug (it's Andrea's fault)"
351
+ end
352
+
353
+ id = $1
354
+ text = $2 || ''
355
+
356
+ indentation = 4 #first.size-text.size
357
+
358
+ # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
359
+
360
+ break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
361
+ item_type = :footnote_text
362
+ lines, _ = read_indented_content(src, indentation, break_list, item_type)
363
+
364
+ # add first line
365
+ lines.unshift text unless text.strip.empty?
366
+
367
+ src2 = LineSource.new(lines, src, parent_offset)
368
+ children = parse_blocks(src2)
369
+
370
+ e = md_footnote(id, children)
371
+ self.footnotes[id] = e
372
+ e
373
+ end
374
+
375
+
376
+ # This is the only ugly function in the code base.
377
+ # It is used to read list items, descriptions, footnote text
378
+ def read_indented_content(src, indentation, break_list, item_type, ial_offset=0)
379
+ lines = []
380
+ # collect all indented lines
381
+ saw_empty = false
382
+ saw_anything_after = false
383
+ break_list = Array(break_list)
384
+ len = indentation - ial_offset
385
+
386
+ while src.cur_line
387
+ num_leading_spaces = src.cur_line.number_of_leading_spaces
388
+ break if num_leading_spaces < len && ![:text, :empty].include?(src.cur_line.md_type)
389
+
390
+ line = strip_indent(src.cur_line, indentation)
391
+ md_type = line.md_type
392
+
393
+ if md_type == :empty
394
+ saw_empty = true
395
+ lines << line
396
+ src.shift_line
397
+ next
398
+ end
399
+
400
+ # Unquestioningly grab anything that's deeper-indented
401
+ if md_type != :code && num_leading_spaces > len
402
+ lines << line
403
+ src.shift_line
404
+ next
405
+ end
406
+
407
+ # after a white line
408
+ if saw_empty
409
+ # we expect things to be properly aligned
410
+ break if num_leading_spaces < len
411
+ saw_anything_after = true
412
+ else
413
+ break if break_list.include?(md_type)
414
+ end
415
+
416
+ lines << line
417
+ src.shift_line
418
+
419
+ # You are only required to indent the first line of
420
+ # a child paragraph.
421
+ if md_type == :text
422
+ while src.cur_line && src.cur_line.md_type == :text
423
+ lines << strip_indent(src.shift_line, indentation)
424
+ end
425
+ end
426
+ end
427
+
428
+ # TODO fix this
429
+ want_my_paragraph = saw_anything_after ||
430
+ (saw_empty && src.cur_line && src.cur_line.md_type == item_type)
431
+
432
+ # create a new context
433
+
434
+ while lines.last && lines.last.md_type == :empty
435
+ lines.pop
436
+ end
437
+
438
+ return lines, want_my_paragraph
439
+ end
440
+
441
+
442
+ def read_quote(src)
443
+ parent_offset = src.cur_index
444
+
445
+ lines = []
446
+ # collect all indented lines
447
+ while src.cur_line && src.cur_line.md_type == :quote
448
+ lines << unquote(src.shift_line)
449
+ end
450
+
451
+ src2 = LineSource.new(lines, src, parent_offset)
452
+ children = parse_blocks(src2)
453
+ md_quote(children)
454
+ end
455
+
456
+ def read_code(src)
457
+ # collect all indented lines
458
+ lines = []
459
+ while src.cur_line && [:code, :empty].include?(src.cur_line.md_type)
460
+ lines << strip_indent(src.shift_line, 4)
461
+ end
462
+
463
+ #while lines.last && (lines.last.md_type == :empty )
464
+ while lines.last && lines.last.strip.size == 0
465
+ lines.pop
466
+ end
467
+
468
+ while lines.first && lines.first.strip.size == 0
469
+ lines.shift
470
+ end
471
+
472
+ return nil if lines.empty?
473
+
474
+ source = lines.join("\n")
475
+
476
+ md_codeblock(source)
477
+ end
478
+
479
+ def read_ref_definition(src, out)
480
+ line = src.shift_line
481
+
482
+ # if link is incomplete, shift next line
483
+ if src.cur_line &&
484
+ ![:footnote_text, :ref_definition, :definition, :abbreviation].include?(src.cur_line.md_type) &&
485
+ (1..3).include?(src.cur_line.number_of_leading_spaces)
486
+ line << " " << src.shift_line
487
+ end
488
+
489
+ match = LinkRegex.match(line)
490
+ unless match
491
+ maruku_error "Link does not respect format: '#{line}'" and return
492
+ end
493
+
494
+ id = match[1]
495
+ url = match[2]
496
+ title = match[3] || match[4] || match[5]
497
+ id = sanitize_ref_id(id)
498
+
499
+ hash = self.refs[id] = {
500
+ :url => url,
501
+ :title => title
502
+ }
503
+
504
+ stuff = (match[6] || '')
505
+ stuff.split.each do |couple|
506
+ k, v = couple.split('=')
507
+ v ||= ""
508
+ v = v[1..-2] if v.start_with?('"') # strip quotes
509
+ hash[k.to_sym] = v
510
+ end
511
+
512
+ out << md_ref_def(id, url, :title => title)
513
+ end
514
+
515
+ def split_cells(s)
516
+ s.split('|').reject(&:empty?).map(&:strip)
517
+ end
518
+
519
+ def read_table(src)
520
+ head = split_cells(src.shift_line).map do |s|
521
+ md_el(:head_cell, parse_span(s))
522
+ end
523
+
524
+ separator = split_cells(src.shift_line)
525
+
526
+ align = separator.map do |s|
527
+ # ex: :-------------------:
528
+ # If the separator starts and ends with a colon,
529
+ # center the cell. If it's on the right, right-align,
530
+ # otherwise left-align.
531
+ starts = s.start_with? ':'
532
+ ends = s.end_with? ':'
533
+ if starts && ends
534
+ :center
535
+ elsif ends
536
+ :right
537
+ else
538
+ :left
539
+ end
540
+ end
541
+
542
+ num_columns = align.size
543
+
544
+ if head.size != num_columns
545
+ maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
546
+ tell_user "I will ignore this table."
547
+ # XXX try to recover
548
+ return md_br
549
+ end
550
+
551
+ rows = []
552
+
553
+ while src.cur_line && src.cur_line =~ /\|/
554
+ row = split_cells(src.shift_line).map do |s|
555
+ md_el(:cell, parse_span(s))
556
+ end
557
+
558
+ if head.size != num_columns
559
+ maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
560
+ tell_user "I will ignore this table."
561
+ # XXX try to recover
562
+ return md_br
563
+ end
564
+ rows << row
565
+ end
566
+
567
+ children = (head + rows).flatten
568
+ md_el(:table, children, { :align => align })
569
+ end
570
+
571
+ # If current line is text, a definition list is coming
572
+ # if 1) text,empty,[text,empty]*,definition
573
+ def eventually_comes_a_def_list(src)
574
+ src.tell_me_the_future =~ %r{^t+e?d}x
575
+ end
576
+
577
+ def read_definition(src)
578
+ # Read one or more terms
579
+ terms = []
580
+ while src.cur_line && src.cur_line.md_type == :text
581
+ terms << md_el(:definition_term, parse_span(src.shift_line))
582
+ end
583
+
584
+ want_my_paragraph = false
585
+
586
+ raise "Chunky Bacon!" unless src.cur_line
587
+
588
+ # one optional empty
589
+ if src.cur_line.md_type == :empty
590
+ want_my_paragraph = true
591
+ src.shift_line
592
+ end
593
+
594
+ raise "Chunky Bacon!" unless src.cur_line.md_type == :definition
595
+
596
+ # Read one or more definitions
597
+ definitions = []
598
+ while src.cur_line && src.cur_line.md_type == :definition
599
+ parent_offset = src.cur_index
600
+
601
+ first = src.shift_line
602
+ first =~ Definition
603
+ first = $1
604
+
605
+ lines, w_m_p = read_indented_content(src, 4, :definition, :definition)
606
+ want_my_paragraph ||= w_m_p
607
+
608
+ lines.unshift first
609
+
610
+ src2 = LineSource.new(lines, src, parent_offset)
611
+ children = parse_blocks(src2)
612
+ definitions << md_el(:definition_data, children)
613
+ end
614
+
615
+ md_el(:definition, terms + definitions, {
616
+ :terms => terms,
617
+ :definitions => definitions,
618
+ :want_my_paragraph => want_my_paragraph
619
+ })
620
+ end
621
+ end end end end