maruku 0.6.0 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE.txt +20 -0
  3. data/bin/maruku +153 -152
  4. data/bin/marutex +2 -29
  5. data/data/entities.xml +261 -0
  6. data/docs/markdown_syntax.md +9 -21
  7. data/docs/math.md +14 -18
  8. data/lib/maruku.rb +65 -78
  9. data/lib/maruku/attributes.rb +109 -214
  10. data/lib/maruku/defaults.rb +45 -67
  11. data/lib/maruku/document.rb +44 -0
  12. data/lib/maruku/element.rb +138 -0
  13. data/lib/maruku/errors.rb +80 -0
  14. data/lib/maruku/ext/div.rb +105 -113
  15. data/lib/maruku/ext/fenced_code.rb +97 -0
  16. data/lib/maruku/ext/math.rb +22 -26
  17. data/lib/maruku/ext/math/elements.rb +20 -26
  18. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  19. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  20. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  21. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  22. data/lib/maruku/ext/math/parsing.rb +121 -115
  23. data/lib/maruku/ext/math/to_html.rb +202 -187
  24. data/lib/maruku/ext/math/to_latex.rb +34 -21
  25. data/lib/maruku/helpers.rb +158 -257
  26. data/lib/maruku/html.rb +251 -0
  27. data/lib/maruku/input/charsource.rb +272 -319
  28. data/lib/maruku/input/extensions.rb +62 -63
  29. data/lib/maruku/input/html_helper.rb +233 -189
  30. data/lib/maruku/input/linesource.rb +90 -110
  31. data/lib/maruku/input/mdline.rb +131 -0
  32. data/lib/maruku/input/parse_block.rb +736 -613
  33. data/lib/maruku/input/parse_doc.rb +145 -217
  34. data/lib/maruku/input/parse_span.rb +740 -0
  35. data/lib/maruku/inspect_element.rb +60 -0
  36. data/lib/maruku/maruku.rb +14 -30
  37. data/lib/maruku/output/entity_table.rb +37 -0
  38. data/lib/maruku/output/s5/fancy.rb +462 -462
  39. data/lib/maruku/output/s5/to_s5.rb +115 -135
  40. data/lib/maruku/output/to_html.rb +907 -983
  41. data/lib/maruku/output/to_latex.rb +571 -563
  42. data/lib/maruku/output/to_markdown.rb +207 -162
  43. data/lib/maruku/output/to_s.rb +10 -52
  44. data/lib/maruku/string_utils.rb +129 -179
  45. data/lib/maruku/toc.rb +185 -196
  46. data/lib/maruku/version.rb +33 -38
  47. data/spec/block_docs/abbrev.md +776 -0
  48. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  49. data/spec/block_docs/abbreviations2.md +27 -0
  50. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  51. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  52. data/spec/block_docs/attribute_sanitize.md +22 -0
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/spec/block_docs/atx_headers.md +22 -0
  59. data/spec/block_docs/auto_cdata.md +48 -0
  60. data/spec/block_docs/bad_cites.md +30 -0
  61. data/spec/block_docs/bad_divrefs.md +30 -0
  62. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  63. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  64. data/spec/block_docs/block_quotes.md +66 -0
  65. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  66. data/{tests/unittest → spec/block_docs}/bug_table.md +7 -19
  67. data/spec/block_docs/cites.md +37 -0
  68. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  69. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  70. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  71. data/spec/block_docs/code4.md +79 -0
  72. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  73. data/spec/block_docs/div_without_newline.md +16 -0
  74. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  75. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  76. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  77. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  78. data/spec/block_docs/email.md +29 -0
  79. data/spec/block_docs/empty_cells.md +31 -0
  80. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  81. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  82. data/{tests/unittest → spec/block_docs}/entities.md +33 -41
  83. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  84. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  85. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  86. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  87. data/{tests/unittest → spec/block_docs}/extra_table1.md +9 -21
  88. data/spec/block_docs/fenced_code_blocks.md +58 -0
  89. data/spec/block_docs/fenced_code_blocks_highlighted.md +17 -0
  90. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  91. data/spec/block_docs/footnotes2.md +82 -0
  92. data/spec/block_docs/hard.md +25 -0
  93. data/spec/block_docs/header_after_par.md +62 -0
  94. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  95. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  96. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  97. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  98. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  99. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  100. data/spec/block_docs/html_block_in_para.md +22 -0
  101. data/spec/block_docs/html_inline.md +25 -0
  102. data/spec/block_docs/html_trailing.md +31 -0
  103. data/spec/block_docs/ie.md +62 -0
  104. data/spec/block_docs/iframe.md +29 -0
  105. data/spec/block_docs/ignore_bad_header.md +9 -0
  106. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  107. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  108. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  109. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  110. data/spec/block_docs/inline_html_beginning.md +10 -0
  111. data/spec/block_docs/issue106.md +78 -0
  112. data/spec/block_docs/issue115.md +20 -0
  113. data/spec/block_docs/issue117.md +13 -0
  114. data/spec/block_docs/issue120.md +48 -0
  115. data/spec/block_docs/issue123.md +11 -0
  116. data/spec/block_docs/issue124.md +16 -0
  117. data/spec/block_docs/issue126.md +9 -0
  118. data/spec/block_docs/issue130.md +11 -0
  119. data/spec/block_docs/issue20.md +9 -0
  120. data/spec/block_docs/issue26.md +22 -0
  121. data/spec/block_docs/issue29.md +9 -0
  122. data/spec/block_docs/issue30.md +30 -0
  123. data/spec/block_docs/issue31.md +25 -0
  124. data/spec/block_docs/issue40.md +52 -0
  125. data/spec/block_docs/issue64.md +55 -0
  126. data/spec/block_docs/issue67.md +19 -0
  127. data/spec/block_docs/issue70.md +11 -0
  128. data/spec/block_docs/issue72.md +17 -0
  129. data/spec/block_docs/issue74.md +38 -0
  130. data/spec/block_docs/issue79.md +15 -0
  131. data/spec/block_docs/issue83.md +13 -0
  132. data/spec/block_docs/issue85.md +25 -0
  133. data/spec/block_docs/issue88.md +19 -0
  134. data/spec/block_docs/issue89.md +12 -0
  135. data/spec/block_docs/issue90.md +38 -0
  136. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  137. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  138. data/spec/block_docs/links2.md +21 -0
  139. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  140. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  141. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  142. data/spec/block_docs/list_multipara.md +42 -0
  143. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  144. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  145. data/spec/block_docs/lists11.md +23 -0
  146. data/spec/block_docs/lists12.md +43 -0
  147. data/spec/block_docs/lists13.md +55 -0
  148. data/spec/block_docs/lists14.md +61 -0
  149. data/spec/block_docs/lists15.md +36 -0
  150. data/spec/block_docs/lists6.md +88 -0
  151. data/spec/block_docs/lists7b.md +58 -0
  152. data/spec/block_docs/lists9.md +53 -0
  153. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  154. data/spec/block_docs/lists_blank.md +35 -0
  155. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  156. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +44 -29
  157. data/spec/block_docs/lists_nested.md +44 -0
  158. data/spec/block_docs/lists_nested_blankline.md +34 -0
  159. data/spec/block_docs/lists_nested_deep.md +43 -0
  160. data/spec/block_docs/lists_ol.md +129 -0
  161. data/spec/block_docs/lists_ol2.md +147 -0
  162. data/spec/block_docs/lists_paraindent.md +42 -0
  163. data/spec/block_docs/lists_tab.md +54 -0
  164. data/spec/block_docs/loss.md +17 -0
  165. data/spec/block_docs/math-blahtex/equations.md +29 -0
  166. data/spec/block_docs/math-blahtex/inline.md +48 -0
  167. data/spec/block_docs/math-blahtex/math2.md +52 -0
  168. data/spec/block_docs/math-blahtex/table.md +25 -0
  169. data/spec/block_docs/math/embedded_invalid_svg.md +108 -0
  170. data/spec/block_docs/math/embedded_svg.md +136 -0
  171. data/spec/block_docs/math/equations.md +49 -0
  172. data/spec/block_docs/math/inline.md +46 -0
  173. data/spec/block_docs/math/math2.md +53 -0
  174. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  175. data/spec/block_docs/math/raw_mathml.md +87 -0
  176. data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
  177. data/spec/block_docs/math/table.md +25 -0
  178. data/{tests/unittest → spec/block_docs}/math/table2.md +11 -23
  179. data/{tests/unittest → spec/block_docs}/misc_sw.md +184 -121
  180. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  181. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  182. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  183. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  184. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  185. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  186. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  187. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  188. data/spec/block_docs/ref_with_title.md +22 -0
  189. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  190. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  191. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  192. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  193. data/{tests/unittest → spec/block_docs}/table_attributes.md +6 -20
  194. data/spec/block_docs/table_colspan.md +41 -0
  195. data/spec/block_docs/tables.md +47 -0
  196. data/spec/block_docs/tables2.md +74 -0
  197. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  198. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  199. data/spec/block_docs/toc.md +87 -0
  200. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  201. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  202. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  203. data/spec/block_docs/xml.md +33 -0
  204. data/spec/block_docs/xml3.md +24 -0
  205. data/spec/block_docs/xml_comments.md +32 -0
  206. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  207. data/spec/block_spec.rb +110 -0
  208. data/spec/cli_spec.rb +8 -0
  209. data/spec/span_spec.rb +263 -0
  210. data/spec/spec_helper.rb +3 -0
  211. data/spec/to_html_utf8_spec.rb +13 -0
  212. metadata +218 -202
  213. data/Rakefile +0 -73
  214. data/bin/marudown +0 -29
  215. data/bin/marutest +0 -345
  216. data/docs/changelog.md +0 -334
  217. data/lib/maruku/errors_management.rb +0 -92
  218. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  219. data/lib/maruku/input/parse_span_better.rb +0 -746
  220. data/lib/maruku/input/rubypants.rb +0 -225
  221. data/lib/maruku/input/type_detection.rb +0 -147
  222. data/lib/maruku/output/to_latex_entities.rb +0 -367
  223. data/lib/maruku/output/to_latex_strings.rb +0 -64
  224. data/lib/maruku/structures.rb +0 -167
  225. data/lib/maruku/structures_inspect.rb +0 -87
  226. data/lib/maruku/structures_iterators.rb +0 -61
  227. data/lib/maruku/tests/benchmark.rb +0 -82
  228. data/lib/maruku/tests/new_parser.rb +0 -373
  229. data/lib/maruku/tests/tests.rb +0 -136
  230. data/lib/maruku/usage/example1.rb +0 -33
  231. data/maruku_gem.rb +0 -33
  232. data/tests/bugs/code_in_links.md +0 -101
  233. data/tests/bugs/complex_escaping.md +0 -38
  234. data/tests/math/syntax.md +0 -46
  235. data/tests/math_usage/document.md +0 -13
  236. data/tests/others/abbreviations.md +0 -11
  237. data/tests/others/blank.md +0 -4
  238. data/tests/others/code.md +0 -5
  239. data/tests/others/code2.md +0 -8
  240. data/tests/others/code3.md +0 -16
  241. data/tests/others/email.md +0 -4
  242. data/tests/others/entities.md +0 -19
  243. data/tests/others/escaping.md +0 -16
  244. data/tests/others/extra_dl.md +0 -101
  245. data/tests/others/extra_header_id.md +0 -13
  246. data/tests/others/extra_table1.md +0 -40
  247. data/tests/others/footnotes.md +0 -17
  248. data/tests/others/headers.md +0 -10
  249. data/tests/others/hrule.md +0 -10
  250. data/tests/others/images.md +0 -20
  251. data/tests/others/inline_html.md +0 -42
  252. data/tests/others/links.md +0 -38
  253. data/tests/others/list1.md +0 -4
  254. data/tests/others/list2.md +0 -5
  255. data/tests/others/list3.md +0 -8
  256. data/tests/others/lists.md +0 -32
  257. data/tests/others/lists_after_paragraph.md +0 -44
  258. data/tests/others/lists_ol.md +0 -39
  259. data/tests/others/misc_sw.md +0 -105
  260. data/tests/others/one.md +0 -1
  261. data/tests/others/paragraphs.md +0 -13
  262. data/tests/others/sss06.md +0 -352
  263. data/tests/others/test.md +0 -4
  264. data/tests/s5/s5profiling.md +0 -48
  265. data/tests/unittest/bug_def.md +0 -28
  266. data/tests/unittest/email.md +0 -32
  267. data/tests/unittest/hang.md +0 -29
  268. data/tests/unittest/html2.md +0 -34
  269. data/tests/unittest/ie.md +0 -61
  270. data/tests/unittest/links2.md +0 -34
  271. data/tests/unittest/lists11.md +0 -28
  272. data/tests/unittest/lists6.md +0 -53
  273. data/tests/unittest/lists9.md +0 -76
  274. data/tests/unittest/lists_ol.md +0 -274
  275. data/tests/unittest/math/equations.md +0 -86
  276. data/tests/unittest/math/inline.md +0 -58
  277. data/tests/unittest/math/math2.md +0 -57
  278. data/tests/unittest/math/table.md +0 -37
  279. data/tests/unittest/notyet/header_after_par.md +0 -70
  280. data/tests/unittest/pending/empty_cells.md +0 -49
  281. data/tests/unittest/red_tests/abbrev.md +0 -1388
  282. data/tests/unittest/red_tests/lists7.md +0 -68
  283. data/tests/unittest/red_tests/lists7b.md +0 -128
  284. data/tests/unittest/red_tests/lists8.md +0 -76
  285. data/tests/unittest/red_tests/xml.md +0 -70
  286. data/tests/unittest/xml2.md +0 -31
  287. data/tests/unittest/xml3.md +0 -38
  288. data/tests/utf8-files/simple.md +0 -1
  289. data/unit_test_block.sh +0 -5
  290. data/unit_test_span.sh +0 -3
@@ -1,37 +1,16 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- require 'iconv'
23
-
24
-
25
- module MaRuKu; module In; module Markdown; module BlockLevelParser
26
-
27
- def parse_doc(s)
28
- # FIXME \r\n => \n
29
- meta2 = parse_email_headers(s)
30
- data = meta2[:data]
31
- meta2.delete :data
32
-
33
- self.attributes.merge! meta2
34
-
1
+ require 'strscan'
2
+ require 'cgi'
3
+
4
+ module MaRuKu::In::Markdown::BlockLevelParser
5
+
6
+ def parse_doc(s)
7
+ # Remove BOM if it is present
8
+ s = s.sub(/^\xEF\xBB\xBF/u, '')
9
+ meta2 = parse_email_headers(s)
10
+ data = meta2.delete :data
11
+
12
+ self.attributes.merge! meta2
13
+
35
14
  =begin maruku_doc
36
15
  Attribute: encoding
37
16
  Scope: document
@@ -39,50 +18,42 @@ Summary: Encoding for the document.
39
18
 
40
19
  If the `encoding` attribute is specified, then the content
41
20
  will be converted from the specified encoding to UTF-8.
42
-
43
- Conversion happens using the `iconv` library.
44
21
  =end
45
22
 
46
- enc = self.attributes[:encoding]
47
- self.attributes.delete :encoding
48
- if enc && enc.downcase != 'utf-8'
49
- converted = Iconv.new('utf-8', enc).iconv(data)
50
-
51
- # puts "Data: #{data.inspect}: #{data}"
52
- # puts "Conv: #{converted.inspect}: #{converted}"
53
-
54
- data = converted
55
- end
56
-
57
- @children = parse_text_as_markdown(data)
58
-
59
- if true #markdown_extra?
60
- self.search_abbreviations
61
- self.substitute_markdown_inside_raw_html
62
- end
63
-
64
- toc = create_toc
65
-
66
- # use title if not set
67
- if not self.attributes[:title] and toc.header_element
68
- title = toc.header_element.to_s
69
- self.attributes[:title] = title
70
- # puts "Set document title to #{title}"
71
- end
72
-
73
- # save for later use
74
- self.toc = toc
75
-
76
- # Now do the attributes magic
77
- each_element do |e|
78
- # default attribute list
79
- if default = self.ald[e.node_type.to_s]
80
- expand_attribute_list(default, e.attributes)
81
- end
82
- expand_attribute_list(e.al, e.attributes)
83
- # puts "#{e.node_type}: #{e.attributes.inspect}"
84
- end
85
-
23
+ enc = self.attributes.delete(:encoding) || 'utf-8'
24
+ if enc.downcase != 'utf-8'
25
+ # Switch to ruby 1.9 String#encode
26
+ # with backward 1.8 compatibility
27
+ if data.respond_to?(:encode!)
28
+ data.encode!('UTF-8', enc)
29
+ else
30
+ require 'iconv'
31
+ data = Iconv.new('utf-8', enc).iconv(data)
32
+ end
33
+ end
34
+
35
+ @children = parse_text_as_markdown(data)
36
+
37
+ if markdown_extra?
38
+ self.search_abbreviations
39
+ self.substitute_markdown_inside_raw_html
40
+ end
41
+
42
+ self.toc = create_toc
43
+
44
+ # use title if not set
45
+ self.attributes[:title] ||= toc.header_element.children.join if toc.header_element
46
+
47
+ # Now do the attributes magic
48
+ each_element do |e|
49
+ # default attribute list
50
+ if default = self.ald[e.node_type.to_s]
51
+ expand_attribute_list(default, e.attributes)
52
+ end
53
+ expand_attribute_list(e.al, e.attributes)
54
+ # puts "#{e.node_type}: #{e.attributes.inspect}"
55
+ end
56
+
86
57
  =begin maruku_doc
87
58
  Attribute: unsafe_features
88
59
  Scope: global
@@ -91,144 +62,101 @@ Summary: Enables execution of XML instructions.
91
62
  Disabled by default because of security concerns.
92
63
  =end
93
64
 
94
- if Maruku::Globals[:unsafe_features]
95
- self.execute_code_blocks
96
- # TODO: remove executed code blocks
97
- end
98
- end
99
-
100
- # Expands an attribute list in an Hash
101
- def expand_attribute_list(al, result)
102
- al.each do |k, v|
103
- case k
104
- when :class
105
- if not result[:class]
106
- result[:class] = v
107
- else
108
- result[:class] += " " + v
109
- end
110
- when :id; result[:id] = v
111
- when :ref;
112
- if self.ald[v]
113
- already = (result[:expanded_references] ||= [])
114
- if not already.include?(v)
115
- already.push v
116
- expand_attribute_list(self.ald[v], result)
117
- else
118
- already.push v
119
- maruku_error "Circular reference between labels.\n\n"+
120
- "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
121
- (already.map{|x| x.inspect}.join(' => '))
122
- end
123
- else
124
- if not result[:unresolved_references]
125
- result[:unresolved_references] = v
126
- else
127
- result[:unresolved_references] << " #{v}"
128
- end
129
-
130
- # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
131
- result[v.to_sym] = true
132
- end
133
- else
134
- result[k.to_sym]=v
135
- end
136
- end
137
- end
138
-
139
- def safe_execute_code(object, code)
140
- begin
141
- return object.instance_eval(code)
142
- rescue Exception => e
143
- maruku_error "Exception while executing this:\n"+
144
- add_tabs(code, 1, ">")+
145
- "\nThe error was:\n"+
146
- add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
147
- rescue RuntimeError => e
148
- maruku_error "2: Exception while executing this:\n"+
149
- add_tabs(code, 1, ">")+
150
- "\nThe error was:\n"+
151
- add_tabs(e.inspect, 1, "|")
152
- rescue SyntaxError => e
153
- maruku_error "2: Exception while executing this:\n"+
154
- add_tabs(code, 1, ">")+
155
- "\nThe error was:\n"+
156
- add_tabs(e.inspect, 1, "|")
157
- end
158
- nil
159
- end
160
-
161
- def execute_code_blocks
162
- self.each_element(:xml_instr) do |e|
163
- if e.target == 'maruku'
164
- result = safe_execute_code(e, e.code)
165
- if result.kind_of?(String)
166
- puts "Result is : #{result.inspect}"
167
- end
168
- end
169
- end
170
- end
171
-
172
- def search_abbreviations
173
- self.abbreviations.each do |abbrev, title|
174
- reg = Regexp.new(Regexp.escape(abbrev))
175
- self.replace_each_string do |s|
176
- # bug if many abbreviations are present (agorf)
177
- if m = reg.match(s)
178
- e = md_abbr(abbrev.dup, title ? title.dup : nil)
179
- [m.pre_match, e, m.post_match]
180
- else
181
- s
182
- end
183
- end
184
- end
185
- end
186
-
187
- include REXML
188
- # (PHP Markdown extra) Search for elements that have
189
- # markdown=1 or markdown=block defined
190
- def substitute_markdown_inside_raw_html
191
- self.each_element(:raw_html) do |e|
192
- doc = e.instance_variable_get :@parsed_html
193
- if doc # valid html
194
- # parse block-level markdown elements in these HTML tags
195
- block_tags = ['div']
196
-
197
- # use xpath to find elements with 'markdown' attribute
198
- XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
199
- # puts "Found #{e}"
200
- # should we parse block-level or span-level?
201
-
202
- how = e.attributes['markdown']
203
- parse_blocks = (how == 'block') || block_tags.include?(e.name)
204
-
205
- # Select all text elements of e
206
- XPath.match(e, "//text()" ).each { |original_text|
207
- s = original_text.value.strip
208
- if s.size > 0
209
-
210
- # puts "Parsing #{s.inspect} as blocks: #{parse_blocks} (#{e.name}, #{e.attributes['markdown']}) "
211
-
212
- el = md_el(:dummy,
213
- parse_blocks ? parse_text_as_markdown(s) :
214
- parse_lines_as_span([s]) )
215
- p = original_text.parent
216
- el.children_to_html.each do |x|
217
- p.insert_before(original_text, x)
218
- end
219
- p.delete(original_text)
220
-
221
- end
222
- }
223
-
224
-
225
- # remove 'markdown' attribute
226
- e.delete_attribute 'markdown'
227
-
228
- end
229
-
230
- end
231
- end
232
- end
233
-
234
- end end end end
65
+ if Maruku::Globals[:unsafe_features]
66
+ self.execute_code_blocks
67
+ # TODO: remove executed code blocks
68
+ end
69
+ end
70
+
71
+ # Expands an attribute list in an Hash
72
+ def expand_attribute_list(al, result)
73
+ al.each do |k, v|
74
+ case k
75
+ when :class
76
+ if result[:class]
77
+ result[:class] << " " << v
78
+ else
79
+ result[:class] = v
80
+ end
81
+ when :id
82
+ result[:id] = v
83
+ when :ref
84
+ if self.ald[v]
85
+ already = (result[:expanded_references] ||= [])
86
+ if !already.include?(v)
87
+ already << v
88
+ expand_attribute_list(self.ald[v], result)
89
+ else
90
+ already << v
91
+ maruku_error "Circular reference between labels.\n\n" +
92
+ "Label #{v.inspect} calls itself via recursion.\nThe recursion is " +
93
+ already.map(&:inspect).join(' => ')
94
+ end
95
+ else
96
+ if result[:unresolved_references]
97
+ result[:unresolved_references] << " " << v
98
+ else
99
+ result[:unresolved_references] = v
100
+ end
101
+
102
+ # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
103
+ result[v.to_sym] = true
104
+ end
105
+ else
106
+ result[k.to_sym] = v
107
+ end
108
+ end
109
+ end
110
+
111
+ def safe_execute_code(object, code)
112
+ begin
113
+ object.instance_eval(code)
114
+ rescue StandardError, ScriptError => e
115
+ maruku_error "Exception while executing this:\n" +
116
+ code.gsub(/^/, ">") +
117
+ "\nThe error was:\n" +
118
+ (e.inspect + "\n" + e.send(:caller).join("\n")).gsub(/^/, "|")
119
+ nil
120
+ end
121
+ end
122
+
123
+ def execute_code_blocks
124
+ each_element(:xml_instr) do |e|
125
+ if e.target == 'maruku'
126
+ result = safe_execute_code(e, e.code)
127
+ if result.kind_of?(String)
128
+ puts "Result is : #{result.inspect}"
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ def search_abbreviations
135
+ abbreviations.each do |abbrev, title|
136
+ reg = Regexp.new(Regexp.escape(abbrev))
137
+ replace_each_string do |s|
138
+ # bug if many abbreviations are present (agorf)
139
+ p = StringScanner.new(s)
140
+ a = []
141
+ until p.eos?
142
+ o = ''
143
+ o << p.getch until p.scan(reg) or p.eos?
144
+ a << o unless o.empty?
145
+ a << md_abbr(abbrev.dup, title ? title.dup : nil) if p.matched == abbrev
146
+ end
147
+ a
148
+ end
149
+ end
150
+ end
151
+
152
+ # (PHP Markdown extra) Search for elements that have
153
+ # markdown=1 or markdown=block defined
154
+ def substitute_markdown_inside_raw_html
155
+ each_element(:raw_html) do |e|
156
+ html = e.parsed_html
157
+ next unless html
158
+
159
+ html.process_markdown_inside_elements(self)
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,740 @@
1
+ module MaRuKu::In::Markdown::SpanLevelParser
2
+ include MaRuKu::Helpers
3
+
4
+ EscapedCharInText = '\\`*_{}[]()#.!|:+->'.split(//)
5
+ EscapedCharInQuotes = EscapedCharInText + ["'", '"']
6
+
7
+ EscapedCharInInlineCode = ['\\', '`']
8
+
9
+ IgnoreWikiLinks = MaRuKu::Globals[:ignore_wikilinks]
10
+
11
+ def parse_span(string, parent=nil)
12
+ string = Array(string).join("\n") unless string.kind_of? String
13
+ src = MaRuKu::In::Markdown::SpanLevelParser::CharSource.new(string, parent)
14
+ read_span(src, EscapedCharInText, [nil])
15
+ end
16
+
17
+ # This is the main loop for reading span elements
18
+ #
19
+ # It's long, but not *complex* or difficult to understand.
20
+ #
21
+ #
22
+ def read_span(src, escaped, exit_on_chars=nil, exit_on_strings=nil)
23
+ escaped = Array(escaped)
24
+ con = SpanContext.new
25
+ dquote_state = squote_state = :closed
26
+ c = d = prev_char = nil
27
+ while true
28
+ c = src.cur_char
29
+
30
+ # This is only an optimization which cuts 50% of the time used.
31
+ # (but you can't use a-zA-z in exit_on_chars)
32
+ if c && c =~ /[[:alnum:]]/
33
+ con.push_char src.shift_char
34
+ prev_char = c
35
+ next
36
+ end
37
+
38
+ break if Array(exit_on_chars).include?(c)
39
+ if Array(exit_on_strings).any? {|x| src.cur_chars_are x }
40
+ # Special case: bold nested in italic
41
+ break unless !(['*', '_'] & Array(exit_on_strings)).empty? &&
42
+ ['**', '__'].include?(src.cur_chars(2)) &&
43
+ !['***', '___'].include?(src.cur_chars(3))
44
+ end
45
+
46
+ # check if there are extensions
47
+ next if check_span_extensions(src, con)
48
+
49
+ case c = src.cur_char
50
+ when ' '
51
+ if src.cur_chars_are " \n"
52
+ src.ignore_chars(3)
53
+ con.push_element md_br
54
+ prev_char = ' '
55
+ next
56
+ elsif src.cur_chars_are ' >>' # closing guillemettes
57
+ src.ignore_chars(3)
58
+ con.push_element md_entity('nbsp')
59
+ con.push_element md_entity('raquo')
60
+ elsif src.cur_chars(5) =~ / '\d\ds/ # special case: '80s
61
+ src.ignore_chars(2)
62
+ con.push_space
63
+ con.push_element md_entity('rsquo')
64
+ elsif src.cur_chars_are " '" # opening single-quote
65
+ src.ignore_chars(2)
66
+ con.push_space
67
+ con.push_element md_entity('lsquo')
68
+ squote_state = :open
69
+ else
70
+ src.ignore_char
71
+ con.push_space
72
+ end
73
+ when "\n", "\t"
74
+ src.ignore_char
75
+ con.push_space
76
+ when '`'
77
+ read_inline_code(src, con)
78
+ when '<'
79
+ # It could be:
80
+ # 1) HTML "<div ..."
81
+ # 2) HTML "<!-- ..."
82
+ # 3) url "<http:// ", "<ftp:// ..."
83
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
84
+ # 5) on itself! "a < b "
85
+ # 6) Start of <<guillemettes>>
86
+
87
+ case d = src.next_char
88
+ when '<' # guillemettes
89
+ if src.cur_chars_are '<< '
90
+ src.ignore_chars(3)
91
+ con.push_element md_entity('laquo')
92
+ con.push_element md_entity('nbsp')
93
+ else
94
+ src.ignore_chars(2)
95
+ con.push_element md_entity('laquo')
96
+ end
97
+ when '!'
98
+ if src.cur_chars_are '<!--'
99
+ read_inline_html(src, con)
100
+ else
101
+ con.push_char src.shift_char
102
+ end
103
+ when '?'
104
+ read_xml_instr_span(src, con)
105
+ when ' ', "\t"
106
+ con.push_char src.shift_char
107
+ else
108
+ if src.next_matches(/<mailto:/) ||
109
+ src.next_matches(/<[\w\.]+\@/)
110
+ read_email_el(src, con)
111
+ elsif src.next_matches(/<\w+:/)
112
+ read_url_el(src, con)
113
+ elsif src.next_matches(/<\w/)
114
+ #puts "This is HTML: #{src.cur_chars(20)}"
115
+ read_inline_html(src, con)
116
+ else
117
+ #puts "This is NOT HTML: #{src.cur_chars(20)}"
118
+ con.push_char src.shift_char
119
+ end
120
+ end
121
+ when '>'
122
+ if src.next_char == '>'
123
+ src.ignore_chars(2)
124
+ con.push_element md_entity('raquo')
125
+ else
126
+ con.push_char src.shift_char
127
+ end
128
+ when "\\"
129
+ d = src.next_char
130
+ if d == "'"
131
+ src.ignore_chars(2)
132
+ con.push_element md_entity('apos')
133
+ elsif d == '"'
134
+ src.ignore_chars(2)
135
+ con.push_element md_entity('quot')
136
+ elsif escaped.include? d
137
+ src.ignore_chars(2)
138
+ con.push_char d
139
+ else
140
+ con.push_char src.shift_char
141
+ end
142
+ when '['
143
+ if markdown_extra? && src.next_char == '^'
144
+ read_footnote_ref(src,con)
145
+ elsif IgnoreWikiLinks && src.next_char == '['
146
+ con.push_char src.shift_char
147
+ con.push_char src.shift_char
148
+ else
149
+ read_link(src, con)
150
+ end
151
+ when '!'
152
+ if src.next_char == '['
153
+ read_image(src, con)
154
+ else
155
+ con.push_char src.shift_char
156
+ end
157
+ when '&'
158
+ # named references
159
+ if m = src.read_regexp(/\&(\w+);/)
160
+ con.push_element md_entity(m[1])
161
+ # numeric
162
+ elsif m = src.read_regexp(/\&\#(x)?(\w+);/)
163
+ num = m[1] ? m[2].hex : m[2].to_i
164
+ con.push_element md_entity(num)
165
+ else
166
+ con.push_char src.shift_char
167
+ end
168
+ when '*'
169
+ if !src.next_char
170
+ maruku_error "Opening * as last char.", src, con, 'Treating as literal'
171
+ con.push_char src.shift_char
172
+ else
173
+ follows = src.cur_chars(4)
174
+ if follows =~ /^\*\*\*[^\s\*]/
175
+ con.push_element read_emstrong(src, '***')
176
+ elsif follows =~ /^\*\*[^\s\*]/
177
+ con.push_element read_strong(src, '**')
178
+ elsif follows =~ /^\*[^\s\*]/
179
+ con.push_element read_em(src, '*')
180
+ else # * is just a normal char
181
+ con.push_char src.shift_char
182
+ end
183
+ end
184
+ when '_'
185
+ if !src.next_char
186
+ maruku_error "Opening _ as last char", src, con, 'Treating as literal'
187
+ con.push_char src.shift_char
188
+ else
189
+ # we don't want "mod_ruby" to start an emphasis
190
+ # so we start one only if
191
+ # 1) there's nothing else in the span (first char)
192
+ # or 2) the last char was a space
193
+ # or 3) the current string is empty
194
+ #if con.elements.empty? ||
195
+ if con.is_end?
196
+ # also, we check the next characters
197
+ follows = src.cur_chars(4)
198
+ if follows =~ /^\_\_\_[^\s\_]/
199
+ con.push_element read_emstrong(src, '___')
200
+ elsif follows =~ /^\_\_[^\s\_]/
201
+ con.push_element read_strong(src, '__')
202
+ elsif follows =~ /^\_[^\s\_]/
203
+ con.push_element read_em(src, '_')
204
+ else # _ is just a normal char
205
+ con.push_char src.shift_char
206
+ end
207
+ else
208
+ # _ is just a normal char
209
+ con.push_char src.shift_char
210
+ end
211
+ end
212
+ when '{' # extension
213
+ if ['#', '.', ':'].include? src.next_char
214
+ src.ignore_char # {
215
+ interpret_extension(src, con, '}')
216
+ src.ignore_char # }
217
+ else
218
+ con.push_char src.shift_char
219
+ end
220
+ when nil
221
+ maruku_error( ("Unclosed span (waiting for %s" +
222
+ "#{exit_on_strings.inspect})") %
223
+ [ exit_on_chars ? "#{exit_on_chars.inspect} or" : "" ],
224
+ src, con)
225
+ break
226
+ when '-' # dashes
227
+ if src.next_char == '-'
228
+ if src.cur_chars_are '---'
229
+ src.ignore_chars(3)
230
+ con.push_element md_entity('mdash')
231
+ else
232
+ src.ignore_chars(2)
233
+ con.push_element md_entity('ndash')
234
+ end
235
+ else
236
+ con.push_char src.shift_char
237
+ end
238
+ when '.' # ellipses
239
+ if src.cur_chars_are '...'
240
+ src.ignore_chars(3)
241
+ con.push_element md_entity('hellip')
242
+ elsif src.cur_chars_are '. . .'
243
+ src.ignore_chars(5)
244
+ con.push_element md_entity('hellip')
245
+ else
246
+ con.push_char src.shift_char
247
+ end
248
+ when '"'
249
+ if dquote_state == :closed
250
+ dquote_state = :open
251
+ src.ignore_char
252
+ con.push_element md_entity('ldquo')
253
+ else
254
+ dquote_state = :closed
255
+ src.ignore_char
256
+ con.push_element md_entity('rdquo')
257
+ end
258
+ when "'"
259
+ if src.cur_chars(4) =~ /'\d\ds/ # special case: '80s
260
+ src.ignore_char
261
+ con.push_element md_entity('rsquo')
262
+ elsif squote_state == :open
263
+ squote_state = :closed unless src.next_char =~ /[[:alpha:]]/
264
+ src.ignore_char
265
+ con.push_element md_entity('rsquo')
266
+ else
267
+ if prev_char =~ /[[:alpha:]]/
268
+ src.ignore_char
269
+ con.push_element md_entity('rsquo')
270
+ else
271
+ src.ignore_char
272
+ con.push_element md_entity('lsquo')
273
+ squote_state = :open
274
+ end
275
+ end
276
+ else # normal text
277
+ con.push_char src.shift_char
278
+ end # end case
279
+ prev_char = c
280
+ end # end while true
281
+
282
+ con.push_string_if_present
283
+
284
+ # Assign IAL to elements
285
+ merge_ial(con.elements, src, con)
286
+
287
+ # Remove leading space
288
+ if (s = con.elements.first).kind_of? String
289
+ if s[0, 1] == ' '
290
+ con.elements[0] = s[1..-1]
291
+ end
292
+ con.elements.shift if s.empty?
293
+ end
294
+
295
+ con.elements.shift if (con.elements.first.kind_of?(String) && con.elements.first.empty?)
296
+
297
+ # Remove final spaces
298
+ if (s = con.elements.last).kind_of? String
299
+ s.chop! if s[-1, 1] == ' '
300
+ con.elements.pop if s.empty?
301
+ end
302
+
303
+ con.elements
304
+ end
305
+
306
+
307
+ def read_xml_instr_span(src, con)
308
+ src.ignore_chars(2) # starting <?
309
+
310
+ # read target <?target code... ?>
311
+ target = if m = src.read_regexp(/^(\w+)/)
312
+ m[1]
313
+ else
314
+ # XML instructions are invalid without a target
315
+ ''
316
+ end
317
+
318
+ delim = "?>"
319
+
320
+ code = read_simple(src, nil, nil, delim)
321
+
322
+ src.ignore_chars delim.size
323
+
324
+ code = (code || "").strip
325
+ con.push_element md_xml_instr(target, code)
326
+ end
327
+
328
+ # Start: cursor on character **after** '{'
329
+ # End: curson on '}' or EOF
330
+ def interpret_extension(src, con, break_on_chars=nil)
331
+ case src.cur_char
332
+ when ':'
333
+ src.ignore_char # :
334
+ extension_meta(src, con, break_on_chars)
335
+ when '#', '.'
336
+ extension_meta(src, con, break_on_chars)
337
+ else
338
+ stuff = read_simple(src, '}', break_on_chars)
339
+ if stuff =~ /^(\w+\s|[^\w])/
340
+ extension_id = $1.strip
341
+
342
+ maruku_recover "I don't know what to do with extension '#{extension_id}'\n" +
343
+ "I will treat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
344
+ else
345
+ maruku_recover "I will treat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
346
+ end
347
+ extension_meta(src, con, break_on_chars)
348
+ end
349
+ end
350
+
351
+ def extension_meta(src, con, break_on_chars=nil)
352
+ if m = src.read_regexp(/([^\s\:\"\'}]+?):/)
353
+ name = m[1]
354
+ al = read_attribute_list(src, con, break_on_chars)
355
+ self.doc.ald[name] = al
356
+ con.push md_ald(name, al)
357
+ else
358
+ al = read_attribute_list(src, con, break_on_chars)
359
+ con.push md_ial(al)
360
+ end
361
+ end
362
+
363
+ def read_url_el(src,con)
364
+ src.ignore_char # leading <
365
+ url = read_simple(src, nil, '>')
366
+ src.ignore_char # closing >
367
+
368
+ con.push_element md_url(url)
369
+ end
370
+
371
+ def read_email_el(src,con)
372
+ src.ignore_char # leading <
373
+ mail = read_simple(src, nil, '>')
374
+ src.ignore_char # closing >
375
+
376
+ address = mail.gsub(/^mailto:/, '')
377
+ con.push_element md_email(address)
378
+ end
379
+
380
+ def read_url(src, break_on)
381
+ if ["'", '"'].include? src.cur_char
382
+ maruku_error 'Invalid char for url', src
383
+ end
384
+
385
+ url = read_simple(src, nil, break_on) || ''
386
+
387
+ if url[0, 1] == '<' && url[-1, 1] == '>'
388
+ url = url[1, url.size-2]
389
+ end
390
+
391
+ return nil if url.empty?
392
+ url
393
+ end
394
+
395
+
396
+ def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
397
+ case src.cur_char
398
+ when "'", '"'
399
+ read_quoted(src, con)
400
+ else
401
+ read_simple(src, escaped, exit_on_chars, nil, false)
402
+ end
403
+ end
404
+
405
+ # Tries to read a quoted value. If stream does not
406
+ # start with ' or ", returns nil.
407
+ def read_quoted(src, con)
408
+ case src.cur_char
409
+ when "'", '"'
410
+ quote_char = src.shift_char # opening quote
411
+ string = read_simple(src, EscapedCharInQuotes, quote_char)
412
+ src.ignore_char # closing quote
413
+ string
414
+ else
415
+ nil
416
+ end
417
+ end
418
+
419
+ # Reads a simple string (no formatting) until one of exit_on_chars,
420
+ # while escaping the escaped.
421
+ # If the string is empty, it returns nil.
422
+ # By default, raises on error if the string terminates unexpectedly. This can be
423
+ # by setting the last argument to false.
424
+ def read_simple(src, escaped, exit_on_chars=nil, exit_on_strings=nil, warn=true)
425
+ text = ""
426
+ escaped = Array(escaped)
427
+ exit_on_chars = Array(exit_on_chars)
428
+ exit_on_strings = Array(exit_on_strings)
429
+ while true
430
+ c = src.cur_char
431
+
432
+ break if exit_on_chars.include?(c)
433
+ break if exit_on_strings.any? {|x| src.cur_chars_are x }
434
+
435
+ case c
436
+ when nil
437
+ if warn
438
+ maruku_error "String finished while reading (break on " +
439
+ "#{(exit_on_chars + exit_on_strings).inspect})" +
440
+ " already read: #{text.inspect}", src
441
+ end
442
+ break
443
+ when "\\"
444
+ d = src.next_char
445
+ if escaped.include? d
446
+ src.ignore_chars(2)
447
+ text << d
448
+ else
449
+ text << src.shift_char
450
+ end
451
+ else
452
+ text << src.shift_char
453
+ end
454
+ end
455
+
456
+ text.empty? ? nil : text
457
+ end
458
+
459
+ def read_em(src, delim)
460
+ src.ignore_char
461
+ children = read_span(src, EscapedCharInText, nil, delim)
462
+ src.ignore_char
463
+ md_em(children)
464
+ end
465
+
466
+ def read_strong(src, delim)
467
+ src.ignore_chars(2)
468
+ children = read_span(src, EscapedCharInText, nil, delim)
469
+ src.ignore_chars(2)
470
+ md_strong(children)
471
+ end
472
+
473
+ def read_emstrong(src, delim)
474
+ src.ignore_chars(3)
475
+ children = read_span(src, EscapedCharInText, nil, delim)
476
+ src.ignore_chars(3)
477
+ md_emstrong(children)
478
+ end
479
+
480
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
481
+ def read_ref_id(src, con)
482
+ src.ignore_char # [
483
+ if m = src.read_regexp(/([^\]]*?)\]/)
484
+ m[1]
485
+ else
486
+ nil
487
+ end
488
+ end
489
+
490
+ def read_footnote_ref(src,con)
491
+ ref = read_ref_id(src,con)
492
+ con.push_element md_foot_ref(ref)
493
+ end
494
+
495
+ def read_inline_html(src, con)
496
+ h = HTMLHelper.new
497
+ begin
498
+ # This is our current buffer in the context
499
+ next_stuff = src.current_remaining_buffer
500
+
501
+ consumed = 0
502
+ while true
503
+ if consumed >= next_stuff.size
504
+ maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
505
+ break
506
+ end
507
+
508
+ h.eat_this next_stuff[consumed].chr
509
+ consumed += 1
510
+ break if h.is_finished?
511
+ end
512
+ src.ignore_chars(consumed)
513
+ con.push_element md_html(h.stuff_you_read)
514
+ rescue => e
515
+ maruku_error "Bad html: \n" +
516
+ e.inspect.gsub(/^/, '>'), src, con, "I will try to continue after bad HTML."
517
+ con.push_char src.shift_char
518
+ end
519
+ end
520
+
521
+ def read_inline_code(src, con)
522
+ # Count the number of ticks
523
+ num_ticks = 0
524
+ while src.cur_char == '`'
525
+ num_ticks += 1
526
+ src.ignore_char
527
+ end
528
+ # We will read until this string
529
+ end_string = "`" * num_ticks
530
+
531
+ # Try to handle empty single-ticks
532
+ if num_ticks > 1 && !src.next_matches(/.*#{Regexp.escape(end_string)}/)
533
+ con.push_element md_entity('ldquo')
534
+ src.ignore_chars(2)
535
+ return
536
+ end
537
+
538
+ code = read_simple(src, nil, nil, end_string)
539
+
540
+ # We didn't find a closing batch!
541
+ if !code || src.cur_char != '`'
542
+ con.push_element(end_string + (code || '')) and return
543
+ end
544
+
545
+ # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
546
+ src.ignore_chars num_ticks
547
+
548
+ # Ignore at most one space
549
+ if num_ticks > 1 && code[0, 1] == ' '
550
+ code = code[1..-1]
551
+ end
552
+
553
+ # drop last space
554
+ if num_ticks > 1 && code[-1, 1] == ' '
555
+ code = code[0..-2]
556
+ end
557
+
558
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
559
+ con.push_element md_code(code)
560
+ end
561
+
562
+ def read_link(src, con)
563
+ # we read the string and see what happens
564
+ src.ignore_char # opening bracket
565
+ children = read_span(src, EscapedCharInText, ']')
566
+ src.ignore_char # closing bracket
567
+
568
+ # ignore space
569
+ if src.cur_char == ' ' && ['[', '('].include?(src.next_char)
570
+ src.shift_char
571
+ end
572
+
573
+ case src.cur_char
574
+ when '('
575
+ src.ignore_char # opening (
576
+ src.consume_whitespace
577
+ url = read_url(src, [' ', "\t", ")"]) || ''
578
+
579
+ src.consume_whitespace
580
+ title = nil
581
+ if src.cur_char != ')' # we have a title
582
+ quote_char = src.cur_char
583
+ title = read_quoted(src, con)
584
+
585
+ if not title
586
+ maruku_error 'Must quote title', src, con
587
+ else
588
+ # Tries to read a title with quotes: ![a](url "ti"tle")
589
+ # this is the most ugly thing in Markdown
590
+ unless src.next_matches(/\s*\)/)
591
+ # if there is not a closing par ), then read
592
+ # the rest and guess it's title with quotes
593
+ rest = read_simple(src, nil, ')', nil)
594
+ # chop the closing char
595
+ rest.chop!
596
+ title << quote_char << rest
597
+ end
598
+ end
599
+ end
600
+ src.consume_whitespace
601
+ closing = src.shift_char # closing )
602
+ if closing != ')'
603
+ maruku_error 'Unclosed link', src, con, "No closing ): I will not create" +
604
+ " the link for #{children.inspect}"
605
+ con.push_elements children
606
+ return
607
+ end
608
+ con.push_element md_im_link(children, url, title)
609
+ when '[' # link ref
610
+ ref_id = read_ref_id(src, con)
611
+ if ref_id
612
+ con.push_element md_link(children, ref_id)
613
+ else
614
+ maruku_error "Could not read ref_id", src, con, "I will not create the link for " +
615
+ "#{children.inspect}"
616
+ con.push_elements children
617
+ return
618
+ end
619
+ else # empty [link]
620
+ con.push_element md_link(children, nil)
621
+ end
622
+ end # read link
623
+
624
+ def read_image(src, con)
625
+ src.ignore_chars(2) # opening "!["
626
+ alt_text = read_span(src, EscapedCharInText, ']')
627
+ src.ignore_char # closing bracket
628
+ # ignore space
629
+ if src.cur_char == ' ' && ['[', '('].include?(src.next_char)
630
+ src.ignore_char
631
+ end
632
+ case src.cur_char
633
+ when '('
634
+ src.ignore_char # opening (
635
+ src.consume_whitespace
636
+ url = read_url(src, [' ', "\t", ')'])
637
+ unless url
638
+ maruku_error "Could not read url from #{src.cur_chars(10).inspect}", src, con
639
+ end
640
+ src.consume_whitespace
641
+ title = nil
642
+ if src.cur_char != ')' # we have a title
643
+ quote_char = src.cur_char
644
+ title = read_quoted(src, con)
645
+ if !title
646
+ maruku_error 'Must quote title', src, con
647
+ else
648
+ # Tries to read a title with quotes: ![a](url "ti"tle")
649
+ # this is the most ugly thing in Markdown
650
+ if !src.next_matches(/\s*\)/)
651
+ # if there is not a closing par ), then read
652
+ # the rest and guess it's title with quotes
653
+ rest = read_simple(src, nil, ')', nil)
654
+ # chop the closing char
655
+ rest.chop!
656
+ title << quote_char << rest
657
+ end
658
+ end
659
+ end
660
+ src.consume_whitespace
661
+ closing = src.shift_char # closing )
662
+ if closing != ')'
663
+ maruku_error "Unclosed link: '#{closing}'" +
664
+ " Read url=#{url.inspect} title=#{title.inspect}", src, con
665
+ end
666
+ con.push_element md_im_image(alt_text, url, title)
667
+ when '[' # link ref
668
+ ref_id = read_ref_id(src, con)
669
+ if !ref_id # TODO: check around
670
+ maruku_error 'Reference not closed.', src, con
671
+ ref_id = ""
672
+ end
673
+
674
+ con.push_element md_image(alt_text, ref_id)
675
+ else # no stuff
676
+ ref_id = alt_text.join
677
+ con.push_element md_image(alt_text, ref_id)
678
+ end
679
+ end # read link
680
+
681
+ class SpanContext
682
+ # Read elements
683
+ attr_accessor :elements
684
+
685
+ def initialize
686
+ @elements = []
687
+ @cur_string = ''
688
+ end
689
+
690
+ def push_element(e)
691
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " unless
692
+ e.kind_of?(String) || e.kind_of?(MaRuKu::MDElement)
693
+
694
+ push_string_if_present
695
+
696
+ @elements << e
697
+ end
698
+ alias push push_element
699
+
700
+ def push_elements(a)
701
+ a.each do |e|
702
+ if e.kind_of? String
703
+ @cur_string << e
704
+ else
705
+ push_element e
706
+ end
707
+ end
708
+ end
709
+
710
+ def is_end?
711
+ @cur_string.empty? || @cur_string =~ /\s\z/
712
+ end
713
+
714
+ def push_string_if_present
715
+ unless @cur_string.empty?
716
+ @elements << @cur_string
717
+ @cur_string = ''
718
+ end
719
+ end
720
+
721
+ def push_char(c)
722
+ @cur_string << c
723
+ end
724
+
725
+ # push space into current string if
726
+ # there isn't one
727
+ def push_space
728
+ @cur_string << ' ' unless @cur_string[-1, 1] == ' '
729
+ end
730
+
731
+ def describe
732
+ lines = @elements.map{|x| x.inspect }.join("\n")
733
+ s = "Elements read in span: \n" +
734
+ lines.gsub(/^/, ' -') + "\n"
735
+
736
+ s += "Current string: \n #{@cur_string.inspect}\n" unless @cur_string.empty?
737
+ s
738
+ end
739
+ end
740
+ end