maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -1,92 +0,0 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
-
23
- #m Any method that detects formatting error calls the
24
- #m maruku_error() method.
25
- #m if @meta[:on_error] ==
26
- #m
27
- #m - :warning write on the standard err (or @error_stream if defined),
28
- #m then do your best.
29
- #m - :ignore be shy and try to continue
30
- #m - :raise raises a MarukuException
31
- #m
32
- #m default is :raise
33
-
34
- module MaRuKu
35
-
36
- class Exception < RuntimeError
37
- end
38
-
39
- module Errors
40
-
41
- def maruku_error(s,src=nil,con=nil)
42
- policy = get_setting(:on_error)
43
-
44
- case policy
45
- when :ignore
46
- when :raise
47
- raise_error create_frame(describe_error(s,src,con))
48
- when :warning
49
- tell_user create_frame(describe_error(s,src,con))
50
- else
51
- raise "BugBug: policy = #{policy.inspect}"
52
- end
53
- end
54
-
55
- def maruku_recover(s,src=nil,con=nil)
56
- tell_user create_frame(describe_error(s,src,con))
57
- end
58
-
59
- alias error maruku_error
60
-
61
- def raise_error(s)
62
- raise MaRuKu::Exception, s, caller
63
- end
64
-
65
- def tell_user(s)
66
- error_stream = self.attributes[:error_stream] || $stderr
67
- error_stream << s
68
- end
69
-
70
- def create_frame(s)
71
- n = 75
72
- "\n" +
73
- " "+"_"*n + "\n"+
74
- "| Maruku tells you:\n" +
75
- "+" + ("-"*n) +"\n"+
76
- add_tabs(s,1,'| ') + "\n" +
77
- "+" + ("-"*n) + "\n" +
78
- add_tabs(caller[0, 5].join("\n"),1,'!') + "\n" +
79
- "\\" + ("_"*n) + "\n"
80
- end
81
-
82
- def describe_error(s,src,con)
83
- t = s
84
- src && (t += "\n#{src.describe}\n")
85
- con && (t += "\n#{con.describe}\n")
86
- t
87
- end
88
-
89
- end # Errors
90
- end # MaRuKu
91
-
92
-
@@ -1,12 +0,0 @@
1
- class String
2
- # fix some LaTeX command-name clashes
3
- def fix_latex
4
- if #{html_math_engine} == 'itex2mml'
5
- s = self.gsub("\\mathop{", "\\operatorname{")
6
- s.gsub!(/\\begin\{svg\}.*?\\end\{svg\}/m, " ")
7
- s.gsub("\\space{", "\\itexspace{")
8
- else
9
- self
10
- end
11
- end
12
- end
@@ -1,746 +0,0 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- require 'set'
23
-
24
- module MaRuKu; module In; module Markdown; module SpanLevelParser
25
- include MaRuKu::Helpers
26
-
27
- EscapedCharInText =
28
- Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
-
30
- EscapedCharInQuotes =
31
- Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
-
33
- EscapedCharInInlineCode = [?\\,?`]
34
-
35
- def parse_lines_as_span(lines, parent=nil)
36
- parse_span_better lines.join("\n"), parent
37
- end
38
-
39
- def parse_span_better(string, parent=nil)
40
- if not string.kind_of? String then
41
- error "Passed #{string.class}." end
42
-
43
- st = (string + "")
44
- st.freeze
45
- src = CharSource.new(st, parent)
46
- read_span(src, EscapedCharInText, [nil])
47
- end
48
-
49
- # This is the main loop for reading span elements
50
- #
51
- # It's long, but not *complex* or difficult to understand.
52
- #
53
- #
54
- def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
- con = SpanContext.new
56
- c = d = nil
57
- while true
58
- c = src.cur_char
59
-
60
- # This is only an optimization which cuts 50% of the time used.
61
- # (but you can't use a-zA-z in exit_on_chars)
62
- if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
- con.cur_string << src.shift_char
64
- next
65
- end
66
-
67
- break if exit_on_chars && exit_on_chars.include?(c)
68
- break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
-
70
- # check if there are extensions
71
- if check_span_extensions(src, con)
72
- next
73
- end
74
-
75
- case c = src.cur_char
76
- when ?\ # it's space (32)
77
- if src.cur_chars_are " \n"
78
- src.ignore_chars(3)
79
- con.push_element md_br()
80
- next
81
- else
82
- src.ignore_char
83
- con.push_space
84
- end
85
- when ?\n, ?\t
86
- src.ignore_char
87
- con.push_space
88
- when ?`
89
- read_inline_code(src,con)
90
- when ?<
91
- # It could be:
92
- # 1) HTML "<div ..."
93
- # 2) HTML "<!-- ..."
94
- # 3) url "<http:// ", "<ftp:// ..."
95
- # 4) email "<andrea@... ", "<mailto:andrea@..."
96
- # 5) on itself! "a < b "
97
- # 6) Start of <<guillemettes>>
98
-
99
- case d = src.next_char
100
- when ?<; # guillemettes
101
- src.ignore_chars(2)
102
- con.push_char ?<
103
- con.push_char ?<
104
- when ?!;
105
- if src.cur_chars_are '<!--'
106
- read_inline_html(src, con)
107
- else
108
- con.push_char src.shift_char
109
- end
110
- when ??
111
- read_xml_instr_span(src, con)
112
- when ?\ , ?\t
113
- con.push_char src.shift_char
114
- else
115
- if src.next_matches(/<mailto:/) or
116
- src.next_matches(/<[\w\.]+\@/)
117
- read_email_el(src, con)
118
- elsif src.next_matches(/<\w+:/)
119
- read_url_el(src, con)
120
- elsif src.next_matches(/<\w/)
121
- #puts "This is HTML: #{src.cur_chars(20)}"
122
- read_inline_html(src, con)
123
- else
124
- #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
- con.push_char src.shift_char
126
- end
127
- end
128
- when ?\\
129
- d = src.next_char
130
- if d == ?'
131
- src.ignore_chars(2)
132
- con.push_element md_entity('apos')
133
- elsif d == ?"
134
- src.ignore_chars(2)
135
- con.push_element md_entity('quot')
136
- elsif escaped.include? d
137
- src.ignore_chars(2)
138
- con.push_char d
139
- else
140
- con.push_char src.shift_char
141
- end
142
- when ?[
143
- if markdown_extra? && src.next_char == ?^
144
- read_footnote_ref(src,con)
145
- else
146
- read_link(src, con)
147
- end
148
- when ?!
149
- if src.next_char == ?[
150
- read_image(src, con)
151
- else
152
- con.push_char src.shift_char
153
- end
154
- when ?&
155
- # named references
156
- if m = src.read_regexp(/\&([\w\d]+);/)
157
- con.push_element md_entity(m[1])
158
- # numeric
159
- elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
- num = m[1] ? m[2].hex : m[2].to_i
161
- con.push_element md_entity(num)
162
- else
163
- con.push_char src.shift_char
164
- end
165
- when ?*
166
- if not src.next_char
167
- maruku_error "Opening * as last char.", src, con
168
- maruku_recover "Threating as literal"
169
- con.push_char src.shift_char
170
- else
171
- follows = src.cur_chars(4)
172
- if follows =~ /^\*\*\*[^\s\*]/
173
- con.push_element read_emstrong(src,'***')
174
- elsif follows =~ /^\*\*[^\s\*]/
175
- con.push_element read_strong(src,'**')
176
- elsif follows =~ /^\*[^\s\*]/
177
- con.push_element read_em(src,'*')
178
- else # * is just a normal char
179
- con.push_char src.shift_char
180
- end
181
- end
182
- when ?_
183
- if not src.next_char
184
- maruku_error "Opening _ as last char", src, con
185
- maruku_recover "Threating as literal", src, con
186
- con.push_char src.shift_char
187
- else
188
- # we don't want "mod_ruby" to start an emphasis
189
- # so we start one only if
190
- # 1) there's nothing else in the span (first char)
191
- # or 2) the last char was a space
192
- # or 3) the current string is empty
193
- #if con.elements.empty? ||
194
- if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
195
- # also, we check the next characters
196
- follows = src.cur_chars(4)
197
- if follows =~ /^\_\_\_[^\s\_]/
198
- con.push_element read_emstrong(src,'___')
199
- elsif follows =~ /^\_\_[^\s\_]/
200
- con.push_element read_strong(src,'__')
201
- elsif follows =~ /^\_[^\s\_]/
202
- con.push_element read_em(src,'_')
203
- else # _ is just a normal char
204
- con.push_char src.shift_char
205
- end
206
- else
207
- # _ is just a normal char
208
- con.push_char src.shift_char
209
- end
210
- end
211
- when ?{ # extension
212
- if [?#, ?., ?:].include? src.next_char
213
- src.ignore_char # {
214
- interpret_extension(src, con, [?}])
215
- src.ignore_char # }
216
- else
217
- con.push_char src.shift_char
218
- end
219
- when nil
220
- maruku_error( ("Unclosed span (waiting for %s"+
221
- "#{exit_on_strings.inspect})") % [
222
- exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
223
- src,con)
224
- break
225
- else # normal text
226
- con.push_char src.shift_char
227
- end # end case
228
- end # end while true
229
- con.push_string_if_present
230
-
231
- # Assign IAL to elements
232
- merge_ial(con.elements, src, con)
233
-
234
-
235
- # Remove leading space
236
- if (s = con.elements.first).kind_of? String
237
- if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
238
- con.elements.shift if s.size == 0
239
- end
240
-
241
- # Remove final spaces
242
- if (s = con.elements.last).kind_of? String
243
- s.chop! if s[-1] == ?\
244
- con.elements.pop if s.size == 0
245
- end
246
-
247
- educated = educate(con.elements)
248
-
249
- educated
250
- end
251
-
252
-
253
- def read_xml_instr_span(src, con)
254
- src.ignore_chars(2) # starting <?
255
-
256
- # read target <?target code... ?>
257
- target = if m = src.read_regexp(/(\w+)/)
258
- m[1]
259
- else
260
- ''
261
- end
262
-
263
- delim = "?>"
264
-
265
- code =
266
- read_simple(src, escaped=[], break_on_chars=[],
267
- break_on_strings=[delim])
268
-
269
- src.ignore_chars delim.size
270
-
271
- code = (code || "").strip
272
- con.push_element md_xml_instr(target, code)
273
- end
274
-
275
- # Start: cursor on character **after** '{'
276
- # End: curson on '}' or EOF
277
- def interpret_extension(src, con, break_on_chars)
278
- case src.cur_char
279
- when ?:
280
- src.ignore_char # :
281
- extension_meta(src, con, break_on_chars)
282
- when ?#, ?.
283
- extension_meta(src, con, break_on_chars)
284
- else
285
- stuff = read_simple(src, escaped=[?}], break_on_chars, [])
286
- if stuff =~ /^(\w+\s|[^\w])/
287
- extension_id = $1.strip
288
- if false
289
- else
290
- maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
291
- "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
292
- extension_meta(src, con, break_on_chars)
293
- end
294
- else
295
- maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
296
- extension_meta(src, con, break_on_chars)
297
- end
298
- end
299
- end
300
-
301
- def extension_meta(src, con, break_on_chars)
302
- if m = src.read_regexp(/([^\s\:\"\']+):/)
303
- name = m[1]
304
- al = read_attribute_list(src, con, break_on_chars)
305
- # puts "#{name}=#{al.inspect}"
306
- self.doc.ald[name] = al
307
- con.push md_ald(name, al)
308
- else
309
- al = read_attribute_list(src, con, break_on_chars)
310
- self.doc.ald[name] = al
311
- con.push md_ial(al)
312
- end
313
- end
314
-
315
- def read_url_el(src,con)
316
- src.ignore_char # leading <
317
- url = read_simple(src, [], [?>])
318
- src.ignore_char # closing >
319
-
320
- con.push_element md_url(url)
321
- end
322
-
323
- def read_email_el(src,con)
324
- src.ignore_char # leading <
325
- mail = read_simple(src, [], [?>])
326
- src.ignore_char # closing >
327
-
328
- address = mail.gsub(/^mailto:/,'')
329
- con.push_element md_email(address)
330
- end
331
-
332
- def read_url(src, break_on)
333
- if [?',?"].include? src.cur_char
334
- error 'Invalid char for url', src
335
- end
336
-
337
- url = read_simple(src, [], break_on)
338
- if not url # empty url
339
- url = ""
340
- end
341
-
342
- if url[0] == ?< && url[-1] == ?>
343
- url = url[1, url.size-2]
344
- end
345
-
346
- if url.size == 0
347
- return nil
348
- end
349
-
350
- url
351
- end
352
-
353
-
354
- def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
355
- case src.cur_char
356
- when ?', ?"
357
- read_quoted(src, con)
358
- else
359
- read_simple(src, escaped, exit_on_chars)
360
- end
361
- end
362
-
363
- # Tries to read a quoted value. If stream does not
364
- # start with ' or ", returns nil.
365
- def read_quoted(src, con)
366
- case src.cur_char
367
- when ?', ?"
368
- quote_char = src.shift_char # opening quote
369
- string = read_simple(src, EscapedCharInQuotes, [quote_char])
370
- src.ignore_char # closing quote
371
- return string
372
- else
373
- # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
374
- return nil
375
- end
376
- end
377
-
378
- # Reads a simple string (no formatting) until one of break_on_chars,
379
- # while escaping the escaped.
380
- # If the string is empty, it returns nil.
381
- # Raises on error if the string terminates unexpectedly.
382
- # # If eat_delim is true, and if the delim is not the EOF, then the delim
383
- # # gets eaten from the stream.
384
- def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
385
- text = ""
386
- while true
387
- # puts "Reading simple #{text.inspect}"
388
- c = src.cur_char
389
- if exit_on_chars && exit_on_chars.include?(c)
390
- # src.ignore_char if eat_delim
391
- break
392
- end
393
-
394
- break if exit_on_strings &&
395
- exit_on_strings.any? {|x| src.cur_chars_are x}
396
-
397
- case c
398
- when nil
399
- s= "String finished while reading (break on "+
400
- "#{exit_on_chars.map{|x|""<<x}.inspect})"+
401
- " already read: #{text.inspect}"
402
- maruku_error s, src
403
- maruku_recover "I boldly continue", src
404
- break
405
- when ?\\
406
- d = src.next_char
407
- if escaped.include? d
408
- src.ignore_chars(2)
409
- text << d
410
- else
411
- text << src.shift_char
412
- end
413
- else
414
- text << src.shift_char
415
- end
416
- end
417
- # puts "Read simple #{text.inspect}"
418
- text.empty? ? nil : text
419
- end
420
-
421
- def read_em(src, delim)
422
- src.ignore_char
423
- children = read_span(src, EscapedCharInText, nil, [delim])
424
- src.ignore_char
425
- md_em(children)
426
- end
427
-
428
- def read_strong(src, delim)
429
- src.ignore_chars(2)
430
- children = read_span(src, EscapedCharInText, nil, [delim])
431
- src.ignore_chars(2)
432
- md_strong(children)
433
- end
434
-
435
- def read_emstrong(src, delim)
436
- src.ignore_chars(3)
437
- children = read_span(src, EscapedCharInText, nil, [delim])
438
- src.ignore_chars(3)
439
- md_emstrong(children)
440
- end
441
-
442
- SPACE = ?\ # = 32
443
-
444
- # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
445
- # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
446
- R_REF_ID = Regexp.compile(/([^\]]*)\]/)
447
-
448
- # Reads a bracketed id "[refid]". Consumes also both brackets.
449
- def read_ref_id(src, con)
450
- src.ignore_char # [
451
- src.consume_whitespace
452
- # puts "Next: #{src.cur_chars(10).inspect}"
453
- if m = src.read_regexp(R_REF_ID)
454
- # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
455
- # puts "Then: #{src.cur_chars(10).inspect}"
456
- m[1]
457
- else
458
- nil
459
- end
460
- end
461
-
462
- def read_footnote_ref(src,con)
463
- ref = read_ref_id(src,con)
464
- con.push_element md_foot_ref(ref)
465
- end
466
-
467
- def read_inline_html(src, con)
468
- h = HTMLHelper.new
469
- begin
470
- # This is our current buffer in the context
471
- next_stuff = src.current_remaining_buffer
472
-
473
- consumed = 0
474
- while true
475
- if consumed >= next_stuff.size
476
- maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
477
- break
478
- end
479
-
480
- h.eat_this next_stuff[consumed].chr; consumed += 1
481
- break if h.is_finished?
482
- end
483
- src.ignore_chars(consumed)
484
- con.push_element md_html(h.stuff_you_read)
485
-
486
- #start = src.current_remaining_buffer
487
- # h.eat_this start
488
- # if not h.is_finished?
489
- # error "inline_html: Malformed:\n "+
490
- # "#{start.inspect}\n #{h.inspect}",src,con
491
- # end
492
- #
493
- # consumed = start.size - h.rest.size
494
- # if consumed > 0
495
- # con.push_element md_html(h.stuff_you_read)
496
- # src.ignore_chars(consumed)
497
- # else
498
- # puts "HTML helper did not work on #{start.inspect}"
499
- # con.push_char src.shift_char
500
- # end
501
- rescue Exception => e
502
- maruku_error "Bad html: \n" +
503
- add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
504
- src,con
505
- maruku_recover "I will try to continue after bad HTML.", src, con
506
- con.push_char src.shift_char
507
- end
508
- end
509
-
510
- def read_inline_code(src, con)
511
- # Count the number of ticks
512
- num_ticks = 0
513
- while src.cur_char == ?`
514
- num_ticks += 1
515
- src.ignore_char
516
- end
517
- # We will read until this string
518
- end_string = "`"*num_ticks
519
-
520
- code =
521
- read_simple(src, escaped=[], break_on_chars=[],
522
- break_on_strings=[end_string])
523
-
524
- # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
525
- src.ignore_chars num_ticks
526
-
527
- # Ignore at most one space
528
- if num_ticks > 1 && code[0] == SPACE
529
- code = code[1, code.size-1]
530
- end
531
-
532
- # drop last space
533
- if num_ticks > 1 && code[-1] == SPACE
534
- code = code[0,code.size-1]
535
- end
536
-
537
- # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
538
- con.push_element md_code(code)
539
- end
540
-
541
- def read_link(src, con)
542
- # we read the string and see what happens
543
- src.ignore_char # opening bracket
544
- children = read_span(src, EscapedCharInText, [?]])
545
- src.ignore_char # closing bracket
546
-
547
- # ignore space
548
- if src.cur_char == SPACE and
549
- (src.next_char == ?[ or src.next_char == ?( )
550
- src.shift_char
551
- end
552
-
553
- case src.cur_char
554
- when ?(
555
- src.ignore_char # opening (
556
- src.consume_whitespace
557
- url = read_url(src, [SPACE,?\t,?)])
558
- if not url
559
- url = '' # no url is ok
560
- end
561
- src.consume_whitespace
562
- title = nil
563
- if src.cur_char != ?) # we have a title
564
- quote_char = src.cur_char
565
- title = read_quoted(src,con)
566
-
567
- if not title
568
- maruku_error 'Must quote title',src,con
569
- else
570
- # Tries to read a title with quotes: ![a](url "ti"tle")
571
- # this is the most ugly thing in Markdown
572
- if not src.next_matches(/\s*\)/)
573
- # if there is not a closing par ), then read
574
- # the rest and guess it's title with quotes
575
- rest = read_simple(src, escaped=[], break_on_chars=[?)],
576
- break_on_strings=[])
577
- # chop the closing char
578
- rest.chop!
579
- title << quote_char << rest
580
- end
581
- end
582
- end
583
- src.consume_whitespace
584
- closing = src.shift_char # closing )
585
- if closing != ?)
586
- maruku_error 'Unclosed link',src,con
587
- maruku_recover "No closing ): I will not create"+
588
- " the link for #{children.inspect}", src, con
589
- con.push_elements children
590
- return
591
- end
592
- con.push_element md_im_link(children,url, title)
593
- when ?[ # link ref
594
- ref_id = read_ref_id(src,con)
595
- if ref_id
596
- if ref_id.size == 0
597
- ref_id = sanitize_ref_id(children.to_s)
598
- else
599
- ref_id = sanitize_ref_id(ref_id)
600
- end
601
- con.push_element md_link(children, ref_id)
602
- else
603
- maruku_error "Could not read ref_id", src, con
604
- maruku_recover "I will not create the link for "+
605
- "#{children.inspect}", src, con
606
- con.push_elements children
607
- return
608
- end
609
- else # empty [link]
610
- id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
611
- con.push_element md_link(children, id)
612
- end
613
- end # read link
614
-
615
- def read_image(src, con)
616
- src.ignore_chars(2) # opening "!["
617
- alt_text = read_span(src, EscapedCharInText, [?]])
618
- src.ignore_char # closing bracket
619
- # ignore space
620
- if src.cur_char == SPACE and
621
- (src.next_char == ?[ or src.next_char == ?( )
622
- src.ignore_char
623
- end
624
- case src.cur_char
625
- when ?(
626
- src.ignore_char # opening (
627
- src.consume_whitespace
628
- url = read_url(src, [SPACE,?\t,?)])
629
- if not url
630
- error "Could not read url from #{src.cur_chars(10).inspect}",
631
- src,con
632
- end
633
- src.consume_whitespace
634
- title = nil
635
- if src.cur_char != ?) # we have a title
636
- quote_char = src.cur_char
637
- title = read_quoted(src,con)
638
- if not title
639
- maruku_error 'Must quote title',src,con
640
- else
641
- # Tries to read a title with quotes: ![a](url "ti"tle")
642
- # this is the most ugly thing in Markdown
643
- if not src.next_matches(/\s*\)/)
644
- # if there is not a closing par ), then read
645
- # the rest and guess it's title with quotes
646
- rest = read_simple(src, escaped=[], break_on_chars=[?)],
647
- break_on_strings=[])
648
- # chop the closing char
649
- rest.chop!
650
- title << quote_char << rest
651
- end
652
- end
653
- end
654
- src.consume_whitespace
655
- closing = src.shift_char # closing )
656
- if closing != ?)
657
- error( ("Unclosed link: '"<<closing<<"'")+
658
- " Read url=#{url.inspect} title=#{title.inspect}",src,con)
659
- end
660
- con.push_element md_im_image(alt_text, url, title)
661
- when ?[ # link ref
662
- ref_id = read_ref_id(src,con)
663
- if not ref_id # TODO: check around
664
- error('Reference not closed.', src, con)
665
- ref_id = ""
666
- end
667
- if ref_id.size == 0
668
- ref_id = alt_text.to_s
669
- end
670
-
671
- ref_id = sanitize_ref_id(ref_id)
672
-
673
- con.push_element md_image(alt_text, ref_id)
674
- else # no stuff
675
- ref_id = sanitize_ref_id(alt_text.to_s)
676
- con.push_element md_image(alt_text, ref_id)
677
- end
678
- end # read link
679
-
680
-
681
- class SpanContext
682
- include MaRuKu::Strings
683
-
684
- # Read elements
685
- attr_accessor :elements
686
- attr_accessor :cur_string
687
-
688
- def initialize
689
- @elements = []
690
- @cur_string = ""
691
- end
692
-
693
- def push_element(e)
694
- raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
695
- not (e.kind_of?(String) or e.kind_of?(MDElement))
696
-
697
- push_string_if_present
698
- @elements << e
699
- nil
700
- end
701
- alias push push_element
702
-
703
- def push_elements(a)
704
- for e in a
705
- if e.kind_of? String
706
- e.each_byte do |b| push_char b end
707
- else
708
- push_element e
709
- end
710
- end
711
- end
712
-
713
- def push_string_if_present
714
- if @cur_string.size > 0
715
- @elements << @cur_string
716
- @cur_string = ""
717
- end
718
- nil
719
- end
720
-
721
- def push_char(c)
722
- @cur_string << c
723
- nil
724
- end
725
-
726
- # push space into current string if
727
- # there isn't one
728
- def push_space
729
- last = @cur_string[@cur_string.size-1]
730
- @cur_string << ?\ if last != ?\
731
- end
732
-
733
- def describe
734
- lines = @elements.map{|x| x.inspect}.join("\n")
735
- s = "Elements read in span: \n" +
736
- add_tabs(lines,1, ' -')+"\n"
737
-
738
- if @cur_string.size > 0
739
- s += "Current string: \n #{@cur_string.inspect}\n"
740
- end
741
- s
742
- end
743
- end # SpanContext
744
-
745
- end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
746
-