maruku 0.6.1 → 0.7.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -1,92 +0,0 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
-
23
- #m Any method that detects formatting error calls the
24
- #m maruku_error() method.
25
- #m if @meta[:on_error] ==
26
- #m
27
- #m - :warning write on the standard err (or @error_stream if defined),
28
- #m then do your best.
29
- #m - :ignore be shy and try to continue
30
- #m - :raise raises a MarukuException
31
- #m
32
- #m default is :raise
33
-
34
- module MaRuKu
35
-
36
- class Exception < RuntimeError
37
- end
38
-
39
- module Errors
40
-
41
- def maruku_error(s,src=nil,con=nil)
42
- policy = get_setting(:on_error)
43
-
44
- case policy
45
- when :ignore
46
- when :raise
47
- raise_error create_frame(describe_error(s,src,con))
48
- when :warning
49
- tell_user create_frame(describe_error(s,src,con))
50
- else
51
- raise "BugBug: policy = #{policy.inspect}"
52
- end
53
- end
54
-
55
- def maruku_recover(s,src=nil,con=nil)
56
- tell_user create_frame(describe_error(s,src,con))
57
- end
58
-
59
- alias error maruku_error
60
-
61
- def raise_error(s)
62
- raise MaRuKu::Exception, s, caller
63
- end
64
-
65
- def tell_user(s)
66
- error_stream = self.attributes[:error_stream] || $stderr
67
- error_stream << s
68
- end
69
-
70
- def create_frame(s)
71
- n = 75
72
- "\n" +
73
- " "+"_"*n + "\n"+
74
- "| Maruku tells you:\n" +
75
- "+" + ("-"*n) +"\n"+
76
- add_tabs(s,1,'| ') + "\n" +
77
- "+" + ("-"*n) + "\n" +
78
- add_tabs(caller[0, 5].join("\n"),1,'!') + "\n" +
79
- "\\" + ("_"*n) + "\n"
80
- end
81
-
82
- def describe_error(s,src,con)
83
- t = s
84
- src && (t += "\n#{src.describe}\n")
85
- con && (t += "\n#{con.describe}\n")
86
- t
87
- end
88
-
89
- end # Errors
90
- end # MaRuKu
91
-
92
-
@@ -1,12 +0,0 @@
1
- class String
2
- # fix some LaTeX command-name clashes
3
- def fix_latex
4
- if #{html_math_engine} == 'itex2mml'
5
- s = self.gsub("\\mathop{", "\\operatorname{")
6
- s.gsub!(/\\begin\{svg\}.*?\\end\{svg\}/m, " ")
7
- s.gsub("\\space{", "\\itexspace{")
8
- else
9
- self
10
- end
11
- end
12
- end
@@ -1,746 +0,0 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- require 'set'
23
-
24
- module MaRuKu; module In; module Markdown; module SpanLevelParser
25
- include MaRuKu::Helpers
26
-
27
- EscapedCharInText =
28
- Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
-
30
- EscapedCharInQuotes =
31
- Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
-
33
- EscapedCharInInlineCode = [?\\,?`]
34
-
35
- def parse_lines_as_span(lines, parent=nil)
36
- parse_span_better lines.join("\n"), parent
37
- end
38
-
39
- def parse_span_better(string, parent=nil)
40
- if not string.kind_of? String then
41
- error "Passed #{string.class}." end
42
-
43
- st = (string + "")
44
- st.freeze
45
- src = CharSource.new(st, parent)
46
- read_span(src, EscapedCharInText, [nil])
47
- end
48
-
49
- # This is the main loop for reading span elements
50
- #
51
- # It's long, but not *complex* or difficult to understand.
52
- #
53
- #
54
- def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
- con = SpanContext.new
56
- c = d = nil
57
- while true
58
- c = src.cur_char
59
-
60
- # This is only an optimization which cuts 50% of the time used.
61
- # (but you can't use a-zA-z in exit_on_chars)
62
- if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
- con.cur_string << src.shift_char
64
- next
65
- end
66
-
67
- break if exit_on_chars && exit_on_chars.include?(c)
68
- break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
-
70
- # check if there are extensions
71
- if check_span_extensions(src, con)
72
- next
73
- end
74
-
75
- case c = src.cur_char
76
- when ?\ # it's space (32)
77
- if src.cur_chars_are " \n"
78
- src.ignore_chars(3)
79
- con.push_element md_br()
80
- next
81
- else
82
- src.ignore_char
83
- con.push_space
84
- end
85
- when ?\n, ?\t
86
- src.ignore_char
87
- con.push_space
88
- when ?`
89
- read_inline_code(src,con)
90
- when ?<
91
- # It could be:
92
- # 1) HTML "<div ..."
93
- # 2) HTML "<!-- ..."
94
- # 3) url "<http:// ", "<ftp:// ..."
95
- # 4) email "<andrea@... ", "<mailto:andrea@..."
96
- # 5) on itself! "a < b "
97
- # 6) Start of <<guillemettes>>
98
-
99
- case d = src.next_char
100
- when ?<; # guillemettes
101
- src.ignore_chars(2)
102
- con.push_char ?<
103
- con.push_char ?<
104
- when ?!;
105
- if src.cur_chars_are '<!--'
106
- read_inline_html(src, con)
107
- else
108
- con.push_char src.shift_char
109
- end
110
- when ??
111
- read_xml_instr_span(src, con)
112
- when ?\ , ?\t
113
- con.push_char src.shift_char
114
- else
115
- if src.next_matches(/<mailto:/) or
116
- src.next_matches(/<[\w\.]+\@/)
117
- read_email_el(src, con)
118
- elsif src.next_matches(/<\w+:/)
119
- read_url_el(src, con)
120
- elsif src.next_matches(/<\w/)
121
- #puts "This is HTML: #{src.cur_chars(20)}"
122
- read_inline_html(src, con)
123
- else
124
- #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
- con.push_char src.shift_char
126
- end
127
- end
128
- when ?\\
129
- d = src.next_char
130
- if d == ?'
131
- src.ignore_chars(2)
132
- con.push_element md_entity('apos')
133
- elsif d == ?"
134
- src.ignore_chars(2)
135
- con.push_element md_entity('quot')
136
- elsif escaped.include? d
137
- src.ignore_chars(2)
138
- con.push_char d
139
- else
140
- con.push_char src.shift_char
141
- end
142
- when ?[
143
- if markdown_extra? && src.next_char == ?^
144
- read_footnote_ref(src,con)
145
- else
146
- read_link(src, con)
147
- end
148
- when ?!
149
- if src.next_char == ?[
150
- read_image(src, con)
151
- else
152
- con.push_char src.shift_char
153
- end
154
- when ?&
155
- # named references
156
- if m = src.read_regexp(/\&([\w\d]+);/)
157
- con.push_element md_entity(m[1])
158
- # numeric
159
- elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
- num = m[1] ? m[2].hex : m[2].to_i
161
- con.push_element md_entity(num)
162
- else
163
- con.push_char src.shift_char
164
- end
165
- when ?*
166
- if not src.next_char
167
- maruku_error "Opening * as last char.", src, con
168
- maruku_recover "Threating as literal"
169
- con.push_char src.shift_char
170
- else
171
- follows = src.cur_chars(4)
172
- if follows =~ /^\*\*\*[^\s\*]/
173
- con.push_element read_emstrong(src,'***')
174
- elsif follows =~ /^\*\*[^\s\*]/
175
- con.push_element read_strong(src,'**')
176
- elsif follows =~ /^\*[^\s\*]/
177
- con.push_element read_em(src,'*')
178
- else # * is just a normal char
179
- con.push_char src.shift_char
180
- end
181
- end
182
- when ?_
183
- if not src.next_char
184
- maruku_error "Opening _ as last char", src, con
185
- maruku_recover "Threating as literal", src, con
186
- con.push_char src.shift_char
187
- else
188
- # we don't want "mod_ruby" to start an emphasis
189
- # so we start one only if
190
- # 1) there's nothing else in the span (first char)
191
- # or 2) the last char was a space
192
- # or 3) the current string is empty
193
- #if con.elements.empty? ||
194
- if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
195
- # also, we check the next characters
196
- follows = src.cur_chars(4)
197
- if follows =~ /^\_\_\_[^\s\_]/
198
- con.push_element read_emstrong(src,'___')
199
- elsif follows =~ /^\_\_[^\s\_]/
200
- con.push_element read_strong(src,'__')
201
- elsif follows =~ /^\_[^\s\_]/
202
- con.push_element read_em(src,'_')
203
- else # _ is just a normal char
204
- con.push_char src.shift_char
205
- end
206
- else
207
- # _ is just a normal char
208
- con.push_char src.shift_char
209
- end
210
- end
211
- when ?{ # extension
212
- if [?#, ?., ?:].include? src.next_char
213
- src.ignore_char # {
214
- interpret_extension(src, con, [?}])
215
- src.ignore_char # }
216
- else
217
- con.push_char src.shift_char
218
- end
219
- when nil
220
- maruku_error( ("Unclosed span (waiting for %s"+
221
- "#{exit_on_strings.inspect})") % [
222
- exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
223
- src,con)
224
- break
225
- else # normal text
226
- con.push_char src.shift_char
227
- end # end case
228
- end # end while true
229
- con.push_string_if_present
230
-
231
- # Assign IAL to elements
232
- merge_ial(con.elements, src, con)
233
-
234
-
235
- # Remove leading space
236
- if (s = con.elements.first).kind_of? String
237
- if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
238
- con.elements.shift if s.size == 0
239
- end
240
-
241
- # Remove final spaces
242
- if (s = con.elements.last).kind_of? String
243
- s.chop! if s[-1] == ?\
244
- con.elements.pop if s.size == 0
245
- end
246
-
247
- educated = educate(con.elements)
248
-
249
- educated
250
- end
251
-
252
-
253
- def read_xml_instr_span(src, con)
254
- src.ignore_chars(2) # starting <?
255
-
256
- # read target <?target code... ?>
257
- target = if m = src.read_regexp(/(\w+)/)
258
- m[1]
259
- else
260
- ''
261
- end
262
-
263
- delim = "?>"
264
-
265
- code =
266
- read_simple(src, escaped=[], break_on_chars=[],
267
- break_on_strings=[delim])
268
-
269
- src.ignore_chars delim.size
270
-
271
- code = (code || "").strip
272
- con.push_element md_xml_instr(target, code)
273
- end
274
-
275
- # Start: cursor on character **after** '{'
276
- # End: curson on '}' or EOF
277
- def interpret_extension(src, con, break_on_chars)
278
- case src.cur_char
279
- when ?:
280
- src.ignore_char # :
281
- extension_meta(src, con, break_on_chars)
282
- when ?#, ?.
283
- extension_meta(src, con, break_on_chars)
284
- else
285
- stuff = read_simple(src, escaped=[?}], break_on_chars, [])
286
- if stuff =~ /^(\w+\s|[^\w])/
287
- extension_id = $1.strip
288
- if false
289
- else
290
- maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
291
- "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
292
- extension_meta(src, con, break_on_chars)
293
- end
294
- else
295
- maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
296
- extension_meta(src, con, break_on_chars)
297
- end
298
- end
299
- end
300
-
301
- def extension_meta(src, con, break_on_chars)
302
- if m = src.read_regexp(/([^\s\:\"\']+):/)
303
- name = m[1]
304
- al = read_attribute_list(src, con, break_on_chars)
305
- # puts "#{name}=#{al.inspect}"
306
- self.doc.ald[name] = al
307
- con.push md_ald(name, al)
308
- else
309
- al = read_attribute_list(src, con, break_on_chars)
310
- self.doc.ald[name] = al
311
- con.push md_ial(al)
312
- end
313
- end
314
-
315
- def read_url_el(src,con)
316
- src.ignore_char # leading <
317
- url = read_simple(src, [], [?>])
318
- src.ignore_char # closing >
319
-
320
- con.push_element md_url(url)
321
- end
322
-
323
- def read_email_el(src,con)
324
- src.ignore_char # leading <
325
- mail = read_simple(src, [], [?>])
326
- src.ignore_char # closing >
327
-
328
- address = mail.gsub(/^mailto:/,'')
329
- con.push_element md_email(address)
330
- end
331
-
332
- def read_url(src, break_on)
333
- if [?',?"].include? src.cur_char
334
- error 'Invalid char for url', src
335
- end
336
-
337
- url = read_simple(src, [], break_on)
338
- if not url # empty url
339
- url = ""
340
- end
341
-
342
- if url[0] == ?< && url[-1] == ?>
343
- url = url[1, url.size-2]
344
- end
345
-
346
- if url.size == 0
347
- return nil
348
- end
349
-
350
- url
351
- end
352
-
353
-
354
- def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
355
- case src.cur_char
356
- when ?', ?"
357
- read_quoted(src, con)
358
- else
359
- read_simple(src, escaped, exit_on_chars)
360
- end
361
- end
362
-
363
- # Tries to read a quoted value. If stream does not
364
- # start with ' or ", returns nil.
365
- def read_quoted(src, con)
366
- case src.cur_char
367
- when ?', ?"
368
- quote_char = src.shift_char # opening quote
369
- string = read_simple(src, EscapedCharInQuotes, [quote_char])
370
- src.ignore_char # closing quote
371
- return string
372
- else
373
- # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
374
- return nil
375
- end
376
- end
377
-
378
- # Reads a simple string (no formatting) until one of break_on_chars,
379
- # while escaping the escaped.
380
- # If the string is empty, it returns nil.
381
- # Raises on error if the string terminates unexpectedly.
382
- # # If eat_delim is true, and if the delim is not the EOF, then the delim
383
- # # gets eaten from the stream.
384
- def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
385
- text = ""
386
- while true
387
- # puts "Reading simple #{text.inspect}"
388
- c = src.cur_char
389
- if exit_on_chars && exit_on_chars.include?(c)
390
- # src.ignore_char if eat_delim
391
- break
392
- end
393
-
394
- break if exit_on_strings &&
395
- exit_on_strings.any? {|x| src.cur_chars_are x}
396
-
397
- case c
398
- when nil
399
- s= "String finished while reading (break on "+
400
- "#{exit_on_chars.map{|x|""<<x}.inspect})"+
401
- " already read: #{text.inspect}"
402
- maruku_error s, src
403
- maruku_recover "I boldly continue", src
404
- break
405
- when ?\\
406
- d = src.next_char
407
- if escaped.include? d
408
- src.ignore_chars(2)
409
- text << d
410
- else
411
- text << src.shift_char
412
- end
413
- else
414
- text << src.shift_char
415
- end
416
- end
417
- # puts "Read simple #{text.inspect}"
418
- text.empty? ? nil : text
419
- end
420
-
421
- def read_em(src, delim)
422
- src.ignore_char
423
- children = read_span(src, EscapedCharInText, nil, [delim])
424
- src.ignore_char
425
- md_em(children)
426
- end
427
-
428
- def read_strong(src, delim)
429
- src.ignore_chars(2)
430
- children = read_span(src, EscapedCharInText, nil, [delim])
431
- src.ignore_chars(2)
432
- md_strong(children)
433
- end
434
-
435
- def read_emstrong(src, delim)
436
- src.ignore_chars(3)
437
- children = read_span(src, EscapedCharInText, nil, [delim])
438
- src.ignore_chars(3)
439
- md_emstrong(children)
440
- end
441
-
442
- SPACE = ?\ # = 32
443
-
444
- # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
445
- # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
446
- R_REF_ID = Regexp.compile(/([^\]]*)\]/)
447
-
448
- # Reads a bracketed id "[refid]". Consumes also both brackets.
449
- def read_ref_id(src, con)
450
- src.ignore_char # [
451
- src.consume_whitespace
452
- # puts "Next: #{src.cur_chars(10).inspect}"
453
- if m = src.read_regexp(R_REF_ID)
454
- # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
455
- # puts "Then: #{src.cur_chars(10).inspect}"
456
- m[1]
457
- else
458
- nil
459
- end
460
- end
461
-
462
- def read_footnote_ref(src,con)
463
- ref = read_ref_id(src,con)
464
- con.push_element md_foot_ref(ref)
465
- end
466
-
467
- def read_inline_html(src, con)
468
- h = HTMLHelper.new
469
- begin
470
- # This is our current buffer in the context
471
- next_stuff = src.current_remaining_buffer
472
-
473
- consumed = 0
474
- while true
475
- if consumed >= next_stuff.size
476
- maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
477
- break
478
- end
479
-
480
- h.eat_this next_stuff[consumed].chr; consumed += 1
481
- break if h.is_finished?
482
- end
483
- src.ignore_chars(consumed)
484
- con.push_element md_html(h.stuff_you_read)
485
-
486
- #start = src.current_remaining_buffer
487
- # h.eat_this start
488
- # if not h.is_finished?
489
- # error "inline_html: Malformed:\n "+
490
- # "#{start.inspect}\n #{h.inspect}",src,con
491
- # end
492
- #
493
- # consumed = start.size - h.rest.size
494
- # if consumed > 0
495
- # con.push_element md_html(h.stuff_you_read)
496
- # src.ignore_chars(consumed)
497
- # else
498
- # puts "HTML helper did not work on #{start.inspect}"
499
- # con.push_char src.shift_char
500
- # end
501
- rescue Exception => e
502
- maruku_error "Bad html: \n" +
503
- add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
504
- src,con
505
- maruku_recover "I will try to continue after bad HTML.", src, con
506
- con.push_char src.shift_char
507
- end
508
- end
509
-
510
- def read_inline_code(src, con)
511
- # Count the number of ticks
512
- num_ticks = 0
513
- while src.cur_char == ?`
514
- num_ticks += 1
515
- src.ignore_char
516
- end
517
- # We will read until this string
518
- end_string = "`"*num_ticks
519
-
520
- code =
521
- read_simple(src, escaped=[], break_on_chars=[],
522
- break_on_strings=[end_string])
523
-
524
- # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
525
- src.ignore_chars num_ticks
526
-
527
- # Ignore at most one space
528
- if num_ticks > 1 && code[0] == SPACE
529
- code = code[1, code.size-1]
530
- end
531
-
532
- # drop last space
533
- if num_ticks > 1 && code[-1] == SPACE
534
- code = code[0,code.size-1]
535
- end
536
-
537
- # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
538
- con.push_element md_code(code)
539
- end
540
-
541
- def read_link(src, con)
542
- # we read the string and see what happens
543
- src.ignore_char # opening bracket
544
- children = read_span(src, EscapedCharInText, [?]])
545
- src.ignore_char # closing bracket
546
-
547
- # ignore space
548
- if src.cur_char == SPACE and
549
- (src.next_char == ?[ or src.next_char == ?( )
550
- src.shift_char
551
- end
552
-
553
- case src.cur_char
554
- when ?(
555
- src.ignore_char # opening (
556
- src.consume_whitespace
557
- url = read_url(src, [SPACE,?\t,?)])
558
- if not url
559
- url = '' # no url is ok
560
- end
561
- src.consume_whitespace
562
- title = nil
563
- if src.cur_char != ?) # we have a title
564
- quote_char = src.cur_char
565
- title = read_quoted(src,con)
566
-
567
- if not title
568
- maruku_error 'Must quote title',src,con
569
- else
570
- # Tries to read a title with quotes: ![a](url "ti"tle")
571
- # this is the most ugly thing in Markdown
572
- if not src.next_matches(/\s*\)/)
573
- # if there is not a closing par ), then read
574
- # the rest and guess it's title with quotes
575
- rest = read_simple(src, escaped=[], break_on_chars=[?)],
576
- break_on_strings=[])
577
- # chop the closing char
578
- rest.chop!
579
- title << quote_char << rest
580
- end
581
- end
582
- end
583
- src.consume_whitespace
584
- closing = src.shift_char # closing )
585
- if closing != ?)
586
- maruku_error 'Unclosed link',src,con
587
- maruku_recover "No closing ): I will not create"+
588
- " the link for #{children.inspect}", src, con
589
- con.push_elements children
590
- return
591
- end
592
- con.push_element md_im_link(children,url, title)
593
- when ?[ # link ref
594
- ref_id = read_ref_id(src,con)
595
- if ref_id
596
- if ref_id.size == 0
597
- ref_id = sanitize_ref_id(children.to_s)
598
- else
599
- ref_id = sanitize_ref_id(ref_id)
600
- end
601
- con.push_element md_link(children, ref_id)
602
- else
603
- maruku_error "Could not read ref_id", src, con
604
- maruku_recover "I will not create the link for "+
605
- "#{children.inspect}", src, con
606
- con.push_elements children
607
- return
608
- end
609
- else # empty [link]
610
- id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
611
- con.push_element md_link(children, id)
612
- end
613
- end # read link
614
-
615
- def read_image(src, con)
616
- src.ignore_chars(2) # opening "!["
617
- alt_text = read_span(src, EscapedCharInText, [?]])
618
- src.ignore_char # closing bracket
619
- # ignore space
620
- if src.cur_char == SPACE and
621
- (src.next_char == ?[ or src.next_char == ?( )
622
- src.ignore_char
623
- end
624
- case src.cur_char
625
- when ?(
626
- src.ignore_char # opening (
627
- src.consume_whitespace
628
- url = read_url(src, [SPACE,?\t,?)])
629
- if not url
630
- error "Could not read url from #{src.cur_chars(10).inspect}",
631
- src,con
632
- end
633
- src.consume_whitespace
634
- title = nil
635
- if src.cur_char != ?) # we have a title
636
- quote_char = src.cur_char
637
- title = read_quoted(src,con)
638
- if not title
639
- maruku_error 'Must quote title',src,con
640
- else
641
- # Tries to read a title with quotes: ![a](url "ti"tle")
642
- # this is the most ugly thing in Markdown
643
- if not src.next_matches(/\s*\)/)
644
- # if there is not a closing par ), then read
645
- # the rest and guess it's title with quotes
646
- rest = read_simple(src, escaped=[], break_on_chars=[?)],
647
- break_on_strings=[])
648
- # chop the closing char
649
- rest.chop!
650
- title << quote_char << rest
651
- end
652
- end
653
- end
654
- src.consume_whitespace
655
- closing = src.shift_char # closing )
656
- if closing != ?)
657
- error( ("Unclosed link: '"<<closing<<"'")+
658
- " Read url=#{url.inspect} title=#{title.inspect}",src,con)
659
- end
660
- con.push_element md_im_image(alt_text, url, title)
661
- when ?[ # link ref
662
- ref_id = read_ref_id(src,con)
663
- if not ref_id # TODO: check around
664
- error('Reference not closed.', src, con)
665
- ref_id = ""
666
- end
667
- if ref_id.size == 0
668
- ref_id = alt_text.to_s
669
- end
670
-
671
- ref_id = sanitize_ref_id(ref_id)
672
-
673
- con.push_element md_image(alt_text, ref_id)
674
- else # no stuff
675
- ref_id = sanitize_ref_id(alt_text.to_s)
676
- con.push_element md_image(alt_text, ref_id)
677
- end
678
- end # read link
679
-
680
-
681
- class SpanContext
682
- include MaRuKu::Strings
683
-
684
- # Read elements
685
- attr_accessor :elements
686
- attr_accessor :cur_string
687
-
688
- def initialize
689
- @elements = []
690
- @cur_string = ""
691
- end
692
-
693
- def push_element(e)
694
- raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
695
- not (e.kind_of?(String) or e.kind_of?(MDElement))
696
-
697
- push_string_if_present
698
- @elements << e
699
- nil
700
- end
701
- alias push push_element
702
-
703
- def push_elements(a)
704
- for e in a
705
- if e.kind_of? String
706
- e.each_byte do |b| push_char b end
707
- else
708
- push_element e
709
- end
710
- end
711
- end
712
-
713
- def push_string_if_present
714
- if @cur_string.size > 0
715
- @elements << @cur_string
716
- @cur_string = ""
717
- end
718
- nil
719
- end
720
-
721
- def push_char(c)
722
- @cur_string << c
723
- nil
724
- end
725
-
726
- # push space into current string if
727
- # there isn't one
728
- def push_space
729
- last = @cur_string[@cur_string.size-1]
730
- @cur_string << ?\ if last != ?\
731
- end
732
-
733
- def describe
734
- lines = @elements.map{|x| x.inspect}.join("\n")
735
- s = "Elements read in span: \n" +
736
- add_tabs(lines,1, ' -')+"\n"
737
-
738
- if @cur_string.size > 0
739
- s += "Current string: \n #{@cur_string.inspect}\n"
740
- end
741
- s
742
- end
743
- end # SpanContext
744
-
745
- end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
746
-