maruku 0.6.0 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE.txt +20 -0
  3. data/bin/maruku +153 -152
  4. data/bin/marutex +2 -29
  5. data/data/entities.xml +261 -0
  6. data/docs/markdown_syntax.md +9 -21
  7. data/docs/math.md +14 -18
  8. data/lib/maruku.rb +65 -78
  9. data/lib/maruku/attributes.rb +109 -214
  10. data/lib/maruku/defaults.rb +45 -67
  11. data/lib/maruku/document.rb +44 -0
  12. data/lib/maruku/element.rb +138 -0
  13. data/lib/maruku/errors.rb +80 -0
  14. data/lib/maruku/ext/div.rb +105 -113
  15. data/lib/maruku/ext/fenced_code.rb +97 -0
  16. data/lib/maruku/ext/math.rb +22 -26
  17. data/lib/maruku/ext/math/elements.rb +20 -26
  18. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  19. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  20. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  21. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  22. data/lib/maruku/ext/math/parsing.rb +121 -115
  23. data/lib/maruku/ext/math/to_html.rb +202 -187
  24. data/lib/maruku/ext/math/to_latex.rb +34 -21
  25. data/lib/maruku/helpers.rb +158 -257
  26. data/lib/maruku/html.rb +251 -0
  27. data/lib/maruku/input/charsource.rb +272 -319
  28. data/lib/maruku/input/extensions.rb +62 -63
  29. data/lib/maruku/input/html_helper.rb +233 -189
  30. data/lib/maruku/input/linesource.rb +90 -110
  31. data/lib/maruku/input/mdline.rb +131 -0
  32. data/lib/maruku/input/parse_block.rb +736 -613
  33. data/lib/maruku/input/parse_doc.rb +145 -217
  34. data/lib/maruku/input/parse_span.rb +740 -0
  35. data/lib/maruku/inspect_element.rb +60 -0
  36. data/lib/maruku/maruku.rb +14 -30
  37. data/lib/maruku/output/entity_table.rb +37 -0
  38. data/lib/maruku/output/s5/fancy.rb +462 -462
  39. data/lib/maruku/output/s5/to_s5.rb +115 -135
  40. data/lib/maruku/output/to_html.rb +907 -983
  41. data/lib/maruku/output/to_latex.rb +571 -563
  42. data/lib/maruku/output/to_markdown.rb +207 -162
  43. data/lib/maruku/output/to_s.rb +10 -52
  44. data/lib/maruku/string_utils.rb +129 -179
  45. data/lib/maruku/toc.rb +185 -196
  46. data/lib/maruku/version.rb +33 -38
  47. data/spec/block_docs/abbrev.md +776 -0
  48. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  49. data/spec/block_docs/abbreviations2.md +27 -0
  50. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  51. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  52. data/spec/block_docs/attribute_sanitize.md +22 -0
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/spec/block_docs/atx_headers.md +22 -0
  59. data/spec/block_docs/auto_cdata.md +48 -0
  60. data/spec/block_docs/bad_cites.md +30 -0
  61. data/spec/block_docs/bad_divrefs.md +30 -0
  62. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  63. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  64. data/spec/block_docs/block_quotes.md +66 -0
  65. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  66. data/{tests/unittest → spec/block_docs}/bug_table.md +7 -19
  67. data/spec/block_docs/cites.md +37 -0
  68. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  69. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  70. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  71. data/spec/block_docs/code4.md +79 -0
  72. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  73. data/spec/block_docs/div_without_newline.md +16 -0
  74. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  75. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  76. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  77. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  78. data/spec/block_docs/email.md +29 -0
  79. data/spec/block_docs/empty_cells.md +31 -0
  80. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  81. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  82. data/{tests/unittest → spec/block_docs}/entities.md +33 -41
  83. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  84. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  85. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  86. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  87. data/{tests/unittest → spec/block_docs}/extra_table1.md +9 -21
  88. data/spec/block_docs/fenced_code_blocks.md +58 -0
  89. data/spec/block_docs/fenced_code_blocks_highlighted.md +17 -0
  90. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  91. data/spec/block_docs/footnotes2.md +82 -0
  92. data/spec/block_docs/hard.md +25 -0
  93. data/spec/block_docs/header_after_par.md +62 -0
  94. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  95. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  96. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  97. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  98. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  99. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  100. data/spec/block_docs/html_block_in_para.md +22 -0
  101. data/spec/block_docs/html_inline.md +25 -0
  102. data/spec/block_docs/html_trailing.md +31 -0
  103. data/spec/block_docs/ie.md +62 -0
  104. data/spec/block_docs/iframe.md +29 -0
  105. data/spec/block_docs/ignore_bad_header.md +9 -0
  106. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  107. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  108. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  109. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  110. data/spec/block_docs/inline_html_beginning.md +10 -0
  111. data/spec/block_docs/issue106.md +78 -0
  112. data/spec/block_docs/issue115.md +20 -0
  113. data/spec/block_docs/issue117.md +13 -0
  114. data/spec/block_docs/issue120.md +48 -0
  115. data/spec/block_docs/issue123.md +11 -0
  116. data/spec/block_docs/issue124.md +16 -0
  117. data/spec/block_docs/issue126.md +9 -0
  118. data/spec/block_docs/issue130.md +11 -0
  119. data/spec/block_docs/issue20.md +9 -0
  120. data/spec/block_docs/issue26.md +22 -0
  121. data/spec/block_docs/issue29.md +9 -0
  122. data/spec/block_docs/issue30.md +30 -0
  123. data/spec/block_docs/issue31.md +25 -0
  124. data/spec/block_docs/issue40.md +52 -0
  125. data/spec/block_docs/issue64.md +55 -0
  126. data/spec/block_docs/issue67.md +19 -0
  127. data/spec/block_docs/issue70.md +11 -0
  128. data/spec/block_docs/issue72.md +17 -0
  129. data/spec/block_docs/issue74.md +38 -0
  130. data/spec/block_docs/issue79.md +15 -0
  131. data/spec/block_docs/issue83.md +13 -0
  132. data/spec/block_docs/issue85.md +25 -0
  133. data/spec/block_docs/issue88.md +19 -0
  134. data/spec/block_docs/issue89.md +12 -0
  135. data/spec/block_docs/issue90.md +38 -0
  136. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  137. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  138. data/spec/block_docs/links2.md +21 -0
  139. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  140. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  141. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  142. data/spec/block_docs/list_multipara.md +42 -0
  143. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  144. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  145. data/spec/block_docs/lists11.md +23 -0
  146. data/spec/block_docs/lists12.md +43 -0
  147. data/spec/block_docs/lists13.md +55 -0
  148. data/spec/block_docs/lists14.md +61 -0
  149. data/spec/block_docs/lists15.md +36 -0
  150. data/spec/block_docs/lists6.md +88 -0
  151. data/spec/block_docs/lists7b.md +58 -0
  152. data/spec/block_docs/lists9.md +53 -0
  153. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  154. data/spec/block_docs/lists_blank.md +35 -0
  155. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  156. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +44 -29
  157. data/spec/block_docs/lists_nested.md +44 -0
  158. data/spec/block_docs/lists_nested_blankline.md +34 -0
  159. data/spec/block_docs/lists_nested_deep.md +43 -0
  160. data/spec/block_docs/lists_ol.md +129 -0
  161. data/spec/block_docs/lists_ol2.md +147 -0
  162. data/spec/block_docs/lists_paraindent.md +42 -0
  163. data/spec/block_docs/lists_tab.md +54 -0
  164. data/spec/block_docs/loss.md +17 -0
  165. data/spec/block_docs/math-blahtex/equations.md +29 -0
  166. data/spec/block_docs/math-blahtex/inline.md +48 -0
  167. data/spec/block_docs/math-blahtex/math2.md +52 -0
  168. data/spec/block_docs/math-blahtex/table.md +25 -0
  169. data/spec/block_docs/math/embedded_invalid_svg.md +108 -0
  170. data/spec/block_docs/math/embedded_svg.md +136 -0
  171. data/spec/block_docs/math/equations.md +49 -0
  172. data/spec/block_docs/math/inline.md +46 -0
  173. data/spec/block_docs/math/math2.md +53 -0
  174. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  175. data/spec/block_docs/math/raw_mathml.md +87 -0
  176. data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
  177. data/spec/block_docs/math/table.md +25 -0
  178. data/{tests/unittest → spec/block_docs}/math/table2.md +11 -23
  179. data/{tests/unittest → spec/block_docs}/misc_sw.md +184 -121
  180. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  181. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  182. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  183. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  184. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  185. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  186. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  187. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  188. data/spec/block_docs/ref_with_title.md +22 -0
  189. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  190. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  191. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  192. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  193. data/{tests/unittest → spec/block_docs}/table_attributes.md +6 -20
  194. data/spec/block_docs/table_colspan.md +41 -0
  195. data/spec/block_docs/tables.md +47 -0
  196. data/spec/block_docs/tables2.md +74 -0
  197. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  198. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  199. data/spec/block_docs/toc.md +87 -0
  200. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  201. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  202. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  203. data/spec/block_docs/xml.md +33 -0
  204. data/spec/block_docs/xml3.md +24 -0
  205. data/spec/block_docs/xml_comments.md +32 -0
  206. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  207. data/spec/block_spec.rb +110 -0
  208. data/spec/cli_spec.rb +8 -0
  209. data/spec/span_spec.rb +263 -0
  210. data/spec/spec_helper.rb +3 -0
  211. data/spec/to_html_utf8_spec.rb +13 -0
  212. metadata +218 -202
  213. data/Rakefile +0 -73
  214. data/bin/marudown +0 -29
  215. data/bin/marutest +0 -345
  216. data/docs/changelog.md +0 -334
  217. data/lib/maruku/errors_management.rb +0 -92
  218. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  219. data/lib/maruku/input/parse_span_better.rb +0 -746
  220. data/lib/maruku/input/rubypants.rb +0 -225
  221. data/lib/maruku/input/type_detection.rb +0 -147
  222. data/lib/maruku/output/to_latex_entities.rb +0 -367
  223. data/lib/maruku/output/to_latex_strings.rb +0 -64
  224. data/lib/maruku/structures.rb +0 -167
  225. data/lib/maruku/structures_inspect.rb +0 -87
  226. data/lib/maruku/structures_iterators.rb +0 -61
  227. data/lib/maruku/tests/benchmark.rb +0 -82
  228. data/lib/maruku/tests/new_parser.rb +0 -373
  229. data/lib/maruku/tests/tests.rb +0 -136
  230. data/lib/maruku/usage/example1.rb +0 -33
  231. data/maruku_gem.rb +0 -33
  232. data/tests/bugs/code_in_links.md +0 -101
  233. data/tests/bugs/complex_escaping.md +0 -38
  234. data/tests/math/syntax.md +0 -46
  235. data/tests/math_usage/document.md +0 -13
  236. data/tests/others/abbreviations.md +0 -11
  237. data/tests/others/blank.md +0 -4
  238. data/tests/others/code.md +0 -5
  239. data/tests/others/code2.md +0 -8
  240. data/tests/others/code3.md +0 -16
  241. data/tests/others/email.md +0 -4
  242. data/tests/others/entities.md +0 -19
  243. data/tests/others/escaping.md +0 -16
  244. data/tests/others/extra_dl.md +0 -101
  245. data/tests/others/extra_header_id.md +0 -13
  246. data/tests/others/extra_table1.md +0 -40
  247. data/tests/others/footnotes.md +0 -17
  248. data/tests/others/headers.md +0 -10
  249. data/tests/others/hrule.md +0 -10
  250. data/tests/others/images.md +0 -20
  251. data/tests/others/inline_html.md +0 -42
  252. data/tests/others/links.md +0 -38
  253. data/tests/others/list1.md +0 -4
  254. data/tests/others/list2.md +0 -5
  255. data/tests/others/list3.md +0 -8
  256. data/tests/others/lists.md +0 -32
  257. data/tests/others/lists_after_paragraph.md +0 -44
  258. data/tests/others/lists_ol.md +0 -39
  259. data/tests/others/misc_sw.md +0 -105
  260. data/tests/others/one.md +0 -1
  261. data/tests/others/paragraphs.md +0 -13
  262. data/tests/others/sss06.md +0 -352
  263. data/tests/others/test.md +0 -4
  264. data/tests/s5/s5profiling.md +0 -48
  265. data/tests/unittest/bug_def.md +0 -28
  266. data/tests/unittest/email.md +0 -32
  267. data/tests/unittest/hang.md +0 -29
  268. data/tests/unittest/html2.md +0 -34
  269. data/tests/unittest/ie.md +0 -61
  270. data/tests/unittest/links2.md +0 -34
  271. data/tests/unittest/lists11.md +0 -28
  272. data/tests/unittest/lists6.md +0 -53
  273. data/tests/unittest/lists9.md +0 -76
  274. data/tests/unittest/lists_ol.md +0 -274
  275. data/tests/unittest/math/equations.md +0 -86
  276. data/tests/unittest/math/inline.md +0 -58
  277. data/tests/unittest/math/math2.md +0 -57
  278. data/tests/unittest/math/table.md +0 -37
  279. data/tests/unittest/notyet/header_after_par.md +0 -70
  280. data/tests/unittest/pending/empty_cells.md +0 -49
  281. data/tests/unittest/red_tests/abbrev.md +0 -1388
  282. data/tests/unittest/red_tests/lists7.md +0 -68
  283. data/tests/unittest/red_tests/lists7b.md +0 -128
  284. data/tests/unittest/red_tests/lists8.md +0 -76
  285. data/tests/unittest/red_tests/xml.md +0 -70
  286. data/tests/unittest/xml2.md +0 -31
  287. data/tests/unittest/xml3.md +0 -38
  288. data/tests/utf8-files/simple.md +0 -1
  289. data/unit_test_block.sh +0 -5
  290. data/unit_test_span.sh +0 -3
@@ -1,69 +1,68 @@
1
- module MaRuKu; module In; module Markdown
1
+ module MaRuKu::In::Markdown
2
+ # Hash Fixnum -> name
3
+ SpanExtensionsTrigger = {}
2
4
 
3
5
 
4
- # Hash Fixnum -> name
5
- SpanExtensionsTrigger = {}
6
-
7
-
8
- class SpanExtension
9
- # trigging chars
10
- attr_accessor :chars
11
- # trigging regexp
12
- attr_accessor :regexp
13
- # lambda
14
- attr_accessor :block
15
- end
16
-
17
- # Hash String -> Extension
18
- SpanExtensions = {}
6
+ class SpanExtension
7
+ # trigging chars
8
+ attr_accessor :chars
9
+ # trigging regexp
10
+ attr_accessor :regexp
11
+ # lambda
12
+ attr_accessor :block
13
+ end
19
14
 
20
- def check_span_extensions(src, con)
21
- c = src.cur_char
22
- if extensions = SpanExtensionsTrigger[c]
23
- extensions.each do |e|
24
- if e.regexp && (match = src.next_matches(e.regexp))
25
- return true if e.block.call(doc, src, con)
26
- end
27
- end
28
- end
29
- return false # not special
30
- end
31
-
32
- def self.register_span_extension(args)
33
- e = SpanExtension.new
34
- e.chars = [*args[:chars]]
35
- e.regexp = args[:regexp]
36
- e.block = args[:handler] || raise("No blocks passed")
37
- e.chars.each do |c|
38
- (SpanExtensionsTrigger[c] ||= []).push e
39
- end
40
- end
15
+ # Hash String -> Extension
16
+ SpanExtensions = {}
41
17
 
42
- def self.register_block_extension(args)
43
- regexp = args[:regexp]
44
- BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
45
- end
18
+ def check_span_extensions(src, con)
19
+ c = src.cur_char
20
+ if extensions = SpanExtensionsTrigger[c]
21
+ extensions.each do |e|
22
+ if e.regexp && src.next_matches(e.regexp)
23
+ return true if e.block.call(doc, src, con)
24
+ end
25
+ end
26
+ end
46
27
 
47
- # Hash Regexp -> Block
48
- BlockExtensions = {}
28
+ false # not special
29
+ end
49
30
 
50
- def check_block_extensions(src, con, line)
51
- BlockExtensions.each do |reg, block|
52
- if m = reg.match(line)
53
- block = BlockExtensions[reg]
54
- accepted = block.call(doc, src, con)
55
- return true if accepted
56
- end
57
- end
58
- return false # not special
59
- end
60
-
61
- def any_matching_block_extension?(line)
62
- BlockExtensions.each_key do |reg|
63
- m = reg.match(line)
64
- return m if m
65
- end
66
- return false
67
- end
68
-
69
- end end end
31
+ def self.register_span_extension(args)
32
+ e = SpanExtension.new
33
+ e.chars = [*args[:chars]]
34
+ e.regexp = args[:regexp]
35
+ e.block = args[:handler] || raise("No blocks passed")
36
+ e.chars.each do |c|
37
+ (SpanExtensionsTrigger[c] ||= []).push e
38
+ end
39
+ end
40
+
41
+ def self.register_block_extension(args)
42
+ regexp = args[:regexp]
43
+ BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
44
+ end
45
+
46
+ # Hash Regexp -> Block
47
+ BlockExtensions = {}
48
+
49
+ def check_block_extensions(src, con, line)
50
+ BlockExtensions.each do |reg, block|
51
+ if reg.match(line)
52
+ block = BlockExtensions[reg]
53
+ accepted = block.call(doc, src, con)
54
+ return true if accepted
55
+ end
56
+ end
57
+ false # not special
58
+ end
59
+
60
+ def any_matching_block_extension?(line)
61
+ BlockExtensions.each_key do |reg|
62
+ m = reg.match(line)
63
+ return m if m
64
+ end
65
+ false
66
+ end
67
+
68
+ end
@@ -1,189 +1,233 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module SpanLevelParser
23
-
24
- # This class helps me read and sanitize HTML blocks
25
-
26
- # I tried to do this with REXML, but wasn't able to. (suggestions?)
27
-
28
- class HTMLHelper
29
- include MaRuKu::Strings
30
-
31
- Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
32
- PartialTag = %r{^<.*}m
33
-
34
- EverythingElse = %r{^[^<]+}m
35
- CommentStart = %r{^<!--}x
36
- CommentEnd = %r{^.*-->}
37
- TO_SANITIZE = ['img','hr','br']
38
-
39
- attr_reader :rest
40
-
41
- def my_debug(s)
42
- # puts "---"*10+"\n"+inspect+"\t>>>\t"s
43
- end
44
-
45
- def initialize
46
- @rest = ""
47
- @tag_stack = []
48
- @m = nil
49
- @already = ""
50
- self.state = :inside_element
51
- end
52
-
53
- attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
54
-
55
- def eat_this(line)
56
- @rest = line + @rest
57
- things_read = 0
58
- until @rest.empty?
59
- case self.state
60
- when :inside_comment
61
- if @m = CommentEnd.match(@rest)
62
- @already += @m.pre_match + @m.to_s
63
- @rest = @m.post_match
64
- self.state = :inside_element
65
- else
66
- @already += @rest
67
- @rest = ""
68
- self.state = :inside_comment
69
- end
70
- when :inside_element
71
- if @m = CommentStart.match(@rest)
72
- things_read += 1
73
- @already += @m.pre_match + @m.to_s
74
- @rest = @m.post_match
75
- self.state = :inside_comment
76
- elsif @m = Tag.match(@rest) then
77
- my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
78
- things_read += 1
79
- handle_tag
80
- self.state = :inside_element
81
- elsif @m = PartialTag.match(@rest) then
82
- my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
83
- @already += @m.pre_match
84
- @rest = @m.post_match
85
- @partial_tag = @m.to_s
86
- self.state = :inside_tag
87
- elsif @m = EverythingElse.match(@rest)
88
- my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
89
- @already += @m.pre_match + @m.to_s
90
- @rest = @m.post_match
91
- self.state = :inside_element
92
- else
93
- error "Malformed HTML: not complete: #{@rest.inspect}"
94
- end
95
- when :inside_tag
96
- if @m = /^[^>]*>/.match(@rest) then
97
- my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
98
- @partial_tag += @m.to_s
99
- my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
100
- @rest = @partial_tag + @m.post_match
101
- @partial_tag = nil
102
- self.state = :inside_element
103
- else
104
- @partial_tag += @rest
105
- @rest = ""
106
- self.state = :inside_tag
107
- end
108
- else
109
- raise "Bug bug: state = #{self.state.inspect}"
110
- end # not inside comment
111
-
112
- # puts inspect
113
- # puts "Read: #{@tag_stack.inspect}"
114
- break if is_finished? and things_read>0
115
- end
116
- end
117
-
118
- def handle_tag()
119
- @already += @m.pre_match
120
- @rest = @m.post_match
121
-
122
- is_closing = !!@m[1]
123
- tag = @m[2]
124
- attributes = @m[3].to_s
125
-
126
- is_single = false
127
- if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
128
- attributes = attributes[0, attributes.size-1]
129
- is_single = true
130
- end
131
-
132
- my_debug "Attributes: #{attributes.inspect}"
133
- my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
134
-
135
- if TO_SANITIZE.include? tag
136
- attributes.strip!
137
- # puts "Attributes: #{attributes.inspect}"
138
- if attributes.size > 0
139
- @already += '<%s %s />' % [tag, attributes]
140
- else
141
- @already += '<%s />' % [tag]
142
- end
143
- elsif is_closing
144
- @already += @m.to_s
145
- if @tag_stack.empty?
146
- error "Malformed: closing tag #{tag.inspect} "+
147
- "in empty list"
148
- end
149
- if @tag_stack.last != tag
150
- error "Malformed: tag <#{tag}> "+
151
- "closes <#{@tag_stack.last}>"
152
- end
153
- @tag_stack.pop
154
- else
155
- @already += @m.to_s
156
-
157
- if not is_single
158
- @tag_stack.push(tag)
159
- my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
160
- end
161
- end
162
- end
163
- def error(s)
164
- raise Exception, "Error: #{s} \n"+ inspect, caller
165
- end
166
-
167
- def inspect; "HTML READER\n state=#{self.state} "+
168
- "match=#{@m.to_s.inspect}\n"+
169
- "Tag stack = #{@tag_stack.inspect} \n"+
170
- "Before:\n"+
171
- add_tabs(@already,1,'|')+"\n"+
172
- "After:\n"+
173
- add_tabs(@rest,1,'|')+"\n"
174
-
175
- end
176
-
177
-
178
- def stuff_you_read
179
- @already
180
- end
181
-
182
- def rest() @rest end
183
-
184
- def is_finished?
185
- (self.state == :inside_element) and @tag_stack.empty?
186
- end
187
- end # html helper
188
-
189
- end end end end
1
+ module MaRuKu::In::Markdown::SpanLevelParser
2
+
3
+ # This class helps me read and sanitize HTML blocks
4
+ class HTMLHelper
5
+ Tag = %r{^<(/)?(\w+)\s*([^>]*?)>}m
6
+ PartialTag = %r{^<.*}m
7
+ CData = %r{^\s*<!\[CDATA\[}m
8
+ CDataEnd = %r{\]\]>}m
9
+
10
+ EverythingElse = %r{^[^<]+}m
11
+ CommentStart = %r{^<!--}x
12
+ CommentEnd = %r{-->}
13
+ TO_SANITIZE = ['img', 'hr', 'br']
14
+
15
+ attr_reader :rest, :first_tag
16
+
17
+ def initialize
18
+ @rest = ""
19
+ @tag_stack = []
20
+ @m = nil
21
+ @already = ""
22
+ self.state = :inside_element
23
+ end
24
+
25
+ attr_accessor :state # = :inside_element, :inside_tag, :inside_comment, :inside_cdata
26
+
27
+ def eat_this(line)
28
+ @rest = line + @rest
29
+ things_read = 0
30
+ until @rest.empty?
31
+ case self.state
32
+ when :inside_comment
33
+ if @m = CommentEnd.match(@rest)
34
+ debug_state 'Comment End'
35
+ # Workaround for https://bugs.ruby-lang.org/issues/9277 and another bug in 1.9.2 where even a
36
+ # single dash in a comment will cause REXML to error.
37
+ @already << @m.pre_match.gsub(/-(?![^\-])/, '- ') << @m.to_s
38
+ @rest = @m.post_match
39
+ self.state = :inside_element
40
+ else
41
+ @already << @rest.gsub(/-(?![^\-])/, '- ') # Workaround for https://bugs.ruby-lang.org/issues/9277
42
+ @rest = ""
43
+ self.state = :inside_comment
44
+ end
45
+ when :inside_element
46
+ if @m = CommentStart.match(@rest)
47
+ debug_state 'Comment'
48
+ things_read += 1
49
+ @already << @m.pre_match << @m.to_s
50
+ @rest = @m.post_match
51
+ self.state = :inside_comment
52
+ elsif @m = Tag.match(@rest)
53
+ debug_state 'Tag'
54
+ things_read += 1
55
+ self.state = :inside_element
56
+ handle_tag
57
+ elsif @m = CData.match(@rest)
58
+ debug_state 'CDATA'
59
+ @already << @m.pre_match
60
+ close_script_style if script_style?
61
+ @already << @m.to_s
62
+ @rest = @m.post_match
63
+ self.state = :inside_cdata
64
+ elsif @m = PartialTag.match(@rest)
65
+ debug_state 'PartialTag'
66
+ @already << @m.pre_match
67
+ @rest = @m.post_match
68
+ @partial_tag = @m.to_s
69
+ self.state = :inside_tag
70
+ elsif @m = EverythingElse.match(@rest)
71
+ debug_state 'EverythingElse'
72
+ @already << @m.pre_match << @m.to_s
73
+ @rest = @m.post_match
74
+ self.state = :inside_element
75
+ else
76
+ error "Malformed HTML: not complete: #{@rest.inspect}"
77
+ end
78
+ when :inside_tag
79
+ if @m = /^[^>]*>/.match(@rest)
80
+ @partial_tag << @m.to_s
81
+ @rest = @partial_tag + @m.post_match
82
+ @partial_tag = nil
83
+ self.state = :inside_element
84
+ if @m = Tag.match(@rest)
85
+ things_read += 1
86
+ handle_tag
87
+ end
88
+ else
89
+ @partial_tag << @rest
90
+ @rest = ""
91
+ self.state = :inside_tag
92
+ end
93
+ when :inside_cdata
94
+ if @m = CDataEnd.match(@rest)
95
+ self.state = :inside_element
96
+ @already << @m.pre_match << @m.to_s
97
+ @rest = @m.post_match
98
+ start_script_style if script_style?
99
+ else
100
+ @already << @rest
101
+ @rest = ""
102
+ self.state = :inside_cdata
103
+ end
104
+ else
105
+ raise "Bug bug: state = #{self.state.inspect}"
106
+ end
107
+
108
+ break if is_finished? && things_read > 0
109
+ end
110
+ end
111
+
112
+ def handle_tag
113
+ @already << @m.pre_match
114
+ @rest = @m.post_match
115
+
116
+ is_closing = !!@m[1]
117
+ tag = @m[2]
118
+ @first_tag ||= tag
119
+ attributes = @m[3].to_s
120
+
121
+ is_single = false
122
+ if attributes[-1, 1] == '/'
123
+ attributes = attributes[0, attributes.size - 1]
124
+ is_single = true
125
+ end
126
+
127
+ if TO_SANITIZE.include? tag
128
+ attributes.strip!
129
+ if attributes.size > 0
130
+ @already << '<%s %s />' % [tag, attributes]
131
+ else
132
+ @already << '<%s />' % [tag]
133
+ end
134
+ elsif is_closing
135
+ if @tag_stack.empty?
136
+ error "Malformed: closing tag #{tag.inspect} in empty list"
137
+ elsif @tag_stack.last != tag
138
+ error "Malformed: tag <#{tag}> closes <#{@tag_stack.last}>"
139
+ end
140
+
141
+ close_script_style if script_style?
142
+
143
+ @already << @m.to_s
144
+ @tag_stack.pop
145
+ else
146
+ @already << @m.to_s
147
+ @tag_stack.push(tag) unless is_single
148
+
149
+ start_script_style if script_style?
150
+ end
151
+ end
152
+
153
+ def stuff_you_read
154
+ @already
155
+ end
156
+
157
+ def is_finished?
158
+ self.state == :inside_element && @tag_stack.empty?
159
+ end
160
+
161
+ private
162
+
163
+ def debug_state(note)
164
+ my_debug "#{@state}: #{note}: #{@m.to_s.inspect}"
165
+ end
166
+
167
+ def my_debug(s)
168
+ # puts "---" * 10 + "\n" + inspect + "\t>>>\t" + s
169
+ end
170
+
171
+ def error(s)
172
+ raise "Error: #{s} \n" + inspect, caller
173
+ end
174
+
175
+ def inspect
176
+ "HTML READER\n state=#{self.state} " +
177
+ "match=#{@m.to_s.inspect}\n" +
178
+ "Tag stack = #{@tag_stack.inspect} \n" +
179
+ "Before:\n" +
180
+ @already.gsub(/^/, '|') + "\n" +
181
+ "After:\n" +
182
+ @rest.gsub(/^/, '|') + "\n"
183
+ end
184
+
185
+ # Script and style tag handling
186
+ # -----------------------------
187
+ #
188
+ # XHTML, and XML parsers like REXML, require that certain characters be
189
+ # escaped within script or style tags. However, there are conflicts between
190
+ # documents served as XHTML vs HTML. So we need to be extra careful about
191
+ # how we escape these tags so they will even parse correctly. However, we
192
+ # also try to avoid adding that escaping unnecessarily.
193
+ #
194
+ # See http://dorward.me.uk/www/comments-cdata/ for a good explanation.
195
+
196
+ # Are we within a script or style tag?
197
+ def script_style?
198
+ %w(script style).include?(@tag_stack.last)
199
+ end
200
+
201
+ # Save our @already buffer elsewhere, and switch to using @already for the
202
+ # contents of this script or style tag.
203
+ def start_script_style
204
+ @before_already, @already = @already, ""
205
+ end
206
+
207
+ # Finish script or style tag content, wrapping it in CDATA if necessary,
208
+ # and add it to our original @already buffer.
209
+ def close_script_style
210
+ tag = @tag_stack.last
211
+
212
+ # See http://www.w3.org/TR/xhtml1/#C_4 for character sequences not allowed within an element body.
213
+ if @already =~ /<|&|\]\]>|--/
214
+ new_already = script_style_cdata_start(tag)
215
+ new_already << "\n" unless @already.start_with?("\n")
216
+ new_already << @already
217
+ new_already << "\n" unless @already.end_with?("\n")
218
+ new_already << script_style_cdata_end(tag)
219
+ @already = new_already
220
+ end
221
+ @before_already << @already
222
+ @already = @before_already
223
+ end
224
+
225
+ def script_style_cdata_start(tag)
226
+ (tag == 'script') ? "//<![CDATA[" : "/*<![CDATA[*/"
227
+ end
228
+
229
+ def script_style_cdata_end(tag)
230
+ (tag == 'script') ? "//]]>" : "/*]]>*/"
231
+ end
232
+ end
233
+ end