maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -1,69 +1,68 @@
1
- module MaRuKu; module In; module Markdown
1
+ module MaRuKu::In::Markdown
2
+ # Hash Fixnum -> name
3
+ SpanExtensionsTrigger = {}
2
4
 
3
5
 
4
- # Hash Fixnum -> name
5
- SpanExtensionsTrigger = {}
6
-
7
-
8
- class SpanExtension
9
- # trigging chars
10
- attr_accessor :chars
11
- # trigging regexp
12
- attr_accessor :regexp
13
- # lambda
14
- attr_accessor :block
15
- end
16
-
17
- # Hash String -> Extension
18
- SpanExtensions = {}
6
+ class SpanExtension
7
+ # trigging chars
8
+ attr_accessor :chars
9
+ # trigging regexp
10
+ attr_accessor :regexp
11
+ # lambda
12
+ attr_accessor :block
13
+ end
19
14
 
20
- def check_span_extensions(src, con)
21
- c = src.cur_char
22
- if extensions = SpanExtensionsTrigger[c]
23
- extensions.each do |e|
24
- if e.regexp && (match = src.next_matches(e.regexp))
25
- return true if e.block.call(doc, src, con)
26
- end
27
- end
28
- end
29
- return false # not special
30
- end
31
-
32
- def self.register_span_extension(args)
33
- e = SpanExtension.new
34
- e.chars = [*args[:chars]]
35
- e.regexp = args[:regexp]
36
- e.block = args[:handler] || raise("No blocks passed")
37
- e.chars.each do |c|
38
- (SpanExtensionsTrigger[c] ||= []).push e
39
- end
40
- end
15
+ # Hash String -> Extension
16
+ SpanExtensions = {}
41
17
 
42
- def self.register_block_extension(args)
43
- regexp = args[:regexp]
44
- BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
45
- end
18
+ def check_span_extensions(src, con)
19
+ c = src.cur_char
20
+ if extensions = SpanExtensionsTrigger[c]
21
+ extensions.each do |e|
22
+ if e.regexp && src.next_matches(e.regexp)
23
+ return true if e.block.call(doc, src, con)
24
+ end
25
+ end
26
+ end
46
27
 
47
- # Hash Regexp -> Block
48
- BlockExtensions = {}
28
+ false # not special
29
+ end
49
30
 
50
- def check_block_extensions(src, con, line)
51
- BlockExtensions.each do |reg, block|
52
- if m = reg.match(line)
53
- block = BlockExtensions[reg]
54
- accepted = block.call(doc, src, con)
55
- return true if accepted
56
- end
57
- end
58
- return false # not special
59
- end
60
-
61
- def any_matching_block_extension?(line)
62
- BlockExtensions.each_key do |reg|
63
- m = reg.match(line)
64
- return m if m
65
- end
66
- return false
67
- end
68
-
69
- end end end
31
+ def self.register_span_extension(args)
32
+ e = SpanExtension.new
33
+ e.chars = [*args[:chars]]
34
+ e.regexp = args[:regexp]
35
+ e.block = args[:handler] || raise("No blocks passed")
36
+ e.chars.each do |c|
37
+ (SpanExtensionsTrigger[c] ||= []).push e
38
+ end
39
+ end
40
+
41
+ def self.register_block_extension(args)
42
+ regexp = args[:regexp]
43
+ BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
44
+ end
45
+
46
+ # Hash Regexp -> Block
47
+ BlockExtensions = {}
48
+
49
+ def check_block_extensions(src, con, line)
50
+ BlockExtensions.each do |reg, block|
51
+ if reg.match(line)
52
+ block = BlockExtensions[reg]
53
+ accepted = block.call(doc, src, con)
54
+ return true if accepted
55
+ end
56
+ end
57
+ false # not special
58
+ end
59
+
60
+ def any_matching_block_extension?(line)
61
+ BlockExtensions.each_key do |reg|
62
+ m = reg.match(line)
63
+ return m if m
64
+ end
65
+ false
66
+ end
67
+
68
+ end
@@ -1,189 +1,220 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module SpanLevelParser
23
-
24
- # This class helps me read and sanitize HTML blocks
25
-
26
- # I tried to do this with REXML, but wasn't able to. (suggestions?)
27
-
28
- class HTMLHelper
29
- include MaRuKu::Strings
30
-
31
- Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
32
- PartialTag = %r{^<.*}m
33
-
34
- EverythingElse = %r{^[^<]+}m
35
- CommentStart = %r{^<!--}x
36
- CommentEnd = %r{^.*-->}
37
- TO_SANITIZE = ['img','hr','br']
38
-
39
- attr_reader :rest
40
-
41
- def my_debug(s)
42
- # puts "---"*10+"\n"+inspect+"\t>>>\t"s
43
- end
44
-
45
- def initialize
46
- @rest = ""
47
- @tag_stack = []
48
- @m = nil
49
- @already = ""
50
- self.state = :inside_element
51
- end
52
-
53
- attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
54
-
55
- def eat_this(line)
56
- @rest = line + @rest
57
- things_read = 0
58
- until @rest.empty?
59
- case self.state
60
- when :inside_comment
61
- if @m = CommentEnd.match(@rest)
62
- @already += @m.pre_match + @m.to_s
63
- @rest = @m.post_match
64
- self.state = :inside_element
65
- else
66
- @already += @rest
67
- @rest = ""
68
- self.state = :inside_comment
69
- end
70
- when :inside_element
71
- if @m = CommentStart.match(@rest)
72
- things_read += 1
73
- @already += @m.pre_match + @m.to_s
74
- @rest = @m.post_match
75
- self.state = :inside_comment
76
- elsif @m = Tag.match(@rest) then
77
- my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
78
- things_read += 1
79
- handle_tag
80
- self.state = :inside_element
81
- elsif @m = PartialTag.match(@rest) then
82
- my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
83
- @already += @m.pre_match
84
- @rest = @m.post_match
85
- @partial_tag = @m.to_s
86
- self.state = :inside_tag
87
- elsif @m = EverythingElse.match(@rest)
88
- my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
89
- @already += @m.pre_match + @m.to_s
90
- @rest = @m.post_match
91
- self.state = :inside_element
92
- else
93
- error "Malformed HTML: not complete: #{@rest.inspect}"
94
- end
95
- when :inside_tag
96
- if @m = /^[^>]*>/.match(@rest) then
97
- my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
98
- @partial_tag += @m.to_s
99
- my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
100
- @rest = @partial_tag + @m.post_match
101
- @partial_tag = nil
102
- self.state = :inside_element
103
- else
104
- @partial_tag += @rest
105
- @rest = ""
106
- self.state = :inside_tag
107
- end
108
- else
109
- raise "Bug bug: state = #{self.state.inspect}"
110
- end # not inside comment
111
-
112
- # puts inspect
113
- # puts "Read: #{@tag_stack.inspect}"
114
- break if is_finished? and things_read>0
115
- end
116
- end
117
-
118
- def handle_tag()
119
- @already += @m.pre_match
120
- @rest = @m.post_match
121
-
122
- is_closing = !!@m[1]
123
- tag = @m[2]
124
- attributes = @m[3].to_s
125
-
126
- is_single = false
127
- if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
128
- attributes = attributes[0, attributes.size-1]
129
- is_single = true
130
- end
131
-
132
- my_debug "Attributes: #{attributes.inspect}"
133
- my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
134
-
135
- if TO_SANITIZE.include? tag
136
- attributes.strip!
137
- # puts "Attributes: #{attributes.inspect}"
138
- if attributes.size > 0
139
- @already += '<%s %s />' % [tag, attributes]
140
- else
141
- @already += '<%s />' % [tag]
142
- end
143
- elsif is_closing
144
- @already += @m.to_s
145
- if @tag_stack.empty?
146
- error "Malformed: closing tag #{tag.inspect} "+
147
- "in empty list"
148
- end
149
- if @tag_stack.last != tag
150
- error "Malformed: tag <#{tag}> "+
151
- "closes <#{@tag_stack.last}>"
152
- end
153
- @tag_stack.pop
154
- else
155
- @already += @m.to_s
156
-
157
- if not is_single
158
- @tag_stack.push(tag)
159
- my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
160
- end
161
- end
162
- end
163
- def error(s)
164
- raise Exception, "Error: #{s} \n"+ inspect, caller
165
- end
166
-
167
- def inspect; "HTML READER\n state=#{self.state} "+
168
- "match=#{@m.to_s.inspect}\n"+
169
- "Tag stack = #{@tag_stack.inspect} \n"+
170
- "Before:\n"+
171
- add_tabs(@already,1,'|')+"\n"+
172
- "After:\n"+
173
- add_tabs(@rest,1,'|')+"\n"
174
-
175
- end
176
-
177
-
178
- def stuff_you_read
179
- @already
180
- end
181
-
182
- def rest() @rest end
183
-
184
- def is_finished?
185
- (self.state == :inside_element) and @tag_stack.empty?
186
- end
187
- end # html helper
188
-
189
- end end end end
1
+ module MaRuKu::In::Markdown::SpanLevelParser
2
+
3
+ # This class helps me read and sanitize HTML blocks
4
+ class HTMLHelper
5
+ Tag = %r{^<(/)?(\w+)\s*([^>]*?)>}m
6
+ PartialTag = %r{^<.*}m
7
+ CData = %r{^\s*<!\[CDATA\[}m
8
+ CDataEnd = %r{\]\]>}m
9
+
10
+ EverythingElse = %r{^[^<]+}m
11
+ CommentStart = %r{^<!--}x
12
+ CommentEnd = %r{-->}
13
+ TO_SANITIZE = ['img','hr','br']
14
+
15
+ attr_reader :rest, :first_tag
16
+
17
+ def my_debug(s)
18
+ # puts "---" * 10 + "\n" + inspect + "\t>>>\t" + s
19
+ end
20
+
21
+ def initialize
22
+ @rest = ""
23
+ @tag_stack = []
24
+ @m = nil
25
+ @already = ""
26
+ self.state = :inside_element
27
+ end
28
+
29
+ attr_accessor :state # = :inside_element, :inside_tag, :inside_comment, :inside_cdata, :inside_script_style
30
+
31
+ def eat_this(line)
32
+ @rest = line + @rest
33
+ things_read = 0
34
+ until @rest.empty?
35
+ case self.state
36
+ when :inside_comment
37
+ if @m = CommentEnd.match(@rest)
38
+ my_debug "#{@state}: Comment End: #{@m.to_s.inspect}"
39
+ @already << @m.pre_match << @m.to_s
40
+ @rest = @m.post_match
41
+ self.state = :inside_element
42
+ else
43
+ @already << @rest
44
+ @rest = ""
45
+ self.state = :inside_comment
46
+ end
47
+ when :inside_element
48
+ if @m = CommentStart.match(@rest)
49
+ my_debug "#{@state}: Comment: #{@m.to_s.inspect}"
50
+ things_read += 1
51
+ @already << @m.pre_match << @m.to_s
52
+ @rest = @m.post_match
53
+ self.state = :inside_comment
54
+ elsif @m = Tag.match(@rest)
55
+ my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
56
+ things_read += 1
57
+ self.state = :inside_element
58
+ handle_tag
59
+ elsif @m = CData.match(@rest)
60
+ my_debug "#{@state}: CDATA: #{@m.to_s.inspect}"
61
+ @already << @m.pre_match << @m.to_s
62
+ @rest = @m.post_match
63
+ self.state = :inside_cdata
64
+ elsif @m = PartialTag.match(@rest)
65
+ my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
66
+ @already << @m.pre_match
67
+ @rest = @m.post_match
68
+ @partial_tag = @m.to_s
69
+ self.state = :inside_tag
70
+ elsif @m = EverythingElse.match(@rest)
71
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
72
+ @already << @m.pre_match << @m.to_s
73
+ @rest = @m.post_match
74
+ self.state = :inside_element
75
+ else
76
+ error "Malformed HTML: not complete: #{@rest.inspect}"
77
+ end
78
+ when :inside_tag
79
+ if @m = /^[^>]*>/.match(@rest)
80
+ my_debug "#{@state}: matched #{@m.to_s.inspect}"
81
+ @partial_tag << @m.to_s
82
+ my_debug "#{@state}: matched TOTAL: #{@partial_tag.to_s.inspect}"
83
+ @rest = @partial_tag + @m.post_match
84
+ @partial_tag = nil
85
+ self.state = :inside_element
86
+ else
87
+ @partial_tag << @rest
88
+ @rest = ""
89
+ self.state = :inside_tag
90
+ end
91
+ when :inside_cdata
92
+ if @m = CDataEnd.match(@rest)
93
+ my_debug "#{@state}: matched #{@m.to_s.inspect}"
94
+ @already << @m.pre_match << @m.to_s
95
+ @rest = @m.post_match
96
+ self.state = %(script style).include?(@tag_stack.last) ? :inside_script_style : :inside_element
97
+ else
98
+ @already << @rest
99
+ @rest = ""
100
+ self.state = :inside_cdata
101
+ end
102
+ when :inside_script_style
103
+ if @m = CData.match(@rest)
104
+ if @already.rstrip.end_with?('<![CDATA[')
105
+ @already << @m.pre_match
106
+ @rest = @m.post_match
107
+ else
108
+ my_debug "#{@state}: CDATA: #{@m.to_s.inspect}"
109
+ @already << @m.pre_match << @m.to_s
110
+ @rest = @m.post_match
111
+ self.state = :inside_cdata
112
+ end
113
+ elsif @m = Tag.match(@rest)
114
+ is_closing = !!@m[1]
115
+ tag = @m[2]
116
+ if is_closing && tag == @tag_stack.last
117
+ my_debug "#{@state}: matched #{@m.to_s.inspect}"
118
+ @already << @m.pre_match
119
+ @rest = @m.post_match
120
+ # This is necessary to properly parse
121
+ # script tags
122
+ @already << "]]>" unless @already.rstrip.end_with?("]]>")
123
+ self.state = :inside_element
124
+ handle_tag false # don't double-add pre_match
125
+ else
126
+ @already << @rest
127
+ @rest = ""
128
+ end
129
+ elsif @m = EverythingElse.match(@rest)
130
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
131
+ @already << @m.pre_match << @m.to_s
132
+ @rest = @m.post_match
133
+ else
134
+ @already << @rest
135
+ @rest = ""
136
+ end
137
+ else
138
+ raise "Bug bug: state = #{self.state.inspect}"
139
+ end # not inside comment
140
+
141
+ break if is_finished? && things_read > 0
142
+ end
143
+ end
144
+
145
+ def handle_tag(add_pre_match = true)
146
+ @already << @m.pre_match if add_pre_match
147
+ @rest = @m.post_match
148
+
149
+ is_closing = !!@m[1]
150
+ tag = @m[2]
151
+ @first_tag ||= tag
152
+ attributes = @m[3].to_s
153
+
154
+ is_single = false
155
+ if attributes[-1, 1] == '/'
156
+ attributes = attributes[0, attributes.size - 1]
157
+ is_single = true
158
+ end
159
+
160
+ my_debug "Attributes: #{attributes.inspect}"
161
+ my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
162
+
163
+ if TO_SANITIZE.include? tag
164
+ attributes.strip!
165
+ # puts "Attributes: #{attributes.inspect}"
166
+ if attributes.size > 0
167
+ @already << '<%s %s />' % [tag, attributes]
168
+ else
169
+ @already << '<%s />' % [tag]
170
+ end
171
+ elsif is_closing
172
+ if @tag_stack.empty?
173
+ error "Malformed: closing tag #{tag.inspect} in empty list"
174
+ end
175
+ if @tag_stack.last != tag
176
+ error "Malformed: tag <#{tag}> closes <#{@tag_stack.last}>"
177
+ end
178
+
179
+ @already << @m.to_s
180
+ @tag_stack.pop
181
+ else
182
+ @already << @m.to_s
183
+
184
+ if not is_single
185
+ @tag_stack.push(tag)
186
+ my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
187
+ end
188
+
189
+ if %w(script style).include?(@tag_stack.last)
190
+ # This is necessary to properly parse
191
+ # script tags
192
+ @already << "<![CDATA["
193
+ self.state = :inside_script_style
194
+ end
195
+ end
196
+ end
197
+
198
+ def error(s)
199
+ raise "Error: #{s} \n" + inspect, caller
200
+ end
201
+
202
+ def inspect
203
+ "HTML READER\n state=#{self.state} " +
204
+ "match=#{@m.to_s.inspect}\n" +
205
+ "Tag stack = #{@tag_stack.inspect} \n" +
206
+ "Before:\n" +
207
+ @already.gsub(/^/, '|') + "\n" +
208
+ "After:\n" +
209
+ @rest.gsub(/^/, '|') + "\n"
210
+ end
211
+
212
+ def stuff_you_read
213
+ @already
214
+ end
215
+
216
+ def is_finished?
217
+ (self.state == :inside_element) and @tag_stack.empty?
218
+ end
219
+ end # html helper
220
+ end