maruku 0.6.1 → 0.7.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -1,69 +1,68 @@
1
- module MaRuKu; module In; module Markdown
1
+ module MaRuKu::In::Markdown
2
+ # Hash Fixnum -> name
3
+ SpanExtensionsTrigger = {}
2
4
 
3
5
 
4
- # Hash Fixnum -> name
5
- SpanExtensionsTrigger = {}
6
-
7
-
8
- class SpanExtension
9
- # trigging chars
10
- attr_accessor :chars
11
- # trigging regexp
12
- attr_accessor :regexp
13
- # lambda
14
- attr_accessor :block
15
- end
16
-
17
- # Hash String -> Extension
18
- SpanExtensions = {}
6
+ class SpanExtension
7
+ # trigging chars
8
+ attr_accessor :chars
9
+ # trigging regexp
10
+ attr_accessor :regexp
11
+ # lambda
12
+ attr_accessor :block
13
+ end
19
14
 
20
- def check_span_extensions(src, con)
21
- c = src.cur_char
22
- if extensions = SpanExtensionsTrigger[c]
23
- extensions.each do |e|
24
- if e.regexp && (match = src.next_matches(e.regexp))
25
- return true if e.block.call(doc, src, con)
26
- end
27
- end
28
- end
29
- return false # not special
30
- end
31
-
32
- def self.register_span_extension(args)
33
- e = SpanExtension.new
34
- e.chars = [*args[:chars]]
35
- e.regexp = args[:regexp]
36
- e.block = args[:handler] || raise("No blocks passed")
37
- e.chars.each do |c|
38
- (SpanExtensionsTrigger[c] ||= []).push e
39
- end
40
- end
15
+ # Hash String -> Extension
16
+ SpanExtensions = {}
41
17
 
42
- def self.register_block_extension(args)
43
- regexp = args[:regexp]
44
- BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
45
- end
18
+ def check_span_extensions(src, con)
19
+ c = src.cur_char
20
+ if extensions = SpanExtensionsTrigger[c]
21
+ extensions.each do |e|
22
+ if e.regexp && src.next_matches(e.regexp)
23
+ return true if e.block.call(doc, src, con)
24
+ end
25
+ end
26
+ end
46
27
 
47
- # Hash Regexp -> Block
48
- BlockExtensions = {}
28
+ false # not special
29
+ end
49
30
 
50
- def check_block_extensions(src, con, line)
51
- BlockExtensions.each do |reg, block|
52
- if m = reg.match(line)
53
- block = BlockExtensions[reg]
54
- accepted = block.call(doc, src, con)
55
- return true if accepted
56
- end
57
- end
58
- return false # not special
59
- end
60
-
61
- def any_matching_block_extension?(line)
62
- BlockExtensions.each_key do |reg|
63
- m = reg.match(line)
64
- return m if m
65
- end
66
- return false
67
- end
68
-
69
- end end end
31
+ def self.register_span_extension(args)
32
+ e = SpanExtension.new
33
+ e.chars = [*args[:chars]]
34
+ e.regexp = args[:regexp]
35
+ e.block = args[:handler] || raise("No blocks passed")
36
+ e.chars.each do |c|
37
+ (SpanExtensionsTrigger[c] ||= []).push e
38
+ end
39
+ end
40
+
41
+ def self.register_block_extension(args)
42
+ regexp = args[:regexp]
43
+ BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
44
+ end
45
+
46
+ # Hash Regexp -> Block
47
+ BlockExtensions = {}
48
+
49
+ def check_block_extensions(src, con, line)
50
+ BlockExtensions.each do |reg, block|
51
+ if reg.match(line)
52
+ block = BlockExtensions[reg]
53
+ accepted = block.call(doc, src, con)
54
+ return true if accepted
55
+ end
56
+ end
57
+ false # not special
58
+ end
59
+
60
+ def any_matching_block_extension?(line)
61
+ BlockExtensions.each_key do |reg|
62
+ m = reg.match(line)
63
+ return m if m
64
+ end
65
+ false
66
+ end
67
+
68
+ end
@@ -1,189 +1,220 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module SpanLevelParser
23
-
24
- # This class helps me read and sanitize HTML blocks
25
-
26
- # I tried to do this with REXML, but wasn't able to. (suggestions?)
27
-
28
- class HTMLHelper
29
- include MaRuKu::Strings
30
-
31
- Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
32
- PartialTag = %r{^<.*}m
33
-
34
- EverythingElse = %r{^[^<]+}m
35
- CommentStart = %r{^<!--}x
36
- CommentEnd = %r{^.*-->}
37
- TO_SANITIZE = ['img','hr','br']
38
-
39
- attr_reader :rest
40
-
41
- def my_debug(s)
42
- # puts "---"*10+"\n"+inspect+"\t>>>\t"s
43
- end
44
-
45
- def initialize
46
- @rest = ""
47
- @tag_stack = []
48
- @m = nil
49
- @already = ""
50
- self.state = :inside_element
51
- end
52
-
53
- attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
54
-
55
- def eat_this(line)
56
- @rest = line + @rest
57
- things_read = 0
58
- until @rest.empty?
59
- case self.state
60
- when :inside_comment
61
- if @m = CommentEnd.match(@rest)
62
- @already += @m.pre_match + @m.to_s
63
- @rest = @m.post_match
64
- self.state = :inside_element
65
- else
66
- @already += @rest
67
- @rest = ""
68
- self.state = :inside_comment
69
- end
70
- when :inside_element
71
- if @m = CommentStart.match(@rest)
72
- things_read += 1
73
- @already += @m.pre_match + @m.to_s
74
- @rest = @m.post_match
75
- self.state = :inside_comment
76
- elsif @m = Tag.match(@rest) then
77
- my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
78
- things_read += 1
79
- handle_tag
80
- self.state = :inside_element
81
- elsif @m = PartialTag.match(@rest) then
82
- my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
83
- @already += @m.pre_match
84
- @rest = @m.post_match
85
- @partial_tag = @m.to_s
86
- self.state = :inside_tag
87
- elsif @m = EverythingElse.match(@rest)
88
- my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
89
- @already += @m.pre_match + @m.to_s
90
- @rest = @m.post_match
91
- self.state = :inside_element
92
- else
93
- error "Malformed HTML: not complete: #{@rest.inspect}"
94
- end
95
- when :inside_tag
96
- if @m = /^[^>]*>/.match(@rest) then
97
- my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
98
- @partial_tag += @m.to_s
99
- my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
100
- @rest = @partial_tag + @m.post_match
101
- @partial_tag = nil
102
- self.state = :inside_element
103
- else
104
- @partial_tag += @rest
105
- @rest = ""
106
- self.state = :inside_tag
107
- end
108
- else
109
- raise "Bug bug: state = #{self.state.inspect}"
110
- end # not inside comment
111
-
112
- # puts inspect
113
- # puts "Read: #{@tag_stack.inspect}"
114
- break if is_finished? and things_read>0
115
- end
116
- end
117
-
118
- def handle_tag()
119
- @already += @m.pre_match
120
- @rest = @m.post_match
121
-
122
- is_closing = !!@m[1]
123
- tag = @m[2]
124
- attributes = @m[3].to_s
125
-
126
- is_single = false
127
- if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
128
- attributes = attributes[0, attributes.size-1]
129
- is_single = true
130
- end
131
-
132
- my_debug "Attributes: #{attributes.inspect}"
133
- my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
134
-
135
- if TO_SANITIZE.include? tag
136
- attributes.strip!
137
- # puts "Attributes: #{attributes.inspect}"
138
- if attributes.size > 0
139
- @already += '<%s %s />' % [tag, attributes]
140
- else
141
- @already += '<%s />' % [tag]
142
- end
143
- elsif is_closing
144
- @already += @m.to_s
145
- if @tag_stack.empty?
146
- error "Malformed: closing tag #{tag.inspect} "+
147
- "in empty list"
148
- end
149
- if @tag_stack.last != tag
150
- error "Malformed: tag <#{tag}> "+
151
- "closes <#{@tag_stack.last}>"
152
- end
153
- @tag_stack.pop
154
- else
155
- @already += @m.to_s
156
-
157
- if not is_single
158
- @tag_stack.push(tag)
159
- my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
160
- end
161
- end
162
- end
163
- def error(s)
164
- raise Exception, "Error: #{s} \n"+ inspect, caller
165
- end
166
-
167
- def inspect; "HTML READER\n state=#{self.state} "+
168
- "match=#{@m.to_s.inspect}\n"+
169
- "Tag stack = #{@tag_stack.inspect} \n"+
170
- "Before:\n"+
171
- add_tabs(@already,1,'|')+"\n"+
172
- "After:\n"+
173
- add_tabs(@rest,1,'|')+"\n"
174
-
175
- end
176
-
177
-
178
- def stuff_you_read
179
- @already
180
- end
181
-
182
- def rest() @rest end
183
-
184
- def is_finished?
185
- (self.state == :inside_element) and @tag_stack.empty?
186
- end
187
- end # html helper
188
-
189
- end end end end
1
+ module MaRuKu::In::Markdown::SpanLevelParser
2
+
3
+ # This class helps me read and sanitize HTML blocks
4
+ class HTMLHelper
5
+ Tag = %r{^<(/)?(\w+)\s*([^>]*?)>}m
6
+ PartialTag = %r{^<.*}m
7
+ CData = %r{^\s*<!\[CDATA\[}m
8
+ CDataEnd = %r{\]\]>}m
9
+
10
+ EverythingElse = %r{^[^<]+}m
11
+ CommentStart = %r{^<!--}x
12
+ CommentEnd = %r{-->}
13
+ TO_SANITIZE = ['img','hr','br']
14
+
15
+ attr_reader :rest, :first_tag
16
+
17
+ def my_debug(s)
18
+ # puts "---" * 10 + "\n" + inspect + "\t>>>\t" + s
19
+ end
20
+
21
+ def initialize
22
+ @rest = ""
23
+ @tag_stack = []
24
+ @m = nil
25
+ @already = ""
26
+ self.state = :inside_element
27
+ end
28
+
29
+ attr_accessor :state # = :inside_element, :inside_tag, :inside_comment, :inside_cdata, :inside_script_style
30
+
31
+ def eat_this(line)
32
+ @rest = line + @rest
33
+ things_read = 0
34
+ until @rest.empty?
35
+ case self.state
36
+ when :inside_comment
37
+ if @m = CommentEnd.match(@rest)
38
+ my_debug "#{@state}: Comment End: #{@m.to_s.inspect}"
39
+ @already << @m.pre_match << @m.to_s
40
+ @rest = @m.post_match
41
+ self.state = :inside_element
42
+ else
43
+ @already << @rest
44
+ @rest = ""
45
+ self.state = :inside_comment
46
+ end
47
+ when :inside_element
48
+ if @m = CommentStart.match(@rest)
49
+ my_debug "#{@state}: Comment: #{@m.to_s.inspect}"
50
+ things_read += 1
51
+ @already << @m.pre_match << @m.to_s
52
+ @rest = @m.post_match
53
+ self.state = :inside_comment
54
+ elsif @m = Tag.match(@rest)
55
+ my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
56
+ things_read += 1
57
+ self.state = :inside_element
58
+ handle_tag
59
+ elsif @m = CData.match(@rest)
60
+ my_debug "#{@state}: CDATA: #{@m.to_s.inspect}"
61
+ @already << @m.pre_match << @m.to_s
62
+ @rest = @m.post_match
63
+ self.state = :inside_cdata
64
+ elsif @m = PartialTag.match(@rest)
65
+ my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
66
+ @already << @m.pre_match
67
+ @rest = @m.post_match
68
+ @partial_tag = @m.to_s
69
+ self.state = :inside_tag
70
+ elsif @m = EverythingElse.match(@rest)
71
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
72
+ @already << @m.pre_match << @m.to_s
73
+ @rest = @m.post_match
74
+ self.state = :inside_element
75
+ else
76
+ error "Malformed HTML: not complete: #{@rest.inspect}"
77
+ end
78
+ when :inside_tag
79
+ if @m = /^[^>]*>/.match(@rest)
80
+ my_debug "#{@state}: matched #{@m.to_s.inspect}"
81
+ @partial_tag << @m.to_s
82
+ my_debug "#{@state}: matched TOTAL: #{@partial_tag.to_s.inspect}"
83
+ @rest = @partial_tag + @m.post_match
84
+ @partial_tag = nil
85
+ self.state = :inside_element
86
+ else
87
+ @partial_tag << @rest
88
+ @rest = ""
89
+ self.state = :inside_tag
90
+ end
91
+ when :inside_cdata
92
+ if @m = CDataEnd.match(@rest)
93
+ my_debug "#{@state}: matched #{@m.to_s.inspect}"
94
+ @already << @m.pre_match << @m.to_s
95
+ @rest = @m.post_match
96
+ self.state = %(script style).include?(@tag_stack.last) ? :inside_script_style : :inside_element
97
+ else
98
+ @already << @rest
99
+ @rest = ""
100
+ self.state = :inside_cdata
101
+ end
102
+ when :inside_script_style
103
+ if @m = CData.match(@rest)
104
+ if @already.rstrip.end_with?('<![CDATA[')
105
+ @already << @m.pre_match
106
+ @rest = @m.post_match
107
+ else
108
+ my_debug "#{@state}: CDATA: #{@m.to_s.inspect}"
109
+ @already << @m.pre_match << @m.to_s
110
+ @rest = @m.post_match
111
+ self.state = :inside_cdata
112
+ end
113
+ elsif @m = Tag.match(@rest)
114
+ is_closing = !!@m[1]
115
+ tag = @m[2]
116
+ if is_closing && tag == @tag_stack.last
117
+ my_debug "#{@state}: matched #{@m.to_s.inspect}"
118
+ @already << @m.pre_match
119
+ @rest = @m.post_match
120
+ # This is necessary to properly parse
121
+ # script tags
122
+ @already << "]]>" unless @already.rstrip.end_with?("]]>")
123
+ self.state = :inside_element
124
+ handle_tag false # don't double-add pre_match
125
+ else
126
+ @already << @rest
127
+ @rest = ""
128
+ end
129
+ elsif @m = EverythingElse.match(@rest)
130
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
131
+ @already << @m.pre_match << @m.to_s
132
+ @rest = @m.post_match
133
+ else
134
+ @already << @rest
135
+ @rest = ""
136
+ end
137
+ else
138
+ raise "Bug bug: state = #{self.state.inspect}"
139
+ end # not inside comment
140
+
141
+ break if is_finished? && things_read > 0
142
+ end
143
+ end
144
+
145
+ def handle_tag(add_pre_match = true)
146
+ @already << @m.pre_match if add_pre_match
147
+ @rest = @m.post_match
148
+
149
+ is_closing = !!@m[1]
150
+ tag = @m[2]
151
+ @first_tag ||= tag
152
+ attributes = @m[3].to_s
153
+
154
+ is_single = false
155
+ if attributes[-1, 1] == '/'
156
+ attributes = attributes[0, attributes.size - 1]
157
+ is_single = true
158
+ end
159
+
160
+ my_debug "Attributes: #{attributes.inspect}"
161
+ my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
162
+
163
+ if TO_SANITIZE.include? tag
164
+ attributes.strip!
165
+ # puts "Attributes: #{attributes.inspect}"
166
+ if attributes.size > 0
167
+ @already << '<%s %s />' % [tag, attributes]
168
+ else
169
+ @already << '<%s />' % [tag]
170
+ end
171
+ elsif is_closing
172
+ if @tag_stack.empty?
173
+ error "Malformed: closing tag #{tag.inspect} in empty list"
174
+ end
175
+ if @tag_stack.last != tag
176
+ error "Malformed: tag <#{tag}> closes <#{@tag_stack.last}>"
177
+ end
178
+
179
+ @already << @m.to_s
180
+ @tag_stack.pop
181
+ else
182
+ @already << @m.to_s
183
+
184
+ if not is_single
185
+ @tag_stack.push(tag)
186
+ my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
187
+ end
188
+
189
+ if %w(script style).include?(@tag_stack.last)
190
+ # This is necessary to properly parse
191
+ # script tags
192
+ @already << "<![CDATA["
193
+ self.state = :inside_script_style
194
+ end
195
+ end
196
+ end
197
+
198
+ def error(s)
199
+ raise "Error: #{s} \n" + inspect, caller
200
+ end
201
+
202
+ def inspect
203
+ "HTML READER\n state=#{self.state} " +
204
+ "match=#{@m.to_s.inspect}\n" +
205
+ "Tag stack = #{@tag_stack.inspect} \n" +
206
+ "Before:\n" +
207
+ @already.gsub(/^/, '|') + "\n" +
208
+ "After:\n" +
209
+ @rest.gsub(/^/, '|') + "\n"
210
+ end
211
+
212
+ def stuff_you_read
213
+ @already
214
+ end
215
+
216
+ def is_finished?
217
+ (self.state == :inside_element) and @tag_stack.empty?
218
+ end
219
+ end # html helper
220
+ end