maruku 0.6.0 → 0.7.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE.txt +20 -0
  3. data/bin/maruku +153 -152
  4. data/bin/marutex +2 -29
  5. data/data/entities.xml +261 -0
  6. data/docs/markdown_syntax.md +9 -21
  7. data/docs/math.md +14 -18
  8. data/lib/maruku.rb +65 -78
  9. data/lib/maruku/attributes.rb +109 -214
  10. data/lib/maruku/defaults.rb +45 -67
  11. data/lib/maruku/document.rb +44 -0
  12. data/lib/maruku/element.rb +138 -0
  13. data/lib/maruku/errors.rb +80 -0
  14. data/lib/maruku/ext/div.rb +105 -113
  15. data/lib/maruku/ext/fenced_code.rb +97 -0
  16. data/lib/maruku/ext/math.rb +22 -26
  17. data/lib/maruku/ext/math/elements.rb +20 -26
  18. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  19. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  20. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  21. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  22. data/lib/maruku/ext/math/parsing.rb +121 -115
  23. data/lib/maruku/ext/math/to_html.rb +202 -187
  24. data/lib/maruku/ext/math/to_latex.rb +34 -21
  25. data/lib/maruku/helpers.rb +158 -257
  26. data/lib/maruku/html.rb +251 -0
  27. data/lib/maruku/input/charsource.rb +272 -319
  28. data/lib/maruku/input/extensions.rb +62 -63
  29. data/lib/maruku/input/html_helper.rb +233 -189
  30. data/lib/maruku/input/linesource.rb +90 -110
  31. data/lib/maruku/input/mdline.rb +131 -0
  32. data/lib/maruku/input/parse_block.rb +736 -613
  33. data/lib/maruku/input/parse_doc.rb +145 -217
  34. data/lib/maruku/input/parse_span.rb +740 -0
  35. data/lib/maruku/inspect_element.rb +60 -0
  36. data/lib/maruku/maruku.rb +14 -30
  37. data/lib/maruku/output/entity_table.rb +37 -0
  38. data/lib/maruku/output/s5/fancy.rb +462 -462
  39. data/lib/maruku/output/s5/to_s5.rb +115 -135
  40. data/lib/maruku/output/to_html.rb +907 -983
  41. data/lib/maruku/output/to_latex.rb +571 -563
  42. data/lib/maruku/output/to_markdown.rb +207 -162
  43. data/lib/maruku/output/to_s.rb +10 -52
  44. data/lib/maruku/string_utils.rb +129 -179
  45. data/lib/maruku/toc.rb +185 -196
  46. data/lib/maruku/version.rb +33 -38
  47. data/spec/block_docs/abbrev.md +776 -0
  48. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  49. data/spec/block_docs/abbreviations2.md +27 -0
  50. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  51. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  52. data/spec/block_docs/attribute_sanitize.md +22 -0
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/spec/block_docs/atx_headers.md +22 -0
  59. data/spec/block_docs/auto_cdata.md +48 -0
  60. data/spec/block_docs/bad_cites.md +30 -0
  61. data/spec/block_docs/bad_divrefs.md +30 -0
  62. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  63. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  64. data/spec/block_docs/block_quotes.md +66 -0
  65. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  66. data/{tests/unittest → spec/block_docs}/bug_table.md +7 -19
  67. data/spec/block_docs/cites.md +37 -0
  68. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  69. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  70. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  71. data/spec/block_docs/code4.md +79 -0
  72. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  73. data/spec/block_docs/div_without_newline.md +16 -0
  74. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  75. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  76. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  77. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  78. data/spec/block_docs/email.md +29 -0
  79. data/spec/block_docs/empty_cells.md +31 -0
  80. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  81. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  82. data/{tests/unittest → spec/block_docs}/entities.md +33 -41
  83. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  84. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  85. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  86. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  87. data/{tests/unittest → spec/block_docs}/extra_table1.md +9 -21
  88. data/spec/block_docs/fenced_code_blocks.md +58 -0
  89. data/spec/block_docs/fenced_code_blocks_highlighted.md +17 -0
  90. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  91. data/spec/block_docs/footnotes2.md +82 -0
  92. data/spec/block_docs/hard.md +25 -0
  93. data/spec/block_docs/header_after_par.md +62 -0
  94. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  95. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  96. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  97. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  98. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  99. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  100. data/spec/block_docs/html_block_in_para.md +22 -0
  101. data/spec/block_docs/html_inline.md +25 -0
  102. data/spec/block_docs/html_trailing.md +31 -0
  103. data/spec/block_docs/ie.md +62 -0
  104. data/spec/block_docs/iframe.md +29 -0
  105. data/spec/block_docs/ignore_bad_header.md +9 -0
  106. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  107. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  108. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  109. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  110. data/spec/block_docs/inline_html_beginning.md +10 -0
  111. data/spec/block_docs/issue106.md +78 -0
  112. data/spec/block_docs/issue115.md +20 -0
  113. data/spec/block_docs/issue117.md +13 -0
  114. data/spec/block_docs/issue120.md +48 -0
  115. data/spec/block_docs/issue123.md +11 -0
  116. data/spec/block_docs/issue124.md +16 -0
  117. data/spec/block_docs/issue126.md +9 -0
  118. data/spec/block_docs/issue130.md +11 -0
  119. data/spec/block_docs/issue20.md +9 -0
  120. data/spec/block_docs/issue26.md +22 -0
  121. data/spec/block_docs/issue29.md +9 -0
  122. data/spec/block_docs/issue30.md +30 -0
  123. data/spec/block_docs/issue31.md +25 -0
  124. data/spec/block_docs/issue40.md +52 -0
  125. data/spec/block_docs/issue64.md +55 -0
  126. data/spec/block_docs/issue67.md +19 -0
  127. data/spec/block_docs/issue70.md +11 -0
  128. data/spec/block_docs/issue72.md +17 -0
  129. data/spec/block_docs/issue74.md +38 -0
  130. data/spec/block_docs/issue79.md +15 -0
  131. data/spec/block_docs/issue83.md +13 -0
  132. data/spec/block_docs/issue85.md +25 -0
  133. data/spec/block_docs/issue88.md +19 -0
  134. data/spec/block_docs/issue89.md +12 -0
  135. data/spec/block_docs/issue90.md +38 -0
  136. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  137. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  138. data/spec/block_docs/links2.md +21 -0
  139. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  140. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  141. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  142. data/spec/block_docs/list_multipara.md +42 -0
  143. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  144. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  145. data/spec/block_docs/lists11.md +23 -0
  146. data/spec/block_docs/lists12.md +43 -0
  147. data/spec/block_docs/lists13.md +55 -0
  148. data/spec/block_docs/lists14.md +61 -0
  149. data/spec/block_docs/lists15.md +36 -0
  150. data/spec/block_docs/lists6.md +88 -0
  151. data/spec/block_docs/lists7b.md +58 -0
  152. data/spec/block_docs/lists9.md +53 -0
  153. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  154. data/spec/block_docs/lists_blank.md +35 -0
  155. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  156. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +44 -29
  157. data/spec/block_docs/lists_nested.md +44 -0
  158. data/spec/block_docs/lists_nested_blankline.md +34 -0
  159. data/spec/block_docs/lists_nested_deep.md +43 -0
  160. data/spec/block_docs/lists_ol.md +129 -0
  161. data/spec/block_docs/lists_ol2.md +147 -0
  162. data/spec/block_docs/lists_paraindent.md +42 -0
  163. data/spec/block_docs/lists_tab.md +54 -0
  164. data/spec/block_docs/loss.md +17 -0
  165. data/spec/block_docs/math-blahtex/equations.md +29 -0
  166. data/spec/block_docs/math-blahtex/inline.md +48 -0
  167. data/spec/block_docs/math-blahtex/math2.md +52 -0
  168. data/spec/block_docs/math-blahtex/table.md +25 -0
  169. data/spec/block_docs/math/embedded_invalid_svg.md +108 -0
  170. data/spec/block_docs/math/embedded_svg.md +136 -0
  171. data/spec/block_docs/math/equations.md +49 -0
  172. data/spec/block_docs/math/inline.md +46 -0
  173. data/spec/block_docs/math/math2.md +53 -0
  174. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  175. data/spec/block_docs/math/raw_mathml.md +87 -0
  176. data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
  177. data/spec/block_docs/math/table.md +25 -0
  178. data/{tests/unittest → spec/block_docs}/math/table2.md +11 -23
  179. data/{tests/unittest → spec/block_docs}/misc_sw.md +184 -121
  180. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  181. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  182. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  183. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  184. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  185. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  186. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  187. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  188. data/spec/block_docs/ref_with_title.md +22 -0
  189. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  190. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  191. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  192. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  193. data/{tests/unittest → spec/block_docs}/table_attributes.md +6 -20
  194. data/spec/block_docs/table_colspan.md +41 -0
  195. data/spec/block_docs/tables.md +47 -0
  196. data/spec/block_docs/tables2.md +74 -0
  197. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  198. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  199. data/spec/block_docs/toc.md +87 -0
  200. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  201. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  202. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  203. data/spec/block_docs/xml.md +33 -0
  204. data/spec/block_docs/xml3.md +24 -0
  205. data/spec/block_docs/xml_comments.md +32 -0
  206. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  207. data/spec/block_spec.rb +110 -0
  208. data/spec/cli_spec.rb +8 -0
  209. data/spec/span_spec.rb +263 -0
  210. data/spec/spec_helper.rb +3 -0
  211. data/spec/to_html_utf8_spec.rb +13 -0
  212. metadata +218 -202
  213. data/Rakefile +0 -73
  214. data/bin/marudown +0 -29
  215. data/bin/marutest +0 -345
  216. data/docs/changelog.md +0 -334
  217. data/lib/maruku/errors_management.rb +0 -92
  218. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  219. data/lib/maruku/input/parse_span_better.rb +0 -746
  220. data/lib/maruku/input/rubypants.rb +0 -225
  221. data/lib/maruku/input/type_detection.rb +0 -147
  222. data/lib/maruku/output/to_latex_entities.rb +0 -367
  223. data/lib/maruku/output/to_latex_strings.rb +0 -64
  224. data/lib/maruku/structures.rb +0 -167
  225. data/lib/maruku/structures_inspect.rb +0 -87
  226. data/lib/maruku/structures_iterators.rb +0 -61
  227. data/lib/maruku/tests/benchmark.rb +0 -82
  228. data/lib/maruku/tests/new_parser.rb +0 -373
  229. data/lib/maruku/tests/tests.rb +0 -136
  230. data/lib/maruku/usage/example1.rb +0 -33
  231. data/maruku_gem.rb +0 -33
  232. data/tests/bugs/code_in_links.md +0 -101
  233. data/tests/bugs/complex_escaping.md +0 -38
  234. data/tests/math/syntax.md +0 -46
  235. data/tests/math_usage/document.md +0 -13
  236. data/tests/others/abbreviations.md +0 -11
  237. data/tests/others/blank.md +0 -4
  238. data/tests/others/code.md +0 -5
  239. data/tests/others/code2.md +0 -8
  240. data/tests/others/code3.md +0 -16
  241. data/tests/others/email.md +0 -4
  242. data/tests/others/entities.md +0 -19
  243. data/tests/others/escaping.md +0 -16
  244. data/tests/others/extra_dl.md +0 -101
  245. data/tests/others/extra_header_id.md +0 -13
  246. data/tests/others/extra_table1.md +0 -40
  247. data/tests/others/footnotes.md +0 -17
  248. data/tests/others/headers.md +0 -10
  249. data/tests/others/hrule.md +0 -10
  250. data/tests/others/images.md +0 -20
  251. data/tests/others/inline_html.md +0 -42
  252. data/tests/others/links.md +0 -38
  253. data/tests/others/list1.md +0 -4
  254. data/tests/others/list2.md +0 -5
  255. data/tests/others/list3.md +0 -8
  256. data/tests/others/lists.md +0 -32
  257. data/tests/others/lists_after_paragraph.md +0 -44
  258. data/tests/others/lists_ol.md +0 -39
  259. data/tests/others/misc_sw.md +0 -105
  260. data/tests/others/one.md +0 -1
  261. data/tests/others/paragraphs.md +0 -13
  262. data/tests/others/sss06.md +0 -352
  263. data/tests/others/test.md +0 -4
  264. data/tests/s5/s5profiling.md +0 -48
  265. data/tests/unittest/bug_def.md +0 -28
  266. data/tests/unittest/email.md +0 -32
  267. data/tests/unittest/hang.md +0 -29
  268. data/tests/unittest/html2.md +0 -34
  269. data/tests/unittest/ie.md +0 -61
  270. data/tests/unittest/links2.md +0 -34
  271. data/tests/unittest/lists11.md +0 -28
  272. data/tests/unittest/lists6.md +0 -53
  273. data/tests/unittest/lists9.md +0 -76
  274. data/tests/unittest/lists_ol.md +0 -274
  275. data/tests/unittest/math/equations.md +0 -86
  276. data/tests/unittest/math/inline.md +0 -58
  277. data/tests/unittest/math/math2.md +0 -57
  278. data/tests/unittest/math/table.md +0 -37
  279. data/tests/unittest/notyet/header_after_par.md +0 -70
  280. data/tests/unittest/pending/empty_cells.md +0 -49
  281. data/tests/unittest/red_tests/abbrev.md +0 -1388
  282. data/tests/unittest/red_tests/lists7.md +0 -68
  283. data/tests/unittest/red_tests/lists7b.md +0 -128
  284. data/tests/unittest/red_tests/lists8.md +0 -76
  285. data/tests/unittest/red_tests/xml.md +0 -70
  286. data/tests/unittest/xml2.md +0 -31
  287. data/tests/unittest/xml3.md +0 -38
  288. data/tests/utf8-files/simple.md +0 -1
  289. data/unit_test_block.sh +0 -5
  290. data/unit_test_span.sh +0 -3
@@ -0,0 +1,251 @@
1
+ require 'set'
2
+
3
+ $warned_nokogiri = false
4
+
5
+ module MaRuKu
6
+ HTML_INLINE_ELEMS = Set.new %w[a abbr acronym audio b bdi bdo big br button canvas caption cite code
7
+ col colgroup command datalist del details dfn dir em fieldset font form i img input ins
8
+ kbd label legend mark meter optgroup option progress q rp rt ruby s samp select small
9
+ source span strike strong sub summary sup tbody td tfoot th thead time tr track tt u var video wbr
10
+ animate animateColor animateMotion animateTransform circle clipPath defs desc ellipse
11
+ feGaussianBlur filter font-face font-face-name font-face-src foreignObject g glyph hkern
12
+ linearGradient line marker mask metadata missing-glyph mpath path pattern polygon polyline
13
+ radialGradient rect set stop svg switch text textPath title tspan use
14
+ annotation annotation-xml maction math menclose merror mfrac mfenced mi mmultiscripts mn mo
15
+ mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub msubsup msup mtable
16
+ mtd mtext mtr munder munderover none semantics]
17
+
18
+ # Parse block-level markdown elements in these HTML tags
19
+ BLOCK_TAGS = Set.new %w[div section]
20
+
21
+ # This gets mixed into HTML MDElement nodes to hold the parsed document fragment
22
+ module HTMLElement
23
+ attr_accessor :parsed_html
24
+ end
25
+
26
+ # This is just a factory, not an actual class
27
+ module HTMLFragment
28
+
29
+ # HTMLFragment.new produces a concrete HTMLFragment implementation
30
+ # that is either a NokogiriHTMLFragment or a REXMLHTMLFragment.
31
+ def self.new(raw_html)
32
+ if !$warned_nokogiri && MaRuKu::Globals[:html_parser] == 'nokogiri'
33
+ begin
34
+ require 'nokogiri'
35
+ return NokogiriHTMLFragment.new(raw_html)
36
+ rescue LoadError
37
+ warn "Nokogiri could not be loaded. Falling back to REXML."
38
+ $warned_nokogiri = true
39
+ end
40
+ end
41
+
42
+ require 'rexml/document'
43
+ REXMLHTMLFragment.new(raw_html)
44
+ end
45
+ end
46
+
47
+ # Nokogiri backend for HTML handling
48
+ class NokogiriHTMLFragment
49
+ def initialize(raw_html)
50
+ # Wrap our HTML in a dummy document with a doctype (just
51
+ # for the entity references)
52
+ wrapped = '<!DOCTYPE html PUBLIC
53
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
54
+ "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
55
+ <html>' + raw_html.strip + '</html>'
56
+
57
+ d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
58
+ @fragment = d.root
59
+ end
60
+
61
+ # @return The name of the first child element in the fragment.
62
+ def first_node_name
63
+ first_child = @fragment.children.first
64
+ first_child ? first_child.name : nil
65
+ end
66
+
67
+ # Add a class to the children of this fragment
68
+ def add_class(class_name)
69
+ @fragment.children.each do |c|
70
+ c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
71
+ end
72
+ end
73
+
74
+ # Process markdown within the contents of some elements and
75
+ # replace their contents with the processed version.
76
+ #
77
+ # @param doc [MaRuKu::MDDocument] A document to process.
78
+ def process_markdown_inside_elements(doc)
79
+ # find span elements or elements with 'markdown' attribute
80
+ elts = @fragment.css("[markdown]")
81
+
82
+ d = @fragment.children.first
83
+ if d && HTML_INLINE_ELEMS.include?(d.name)
84
+ elts << d unless d.attribute('markdown')
85
+ elts += span_descendents(d)
86
+ end
87
+
88
+ elts.each do |e|
89
+ how = e['markdown']
90
+ e.remove_attribute('markdown')
91
+
92
+ next if "0" == how # user requests no markdown parsing inside
93
+ parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
94
+
95
+ # Select all text children of e
96
+ e.xpath("./text()").each do |original_text|
97
+ s = MaRuKu::Out::HTML.escapeHTML(original_text.text)
98
+ unless s.strip.empty?
99
+ parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
100
+
101
+ # restore leading and trailing spaces
102
+ padding = /\A(\s*).*?(\s*)\z/.match(s)
103
+ parsed = [padding[1]] + parsed + [padding[2]] if padding
104
+
105
+ el = doc.md_el(:dummy, parsed)
106
+
107
+ # Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
108
+ guard = Nokogiri::XML::Element.new('guard', @fragment)
109
+ original_text.replace(guard)
110
+ el.children_to_html.each do |x|
111
+ guard.before(x.to_s)
112
+ end
113
+ guard.remove
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ # Convert this fragment to an HTML or XHTML string.
120
+ # @return [String]
121
+ def to_html
122
+ output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
123
+ Nokogiri::XML::Node::SaveOptions::FORMAT
124
+ @fragment.children.inject("") do |out, child|
125
+ out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
126
+ end
127
+ end
128
+
129
+ private
130
+
131
+ # Get all span-level descendents of the given element, recursively,
132
+ # as a flat NodeSet.
133
+ #
134
+ # @param e [Nokogiri::XML::Node] An element.
135
+ # @return [Nokogiri::XML::NodeSet]
136
+ def span_descendents(e)
137
+ ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
138
+ e.element_children.inject(ns) do |descendents, c|
139
+ if HTML_INLINE_ELEMS.include?(c.name)
140
+ descendents << c
141
+ descendents += span_descendents(c)
142
+ end
143
+ descendents
144
+ end
145
+ end
146
+ end
147
+
148
+ # An HTMLFragment implementation using REXML
149
+ class REXMLHTMLFragment
150
+ def initialize(raw_html)
151
+ wrapped = '<!DOCTYPE html PUBLIC
152
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
153
+ "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
154
+ <html>' + raw_html.strip + '</html>'
155
+
156
+ @fragment = REXML::Document.new(wrapped).root
157
+ end
158
+
159
+ # The name of the first element in the fragment
160
+ def first_node_name
161
+ first_child = @fragment.children.first
162
+ (first_child && first_child.respond_to?(:name)) ? first_child.name : nil
163
+ end
164
+
165
+ # Add a class to the children of this fragment
166
+ def add_class(class_name)
167
+ @fragment.each_element do |c|
168
+ c.attributes['class'] = ((c.attributes['class']||'').split(' ') + [class_name]).join(' ')
169
+ end
170
+ end
171
+
172
+ # Process markdown within the contents of some elements and
173
+ # replace their contents with the processed version.
174
+ def process_markdown_inside_elements(doc)
175
+ elts = []
176
+ @fragment.each_element('//*[@markdown]') do |e|
177
+ elts << e
178
+ end
179
+
180
+ d = @fragment.children.first
181
+ if d && HTML_INLINE_ELEMS.include?(first_node_name)
182
+ elts << d unless d.attributes['markdown']
183
+ elts += span_descendents(d)
184
+ end
185
+
186
+ # find span elements or elements with 'markdown' attribute
187
+ elts.each do |e|
188
+ # should we parse block-level or span-level?
189
+ how = e.attributes['markdown']
190
+ e.attributes.delete('markdown')
191
+
192
+ next if "0" == how # user requests no markdown parsing inside
193
+ parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
194
+
195
+ # Select all text children of e
196
+ e.texts.each do |original_text|
197
+ s = MaRuKu::Out::HTML.escapeHTML(original_text.value)
198
+ unless s.strip.empty?
199
+ # TODO extract common functionality
200
+ parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
201
+ # restore leading and trailing spaces
202
+ padding = /\A(\s*).*?(\s*)\z/.match(s)
203
+ parsed = [padding[1]] + parsed + [padding[2]] if padding
204
+
205
+ el = doc.md_el(:dummy, parsed)
206
+
207
+ new_html = "<dummy>"
208
+ el.children_to_html.each do |x|
209
+ new_html << x.to_s
210
+ end
211
+ new_html << "</dummy>"
212
+
213
+ newdoc = REXML::Document.new(new_html).root
214
+
215
+ p = original_text.parent
216
+ newdoc.children.each do |c|
217
+ p.insert_before(original_text, c)
218
+ end
219
+
220
+ p.delete(original_text)
221
+ end
222
+ end
223
+ end
224
+ end
225
+
226
+ def to_html
227
+ formatter = REXML::Formatters::Default.new(true)
228
+ @fragment.children.inject("") do |out, child|
229
+ out << formatter.write(child, '')
230
+ end
231
+ end
232
+
233
+ private
234
+
235
+ # Get all span-level descendents of the given element, recursively,
236
+ # as an Array.
237
+ #
238
+ # @param e [REXML::Element] An element.
239
+ # @return [Array]
240
+ def span_descendents(e)
241
+ descendents = []
242
+ e.each_element do |c|
243
+ name = c.respond_to?(:name) ? c.name : nil
244
+ if name && HTML_INLINE_ELEMS.include?(c.name)
245
+ descendents << c
246
+ descendents += span_descendents(c)
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
@@ -1,326 +1,279 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module SpanLevelParser
23
-
24
- # a string scanner coded by me
25
- class CharSourceManual; end
26
-
27
- # a wrapper around StringScanner
28
- class CharSourceStrscan; end
29
-
30
- # A debug scanner that checks the correctness of both
31
- # by comparing their output
32
- class CharSourceDebug; end
33
-
34
- # Choose!
35
-
36
- CharSource = CharSourceManual # faster! 58ms vs. 65ms
37
- #CharSource = CharSourceStrscan
38
- #CharSource = CharSourceDebug
39
-
40
-
41
- class CharSourceManual
42
- include MaRuKu::Strings
43
-
44
- def initialize(s, parent=nil)
45
- raise "Passed #{s.class}" if not s.kind_of? String
46
- @buffer = s
47
- @buffer_index = 0
48
- @parent = parent
49
- end
50
-
51
- # Return current char as a FixNum (or nil).
52
- def cur_char; @buffer[@buffer_index] end
53
-
54
- # Return the next n chars as a String.
55
- def cur_chars(n); @buffer[@buffer_index,n] end
56
-
57
- # Return the char after current char as a FixNum (or nil).
58
- def next_char; @buffer[@buffer_index+1] end
59
-
60
- def shift_char
61
- c = @buffer[@buffer_index]
62
- @buffer_index+=1
63
- c
64
- end
65
-
66
- def ignore_char
67
- @buffer_index+=1
68
- nil
69
- end
70
-
71
- def ignore_chars(n)
72
- @buffer_index+=n
73
- nil
74
- end
75
-
76
- def current_remaining_buffer
77
- @buffer[@buffer_index, @buffer.size-@buffer_index]
78
- end
79
-
80
- def cur_chars_are(string)
81
- # There is a bug here
82
- if false
83
- r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
84
- @buffer =~ r2
85
- else
86
- cur_chars(string.size) == string
87
- end
88
- end
89
-
90
- def next_matches(r)
91
- r2 = /^.{#{@buffer_index}}#{r}/m
92
- md = r2.match @buffer
93
- return !!md
94
- end
95
-
96
- def read_regexp3(r)
97
- r2 = /^.{#{@buffer_index}}#{r}/m
98
- m = r2.match @buffer
99
- if m
100
- consumed = m.to_s.size - @buffer_index
101
- # puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
102
- ignore_chars consumed
103
- else
104
- # puts "Could not read regexp #{r2.inspect} from buffer "+
105
- # " index=#{@buffer_index}"
106
- # puts "Cur chars = #{cur_chars(20).inspect}"
107
- # puts "Matches? = #{cur_chars(20) =~ r}"
108
- end
109
- m
110
- end
111
-
112
- def read_regexp(r)
113
- r2 = /^#{r}/
114
- rest = current_remaining_buffer
115
- m = r2.match(rest)
116
- if m
117
- @buffer_index += m.to_s.size
118
- # puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
119
- end
120
- return m
121
- end
122
-
123
- def consume_whitespace
124
- while c = cur_char
125
- if (c == ?\s || c == ?\t)
126
- # puts "ignoring #{c}"
127
- ignore_char
128
- else
129
- # puts "#{c} is not ws: "<<c
130
- break
131
- end
132
- end
133
- end
134
-
135
- def read_text_chars(out)
136
- s = @buffer.size; c=nil
137
- while @buffer_index < s && (c=@buffer[@buffer_index]) &&
138
- ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
139
- out << c
140
- @buffer_index += 1
141
- end
142
- end
143
-
144
- def describe
145
- s = describe_pos(@buffer, @buffer_index)
146
- if @parent
147
- s += "\n\n" + @parent.describe
148
- end
149
- s
150
- end
151
- include SpanLevelParser
152
- end
1
+ require 'strscan'
153
2
 
154
- def describe_pos(buffer, buffer_index)
155
- len = 75
156
- num_before = [len/2, buffer_index].min
157
- num_after = [len/2, buffer.size-buffer_index].min
158
- num_before_max = buffer_index
159
- num_after_max = buffer.size-buffer_index
160
-
161
- # puts "num #{num_before} #{num_after}"
162
- num_before = [num_before_max, len-num_after].min
163
- num_after = [num_after_max, len-num_before].min
164
- # puts "num #{num_before} #{num_after}"
165
-
166
- index_start = [buffer_index - num_before, 0].max
167
- index_end = [buffer_index + num_after, buffer.size].min
168
-
169
- size = index_end- index_start
170
-
171
- # puts "- #{index_start} #{size}"
172
-
173
- str = buffer[index_start, size]
174
- str.gsub!("\n",'N')
175
- str.gsub!("\t",'T')
176
-
177
- if index_end == buffer.size
178
- str += "EOF"
179
- end
180
-
181
- pre_s = buffer_index-index_start
182
- pre_s = [pre_s, 0].max
183
- pre_s2 = [len-pre_s,0].max
184
- # puts "pre_S = #{pre_s}"
185
- pre =" "*(pre_s)
186
-
187
- "-"*len+"\n"+
188
- str + "\n" +
189
- "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
190
- # pre + "|\n"+
191
- pre + "+--- Byte #{buffer_index}\n"+
192
-
193
- "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
194
- add_tabs(buffer,1,">")
195
-
196
- # "CharSource: At character #{@buffer_index} of block "+
197
- # " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
198
- # " before: \n ... #{cur_chars(50).inspect} ... "
199
- end
3
+ module MaRuKu::In::Markdown::SpanLevelParser
200
4
 
5
+ # a string scanner coded by me
6
+ class CharSourceManual; end
201
7
 
202
- require 'strscan'
8
+ # a wrapper around StringScanner
9
+ class CharSourceStrscan; end
203
10
 
204
- class CharSourceStrscan
205
- include SpanLevelParser
206
- include MaRuKu::Strings
207
-
208
- def initialize(s, parent=nil)
209
- @s = StringScanner.new(s)
210
- @parent = parent
211
- end
212
-
213
- # Return current char as a FixNum (or nil).
214
- def cur_char
215
- @s.peek(1)[0]
216
- end
217
-
218
- # Return the next n chars as a String.
219
- def cur_chars(n);
220
- @s.peek(n)
221
- end
222
-
223
- # Return the char after current char as a FixNum (or nil).
224
- def next_char;
225
- @s.peek(2)[1]
226
- end
227
-
228
- def shift_char
229
- (@s.get_byte)[0]
230
- end
231
-
232
- def ignore_char
233
- @s.get_byte
234
- nil
235
- end
236
-
237
- def ignore_chars(n)
238
- n.times do @s.get_byte end
239
- nil
240
- end
241
-
242
- def current_remaining_buffer
243
- @s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
244
- end
245
-
246
- def cur_chars_are(string)
247
- cur_chars(string.size) == string
248
- end
249
-
250
- def next_matches(r)
251
- len = @s.match?(r)
252
- return !!len
253
- end
254
-
255
- def read_regexp(r)
256
- string = @s.scan(r)
257
- if string
258
- return r.match(string)
259
- else
260
- return nil
261
- end
262
- end
263
-
264
- def consume_whitespace
265
- @s.scan(/\s+/)
266
- nil
267
- end
268
-
269
- def describe
270
- describe_pos(@s.string, @s.pos)
271
- end
272
-
273
- end
11
+ # A debug scanner that checks the correctness of both
12
+ # by comparing their output
13
+ class CharSourceDebug; end
274
14
 
15
+ # Choose!
275
16
 
276
- class CharSourceDebug
277
- def initialize(s, parent)
278
- @a = CharSourceManual.new(s, parent)
279
- @b = CharSourceStrscan.new(s, parent)
280
- end
281
-
282
- def method_missing(methodname, *args)
283
- a_bef = @a.describe
284
- b_bef = @b.describe
285
-
286
- a = @a.send(methodname, *args)
287
- b = @b.send(methodname, *args)
288
-
289
- # if methodname == :describe
290
- # return a
291
- # end
292
-
293
- if a.kind_of? MatchData
294
- if a.to_a != b.to_a
295
- puts "called: #{methodname}(#{args})"
296
- puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
297
- puts "AFTER: "+@a.describe
298
- puts "AFTER: "+@b.describe
299
- puts "BEFORE: "+a_bef
300
- puts "BEFORE: "+b_bef
301
- puts caller.join("\n")
302
- exit
303
- end
304
- else
305
- if a!=b
306
- puts "called: #{methodname}(#{args})"
307
- puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
308
- puts ""+@a.describe
309
- puts ""+@b.describe
310
- puts caller.join("\n")
311
- exit
312
- end
313
- end
314
-
315
- if @a.cur_char != @b.cur_char
316
- puts "Fuori sincronia dopo #{methodname}(#{args})"
317
- puts ""+@a.describe
318
- puts ""+@b.describe
319
- exit
320
- end
321
-
322
- return a
323
- end
324
- end
17
+ CharSource = CharSourceManual # faster! 58ms vs. 65ms
18
+ #CharSource = CharSourceStrscan # Faster on LONG documents. But StringScanner is buggy in Rubinius
19
+ #CharSource = CharSourceDebug
20
+
21
+
22
+ class CharSourceManual
23
+ def initialize(s, parent=nil)
24
+ raise "Passed #{s.class}" if not s.kind_of? String
25
+ @buffer = s
26
+ @buffer_index = 0
27
+ @parent = parent
28
+ end
29
+
30
+ # Return current char as a String (or nil).
31
+ def cur_char
32
+ cur_chars(1)
33
+ end
34
+
35
+ # Return the next n chars as a String.
36
+ def cur_chars(n)
37
+ return nil if @buffer_index >= @buffer.size
38
+ @buffer[@buffer_index, n]
39
+ end
40
+
41
+ # Return the char after current char as a String (or nil).
42
+ def next_char
43
+ return nil if @buffer_index + 1 >= @buffer.size
44
+ @buffer[@buffer_index + 1, 1]
45
+ end
46
+
47
+ def shift_char
48
+ c = cur_char
49
+ @buffer_index += 1
50
+ c
51
+ end
52
+
53
+ def ignore_char
54
+ @buffer_index += 1
55
+ end
56
+
57
+ def ignore_chars(n)
58
+ @buffer_index += n
59
+ end
60
+
61
+ def current_remaining_buffer
62
+ @buffer[@buffer_index, @buffer.size - @buffer_index]
63
+ end
64
+
65
+ def cur_chars_are(string)
66
+ cur_chars(string.size) == string
67
+ end
68
+
69
+ def next_matches(r)
70
+ r2 = /^.{#{@buffer_index}}#{r}/m
71
+ r2.match @buffer
72
+ end
73
+
74
+ def read_regexp(r)
75
+ r2 = /^#{r}/
76
+ rest = current_remaining_buffer
77
+ m = r2.match(rest)
78
+ if m
79
+ @buffer_index += m.to_s.size
80
+ end
81
+ m
82
+ end
83
+
84
+ def consume_whitespace
85
+ while c = cur_char
86
+ break unless (c == ' ' || c == "\t")
87
+ ignore_char
88
+ end
89
+ end
90
+
91
+ def describe
92
+ s = describe_pos(@buffer, @buffer_index)
93
+ if @parent
94
+ s += "\n\n" + @parent.describe
95
+ end
96
+ s
97
+ end
98
+
99
+ def describe_pos(buffer, buffer_index)
100
+ len = 75
101
+ num_before = [len/2, buffer_index].min
102
+ num_after = [len/2, buffer.size - buffer_index].min
103
+ num_before_max = buffer_index
104
+ num_after_max = buffer.size - buffer_index
105
+
106
+ num_before = [num_before_max, len - num_after].min
107
+ num_after = [num_after_max, len - num_before].min
108
+
109
+ index_start = [buffer_index - num_before, 0].max
110
+ index_end = [buffer_index + num_after, buffer.size].min
111
+
112
+ size = index_end - index_start
113
+
114
+ str = buffer[index_start, size]
115
+ str.gsub!("\n", 'N')
116
+ str.gsub!("\t", 'T')
117
+
118
+ if index_end == buffer.size
119
+ str += "EOF"
120
+ end
121
+
122
+ pre_s = buffer_index - index_start
123
+ pre_s = [pre_s, 0].max
124
+ pre_s2 = [len - pre_s, 0].max
125
+ pre = " " * pre_s
126
+
127
+ "-" * len + "\n" +
128
+ str + "\n" +
129
+ "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
130
+ pre + "+--- Byte #{buffer_index}\n"+
325
131
 
326
- end end end end
132
+ "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
133
+ buffer.gsub(/^/, ">")
134
+ end
135
+ end
136
+
137
+ class CharSourceStrscan
138
+
139
+ def initialize(s, parent=nil)
140
+ @scanner = StringScanner.new(s)
141
+ @size = s.size
142
+ end
143
+
144
+ # Return current char as a String (or nil).
145
+ def cur_char
146
+ @scanner.peek(1)[0]
147
+ end
148
+
149
+ # Return the next n chars as a String.
150
+ def cur_chars(n)
151
+ @scanner.peek(n)
152
+ end
153
+
154
+ # Return the char after current char as a String (or nil).
155
+ def next_char
156
+ @scanner.peek(2)[1]
157
+ end
158
+
159
+ # Return a character as a String, advancing the pointer.
160
+ def shift_char
161
+ @scanner.getch[0]
162
+ end
163
+
164
+ # Advance the pointer
165
+ def ignore_char
166
+ @scanner.getch
167
+ end
168
+
169
+ # Advance the pointer by n
170
+ def ignore_chars(n)
171
+ n.times { @scanner.getch }
172
+ end
173
+
174
+ # Return the rest of the string
175
+ def current_remaining_buffer
176
+ @scanner.rest
177
+ end
178
+
179
+ # Returns true if string matches what we're pointing to
180
+ def cur_chars_are(string)
181
+ @scanner.peek(string.size) == string
182
+ end
183
+
184
+ # Returns true if Regexp r matches what we're pointing to
185
+ def next_matches(r)
186
+ @scanner.check(r)
187
+ end
188
+
189
+ def read_regexp(r)
190
+ r.match(@scanner.scan(r))
191
+ end
192
+
193
+ def consume_whitespace
194
+ @scanner.skip(/\s+/)
195
+ end
196
+
197
+ def describe
198
+ len = 75
199
+ num_before = [len/2, @scanner.pos].min
200
+ num_after = [len/2, @scanner.rest_size].min
201
+ num_before_max = @scanner.pos
202
+ num_after_max = @scanner.rest_size
203
+
204
+ num_before = [num_before_max, len - num_after].min
205
+ num_after = [num_after_max, len - num_before].min
206
+
207
+ index_start = [@scanner.pos - num_before, 0].max
208
+ index_end = [@scanner.pos + num_after, @size].min
209
+
210
+ size = index_end - index_start
211
+
212
+ str = @scanner.string[index_start, size]
213
+ str.gsub!("\n", 'N')
214
+ str.gsub!("\t", 'T')
215
+
216
+ if index_end == @size
217
+ str += "EOF"
218
+ end
219
+
220
+ pre_s = @scanner.pos - index_start
221
+ pre_s = [pre_s, 0].max
222
+ pre_s2 = [len-pre_s, 0].max
223
+ pre = " " * pre_s
224
+
225
+ "-" * len + "\n" +
226
+ str + "\n" +
227
+ "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
228
+ pre + "+--- Byte #{@scanner.pos}\n" +
229
+ "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" +
230
+ @scanner.string.gsub(/^/, ">")
231
+ end
232
+ end
233
+
234
+ class CharSourceDebug
235
+ def initialize(s, parent)
236
+ @a = CharSourceManual.new(s, parent)
237
+ @b = CharSourceStrscan.new(s, parent)
238
+ end
239
+
240
+ def method_missing(methodname, *args)
241
+ a_bef = @a.describe
242
+ b_bef = @b.describe
243
+
244
+ a = @a.send(methodname, *args)
245
+ b = @b.send(methodname, *args)
246
+
247
+ if a.kind_of? MatchData
248
+ if a.to_a != b.to_a
249
+ puts "called: #{methodname}(#{args})"
250
+ puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
251
+ puts "AFTER: " + @a.describe
252
+ puts "AFTER: " + @b.describe
253
+ puts "BEFORE: " + a_bef
254
+ puts "BEFORE: " + b_bef
255
+ puts caller.join("\n")
256
+ exit
257
+ end
258
+ else
259
+ if a != b
260
+ puts "called: #{methodname}(#{args})"
261
+ puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
262
+ puts "" + @a.describe
263
+ puts "" + @b.describe
264
+ puts caller.join("\n")
265
+ exit
266
+ end
267
+ end
268
+
269
+ if @a.cur_char != @b.cur_char
270
+ puts "Fuori sincronia dopo #{methodname}(#{args})"
271
+ puts "" + @a.describe
272
+ puts "" + @b.describe
273
+ exit
274
+ end
275
+
276
+ return a
277
+ end
278
+ end
279
+ end