maruku 0.6.1 → 0.7.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -0,0 +1,254 @@
1
+ require 'set'
2
+
3
+ $warned_nokogiri = false
4
+
5
+ module MaRuKu
6
+ HTML_INLINE_ELEMS = Set.new %w[a abbr acronym audio b bdi bdo big br button canvas caption cite code
7
+ col colgroup command datalist del details dfn dir em fieldset font form i img input ins
8
+ kbd label legend mark meter optgroup option progress q rp rt ruby s samp section select small
9
+ source span strike strong sub summary sup tbody td tfoot th thead time tr track tt u var video wbr
10
+ animate animateColor animateMotion animateTransform circle clipPath defs desc ellipse
11
+ feGaussianBlur filter font-face font-face-name font-face-src foreignObject g glyph hkern
12
+ linearGradient line marker mask metadata missing-glyph mpath path pattern polygon polyline
13
+ radialGradient rect set stop svg switch text textPath title tspan use
14
+ annotation annotation-xml maction math menclose merror mfrac mfenced mi mmultiscripts mn mo
15
+ mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub msubsup msup mtable
16
+ mtd mtext mtr munder munderover none semantics]
17
+
18
+ # Parse block-level markdown elements in these HTML tags
19
+ BLOCK_TAGS = %w(div)
20
+
21
+ # This gets mixed into HTML MDElement nodes to hold the parsed document fragment
22
+ module HTMLElement
23
+ attr_accessor :parsed_html
24
+ end
25
+
26
+ # This is just a factory, not an actual class
27
+ module HTMLFragment
28
+
29
+ # HTMLFragment.new produces a concrete HTMLFragment implementation
30
+ # that is either a NokogiriHTMLFragment or a REXMLHTMLFragment.
31
+ def self.new(raw_html)
32
+ if !$warned_nokogiri && MaRuKu::Globals[:html_parser] == 'nokogiri'
33
+ begin
34
+ require 'nokogiri'
35
+ return NokogiriHTMLFragment.new(raw_html)
36
+ rescue LoadError
37
+ warn "Nokogiri could not be loaded. Falling back to REXML."
38
+ $warned_nokogiri = true
39
+ end
40
+ end
41
+
42
+ require 'rexml/document'
43
+ REXMLHTMLFragment.new(raw_html)
44
+ end
45
+ end
46
+
47
+ # Nokogiri backend for HTML handling
48
+ class NokogiriHTMLFragment
49
+ def initialize(raw_html)
50
+ # Wrap our HTML in a dummy document with a doctype (just
51
+ # for the entity references)
52
+ wrapped = '<!DOCTYPE html PUBLIC
53
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
54
+ "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
55
+ <html>' + raw_html.strip + '</html>'
56
+
57
+ d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
58
+ @fragment = d.root
59
+ end
60
+
61
+ # @return The name of the first child element in the fragment.
62
+ def first_node_name
63
+ first_child = @fragment.children.first
64
+ first_child ? first_child.name : nil
65
+ end
66
+
67
+ # Add a class to the children of this fragment
68
+ def add_class(class_name)
69
+ @fragment.children.each do |c|
70
+ c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
71
+ end
72
+ end
73
+
74
+ # Process markdown within the contents of some elements and
75
+ # replace their contents with the processed version.
76
+ #
77
+ # @param doc [MaRuKu::MDDocument] A document to process.
78
+ def process_markdown_inside_elements(doc)
79
+ # find span elements or elements with 'markdown' attribute
80
+ elts = @fragment.css("[markdown]")
81
+
82
+ d = @fragment.children.first
83
+ if d && HTML_INLINE_ELEMS.include?(d.name)
84
+ elts << d unless d.attribute('markdown')
85
+ elts += span_descendents(d)
86
+ end
87
+
88
+ elts.each do |e|
89
+ how = e['markdown']
90
+ e.remove_attribute('markdown')
91
+
92
+ next if "0" == how # user requests no markdown parsing inside
93
+ parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
94
+
95
+ # Select all text children of e
96
+ e.xpath("./text()").each do |original_text|
97
+ s = CGI.escapeHTML(original_text.text)
98
+ unless s.strip.empty?
99
+ parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
100
+
101
+ # restore leading and trailing spaces
102
+ padding = /\A(\s*).*?(\s*)\z/.match(s)
103
+ parsed = [padding[1]] + parsed + [padding[2]] if padding
104
+
105
+ el = doc.md_el(:dummy, parsed)
106
+
107
+ # Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
108
+ guard = Nokogiri::XML::Element.new('guard', @fragment)
109
+ original_text.replace(guard)
110
+ el.children_to_html.each do |x|
111
+ guard.before(x.to_s)
112
+ end
113
+ guard.remove
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ # Convert this fragment to an HTML or XHTML string.
120
+ # @return [String]
121
+ def to_html
122
+ output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
123
+ Nokogiri::XML::Node::SaveOptions::FORMAT
124
+ @fragment.children.inject("") do |out, child|
125
+ out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
126
+ end
127
+ end
128
+
129
+ private
130
+
131
+ # Get all span-level descendents of the given element, recursively,
132
+ # as a flat NodeSet.
133
+ #
134
+ # @param e [Nokogiri::XML::Node] An element.
135
+ # @return [Nokogiri::XML::NodeSet]
136
+ def span_descendents(e)
137
+ ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
138
+ e.element_children.inject(ns) do |descendents, c|
139
+ if HTML_INLINE_ELEMS.include?(c.name)
140
+ descendents << c
141
+ descendents += span_descendents(c)
142
+ end
143
+ descendents
144
+ end
145
+ end
146
+ end
147
+
148
+ # An HTMLFragment implementation using REXML
149
+ class REXMLHTMLFragment
150
+ def initialize(raw_html)
151
+ wrapped = '<!DOCTYPE html PUBLIC
152
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
153
+ "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
154
+ <html>' + raw_html.strip + '</html>'
155
+
156
+ @fragment = REXML::Document.new(wrapped).root
157
+ end
158
+
159
+ # The name of the first element in the fragment
160
+ def first_node_name
161
+ first_child = @fragment.children.first
162
+ (first_child && first_child.respond_to?(:name)) ? first_child.name : nil
163
+ end
164
+
165
+ # Add a class to the children of this fragment
166
+ def add_class(class_name)
167
+ @fragment.each_element do |c|
168
+ c.attributes['class'] = ((c.attributes['class']||'').split(' ') + [class_name]).join(' ')
169
+ end
170
+ end
171
+
172
+ # Process markdown within the contents of some elements and
173
+ # replace their contents with the processed version.
174
+ def process_markdown_inside_elements(doc)
175
+ # parse block-level markdown elements in these HTML tags
176
+ block_tags = ['div']
177
+
178
+ elts = []
179
+ @fragment.each_element('//*[@markdown]') do |e|
180
+ elts << e
181
+ end
182
+
183
+ d = @fragment.children.first
184
+ if d && HTML_INLINE_ELEMS.include?(first_node_name)
185
+ elts << d unless d.attributes['markdown']
186
+ elts += span_descendents(d)
187
+ end
188
+
189
+ # find span elements or elements with 'markdown' attribute
190
+ elts.each do |e|
191
+ # should we parse block-level or span-level?
192
+ how = e.attributes['markdown']
193
+ e.attributes.delete('markdown')
194
+
195
+ next if "0" == how # user requests no markdown parsing inside
196
+ parse_blocks = (how == 'block') || block_tags.include?(e.name)
197
+
198
+ # Select all text children of e
199
+ e.texts.each do |original_text|
200
+ s = CGI.escapeHTML(original_text.value)
201
+ unless s.strip.empty?
202
+ # TODO extract common functionality
203
+ parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
204
+ # restore leading and trailing spaces
205
+ padding = /\A(\s*).*?(\s*)\z/.match(s)
206
+ parsed = [padding[1]] + parsed + [padding[2]] if padding
207
+
208
+ el = doc.md_el(:dummy, parsed)
209
+
210
+ new_html = "<dummy>"
211
+ el.children_to_html.each do |x|
212
+ new_html << x.to_s
213
+ end
214
+ new_html << "</dummy>"
215
+
216
+ newdoc = REXML::Document.new(new_html).root
217
+
218
+ p = original_text.parent
219
+ newdoc.children.each do |c|
220
+ p.insert_before(original_text, c)
221
+ end
222
+
223
+ p.delete(original_text)
224
+ end
225
+ end
226
+ end
227
+ end
228
+
229
+ def to_html
230
+ formatter = REXML::Formatters::Default.new(true)
231
+ @fragment.children.inject("") do |out, child|
232
+ out << formatter.write(child, '')
233
+ end
234
+ end
235
+
236
+ private
237
+
238
+ # Get all span-level descendents of the given element, recursively,
239
+ # as an Array.
240
+ #
241
+ # @param e [REXML::Element] An element.
242
+ # @return [Array]
243
+ def span_descendents(e)
244
+ descendents = []
245
+ e.each_element do |c|
246
+ name = c.respond_to?(:name) ? c.name : nil
247
+ if name && HTML_INLINE_ELEMS.include?(c.name)
248
+ descendents << c
249
+ descendents += span_descendents(c)
250
+ end
251
+ end
252
+ end
253
+ end
254
+ end
@@ -1,326 +1,279 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module SpanLevelParser
23
-
24
- # a string scanner coded by me
25
- class CharSourceManual; end
26
-
27
- # a wrapper around StringScanner
28
- class CharSourceStrscan; end
29
-
30
- # A debug scanner that checks the correctness of both
31
- # by comparing their output
32
- class CharSourceDebug; end
33
-
34
- # Choose!
35
-
36
- CharSource = CharSourceManual # faster! 58ms vs. 65ms
37
- #CharSource = CharSourceStrscan
38
- #CharSource = CharSourceDebug
39
-
40
-
41
- class CharSourceManual
42
- include MaRuKu::Strings
43
-
44
- def initialize(s, parent=nil)
45
- raise "Passed #{s.class}" if not s.kind_of? String
46
- @buffer = s
47
- @buffer_index = 0
48
- @parent = parent
49
- end
50
-
51
- # Return current char as a FixNum (or nil).
52
- def cur_char; @buffer[@buffer_index] end
53
-
54
- # Return the next n chars as a String.
55
- def cur_chars(n); @buffer[@buffer_index,n] end
56
-
57
- # Return the char after current char as a FixNum (or nil).
58
- def next_char; @buffer[@buffer_index+1] end
59
-
60
- def shift_char
61
- c = @buffer[@buffer_index]
62
- @buffer_index+=1
63
- c
64
- end
65
-
66
- def ignore_char
67
- @buffer_index+=1
68
- nil
69
- end
70
-
71
- def ignore_chars(n)
72
- @buffer_index+=n
73
- nil
74
- end
75
-
76
- def current_remaining_buffer
77
- @buffer[@buffer_index, @buffer.size-@buffer_index]
78
- end
79
-
80
- def cur_chars_are(string)
81
- # There is a bug here
82
- if false
83
- r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
84
- @buffer =~ r2
85
- else
86
- cur_chars(string.size) == string
87
- end
88
- end
89
-
90
- def next_matches(r)
91
- r2 = /^.{#{@buffer_index}}#{r}/m
92
- md = r2.match @buffer
93
- return !!md
94
- end
95
-
96
- def read_regexp3(r)
97
- r2 = /^.{#{@buffer_index}}#{r}/m
98
- m = r2.match @buffer
99
- if m
100
- consumed = m.to_s.size - @buffer_index
101
- # puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
102
- ignore_chars consumed
103
- else
104
- # puts "Could not read regexp #{r2.inspect} from buffer "+
105
- # " index=#{@buffer_index}"
106
- # puts "Cur chars = #{cur_chars(20).inspect}"
107
- # puts "Matches? = #{cur_chars(20) =~ r}"
108
- end
109
- m
110
- end
111
-
112
- def read_regexp(r)
113
- r2 = /^#{r}/
114
- rest = current_remaining_buffer
115
- m = r2.match(rest)
116
- if m
117
- @buffer_index += m.to_s.size
118
- # puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
119
- end
120
- return m
121
- end
122
-
123
- def consume_whitespace
124
- while c = cur_char
125
- if (c == ?\s || c == ?\t)
126
- # puts "ignoring #{c}"
127
- ignore_char
128
- else
129
- # puts "#{c} is not ws: "<<c
130
- break
131
- end
132
- end
133
- end
134
-
135
- def read_text_chars(out)
136
- s = @buffer.size; c=nil
137
- while @buffer_index < s && (c=@buffer[@buffer_index]) &&
138
- ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
139
- out << c
140
- @buffer_index += 1
141
- end
142
- end
143
-
144
- def describe
145
- s = describe_pos(@buffer, @buffer_index)
146
- if @parent
147
- s += "\n\n" + @parent.describe
148
- end
149
- s
150
- end
151
- include SpanLevelParser
152
- end
1
+ require 'strscan'
153
2
 
154
- def describe_pos(buffer, buffer_index)
155
- len = 75
156
- num_before = [len/2, buffer_index].min
157
- num_after = [len/2, buffer.size-buffer_index].min
158
- num_before_max = buffer_index
159
- num_after_max = buffer.size-buffer_index
160
-
161
- # puts "num #{num_before} #{num_after}"
162
- num_before = [num_before_max, len-num_after].min
163
- num_after = [num_after_max, len-num_before].min
164
- # puts "num #{num_before} #{num_after}"
165
-
166
- index_start = [buffer_index - num_before, 0].max
167
- index_end = [buffer_index + num_after, buffer.size].min
168
-
169
- size = index_end- index_start
170
-
171
- # puts "- #{index_start} #{size}"
172
-
173
- str = buffer[index_start, size]
174
- str.gsub!("\n",'N')
175
- str.gsub!("\t",'T')
176
-
177
- if index_end == buffer.size
178
- str += "EOF"
179
- end
180
-
181
- pre_s = buffer_index-index_start
182
- pre_s = [pre_s, 0].max
183
- pre_s2 = [len-pre_s,0].max
184
- # puts "pre_S = #{pre_s}"
185
- pre =" "*(pre_s)
186
-
187
- "-"*len+"\n"+
188
- str + "\n" +
189
- "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
190
- # pre + "|\n"+
191
- pre + "+--- Byte #{buffer_index}\n"+
192
-
193
- "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
194
- add_tabs(buffer,1,">")
195
-
196
- # "CharSource: At character #{@buffer_index} of block "+
197
- # " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
198
- # " before: \n ... #{cur_chars(50).inspect} ... "
199
- end
3
+ module MaRuKu::In::Markdown::SpanLevelParser
200
4
 
5
+ # a string scanner coded by me
6
+ class CharSourceManual; end
201
7
 
202
- require 'strscan'
8
+ # a wrapper around StringScanner
9
+ class CharSourceStrscan; end
203
10
 
204
- class CharSourceStrscan
205
- include SpanLevelParser
206
- include MaRuKu::Strings
207
-
208
- def initialize(s, parent=nil)
209
- @s = StringScanner.new(s)
210
- @parent = parent
211
- end
212
-
213
- # Return current char as a FixNum (or nil).
214
- def cur_char
215
- @s.peek(1)[0]
216
- end
217
-
218
- # Return the next n chars as a String.
219
- def cur_chars(n);
220
- @s.peek(n)
221
- end
222
-
223
- # Return the char after current char as a FixNum (or nil).
224
- def next_char;
225
- @s.peek(2)[1]
226
- end
227
-
228
- def shift_char
229
- (@s.get_byte)[0]
230
- end
231
-
232
- def ignore_char
233
- @s.get_byte
234
- nil
235
- end
236
-
237
- def ignore_chars(n)
238
- n.times do @s.get_byte end
239
- nil
240
- end
241
-
242
- def current_remaining_buffer
243
- @s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
244
- end
245
-
246
- def cur_chars_are(string)
247
- cur_chars(string.size) == string
248
- end
249
-
250
- def next_matches(r)
251
- len = @s.match?(r)
252
- return !!len
253
- end
254
-
255
- def read_regexp(r)
256
- string = @s.scan(r)
257
- if string
258
- return r.match(string)
259
- else
260
- return nil
261
- end
262
- end
263
-
264
- def consume_whitespace
265
- @s.scan(/\s+/)
266
- nil
267
- end
268
-
269
- def describe
270
- describe_pos(@s.string, @s.pos)
271
- end
272
-
273
- end
11
+ # A debug scanner that checks the correctness of both
12
+ # by comparing their output
13
+ class CharSourceDebug; end
274
14
 
15
+ # Choose!
275
16
 
276
- class CharSourceDebug
277
- def initialize(s, parent)
278
- @a = CharSourceManual.new(s, parent)
279
- @b = CharSourceStrscan.new(s, parent)
280
- end
281
-
282
- def method_missing(methodname, *args)
283
- a_bef = @a.describe
284
- b_bef = @b.describe
285
-
286
- a = @a.send(methodname, *args)
287
- b = @b.send(methodname, *args)
288
-
289
- # if methodname == :describe
290
- # return a
291
- # end
292
-
293
- if a.kind_of? MatchData
294
- if a.to_a != b.to_a
295
- puts "called: #{methodname}(#{args})"
296
- puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
297
- puts "AFTER: "+@a.describe
298
- puts "AFTER: "+@b.describe
299
- puts "BEFORE: "+a_bef
300
- puts "BEFORE: "+b_bef
301
- puts caller.join("\n")
302
- exit
303
- end
304
- else
305
- if a!=b
306
- puts "called: #{methodname}(#{args})"
307
- puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
308
- puts ""+@a.describe
309
- puts ""+@b.describe
310
- puts caller.join("\n")
311
- exit
312
- end
313
- end
314
-
315
- if @a.cur_char != @b.cur_char
316
- puts "Fuori sincronia dopo #{methodname}(#{args})"
317
- puts ""+@a.describe
318
- puts ""+@b.describe
319
- exit
320
- end
321
-
322
- return a
323
- end
324
- end
17
+ CharSource = CharSourceManual # faster! 58ms vs. 65ms
18
+ #CharSource = CharSourceStrscan # Faster on LONG documents. But StringScanner is buggy in Rubinius
19
+ #CharSource = CharSourceDebug
20
+
21
+
22
+ class CharSourceManual
23
+ def initialize(s, parent=nil)
24
+ raise "Passed #{s.class}" if not s.kind_of? String
25
+ @buffer = s
26
+ @buffer_index = 0
27
+ @parent = parent
28
+ end
29
+
30
+ # Return current char as a String (or nil).
31
+ def cur_char
32
+ cur_chars(1)
33
+ end
34
+
35
+ # Return the next n chars as a String.
36
+ def cur_chars(n)
37
+ return nil if @buffer_index >= @buffer.size
38
+ @buffer[@buffer_index, n]
39
+ end
40
+
41
+ # Return the char after current char as a String (or nil).
42
+ def next_char
43
+ return nil if @buffer_index + 1 >= @buffer.size
44
+ @buffer[@buffer_index + 1, 1]
45
+ end
46
+
47
+ def shift_char
48
+ c = cur_char
49
+ @buffer_index += 1
50
+ c
51
+ end
52
+
53
+ def ignore_char
54
+ @buffer_index += 1
55
+ end
56
+
57
+ def ignore_chars(n)
58
+ @buffer_index += n
59
+ end
60
+
61
+ def current_remaining_buffer
62
+ @buffer[@buffer_index, @buffer.size - @buffer_index]
63
+ end
64
+
65
+ def cur_chars_are(string)
66
+ cur_chars(string.size) == string
67
+ end
68
+
69
+ def next_matches(r)
70
+ r2 = /^.{#{@buffer_index}}#{r}/m
71
+ r2.match @buffer
72
+ end
73
+
74
+ def read_regexp(r)
75
+ r2 = /^#{r}/
76
+ rest = current_remaining_buffer
77
+ m = r2.match(rest)
78
+ if m
79
+ @buffer_index += m.to_s.size
80
+ end
81
+ m
82
+ end
83
+
84
+ def consume_whitespace
85
+ while c = cur_char
86
+ break unless (c == ' ' || c == "\t")
87
+ ignore_char
88
+ end
89
+ end
90
+
91
+ def describe
92
+ s = describe_pos(@buffer, @buffer_index)
93
+ if @parent
94
+ s += "\n\n" + @parent.describe
95
+ end
96
+ s
97
+ end
98
+
99
+ def describe_pos(buffer, buffer_index)
100
+ len = 75
101
+ num_before = [len/2, buffer_index].min
102
+ num_after = [len/2, buffer.size - buffer_index].min
103
+ num_before_max = buffer_index
104
+ num_after_max = buffer.size - buffer_index
105
+
106
+ num_before = [num_before_max, len - num_after].min
107
+ num_after = [num_after_max, len - num_before].min
108
+
109
+ index_start = [buffer_index - num_before, 0].max
110
+ index_end = [buffer_index + num_after, buffer.size].min
111
+
112
+ size = index_end - index_start
113
+
114
+ str = buffer[index_start, size]
115
+ str.gsub!("\n", 'N')
116
+ str.gsub!("\t", 'T')
117
+
118
+ if index_end == buffer.size
119
+ str += "EOF"
120
+ end
121
+
122
+ pre_s = buffer_index - index_start
123
+ pre_s = [pre_s, 0].max
124
+ pre_s2 = [len - pre_s, 0].max
125
+ pre = " " * pre_s
126
+
127
+ "-" * len + "\n" +
128
+ str + "\n" +
129
+ "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
130
+ pre + "+--- Byte #{buffer_index}\n"+
325
131
 
326
- end end end end
132
+ "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
133
+ buffer.gsub(/^/, ">")
134
+ end
135
+ end
136
+
137
+ class CharSourceStrscan
138
+
139
+ def initialize(s, parent=nil)
140
+ @scanner = StringScanner.new(s)
141
+ @size = s.size
142
+ end
143
+
144
+ # Return current char as a String (or nil).
145
+ def cur_char
146
+ @scanner.peek(1)[0]
147
+ end
148
+
149
+ # Return the next n chars as a String.
150
+ def cur_chars(n)
151
+ @scanner.peek(n)
152
+ end
153
+
154
+ # Return the char after current char as a String (or nil).
155
+ def next_char
156
+ @scanner.peek(2)[1]
157
+ end
158
+
159
+ # Return a character as a String, advancing the pointer.
160
+ def shift_char
161
+ @scanner.getch[0]
162
+ end
163
+
164
+ # Advance the pointer
165
+ def ignore_char
166
+ @scanner.getch
167
+ end
168
+
169
+ # Advance the pointer by n
170
+ def ignore_chars(n)
171
+ n.times { @scanner.getch }
172
+ end
173
+
174
+ # Return the rest of the string
175
+ def current_remaining_buffer
176
+ @scanner.rest
177
+ end
178
+
179
+ # Returns true if string matches what we're pointing to
180
+ def cur_chars_are(string)
181
+ @scanner.peek(string.size) == string
182
+ end
183
+
184
+ # Returns true if Regexp r matches what we're pointing to
185
+ def next_matches(r)
186
+ @scanner.check(r)
187
+ end
188
+
189
+ def read_regexp(r)
190
+ r.match(@scanner.scan(r))
191
+ end
192
+
193
+ def consume_whitespace
194
+ @scanner.skip(/\s+/)
195
+ end
196
+
197
+ def describe
198
+ len = 75
199
+ num_before = [len/2, @scanner.pos].min
200
+ num_after = [len/2, @scanner.rest_size].min
201
+ num_before_max = @scanner.pos
202
+ num_after_max = @scanner.rest_size
203
+
204
+ num_before = [num_before_max, len - num_after].min
205
+ num_after = [num_after_max, len - num_before].min
206
+
207
+ index_start = [@scanner.pos - num_before, 0].max
208
+ index_end = [@scanner.pos + num_after, @size].min
209
+
210
+ size = index_end - index_start
211
+
212
+ str = @scanner.string[index_start, size]
213
+ str.gsub!("\n", 'N')
214
+ str.gsub!("\t", 'T')
215
+
216
+ if index_end == @size
217
+ str += "EOF"
218
+ end
219
+
220
+ pre_s = @scanner.pos - index_start
221
+ pre_s = [pre_s, 0].max
222
+ pre_s2 = [len-pre_s, 0].max
223
+ pre = " " * pre_s
224
+
225
+ "-" * len + "\n" +
226
+ str + "\n" +
227
+ "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
228
+ pre + "+--- Byte #{@scanner.pos}\n" +
229
+ "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" +
230
+ @scanner.string.gsub(/^/, ">")
231
+ end
232
+ end
233
+
234
+ class CharSourceDebug
235
+ def initialize(s, parent)
236
+ @a = CharSourceManual.new(s, parent)
237
+ @b = CharSourceStrscan.new(s, parent)
238
+ end
239
+
240
+ def method_missing(methodname, *args)
241
+ a_bef = @a.describe
242
+ b_bef = @b.describe
243
+
244
+ a = @a.send(methodname, *args)
245
+ b = @b.send(methodname, *args)
246
+
247
+ if a.kind_of? MatchData
248
+ if a.to_a != b.to_a
249
+ puts "called: #{methodname}(#{args})"
250
+ puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
251
+ puts "AFTER: " + @a.describe
252
+ puts "AFTER: " + @b.describe
253
+ puts "BEFORE: " + a_bef
254
+ puts "BEFORE: " + b_bef
255
+ puts caller.join("\n")
256
+ exit
257
+ end
258
+ else
259
+ if a != b
260
+ puts "called: #{methodname}(#{args})"
261
+ puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
262
+ puts "" + @a.describe
263
+ puts "" + @b.describe
264
+ puts caller.join("\n")
265
+ exit
266
+ end
267
+ end
268
+
269
+ if @a.cur_char != @b.cur_char
270
+ puts "Fuori sincronia dopo #{methodname}(#{args})"
271
+ puts "" + @a.describe
272
+ puts "" + @b.describe
273
+ exit
274
+ end
275
+
276
+ return a
277
+ end
278
+ end
279
+ end