maruku 0.6.1 → 0.7.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/MIT-LICENSE.txt +20 -0
  5. data/bin/maruku +153 -152
  6. data/bin/marutex +2 -29
  7. data/data/entities.xml +261 -0
  8. data/docs/math.md +14 -18
  9. data/lib/maruku.rb +65 -77
  10. data/lib/maruku/attributes.rb +109 -214
  11. data/lib/maruku/defaults.rb +45 -67
  12. data/lib/maruku/document.rb +43 -0
  13. data/lib/maruku/element.rb +112 -0
  14. data/lib/maruku/errors.rb +71 -0
  15. data/lib/maruku/ext/div.rb +105 -113
  16. data/lib/maruku/ext/fenced_code.rb +97 -0
  17. data/lib/maruku/ext/math.rb +22 -26
  18. data/lib/maruku/ext/math/elements.rb +20 -26
  19. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +92 -104
  20. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +33 -26
  21. data/lib/maruku/ext/math/mathml_engines/none.rb +11 -19
  22. data/lib/maruku/ext/math/mathml_engines/ritex.rb +2 -4
  23. data/lib/maruku/ext/math/parsing.rb +107 -113
  24. data/lib/maruku/ext/math/to_html.rb +184 -187
  25. data/lib/maruku/ext/math/to_latex.rb +30 -21
  26. data/lib/maruku/helpers.rb +158 -257
  27. data/lib/maruku/html.rb +254 -0
  28. data/lib/maruku/input/charsource.rb +272 -319
  29. data/lib/maruku/input/extensions.rb +62 -63
  30. data/lib/maruku/input/html_helper.rb +220 -189
  31. data/lib/maruku/input/linesource.rb +90 -110
  32. data/lib/maruku/input/mdline.rb +129 -0
  33. data/lib/maruku/input/parse_block.rb +618 -612
  34. data/lib/maruku/input/parse_doc.rb +145 -215
  35. data/lib/maruku/input/parse_span.rb +658 -0
  36. data/lib/maruku/input/rubypants.rb +200 -128
  37. data/lib/maruku/inspect_element.rb +60 -0
  38. data/lib/maruku/maruku.rb +10 -31
  39. data/lib/maruku/output/entity_table.rb +33 -0
  40. data/lib/maruku/output/s5/fancy.rb +462 -462
  41. data/lib/maruku/output/s5/to_s5.rb +115 -135
  42. data/lib/maruku/output/to_html.rb +898 -983
  43. data/lib/maruku/output/to_latex.rb +561 -560
  44. data/lib/maruku/output/to_markdown.rb +207 -162
  45. data/lib/maruku/output/to_s.rb +11 -52
  46. data/lib/maruku/string_utils.rb +129 -179
  47. data/lib/maruku/toc.rb +185 -196
  48. data/lib/maruku/version.rb +33 -38
  49. data/spec/block_docs/abbrev.md +776 -0
  50. data/{tests/unittest → spec/block_docs}/abbreviations.md +11 -20
  51. data/{tests/unittest → spec/block_docs}/alt.md +2 -14
  52. data/{tests/unittest/pending → spec/block_docs}/amps.md +1 -13
  53. data/{tests/unittest → spec/block_docs}/attributes/att2.md +0 -12
  54. data/{tests/unittest → spec/block_docs}/attributes/att3.md +2 -14
  55. data/{tests/unittest → spec/block_docs}/attributes/attributes.md +12 -16
  56. data/{tests/unittest → spec/block_docs}/attributes/circular.md +0 -12
  57. data/{tests/unittest → spec/block_docs}/attributes/default.md +1 -13
  58. data/{tests/unittest → spec/block_docs}/blank.md +0 -12
  59. data/{tests/unittest → spec/block_docs}/blanks_in_code.md +16 -15
  60. data/{tests/unittest/loss.md → spec/block_docs/bug_def.md} +6 -18
  61. data/{tests/unittest → spec/block_docs}/bug_table.md +3 -15
  62. data/{tests/unittest → spec/block_docs}/code.md +7 -14
  63. data/{tests/unittest → spec/block_docs}/code2.md +4 -14
  64. data/{tests/unittest → spec/block_docs}/code3.md +12 -16
  65. data/{tests/unittest → spec/block_docs}/data_loss.md +2 -14
  66. data/{tests/unittest → spec/block_docs}/divs/div1.md +0 -12
  67. data/{tests/unittest → spec/block_docs}/divs/div2.md +0 -12
  68. data/{tests/unittest → spec/block_docs}/divs/div3_nest.md +3 -15
  69. data/{tests/unittest → spec/block_docs}/easy.md +1 -13
  70. data/spec/block_docs/email.md +29 -0
  71. data/{tests/unittest/pending → spec/block_docs}/empty_cells.md +3 -15
  72. data/{tests/unittest → spec/block_docs}/encoding/iso-8859-1.md +1 -14
  73. data/{tests/unittest → spec/block_docs}/encoding/utf-8.md +0 -12
  74. data/{tests/unittest → spec/block_docs}/entities.md +27 -29
  75. data/{tests/unittest/notyet → spec/block_docs}/escape.md +2 -14
  76. data/{tests/unittest → spec/block_docs}/escaping.md +11 -22
  77. data/{tests/unittest → spec/block_docs}/extra_dl.md +2 -13
  78. data/{tests/unittest → spec/block_docs}/extra_header_id.md +14 -20
  79. data/{tests/unittest → spec/block_docs}/extra_table1.md +3 -15
  80. data/spec/block_docs/fenced_code_blocks.md +66 -0
  81. data/spec/block_docs/fenced_code_blocks_highlighted.md +18 -0
  82. data/{tests/unittest → spec/block_docs}/footnotes.md +12 -24
  83. data/spec/block_docs/footnotes2.md +78 -0
  84. data/spec/block_docs/hard.md +25 -0
  85. data/spec/block_docs/header_after_par.md +62 -0
  86. data/{tests/unittest → spec/block_docs}/headers.md +10 -18
  87. data/{tests/unittest → spec/block_docs}/hex_entities.md +7 -18
  88. data/{tests/unittest → spec/block_docs}/hrule.md +5 -12
  89. data/{tests/unittest → spec/block_docs}/html3.md +1 -13
  90. data/{tests/unittest → spec/block_docs}/html4.md +2 -14
  91. data/{tests/unittest → spec/block_docs}/html5.md +2 -14
  92. data/spec/block_docs/html_block_in_para.md +22 -0
  93. data/spec/block_docs/html_inline.md +25 -0
  94. data/spec/block_docs/html_trailing.md +31 -0
  95. data/spec/block_docs/ie.md +62 -0
  96. data/spec/block_docs/iframe.md +29 -0
  97. data/{tests/unittest → spec/block_docs}/images.md +22 -28
  98. data/{tests/unittest → spec/block_docs}/images2.md +7 -17
  99. data/{tests/unittest → spec/block_docs}/inline_html.md +37 -67
  100. data/{tests/unittest → spec/block_docs}/inline_html2.md +1 -13
  101. data/spec/block_docs/inline_html_beginning.md +10 -0
  102. data/spec/block_docs/issue20.md +9 -0
  103. data/spec/block_docs/issue26.md +22 -0
  104. data/spec/block_docs/issue29.md +9 -0
  105. data/spec/block_docs/issue30.md +30 -0
  106. data/spec/block_docs/issue31.md +25 -0
  107. data/spec/block_docs/issue40.md +40 -0
  108. data/spec/block_docs/issue64.md +55 -0
  109. data/spec/block_docs/issue67.md +19 -0
  110. data/spec/block_docs/issue70.md +11 -0
  111. data/spec/block_docs/issue72.md +17 -0
  112. data/spec/block_docs/issue74.md +38 -0
  113. data/spec/block_docs/issue79.md +15 -0
  114. data/spec/block_docs/issue83.md +13 -0
  115. data/spec/block_docs/issue85.md +25 -0
  116. data/spec/block_docs/issue88.md +19 -0
  117. data/spec/block_docs/issue89.md +12 -0
  118. data/spec/block_docs/issue90.md +38 -0
  119. data/{tests/unittest/pending → spec/block_docs}/link.md +21 -18
  120. data/{tests/unittest → spec/block_docs}/links.md +33 -32
  121. data/spec/block_docs/links2.md +21 -0
  122. data/{tests/unittest → spec/block_docs}/list1.md +0 -12
  123. data/{tests/unittest → spec/block_docs}/list12.md +2 -14
  124. data/{tests/unittest → spec/block_docs}/list2.md +2 -14
  125. data/spec/block_docs/list_multipara.md +42 -0
  126. data/{tests/unittest → spec/block_docs}/lists.md +28 -29
  127. data/{tests/unittest → spec/block_docs}/lists10.md +2 -14
  128. data/spec/block_docs/lists11.md +23 -0
  129. data/spec/block_docs/lists12.md +43 -0
  130. data/spec/block_docs/lists13.md +55 -0
  131. data/spec/block_docs/lists14.md +61 -0
  132. data/spec/block_docs/lists15.md +36 -0
  133. data/spec/block_docs/lists6.md +88 -0
  134. data/spec/block_docs/lists7b.md +58 -0
  135. data/spec/block_docs/lists9.md +53 -0
  136. data/{tests/unittest → spec/block_docs}/lists_after_paragraph.md +19 -25
  137. data/spec/block_docs/lists_blank.md +35 -0
  138. data/{tests/unittest/list3.md → spec/block_docs/lists_blockquote_code.md} +2 -14
  139. data/{tests/unittest/list4.md → spec/block_docs/lists_need_blank_line.md} +50 -21
  140. data/spec/block_docs/lists_nested.md +44 -0
  141. data/spec/block_docs/lists_nested_blankline.md +28 -0
  142. data/spec/block_docs/lists_nested_deep.md +43 -0
  143. data/{tests/unittest → spec/block_docs}/lists_ol.md +37 -54
  144. data/spec/block_docs/lists_paraindent.md +47 -0
  145. data/spec/block_docs/lists_tab.md +54 -0
  146. data/spec/block_docs/loss.md +17 -0
  147. data/spec/block_docs/math-blahtex/equations.md +30 -0
  148. data/spec/block_docs/math-blahtex/inline.md +48 -0
  149. data/spec/block_docs/math-blahtex/math2.md +45 -0
  150. data/spec/block_docs/math-blahtex/table.md +25 -0
  151. data/spec/block_docs/math/embedded_invalid_svg.md +79 -0
  152. data/spec/block_docs/math/embedded_svg.md +97 -0
  153. data/spec/block_docs/math/equations.md +44 -0
  154. data/{tests/unittest → spec/block_docs}/math/inline.md +7 -19
  155. data/spec/block_docs/math/math2.md +45 -0
  156. data/{tests/unittest → spec/block_docs}/math/notmath.md +0 -12
  157. data/spec/block_docs/math/raw_mathml.md +87 -0
  158. data/spec/block_docs/math/table.md +25 -0
  159. data/{tests/unittest → spec/block_docs}/math/table2.md +5 -17
  160. data/{tests/unittest → spec/block_docs}/misc_sw.md +181 -118
  161. data/{tests/unittest → spec/block_docs}/olist.md +6 -18
  162. data/{tests/unittest → spec/block_docs}/one.md +0 -12
  163. data/{tests/unittest → spec/block_docs}/paragraph.md +0 -12
  164. data/{tests/unittest → spec/block_docs}/paragraph_rules/dont_merge_ref.md +4 -12
  165. data/{tests/unittest → spec/block_docs}/paragraph_rules/tab_is_blank.md +0 -12
  166. data/{tests/unittest → spec/block_docs}/paragraphs.md +1 -13
  167. data/{tests/unittest → spec/block_docs}/recover/recover_links.md +4 -16
  168. data/{tests/unittest/pending/ref.md → spec/block_docs/ref_with_period.md} +7 -16
  169. data/spec/block_docs/ref_with_title.md +22 -0
  170. data/{tests/unittest → spec/block_docs}/references/long_example.md +16 -23
  171. data/{tests/unittest → spec/block_docs}/references/spaces_and_numbers.md +0 -12
  172. data/{tests/unittest → spec/block_docs}/smartypants.md +24 -31
  173. data/{tests/unittest → spec/block_docs}/syntax_hl.md +13 -17
  174. data/{tests/unittest → spec/block_docs}/table_attributes.md +2 -14
  175. data/spec/block_docs/tables.md +58 -0
  176. data/{tests/unittest → spec/block_docs}/test.md +1 -13
  177. data/{tests/unittest/notyet → spec/block_docs}/ticks.md +1 -13
  178. data/spec/block_docs/toc.md +87 -0
  179. data/{tests/unittest/notyet → spec/block_docs}/triggering.md +14 -25
  180. data/{tests/unittest → spec/block_docs}/underscore_in_words.md +0 -12
  181. data/{tests/unittest → spec/block_docs}/wrapping.md +4 -16
  182. data/spec/block_docs/xml.md +33 -0
  183. data/{tests/unittest → spec/block_docs}/xml2.md +0 -12
  184. data/spec/block_docs/xml3.md +24 -0
  185. data/{tests/unittest → spec/block_docs}/xml_instruction.md +9 -20
  186. data/spec/block_spec.rb +110 -0
  187. data/spec/cli_spec.rb +8 -0
  188. data/spec/span_spec.rb +256 -0
  189. data/spec/spec_helper.rb +2 -0
  190. data/spec/to_html_utf8_spec.rb +13 -0
  191. metadata +205 -243
  192. metadata.gz.sig +3 -0
  193. data/Rakefile +0 -48
  194. data/bin/marudown +0 -29
  195. data/bin/marutest +0 -345
  196. data/docs/changelog.md +0 -334
  197. data/lib/maruku/errors_management.rb +0 -92
  198. data/lib/maruku/ext/math/latex_fix.rb +0 -12
  199. data/lib/maruku/input/parse_span_better.rb +0 -746
  200. data/lib/maruku/input/type_detection.rb +0 -147
  201. data/lib/maruku/output/to_latex_entities.rb +0 -367
  202. data/lib/maruku/output/to_latex_strings.rb +0 -64
  203. data/lib/maruku/structures.rb +0 -167
  204. data/lib/maruku/structures_inspect.rb +0 -87
  205. data/lib/maruku/structures_iterators.rb +0 -61
  206. data/lib/maruku/tests/benchmark.rb +0 -82
  207. data/lib/maruku/tests/new_parser.rb +0 -373
  208. data/lib/maruku/tests/tests.rb +0 -136
  209. data/lib/maruku/usage/example1.rb +0 -33
  210. data/tests/bugs/code_in_links.md +0 -101
  211. data/tests/bugs/complex_escaping.md +0 -38
  212. data/tests/math/syntax.md +0 -46
  213. data/tests/math_usage/document.md +0 -13
  214. data/tests/others/abbreviations.md +0 -11
  215. data/tests/others/blank.md +0 -4
  216. data/tests/others/code.md +0 -5
  217. data/tests/others/code2.md +0 -8
  218. data/tests/others/code3.md +0 -16
  219. data/tests/others/email.md +0 -4
  220. data/tests/others/entities.md +0 -19
  221. data/tests/others/escaping.md +0 -16
  222. data/tests/others/extra_dl.md +0 -101
  223. data/tests/others/extra_header_id.md +0 -13
  224. data/tests/others/extra_table1.md +0 -40
  225. data/tests/others/footnotes.md +0 -17
  226. data/tests/others/headers.md +0 -10
  227. data/tests/others/hrule.md +0 -10
  228. data/tests/others/images.md +0 -20
  229. data/tests/others/inline_html.md +0 -42
  230. data/tests/others/links.md +0 -38
  231. data/tests/others/list1.md +0 -4
  232. data/tests/others/list2.md +0 -5
  233. data/tests/others/list3.md +0 -8
  234. data/tests/others/lists.md +0 -32
  235. data/tests/others/lists_after_paragraph.md +0 -44
  236. data/tests/others/lists_ol.md +0 -39
  237. data/tests/others/misc_sw.md +0 -105
  238. data/tests/others/one.md +0 -1
  239. data/tests/others/paragraphs.md +0 -13
  240. data/tests/others/sss06.md +0 -352
  241. data/tests/others/test.md +0 -4
  242. data/tests/s5/s5profiling.md +0 -48
  243. data/tests/unittest/bug_def.md +0 -28
  244. data/tests/unittest/email.md +0 -32
  245. data/tests/unittest/html2.md +0 -34
  246. data/tests/unittest/ie.md +0 -61
  247. data/tests/unittest/links2.md +0 -34
  248. data/tests/unittest/lists11.md +0 -28
  249. data/tests/unittest/lists6.md +0 -53
  250. data/tests/unittest/lists9.md +0 -76
  251. data/tests/unittest/math/equations.md +0 -86
  252. data/tests/unittest/math/math2.md +0 -57
  253. data/tests/unittest/math/table.md +0 -37
  254. data/tests/unittest/notyet/header_after_par.md +0 -70
  255. data/tests/unittest/red_tests/abbrev.md +0 -1388
  256. data/tests/unittest/red_tests/lists7.md +0 -68
  257. data/tests/unittest/red_tests/lists7b.md +0 -128
  258. data/tests/unittest/red_tests/lists8.md +0 -76
  259. data/tests/unittest/red_tests/xml.md +0 -70
  260. data/tests/unittest/xml3.md +0 -38
  261. data/tests/utf8-files/simple.md +0 -1
  262. data/unit_test_block.sh +0 -5
  263. data/unit_test_span.sh +0 -3
@@ -0,0 +1,254 @@
1
+ require 'set'
2
+
3
+ $warned_nokogiri = false
4
+
5
+ module MaRuKu
6
+ HTML_INLINE_ELEMS = Set.new %w[a abbr acronym audio b bdi bdo big br button canvas caption cite code
7
+ col colgroup command datalist del details dfn dir em fieldset font form i img input ins
8
+ kbd label legend mark meter optgroup option progress q rp rt ruby s samp section select small
9
+ source span strike strong sub summary sup tbody td tfoot th thead time tr track tt u var video wbr
10
+ animate animateColor animateMotion animateTransform circle clipPath defs desc ellipse
11
+ feGaussianBlur filter font-face font-face-name font-face-src foreignObject g glyph hkern
12
+ linearGradient line marker mask metadata missing-glyph mpath path pattern polygon polyline
13
+ radialGradient rect set stop svg switch text textPath title tspan use
14
+ annotation annotation-xml maction math menclose merror mfrac mfenced mi mmultiscripts mn mo
15
+ mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub msubsup msup mtable
16
+ mtd mtext mtr munder munderover none semantics]
17
+
18
+ # Parse block-level markdown elements in these HTML tags
19
+ BLOCK_TAGS = %w(div)
20
+
21
+ # This gets mixed into HTML MDElement nodes to hold the parsed document fragment
22
+ module HTMLElement
23
+ attr_accessor :parsed_html
24
+ end
25
+
26
+ # This is just a factory, not an actual class
27
+ module HTMLFragment
28
+
29
+ # HTMLFragment.new produces a concrete HTMLFragment implementation
30
+ # that is either a NokogiriHTMLFragment or a REXMLHTMLFragment.
31
+ def self.new(raw_html)
32
+ if !$warned_nokogiri && MaRuKu::Globals[:html_parser] == 'nokogiri'
33
+ begin
34
+ require 'nokogiri'
35
+ return NokogiriHTMLFragment.new(raw_html)
36
+ rescue LoadError
37
+ warn "Nokogiri could not be loaded. Falling back to REXML."
38
+ $warned_nokogiri = true
39
+ end
40
+ end
41
+
42
+ require 'rexml/document'
43
+ REXMLHTMLFragment.new(raw_html)
44
+ end
45
+ end
46
+
47
+ # Nokogiri backend for HTML handling
48
+ class NokogiriHTMLFragment
49
+ def initialize(raw_html)
50
+ # Wrap our HTML in a dummy document with a doctype (just
51
+ # for the entity references)
52
+ wrapped = '<!DOCTYPE html PUBLIC
53
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
54
+ "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
55
+ <html>' + raw_html.strip + '</html>'
56
+
57
+ d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
58
+ @fragment = d.root
59
+ end
60
+
61
+ # @return The name of the first child element in the fragment.
62
+ def first_node_name
63
+ first_child = @fragment.children.first
64
+ first_child ? first_child.name : nil
65
+ end
66
+
67
+ # Add a class to the children of this fragment
68
+ def add_class(class_name)
69
+ @fragment.children.each do |c|
70
+ c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
71
+ end
72
+ end
73
+
74
+ # Process markdown within the contents of some elements and
75
+ # replace their contents with the processed version.
76
+ #
77
+ # @param doc [MaRuKu::MDDocument] A document to process.
78
+ def process_markdown_inside_elements(doc)
79
+ # find span elements or elements with 'markdown' attribute
80
+ elts = @fragment.css("[markdown]")
81
+
82
+ d = @fragment.children.first
83
+ if d && HTML_INLINE_ELEMS.include?(d.name)
84
+ elts << d unless d.attribute('markdown')
85
+ elts += span_descendents(d)
86
+ end
87
+
88
+ elts.each do |e|
89
+ how = e['markdown']
90
+ e.remove_attribute('markdown')
91
+
92
+ next if "0" == how # user requests no markdown parsing inside
93
+ parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)
94
+
95
+ # Select all text children of e
96
+ e.xpath("./text()").each do |original_text|
97
+ s = CGI.escapeHTML(original_text.text)
98
+ unless s.strip.empty?
99
+ parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
100
+
101
+ # restore leading and trailing spaces
102
+ padding = /\A(\s*).*?(\s*)\z/.match(s)
103
+ parsed = [padding[1]] + parsed + [padding[2]] if padding
104
+
105
+ el = doc.md_el(:dummy, parsed)
106
+
107
+ # Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
108
+ guard = Nokogiri::XML::Element.new('guard', @fragment)
109
+ original_text.replace(guard)
110
+ el.children_to_html.each do |x|
111
+ guard.before(x.to_s)
112
+ end
113
+ guard.remove
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ # Convert this fragment to an HTML or XHTML string.
120
+ # @return [String]
121
+ def to_html
122
+ output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
123
+ Nokogiri::XML::Node::SaveOptions::FORMAT
124
+ @fragment.children.inject("") do |out, child|
125
+ out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
126
+ end
127
+ end
128
+
129
+ private
130
+
131
+ # Get all span-level descendents of the given element, recursively,
132
+ # as a flat NodeSet.
133
+ #
134
+ # @param e [Nokogiri::XML::Node] An element.
135
+ # @return [Nokogiri::XML::NodeSet]
136
+ def span_descendents(e)
137
+ ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
138
+ e.element_children.inject(ns) do |descendents, c|
139
+ if HTML_INLINE_ELEMS.include?(c.name)
140
+ descendents << c
141
+ descendents += span_descendents(c)
142
+ end
143
+ descendents
144
+ end
145
+ end
146
+ end
147
+
148
+ # An HTMLFragment implementation using REXML
149
+ class REXMLHTMLFragment
150
+ def initialize(raw_html)
151
+ wrapped = '<!DOCTYPE html PUBLIC
152
+ "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
153
+ "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
154
+ <html>' + raw_html.strip + '</html>'
155
+
156
+ @fragment = REXML::Document.new(wrapped).root
157
+ end
158
+
159
+ # The name of the first element in the fragment
160
+ def first_node_name
161
+ first_child = @fragment.children.first
162
+ (first_child && first_child.respond_to?(:name)) ? first_child.name : nil
163
+ end
164
+
165
+ # Add a class to the children of this fragment
166
+ def add_class(class_name)
167
+ @fragment.each_element do |c|
168
+ c.attributes['class'] = ((c.attributes['class']||'').split(' ') + [class_name]).join(' ')
169
+ end
170
+ end
171
+
172
+ # Process markdown within the contents of some elements and
173
+ # replace their contents with the processed version.
174
+ def process_markdown_inside_elements(doc)
175
+ # parse block-level markdown elements in these HTML tags
176
+ block_tags = ['div']
177
+
178
+ elts = []
179
+ @fragment.each_element('//*[@markdown]') do |e|
180
+ elts << e
181
+ end
182
+
183
+ d = @fragment.children.first
184
+ if d && HTML_INLINE_ELEMS.include?(first_node_name)
185
+ elts << d unless d.attributes['markdown']
186
+ elts += span_descendents(d)
187
+ end
188
+
189
+ # find span elements or elements with 'markdown' attribute
190
+ elts.each do |e|
191
+ # should we parse block-level or span-level?
192
+ how = e.attributes['markdown']
193
+ e.attributes.delete('markdown')
194
+
195
+ next if "0" == how # user requests no markdown parsing inside
196
+ parse_blocks = (how == 'block') || block_tags.include?(e.name)
197
+
198
+ # Select all text children of e
199
+ e.texts.each do |original_text|
200
+ s = CGI.escapeHTML(original_text.value)
201
+ unless s.strip.empty?
202
+ # TODO extract common functionality
203
+ parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)
204
+ # restore leading and trailing spaces
205
+ padding = /\A(\s*).*?(\s*)\z/.match(s)
206
+ parsed = [padding[1]] + parsed + [padding[2]] if padding
207
+
208
+ el = doc.md_el(:dummy, parsed)
209
+
210
+ new_html = "<dummy>"
211
+ el.children_to_html.each do |x|
212
+ new_html << x.to_s
213
+ end
214
+ new_html << "</dummy>"
215
+
216
+ newdoc = REXML::Document.new(new_html).root
217
+
218
+ p = original_text.parent
219
+ newdoc.children.each do |c|
220
+ p.insert_before(original_text, c)
221
+ end
222
+
223
+ p.delete(original_text)
224
+ end
225
+ end
226
+ end
227
+ end
228
+
229
+ def to_html
230
+ formatter = REXML::Formatters::Default.new(true)
231
+ @fragment.children.inject("") do |out, child|
232
+ out << formatter.write(child, '')
233
+ end
234
+ end
235
+
236
+ private
237
+
238
+ # Get all span-level descendents of the given element, recursively,
239
+ # as an Array.
240
+ #
241
+ # @param e [REXML::Element] An element.
242
+ # @return [Array]
243
+ def span_descendents(e)
244
+ descendents = []
245
+ e.each_element do |c|
246
+ name = c.respond_to?(:name) ? c.name : nil
247
+ if name && HTML_INLINE_ELEMS.include?(c.name)
248
+ descendents << c
249
+ descendents += span_descendents(c)
250
+ end
251
+ end
252
+ end
253
+ end
254
+ end
@@ -1,326 +1,279 @@
1
- #--
2
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
- #
4
- # This file is part of Maruku.
5
- #
6
- # Maruku is free software; you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation; either version 2 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # Maruku is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public License
17
- # along with Maruku; if not, write to the Free Software
18
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
- #++
20
-
21
-
22
- module MaRuKu; module In; module Markdown; module SpanLevelParser
23
-
24
- # a string scanner coded by me
25
- class CharSourceManual; end
26
-
27
- # a wrapper around StringScanner
28
- class CharSourceStrscan; end
29
-
30
- # A debug scanner that checks the correctness of both
31
- # by comparing their output
32
- class CharSourceDebug; end
33
-
34
- # Choose!
35
-
36
- CharSource = CharSourceManual # faster! 58ms vs. 65ms
37
- #CharSource = CharSourceStrscan
38
- #CharSource = CharSourceDebug
39
-
40
-
41
- class CharSourceManual
42
- include MaRuKu::Strings
43
-
44
- def initialize(s, parent=nil)
45
- raise "Passed #{s.class}" if not s.kind_of? String
46
- @buffer = s
47
- @buffer_index = 0
48
- @parent = parent
49
- end
50
-
51
- # Return current char as a FixNum (or nil).
52
- def cur_char; @buffer[@buffer_index] end
53
-
54
- # Return the next n chars as a String.
55
- def cur_chars(n); @buffer[@buffer_index,n] end
56
-
57
- # Return the char after current char as a FixNum (or nil).
58
- def next_char; @buffer[@buffer_index+1] end
59
-
60
- def shift_char
61
- c = @buffer[@buffer_index]
62
- @buffer_index+=1
63
- c
64
- end
65
-
66
- def ignore_char
67
- @buffer_index+=1
68
- nil
69
- end
70
-
71
- def ignore_chars(n)
72
- @buffer_index+=n
73
- nil
74
- end
75
-
76
- def current_remaining_buffer
77
- @buffer[@buffer_index, @buffer.size-@buffer_index]
78
- end
79
-
80
- def cur_chars_are(string)
81
- # There is a bug here
82
- if false
83
- r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
84
- @buffer =~ r2
85
- else
86
- cur_chars(string.size) == string
87
- end
88
- end
89
-
90
- def next_matches(r)
91
- r2 = /^.{#{@buffer_index}}#{r}/m
92
- md = r2.match @buffer
93
- return !!md
94
- end
95
-
96
- def read_regexp3(r)
97
- r2 = /^.{#{@buffer_index}}#{r}/m
98
- m = r2.match @buffer
99
- if m
100
- consumed = m.to_s.size - @buffer_index
101
- # puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
102
- ignore_chars consumed
103
- else
104
- # puts "Could not read regexp #{r2.inspect} from buffer "+
105
- # " index=#{@buffer_index}"
106
- # puts "Cur chars = #{cur_chars(20).inspect}"
107
- # puts "Matches? = #{cur_chars(20) =~ r}"
108
- end
109
- m
110
- end
111
-
112
- def read_regexp(r)
113
- r2 = /^#{r}/
114
- rest = current_remaining_buffer
115
- m = r2.match(rest)
116
- if m
117
- @buffer_index += m.to_s.size
118
- # puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
119
- end
120
- return m
121
- end
122
-
123
- def consume_whitespace
124
- while c = cur_char
125
- if (c == ?\s || c == ?\t)
126
- # puts "ignoring #{c}"
127
- ignore_char
128
- else
129
- # puts "#{c} is not ws: "<<c
130
- break
131
- end
132
- end
133
- end
134
-
135
- def read_text_chars(out)
136
- s = @buffer.size; c=nil
137
- while @buffer_index < s && (c=@buffer[@buffer_index]) &&
138
- ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
139
- out << c
140
- @buffer_index += 1
141
- end
142
- end
143
-
144
- def describe
145
- s = describe_pos(@buffer, @buffer_index)
146
- if @parent
147
- s += "\n\n" + @parent.describe
148
- end
149
- s
150
- end
151
- include SpanLevelParser
152
- end
1
+ require 'strscan'
153
2
 
154
- def describe_pos(buffer, buffer_index)
155
- len = 75
156
- num_before = [len/2, buffer_index].min
157
- num_after = [len/2, buffer.size-buffer_index].min
158
- num_before_max = buffer_index
159
- num_after_max = buffer.size-buffer_index
160
-
161
- # puts "num #{num_before} #{num_after}"
162
- num_before = [num_before_max, len-num_after].min
163
- num_after = [num_after_max, len-num_before].min
164
- # puts "num #{num_before} #{num_after}"
165
-
166
- index_start = [buffer_index - num_before, 0].max
167
- index_end = [buffer_index + num_after, buffer.size].min
168
-
169
- size = index_end- index_start
170
-
171
- # puts "- #{index_start} #{size}"
172
-
173
- str = buffer[index_start, size]
174
- str.gsub!("\n",'N')
175
- str.gsub!("\t",'T')
176
-
177
- if index_end == buffer.size
178
- str += "EOF"
179
- end
180
-
181
- pre_s = buffer_index-index_start
182
- pre_s = [pre_s, 0].max
183
- pre_s2 = [len-pre_s,0].max
184
- # puts "pre_S = #{pre_s}"
185
- pre =" "*(pre_s)
186
-
187
- "-"*len+"\n"+
188
- str + "\n" +
189
- "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
190
- # pre + "|\n"+
191
- pre + "+--- Byte #{buffer_index}\n"+
192
-
193
- "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
194
- add_tabs(buffer,1,">")
195
-
196
- # "CharSource: At character #{@buffer_index} of block "+
197
- # " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
198
- # " before: \n ... #{cur_chars(50).inspect} ... "
199
- end
3
+ module MaRuKu::In::Markdown::SpanLevelParser
200
4
 
5
+ # a string scanner coded by me
6
+ class CharSourceManual; end
201
7
 
202
- require 'strscan'
8
+ # a wrapper around StringScanner
9
+ class CharSourceStrscan; end
203
10
 
204
- class CharSourceStrscan
205
- include SpanLevelParser
206
- include MaRuKu::Strings
207
-
208
- def initialize(s, parent=nil)
209
- @s = StringScanner.new(s)
210
- @parent = parent
211
- end
212
-
213
- # Return current char as a FixNum (or nil).
214
- def cur_char
215
- @s.peek(1)[0]
216
- end
217
-
218
- # Return the next n chars as a String.
219
- def cur_chars(n);
220
- @s.peek(n)
221
- end
222
-
223
- # Return the char after current char as a FixNum (or nil).
224
- def next_char;
225
- @s.peek(2)[1]
226
- end
227
-
228
- def shift_char
229
- (@s.get_byte)[0]
230
- end
231
-
232
- def ignore_char
233
- @s.get_byte
234
- nil
235
- end
236
-
237
- def ignore_chars(n)
238
- n.times do @s.get_byte end
239
- nil
240
- end
241
-
242
- def current_remaining_buffer
243
- @s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
244
- end
245
-
246
- def cur_chars_are(string)
247
- cur_chars(string.size) == string
248
- end
249
-
250
- def next_matches(r)
251
- len = @s.match?(r)
252
- return !!len
253
- end
254
-
255
- def read_regexp(r)
256
- string = @s.scan(r)
257
- if string
258
- return r.match(string)
259
- else
260
- return nil
261
- end
262
- end
263
-
264
- def consume_whitespace
265
- @s.scan(/\s+/)
266
- nil
267
- end
268
-
269
- def describe
270
- describe_pos(@s.string, @s.pos)
271
- end
272
-
273
- end
11
+ # A debug scanner that checks the correctness of both
12
+ # by comparing their output
13
+ class CharSourceDebug; end
274
14
 
15
+ # Choose!
275
16
 
276
- class CharSourceDebug
277
- def initialize(s, parent)
278
- @a = CharSourceManual.new(s, parent)
279
- @b = CharSourceStrscan.new(s, parent)
280
- end
281
-
282
- def method_missing(methodname, *args)
283
- a_bef = @a.describe
284
- b_bef = @b.describe
285
-
286
- a = @a.send(methodname, *args)
287
- b = @b.send(methodname, *args)
288
-
289
- # if methodname == :describe
290
- # return a
291
- # end
292
-
293
- if a.kind_of? MatchData
294
- if a.to_a != b.to_a
295
- puts "called: #{methodname}(#{args})"
296
- puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
297
- puts "AFTER: "+@a.describe
298
- puts "AFTER: "+@b.describe
299
- puts "BEFORE: "+a_bef
300
- puts "BEFORE: "+b_bef
301
- puts caller.join("\n")
302
- exit
303
- end
304
- else
305
- if a!=b
306
- puts "called: #{methodname}(#{args})"
307
- puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
308
- puts ""+@a.describe
309
- puts ""+@b.describe
310
- puts caller.join("\n")
311
- exit
312
- end
313
- end
314
-
315
- if @a.cur_char != @b.cur_char
316
- puts "Fuori sincronia dopo #{methodname}(#{args})"
317
- puts ""+@a.describe
318
- puts ""+@b.describe
319
- exit
320
- end
321
-
322
- return a
323
- end
324
- end
17
+ CharSource = CharSourceManual # faster! 58ms vs. 65ms
18
+ #CharSource = CharSourceStrscan # Faster on LONG documents. But StringScanner is buggy in Rubinius
19
+ #CharSource = CharSourceDebug
20
+
21
+
22
+ class CharSourceManual
23
+ def initialize(s, parent=nil)
24
+ raise "Passed #{s.class}" if not s.kind_of? String
25
+ @buffer = s
26
+ @buffer_index = 0
27
+ @parent = parent
28
+ end
29
+
30
+ # Return current char as a String (or nil).
31
+ def cur_char
32
+ cur_chars(1)
33
+ end
34
+
35
+ # Return the next n chars as a String.
36
+ def cur_chars(n)
37
+ return nil if @buffer_index >= @buffer.size
38
+ @buffer[@buffer_index, n]
39
+ end
40
+
41
+ # Return the char after current char as a String (or nil).
42
+ def next_char
43
+ return nil if @buffer_index + 1 >= @buffer.size
44
+ @buffer[@buffer_index + 1, 1]
45
+ end
46
+
47
+ def shift_char
48
+ c = cur_char
49
+ @buffer_index += 1
50
+ c
51
+ end
52
+
53
+ def ignore_char
54
+ @buffer_index += 1
55
+ end
56
+
57
+ def ignore_chars(n)
58
+ @buffer_index += n
59
+ end
60
+
61
+ def current_remaining_buffer
62
+ @buffer[@buffer_index, @buffer.size - @buffer_index]
63
+ end
64
+
65
+ def cur_chars_are(string)
66
+ cur_chars(string.size) == string
67
+ end
68
+
69
+ def next_matches(r)
70
+ r2 = /^.{#{@buffer_index}}#{r}/m
71
+ r2.match @buffer
72
+ end
73
+
74
+ def read_regexp(r)
75
+ r2 = /^#{r}/
76
+ rest = current_remaining_buffer
77
+ m = r2.match(rest)
78
+ if m
79
+ @buffer_index += m.to_s.size
80
+ end
81
+ m
82
+ end
83
+
84
+ def consume_whitespace
85
+ while c = cur_char
86
+ break unless (c == ' ' || c == "\t")
87
+ ignore_char
88
+ end
89
+ end
90
+
91
+ def describe
92
+ s = describe_pos(@buffer, @buffer_index)
93
+ if @parent
94
+ s += "\n\n" + @parent.describe
95
+ end
96
+ s
97
+ end
98
+
99
+ def describe_pos(buffer, buffer_index)
100
+ len = 75
101
+ num_before = [len/2, buffer_index].min
102
+ num_after = [len/2, buffer.size - buffer_index].min
103
+ num_before_max = buffer_index
104
+ num_after_max = buffer.size - buffer_index
105
+
106
+ num_before = [num_before_max, len - num_after].min
107
+ num_after = [num_after_max, len - num_before].min
108
+
109
+ index_start = [buffer_index - num_before, 0].max
110
+ index_end = [buffer_index + num_after, buffer.size].min
111
+
112
+ size = index_end - index_start
113
+
114
+ str = buffer[index_start, size]
115
+ str.gsub!("\n", 'N')
116
+ str.gsub!("\t", 'T')
117
+
118
+ if index_end == buffer.size
119
+ str += "EOF"
120
+ end
121
+
122
+ pre_s = buffer_index - index_start
123
+ pre_s = [pre_s, 0].max
124
+ pre_s2 = [len - pre_s, 0].max
125
+ pre = " " * pre_s
126
+
127
+ "-" * len + "\n" +
128
+ str + "\n" +
129
+ "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
130
+ pre + "+--- Byte #{buffer_index}\n"+
325
131
 
326
- end end end end
132
+ "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
133
+ buffer.gsub(/^/, ">")
134
+ end
135
+ end
136
+
137
+ class CharSourceStrscan
138
+
139
+ def initialize(s, parent=nil)
140
+ @scanner = StringScanner.new(s)
141
+ @size = s.size
142
+ end
143
+
144
+ # Return current char as a String (or nil).
145
+ def cur_char
146
+ @scanner.peek(1)[0]
147
+ end
148
+
149
+ # Return the next n chars as a String.
150
+ def cur_chars(n)
151
+ @scanner.peek(n)
152
+ end
153
+
154
+ # Return the char after current char as a String (or nil).
155
+ def next_char
156
+ @scanner.peek(2)[1]
157
+ end
158
+
159
+ # Return a character as a String, advancing the pointer.
160
+ def shift_char
161
+ @scanner.getch[0]
162
+ end
163
+
164
+ # Advance the pointer
165
+ def ignore_char
166
+ @scanner.getch
167
+ end
168
+
169
+ # Advance the pointer by n
170
+ def ignore_chars(n)
171
+ n.times { @scanner.getch }
172
+ end
173
+
174
+ # Return the rest of the string
175
+ def current_remaining_buffer
176
+ @scanner.rest
177
+ end
178
+
179
+ # Returns true if string matches what we're pointing to
180
+ def cur_chars_are(string)
181
+ @scanner.peek(string.size) == string
182
+ end
183
+
184
+ # Returns true if Regexp r matches what we're pointing to
185
+ def next_matches(r)
186
+ @scanner.check(r)
187
+ end
188
+
189
+ def read_regexp(r)
190
+ r.match(@scanner.scan(r))
191
+ end
192
+
193
+ def consume_whitespace
194
+ @scanner.skip(/\s+/)
195
+ end
196
+
197
+ def describe
198
+ len = 75
199
+ num_before = [len/2, @scanner.pos].min
200
+ num_after = [len/2, @scanner.rest_size].min
201
+ num_before_max = @scanner.pos
202
+ num_after_max = @scanner.rest_size
203
+
204
+ num_before = [num_before_max, len - num_after].min
205
+ num_after = [num_after_max, len - num_before].min
206
+
207
+ index_start = [@scanner.pos - num_before, 0].max
208
+ index_end = [@scanner.pos + num_after, @size].min
209
+
210
+ size = index_end - index_start
211
+
212
+ str = @scanner.string[index_start, size]
213
+ str.gsub!("\n", 'N')
214
+ str.gsub!("\t", 'T')
215
+
216
+ if index_end == @size
217
+ str += "EOF"
218
+ end
219
+
220
+ pre_s = @scanner.pos - index_start
221
+ pre_s = [pre_s, 0].max
222
+ pre_s2 = [len-pre_s, 0].max
223
+ pre = " " * pre_s
224
+
225
+ "-" * len + "\n" +
226
+ str + "\n" +
227
+ "-" * pre_s + "|" + "-" * pre_s2 + "\n" +
228
+ pre + "+--- Byte #{@scanner.pos}\n" +
229
+ "Shown bytes [#{index_start} to #{size}] of #{@size}:\n" +
230
+ @scanner.string.gsub(/^/, ">")
231
+ end
232
+ end
233
+
234
+ class CharSourceDebug
235
+ def initialize(s, parent)
236
+ @a = CharSourceManual.new(s, parent)
237
+ @b = CharSourceStrscan.new(s, parent)
238
+ end
239
+
240
+ def method_missing(methodname, *args)
241
+ a_bef = @a.describe
242
+ b_bef = @b.describe
243
+
244
+ a = @a.send(methodname, *args)
245
+ b = @b.send(methodname, *args)
246
+
247
+ if a.kind_of? MatchData
248
+ if a.to_a != b.to_a
249
+ puts "called: #{methodname}(#{args})"
250
+ puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
251
+ puts "AFTER: " + @a.describe
252
+ puts "AFTER: " + @b.describe
253
+ puts "BEFORE: " + a_bef
254
+ puts "BEFORE: " + b_bef
255
+ puts caller.join("\n")
256
+ exit
257
+ end
258
+ else
259
+ if a != b
260
+ puts "called: #{methodname}(#{args})"
261
+ puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
262
+ puts "" + @a.describe
263
+ puts "" + @b.describe
264
+ puts caller.join("\n")
265
+ exit
266
+ end
267
+ end
268
+
269
+ if @a.cur_char != @b.cur_char
270
+ puts "Fuori sincronia dopo #{methodname}(#{args})"
271
+ puts "" + @a.describe
272
+ puts "" + @b.describe
273
+ exit
274
+ end
275
+
276
+ return a
277
+ end
278
+ end
279
+ end