kramdown 0.1.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (201) hide show
  1. data/AUTHORS +1 -0
  2. data/COPYING +24 -0
  3. data/ChangeLog +1416 -0
  4. data/GPL +674 -0
  5. data/README +20 -0
  6. data/Rakefile +300 -0
  7. data/VERSION +1 -0
  8. data/benchmark/benchmark.rb +33 -0
  9. data/benchmark/mdbasics.text +306 -0
  10. data/benchmark/mdsyntax.text +888 -0
  11. data/benchmark/testing.sh +9 -0
  12. data/benchmark/timing.sh +10 -0
  13. data/bin/kramdown +26 -0
  14. data/doc/default.css +293 -0
  15. data/doc/default.template +78 -0
  16. data/doc/index.page +89 -0
  17. data/doc/installation.page +90 -0
  18. data/doc/news.feed +10 -0
  19. data/doc/news.page +27 -0
  20. data/doc/quickref.page +474 -0
  21. data/doc/syntax.page +1089 -0
  22. data/doc/tests.page +44 -0
  23. data/doc/virtual +2 -0
  24. data/lib/kramdown.rb +23 -0
  25. data/lib/kramdown/converter.rb +215 -0
  26. data/lib/kramdown/document.rb +150 -0
  27. data/lib/kramdown/error.rb +27 -0
  28. data/lib/kramdown/extension.rb +73 -0
  29. data/lib/kramdown/parser.rb +1056 -0
  30. data/lib/kramdown/parser/registry.rb +62 -0
  31. data/setup.rb +1585 -0
  32. data/test/run_tests.rb +58 -0
  33. data/test/test_files.rb +39 -0
  34. data/test/testcases/block/01_blank_line/spaces.html +1 -0
  35. data/test/testcases/block/01_blank_line/spaces.text +3 -0
  36. data/test/testcases/block/01_blank_line/tabs.html +1 -0
  37. data/test/testcases/block/01_blank_line/tabs.text +6 -0
  38. data/test/testcases/block/02_eob/beginning.html +1 -0
  39. data/test/testcases/block/02_eob/beginning.text +3 -0
  40. data/test/testcases/block/02_eob/end.html +1 -0
  41. data/test/testcases/block/02_eob/end.text +3 -0
  42. data/test/testcases/block/02_eob/middle.html +1 -0
  43. data/test/testcases/block/02_eob/middle.text +5 -0
  44. data/test/testcases/block/03_paragraph/indented.html +18 -0
  45. data/test/testcases/block/03_paragraph/indented.text +19 -0
  46. data/test/testcases/block/03_paragraph/no_newline_at_end.html +5 -0
  47. data/test/testcases/block/03_paragraph/no_newline_at_end.text +5 -0
  48. data/test/testcases/block/03_paragraph/one_para.html +1 -0
  49. data/test/testcases/block/03_paragraph/one_para.text +1 -0
  50. data/test/testcases/block/03_paragraph/two_para.html +4 -0
  51. data/test/testcases/block/03_paragraph/two_para.text +4 -0
  52. data/test/testcases/block/04_header/atx_header.html +26 -0
  53. data/test/testcases/block/04_header/atx_header.text +24 -0
  54. data/test/testcases/block/04_header/atx_header_no_newline_at_end.html +1 -0
  55. data/test/testcases/block/04_header/atx_header_no_newline_at_end.text +1 -0
  56. data/test/testcases/block/04_header/setext_header.html +25 -0
  57. data/test/testcases/block/04_header/setext_header.text +27 -0
  58. data/test/testcases/block/04_header/setext_header_no_newline_at_end.html +1 -0
  59. data/test/testcases/block/04_header/setext_header_no_newline_at_end.text +2 -0
  60. data/test/testcases/block/04_header/with_auto_ids.html +17 -0
  61. data/test/testcases/block/04_header/with_auto_ids.options +1 -0
  62. data/test/testcases/block/04_header/with_auto_ids.text +19 -0
  63. data/test/testcases/block/05_blockquote/indented.html +25 -0
  64. data/test/testcases/block/05_blockquote/indented.text +14 -0
  65. data/test/testcases/block/05_blockquote/nested.html +9 -0
  66. data/test/testcases/block/05_blockquote/nested.text +5 -0
  67. data/test/testcases/block/05_blockquote/no_newline_at_end.html +4 -0
  68. data/test/testcases/block/05_blockquote/no_newline_at_end.text +2 -0
  69. data/test/testcases/block/05_blockquote/only_first_quoted.html +8 -0
  70. data/test/testcases/block/05_blockquote/only_first_quoted.text +4 -0
  71. data/test/testcases/block/05_blockquote/with_code_blocks.html +15 -0
  72. data/test/testcases/block/05_blockquote/with_code_blocks.text +11 -0
  73. data/test/testcases/block/06_codeblock/error.html +4 -0
  74. data/test/testcases/block/06_codeblock/error.text +4 -0
  75. data/test/testcases/block/06_codeblock/no_newline_at_end.html +2 -0
  76. data/test/testcases/block/06_codeblock/no_newline_at_end.text +1 -0
  77. data/test/testcases/block/06_codeblock/normal.html +13 -0
  78. data/test/testcases/block/06_codeblock/normal.text +10 -0
  79. data/test/testcases/block/06_codeblock/tilde_syntax.html +7 -0
  80. data/test/testcases/block/06_codeblock/tilde_syntax.text +9 -0
  81. data/test/testcases/block/06_codeblock/whitespace.html +3 -0
  82. data/test/testcases/block/06_codeblock/whitespace.text +3 -0
  83. data/test/testcases/block/06_codeblock/with_blank_line.html +13 -0
  84. data/test/testcases/block/06_codeblock/with_blank_line.text +11 -0
  85. data/test/testcases/block/06_codeblock/with_eob_marker.html +6 -0
  86. data/test/testcases/block/06_codeblock/with_eob_marker.text +5 -0
  87. data/test/testcases/block/07_horizontal_rule/error.html +7 -0
  88. data/test/testcases/block/07_horizontal_rule/error.text +7 -0
  89. data/test/testcases/block/07_horizontal_rule/normal.html +19 -0
  90. data/test/testcases/block/07_horizontal_rule/normal.text +19 -0
  91. data/test/testcases/block/08_list/escaping.html +17 -0
  92. data/test/testcases/block/08_list/escaping.text +17 -0
  93. data/test/testcases/block/08_list/list_and_hr.html +9 -0
  94. data/test/testcases/block/08_list/list_and_hr.text +5 -0
  95. data/test/testcases/block/08_list/list_and_others.html +38 -0
  96. data/test/testcases/block/08_list/list_and_others.text +25 -0
  97. data/test/testcases/block/08_list/mixed.html +111 -0
  98. data/test/testcases/block/08_list/mixed.text +66 -0
  99. data/test/testcases/block/08_list/nested.html +17 -0
  100. data/test/testcases/block/08_list/nested.text +7 -0
  101. data/test/testcases/block/08_list/other_first_element.html +39 -0
  102. data/test/testcases/block/08_list/other_first_element.text +18 -0
  103. data/test/testcases/block/08_list/simple_ol.html +19 -0
  104. data/test/testcases/block/08_list/simple_ol.text +13 -0
  105. data/test/testcases/block/08_list/simple_ul.html +61 -0
  106. data/test/testcases/block/08_list/simple_ul.text +43 -0
  107. data/test/testcases/block/08_list/single_item.html +3 -0
  108. data/test/testcases/block/08_list/single_item.text +1 -0
  109. data/test/testcases/block/08_list/special_cases.html +29 -0
  110. data/test/testcases/block/08_list/special_cases.text +19 -0
  111. data/test/testcases/block/09_html/auto_parse_block_html.html +17 -0
  112. data/test/testcases/block/09_html/auto_parse_block_html.options +1 -0
  113. data/test/testcases/block/09_html/auto_parse_block_html.text +14 -0
  114. data/test/testcases/block/09_html/comment.html +12 -0
  115. data/test/testcases/block/09_html/comment.text +12 -0
  116. data/test/testcases/block/09_html/filtered_html.html +1 -0
  117. data/test/testcases/block/09_html/filtered_html.options +1 -0
  118. data/test/testcases/block/09_html/filtered_html.text +1 -0
  119. data/test/testcases/block/09_html/html_and_codeblocks.html +15 -0
  120. data/test/testcases/block/09_html/html_and_codeblocks.options +1 -0
  121. data/test/testcases/block/09_html/html_and_codeblocks.text +13 -0
  122. data/test/testcases/block/09_html/invalid_html_1.html +5 -0
  123. data/test/testcases/block/09_html/invalid_html_1.text +5 -0
  124. data/test/testcases/block/09_html/invalid_html_2.html +6 -0
  125. data/test/testcases/block/09_html/invalid_html_2.text +5 -0
  126. data/test/testcases/block/09_html/parse_as_raw.html +26 -0
  127. data/test/testcases/block/09_html/parse_as_raw.text +16 -0
  128. data/test/testcases/block/09_html/parse_as_span.html +12 -0
  129. data/test/testcases/block/09_html/parse_as_span.text +7 -0
  130. data/test/testcases/block/09_html/processing_instruction.html +12 -0
  131. data/test/testcases/block/09_html/processing_instruction.text +12 -0
  132. data/test/testcases/block/09_html/simple.html +78 -0
  133. data/test/testcases/block/09_html/simple.text +56 -0
  134. data/test/testcases/block/10_ald/simple.html +2 -0
  135. data/test/testcases/block/10_ald/simple.text +8 -0
  136. data/test/testcases/block/11_ial/simple.html +17 -0
  137. data/test/testcases/block/11_ial/simple.text +25 -0
  138. data/test/testcases/block/12_extension/comment.html +5 -0
  139. data/test/testcases/block/12_extension/comment.text +11 -0
  140. data/test/testcases/block/12_extension/ignored.html +6 -0
  141. data/test/testcases/block/12_extension/ignored.text +11 -0
  142. data/test/testcases/block/12_extension/kdoptions.html +15 -0
  143. data/test/testcases/block/12_extension/kdoptions.text +18 -0
  144. data/test/testcases/block/12_extension/kdoptions2.html +10 -0
  145. data/test/testcases/block/12_extension/kdoptions2.text +5 -0
  146. data/test/testcases/block/12_extension/nokramdown.html +6 -0
  147. data/test/testcases/block/12_extension/nokramdown.text +11 -0
  148. data/test/testcases/span/01_link/empty.html +3 -0
  149. data/test/testcases/span/01_link/empty.text +3 -0
  150. data/test/testcases/span/01_link/image_in_a.html +5 -0
  151. data/test/testcases/span/01_link/image_in_a.text +5 -0
  152. data/test/testcases/span/01_link/imagelinks.html +12 -0
  153. data/test/testcases/span/01_link/imagelinks.text +14 -0
  154. data/test/testcases/span/01_link/inline.html +40 -0
  155. data/test/testcases/span/01_link/inline.text +42 -0
  156. data/test/testcases/span/01_link/link_defs.html +8 -0
  157. data/test/testcases/span/01_link/link_defs.text +22 -0
  158. data/test/testcases/span/01_link/links_with_angle_brackets.html +3 -0
  159. data/test/testcases/span/01_link/links_with_angle_brackets.text +3 -0
  160. data/test/testcases/span/01_link/reference.html +32 -0
  161. data/test/testcases/span/01_link/reference.text +42 -0
  162. data/test/testcases/span/02_emphasis/empty.html +3 -0
  163. data/test/testcases/span/02_emphasis/empty.text +3 -0
  164. data/test/testcases/span/02_emphasis/errors.html +9 -0
  165. data/test/testcases/span/02_emphasis/errors.text +9 -0
  166. data/test/testcases/span/02_emphasis/nesting.html +34 -0
  167. data/test/testcases/span/02_emphasis/nesting.text +30 -0
  168. data/test/testcases/span/02_emphasis/normal.html +42 -0
  169. data/test/testcases/span/02_emphasis/normal.text +42 -0
  170. data/test/testcases/span/03_codespan/empty.html +5 -0
  171. data/test/testcases/span/03_codespan/empty.text +5 -0
  172. data/test/testcases/span/03_codespan/errors.html +1 -0
  173. data/test/testcases/span/03_codespan/errors.text +1 -0
  174. data/test/testcases/span/03_codespan/normal.html +16 -0
  175. data/test/testcases/span/03_codespan/normal.text +16 -0
  176. data/test/testcases/span/04_footnote/definitions.html +14 -0
  177. data/test/testcases/span/04_footnote/definitions.text +18 -0
  178. data/test/testcases/span/04_footnote/footnote_nr.html +12 -0
  179. data/test/testcases/span/04_footnote/footnote_nr.options +1 -0
  180. data/test/testcases/span/04_footnote/footnote_nr.text +4 -0
  181. data/test/testcases/span/04_footnote/markers.html +46 -0
  182. data/test/testcases/span/04_footnote/markers.text +26 -0
  183. data/test/testcases/span/05_html/normal.html +17 -0
  184. data/test/testcases/span/05_html/normal.text +17 -0
  185. data/test/testcases/span/autolinks/url_links.html +9 -0
  186. data/test/testcases/span/autolinks/url_links.text +9 -0
  187. data/test/testcases/span/escaped_chars/normal.html +33 -0
  188. data/test/testcases/span/escaped_chars/normal.text +33 -0
  189. data/test/testcases/span/ial/simple.html +5 -0
  190. data/test/testcases/span/ial/simple.text +5 -0
  191. data/test/testcases/span/line_breaks/normal.html +11 -0
  192. data/test/testcases/span/line_breaks/normal.text +11 -0
  193. data/test/testcases/span/text_substitutions/entities.html +4 -0
  194. data/test/testcases/span/text_substitutions/entities.text +4 -0
  195. data/test/testcases/span/text_substitutions/greaterthan.html +1 -0
  196. data/test/testcases/span/text_substitutions/greaterthan.text +1 -0
  197. data/test/testcases/span/text_substitutions/lowerthan.html +1 -0
  198. data/test/testcases/span/text_substitutions/lowerthan.text +1 -0
  199. data/test/testcases/span/text_substitutions/typography.html +3 -0
  200. data/test/testcases/span/text_substitutions/typography.text +3 -0
  201. metadata +259 -0
@@ -0,0 +1,73 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ module Kramdown
24
+
25
+ # The base extension class.
26
+ #
27
+ # This class provides implementations for the default extensions defined in the kramdown
28
+ # specification.
29
+ #
30
+ # An extension is a method called <tt>parse_EXTNAME</tt> where +EXTNAME+ is the extension name.
31
+ # These methods are called with three parameters:
32
+ #
33
+ # [+parser+]
34
+ # The parser instance from which the extension method is called.
35
+ # [+opts+]
36
+ # A hash containing the options set in the extension.
37
+ # [+body+]
38
+ # A string containing the body of the extension. If no body is available, this is +nil+.
39
+ class Extension
40
+
41
+ # Just ignore everything and do nothing.
42
+ def parse_comment(parser, opts, body)
43
+ nil
44
+ end
45
+
46
+ # Add the body (if available) as <tt>:raw</tt> Element to the +parser.tree+.
47
+ def parse_nokramdown(parser, opts, body)
48
+ parser.tree.children << Element.new(:raw, body) if body.kind_of?(String)
49
+ end
50
+
51
+ # Update the document options with the options set in +opts+.
52
+ def parse_kdoptions(parser, opts, body)
53
+ if val = opts.delete('auto_ids')
54
+ if val.downcase.strip == 'false'
55
+ parser.doc.options[:auto_ids] = false
56
+ elsif !val.empty?
57
+ parser.doc.options[:auto_ids] = true
58
+ end
59
+ end
60
+ if val = opts.delete('filter_html')
61
+ parser.doc.options[:filter_html] = val.split(/\s+/)
62
+ end
63
+ if val = opts.delete('footnote_nr')
64
+ parser.doc.options[:footnote_nr] = Integer(val) rescue parser.doc.options[:footnote_nr]
65
+ end
66
+ opts.each {|k,v| parser.warning("Unknown kramdown options '#{k}'")}
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+
73
+
@@ -0,0 +1,1056 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ require 'strscan'
24
+ require 'stringio'
25
+ require 'kramdown/parser/registry'
26
+
27
+ #TODO: use [[:alpha:]] in all regexp to allow parsing of international values in 1.9.1
28
+ #NOTE: use @src.pre_match only before other check/match?/... operations, otherwise the content is changed
29
+
30
+ module Kramdown
31
+
32
+ # This module contains all available parsers. Currently, there is only one parser for parsing
33
+ # documents in kramdown format.
34
+ module Parser
35
+
36
+ # Used for parsing a document in kramdown format.
37
+ class Kramdown
38
+
39
+ include ::Kramdown
40
+
41
+ attr_reader :tree
42
+ attr_reader :doc
43
+
44
+ # Create a new Kramdown parser object for the Kramdown::Document +doc+.
45
+ def initialize(doc)
46
+ @doc = doc
47
+ @src = nil
48
+ @tree = nil
49
+ @unclosed_html_tags = []
50
+ @stack = []
51
+ @used_ids = {}
52
+ @doc.parse_infos[:ald] = {}
53
+ @doc.parse_infos[:link_defs] = {}
54
+ @doc.parse_infos[:footnotes] = {}
55
+ end
56
+ private_class_method(:new, :allocate)
57
+
58
+
59
+ # Parse the string +source+ using the Kramdown::Document +doc+ and return the parse tree.
60
+ def self.parse(source, doc)
61
+ new(doc).parse(source)
62
+ end
63
+
64
+ # The source string provided on initialization is parsed and the created +tree+ is returned.
65
+ def parse(source)
66
+ configure_parser
67
+ tree = Element.new(:root)
68
+ parse_blocks(tree, adapt_source(source))
69
+ update_tree(tree)
70
+ @doc.parse_infos[:footnotes].each do |name, data|
71
+ update_tree(data[:content])
72
+ end
73
+ tree
74
+ end
75
+
76
+ # Add the given warning +text+ to the warning array of the Kramdown document.
77
+ def warning(text)
78
+ @doc.warnings << text
79
+ #TODO: add position information
80
+ end
81
+
82
+ #######
83
+ private
84
+ #######
85
+
86
+ BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
87
+ :setext_header, :horizontal_rule, :list, :link_definition, :block_html,
88
+ :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
89
+ SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
90
+ :span_ial, :html_entity, :typographic_syms, :special_html_chars, :line_break, :escaped_chars,]
91
+
92
+ # Adapt the object to allow parsing like specified in the options.
93
+ def configure_parser
94
+ @parsers = {}
95
+ BLOCK_PARSERS.each do |name|
96
+ if Registry.has_parser?(name, :block)
97
+ extend(Registry.parser(name).module)
98
+ @parsers[name] = Registry.parser(name)
99
+ else
100
+ raise Kramdown::Error, "Unknown block parser: #{name}"
101
+ end
102
+ end
103
+ SPAN_PARSERS.each do |name|
104
+ if Registry.has_parser?(name, :span)
105
+ extend(Registry.parser(name).module)
106
+ @parsers[name] = Registry.parser(name)
107
+ else
108
+ raise Kramdown::Error, "Unknown span parser: #{name}"
109
+ end
110
+ end
111
+ @span_start = Regexp.union(*SPAN_PARSERS.map {|name| @parsers[name].start_re})
112
+ @span_start_re = /(?=#{@span_start})/
113
+ end
114
+
115
+ # Parse all block level elements in +text+ (a string or a StringScanner object) into the
116
+ # element +el+.
117
+ def parse_blocks(el, text)
118
+ @stack.push([@tree, @src, @unclosed_html_tags])
119
+ @tree, @src, @unclosed_html_tags = el, StringScanner.new(text), []
120
+
121
+ while !@src.eos?
122
+ BLOCK_PARSERS.any? do |name|
123
+ if @src.check(@parsers[name].start_re)
124
+ send(@parsers[name].method)
125
+ else
126
+ false
127
+ end
128
+ end || begin
129
+ warning('Warning: this should not occur - no block parser handled the line')
130
+ add_text(@src.scan(/.*\n/))
131
+ end
132
+ end
133
+
134
+ @unclosed_html_tags.reverse.each do |tag|
135
+ warning("Automatically closing unclosed html tag '#{tag.value}'")
136
+ end
137
+
138
+ @tree, @src, @unclosed_html_tags = *@stack.pop
139
+ end
140
+
141
+ # Update the tree by parsing all <tt>:text</tt> elements with the span level parser (resets
142
+ # +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
143
+ def update_tree(element)
144
+ element.children.map! do |child|
145
+ if child.type == :text
146
+ @stack, @tree = [], nil
147
+ @src = StringScanner.new(child.value)
148
+ parse_spans(child)
149
+ child.children
150
+ else
151
+ update_tree(child)
152
+ update_attr_with_ial(child.options[:attr] ||= {}, child.options[:ial]) if child.options[:ial]
153
+ child
154
+ end
155
+ end.flatten!
156
+ end
157
+
158
+ # Parse all span level elements in the source string.
159
+ def parse_spans(el, stop_re = nil)
160
+ @stack.push(@tree)
161
+ @tree = el
162
+
163
+ used_re = (stop_re.nil? ? @span_start_re : /(?=#{Regexp.union(stop_re, @span_start)})/)
164
+ stop_re_found = false
165
+ while !@src.eos? && !stop_re_found
166
+ if result = @src.scan_until(used_re)
167
+ add_text(result)
168
+ if stop_re && (stop_re_matched = @src.check(stop_re))
169
+ stop_re_found = (block_given? ? yield : true)
170
+ end
171
+ processed = SPAN_PARSERS.any? do |name|
172
+ if @src.check(@parsers[name].start_re)
173
+ send(@parsers[name].method)
174
+ true
175
+ else
176
+ false
177
+ end
178
+ end unless stop_re_found
179
+ if !processed && !stop_re_found
180
+ if stop_re_matched
181
+ add_text(@src.scan(/./))
182
+ else
183
+ raise Kramdown::Error, 'Bug: please report!'
184
+ end
185
+ end
186
+ else
187
+ add_text(@src.scan_until(/.*/m)) unless stop_re
188
+ break
189
+ end
190
+ end
191
+
192
+ @tree = @stack.pop
193
+
194
+ stop_re_found
195
+ end
196
+
197
+ # Modify the string +source+ to be usable by the parser.
198
+ def adapt_source(source)
199
+ source.gsub(/\r\n?/, "\n").chomp + "\n"
200
+ end
201
+
202
+ # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
203
+ # text element or creates a new text element.
204
+ def add_text(text, tree = @tree)
205
+ if tree.children.last && tree.children.last.type == :text
206
+ tree.children.last.value << text
207
+ elsif !text.empty?
208
+ tree.children << Element.new(:text, text)
209
+ end
210
+ end
211
+
212
+ end
213
+
214
+
215
+ module ParserMethods
216
+
217
+ INDENT = /^(?:\t| {4})/
218
+ OPT_SPACE = / {0,3}/
219
+
220
+
221
+ # Parse the string +str+ and extract all attributes and add all found attributes to the hash
222
+ # +opts+.
223
+ def parse_attribute_list(str, opts)
224
+ str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
225
+ if ref
226
+ (opts[:refs] ||= []) << ref
227
+ elsif class_attr
228
+ opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
229
+ elsif id_attr
230
+ opts['id'] = id_attr
231
+ else
232
+ opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
233
+ end
234
+ end
235
+ end
236
+
237
+ # Update the +ial+ with the information from the inline attribute list +opts+.
238
+ def update_ial_with_ial(ial, opts)
239
+ (ial[:refs] ||= []) << opts[:refs]
240
+ ial['class'] = ((ial['class'] || '') + " #{opts['class']}").lstrip if opts['class']
241
+ opts.each {|k,v| ial[k] = v if k != :refs && k != 'class' }
242
+ end
243
+
244
+ # Update the attributes with the information from the inline attribute list and all referenced ALDs.
245
+ def update_attr_with_ial(attr, ial)
246
+ ial[:refs].each do |ref|
247
+ update_attr_with_ial(attr, ref) if ref = @doc.parse_infos[:ald][ref]
248
+ end if ial[:refs]
249
+ attr['class'] = ((attr['class'] || '') + " #{ial['class']}").lstrip if ial['class']
250
+ ial.each {|k,v| attr[k] = v if k.kind_of?(String) && k != 'class' }
251
+ end
252
+
253
+ # Generate an alpha-numeric ID from the the string +str+.
254
+ def generate_id(str)
255
+ gen_id = str.gsub(/[^a-zA-Z0-9 -]/, '').gsub(/^[^a-zA-Z]*/, '').gsub(' ', '-').downcase
256
+ gen_id = 'section' if gen_id.length == 0
257
+ if @used_ids.has_key?(gen_id)
258
+ gen_id += '-' + (@used_ids[gen_id] += 1).to_s
259
+ else
260
+ @used_ids[gen_id] = 0
261
+ end
262
+ gen_id
263
+ end
264
+
265
+ # Helper method for obfuscating the +email+ address by using HTML entities.
266
+ def obfuscate_email(email)
267
+ result = ""
268
+ email.each_byte do |b|
269
+ result += (b > 128 ? b.chr : "&#%03d;" % b)
270
+ end
271
+ result
272
+ end
273
+
274
+
275
+ BLANK_LINE = /(?:^\s*\n)+/
276
+
277
+ # Parse the blank line at the current postition.
278
+ def parse_blank_line
279
+ @src.pos += @src.matched_size
280
+ if @tree.children.last && @tree.children.last.type == :blank
281
+ @tree.children.last.value += @src.matched
282
+ else
283
+ @tree.children << Element.new(:blank, @src.matched)
284
+ end
285
+ true
286
+ end
287
+ Registry.define_parser(:block, :blank_line, BLANK_LINE, self)
288
+
289
+
290
+ EOB_MARKER = /^\^\s*?\n/
291
+
292
+ # Parse the EOB marker at the current location.
293
+ def parse_eob_marker
294
+ @src.pos += @src.matched_size
295
+ true
296
+ end
297
+ Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
298
+
299
+
300
+ PARAGRAPH_START = /^#{OPT_SPACE}[^ \t].*?\n/
301
+
302
+ # Parse the paragraph at the current location.
303
+ def parse_paragraph
304
+ @src.pos += @src.matched_size
305
+ if @tree.children.last && @tree.children.last.type == :p
306
+ @tree.children.last.children.first.value << "\n" << @src.matched.chomp
307
+ else
308
+ @tree.children << Element.new(:p)
309
+ add_text(@src.matched.lstrip.chomp, @tree.children.last)
310
+ end
311
+ true
312
+ end
313
+ Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
314
+
315
+
316
+ SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)\n(-|=)+\s*?\n/
317
+
318
+ # Parse the Setext header at the current location.
319
+ def parse_setext_header
320
+ if @tree.children.last && @tree.children.last.type != :blank
321
+ return false
322
+ end
323
+ @src.pos += @src.matched_size
324
+ text, level = @src[1].strip, @src[2]
325
+ el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
326
+ add_text(text, el)
327
+ el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
328
+ @tree.children << el
329
+ true
330
+ end
331
+ Registry.define_parser(:block, :setext_header, SETEXT_HEADER_START, self)
332
+
333
+
334
+ ATX_HEADER_START = /^\#{1,6}/
335
+ ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*\s*?\n/
336
+
337
+ # Parse the Atx header at the current location.
338
+ def parse_atx_header
339
+ if @tree.children.last && @tree.children.last.type != :blank
340
+ return false
341
+ end
342
+ result = @src.scan(ATX_HEADER_MATCH)
343
+ level, text = @src[1], @src[2].strip
344
+ el = Element.new(:header, nil, :level => level.length)
345
+ add_text(text, el)
346
+ el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
347
+ @tree.children << el
348
+ true
349
+ end
350
+ Registry.define_parser(:block, :atx_header, ATX_HEADER_START, self)
351
+
352
+
353
+ BLOCKQUOTE_START = /^#{OPT_SPACE}> ?/
354
+ BLOCKQUOTE_MATCH = /(^#{OPT_SPACE}>.*?\n)+/
355
+
356
+ # Parse the blockquote at the current location.
357
+ def parse_blockquote
358
+ result = @src.scan(BLOCKQUOTE_MATCH).gsub(BLOCKQUOTE_START, '')
359
+ el = Element.new(:blockquote)
360
+ @tree.children << el
361
+ parse_blocks(el, result)
362
+ true
363
+ end
364
+ Registry.define_parser(:block, :blockquote, BLOCKQUOTE_START, self)
365
+
366
+
367
+ CODEBLOCK_START = INDENT
368
+ CODEBLOCK_MATCH = /(?:#{INDENT}.*?\S.*?\n)+/
369
+
370
+ # Parse the indented codeblock at the current location.
371
+ def parse_codeblock
372
+ result = @src.scan(CODEBLOCK_MATCH).gsub(INDENT, '')
373
+ children = @tree.children
374
+ if children.length >= 2 && children[-1].type == :blank && children[-2].type == :codeblock
375
+ children[-2].value << children[-1].value.gsub(INDENT, '') << result
376
+ children.pop
377
+ else
378
+ @tree.children << Element.new(:codeblock, result)
379
+ end
380
+ true
381
+ end
382
+ Registry.define_parser(:block, :codeblock, CODEBLOCK_START, self)
383
+
384
+
385
+ FENCED_CODEBLOCK_START = /^~{3,}/
386
+ FENCED_CODEBLOCK_MATCH = /^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m
387
+
388
+ # Parse the fenced codeblock at the current location.
389
+ def parse_codeblock_fenced
390
+ if @src.check(FENCED_CODEBLOCK_MATCH)
391
+ @src.pos += @src.matched_size
392
+ @tree.children << Element.new(:codeblock, @src[2])
393
+ true
394
+ else
395
+ false
396
+ end
397
+ end
398
+ Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
399
+
400
+
401
+ HR_START = /^#{OPT_SPACE}(\*|-|_) *\1 *\1 *(\1| )*\n/
402
+
403
+ # Parse the horizontal rule at the current location.
404
+ def parse_horizontal_rule
405
+ @src.pos += @src.matched_size
406
+ @tree.children << Element.new(:hr)
407
+ true
408
+ end
409
+ Registry.define_parser(:block, :horizontal_rule, HR_START, self)
410
+
411
+
412
+ LIST_START_UL = /^(#{OPT_SPACE}[+*-])([\t| ].*?\n)/
413
+ LIST_START_OL = /^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
414
+ LIST_START = /#{LIST_START_UL}|#{LIST_START_OL}/
415
+
416
+ # Parse the ordered or unordered list at the current location.
417
+ def parse_list
418
+ if @tree.children.last && @tree.children.last.type == :p # last element must not be a paragraph
419
+ return false
420
+ end
421
+
422
+ type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL])
423
+ list = Element.new(type)
424
+
425
+ item = nil
426
+ indent_re = nil
427
+ content_re = nil
428
+ eob_found = false
429
+ nested_list_found = false
430
+ while !@src.eos?
431
+ if @src.check(HR_START)
432
+ break
433
+ elsif @src.scan(list_start_re)
434
+ indentation, content = @src[1].length, @src[2]
435
+ item = Element.new(:li)
436
+ list.children << item
437
+ if content =~ /^\s*\n/
438
+ indentation = 4
439
+ else
440
+ while content =~ /^ *\t/
441
+ temp = content.scan(/^ */).first.length + indentation
442
+ content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
443
+ end
444
+ indentation += content.scan(/^ */).first.length
445
+ end
446
+ content.sub!(/^\s*/, '')
447
+ item.value = content
448
+
449
+ indent_re = /^ {#{indentation}}/
450
+ content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
451
+ list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
452
+ /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
453
+ nested_list_found = false
454
+ elsif result = @src.scan(content_re)
455
+ result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
456
+ result.sub!(indent_re, '')
457
+ if !nested_list_found && result =~ LIST_START
458
+ parse_blocks(item, item.value)
459
+ if item.children.length == 1 && item.children.first.type == :p
460
+ item.value = ''
461
+ else
462
+ item.children.clear
463
+ end
464
+ nested_list_found = true
465
+ end
466
+ item.value << result
467
+ elsif result = @src.scan(BLANK_LINE)
468
+ nested_list_found = true
469
+ item.value << result
470
+ elsif @src.scan(EOB_MARKER)
471
+ eob_found = true
472
+ break
473
+ else
474
+ break
475
+ end
476
+ end
477
+
478
+ @tree.children << list
479
+
480
+ last = nil
481
+ list.children.each do |item|
482
+ temp = Element.new(:temp)
483
+ parse_blocks(temp, item.value)
484
+ item.children += temp.children
485
+ item.value = nil
486
+ next if item.children.size == 0
487
+
488
+ if item.children.first.type == :p && (item.children.length < 2 || item.children[1].type != :blank ||
489
+ (item == list.children.last && item.children.length == 2 && !eob_found))
490
+ text = item.children.shift.children.first
491
+ text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
492
+ item.children.unshift(text)
493
+ else
494
+ item.options[:first_as_block] = true
495
+ end
496
+
497
+ if item.children.last.type == :blank
498
+ last = item.children.pop
499
+ else
500
+ last = nil
501
+ end
502
+ end
503
+
504
+ @tree.children << last if !last.nil? && !eob_found
505
+
506
+ true
507
+ end
508
+ Registry.define_parser(:block, :list, LIST_START, self)
509
+
510
+
511
+ PUNCTUATION_CHARS = "_.:,;!?-"
512
+ LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
513
+ LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
514
+ LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
515
+
516
+ # Parse the link definition at the current location.
517
+ def parse_link_definition
518
+ @src.pos += @src.matched_size
519
+ link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
520
+ warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
521
+ @doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
522
+ true
523
+ end
524
+ Registry.define_parser(:block, :link_definition, LINK_DEFINITION_START, self)
525
+
526
+
527
+ ALD_ID_CHARS = /[\w\d-]/
528
+ ALD_ANY_CHARS = /\\\}|[^\}]/
529
+ ALD_ID_NAME = /(?:\w|\d)#{ALD_ID_CHARS}*/
530
+ ALD_TYPE_KEY_VALUE_PAIR = /(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])+?)\2/
531
+ ALD_TYPE_CLASS_NAME = /\.(#{ALD_ID_NAME})/
532
+ ALD_TYPE_ID_NAME = /#(#{ALD_ID_NAME})/
533
+ ALD_TYPE_REF = /(#{ALD_ID_NAME})/
534
+ ALD_TYPE_ANY = /(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
535
+ ALD_START = /^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
536
+
537
+ # Parse the attribute list definition at the current location.
538
+ def parse_ald
539
+ @src.pos += @src.matched_size
540
+ parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= {})
541
+ true
542
+ end
543
+ Registry.define_parser(:block, :ald, ALD_START, self)
544
+
545
+
546
+ IAL_BLOCK_START = /^#{OPT_SPACE}\{:(?!:)(#{ALD_ANY_CHARS}+)\}\s*?\n/
547
+
548
+ # Parse the inline attribute list at the current location.
549
+ def parse_block_ial
550
+ @src.pos += @src.matched_size
551
+ if @tree.children.last && @tree.children.last.type != :blank
552
+ parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= {})
553
+ end
554
+ true
555
+ end
556
+ Registry.define_parser(:block, :block_ial, IAL_BLOCK_START, self)
557
+
558
+
559
+ EXT_BLOCK_START_STR = "^#{OPT_SPACE}\\{::(%s):(:)?(#{ALD_ANY_CHARS}*)\\}\s*?\n"
560
+ EXT_BLOCK_START = /#{EXT_BLOCK_START_STR % ALD_ID_NAME}/
561
+
562
+ # Parse the extension block at the current location.
563
+ def parse_extension_block
564
+ @src.pos += @src.matched_size
565
+
566
+ ext = @src[1]
567
+ opts = {}
568
+ body = nil
569
+ parse_attribute_list(@src[3], opts)
570
+
571
+ if !@doc.extension.public_methods.map {|m| m.to_s}.include?("parse_#{ext}")
572
+ warning("No extension named '#{ext}' found - ignoring extension block")
573
+ body = :invalid
574
+ end
575
+
576
+ if !@src[2]
577
+ stop_re = /#{EXT_BLOCK_START_STR % ext}/
578
+ if result = @src.scan_until(stop_re)
579
+ parse_attribute_list(@src[3], opts)
580
+ body = result.sub!(stop_re, '') if body != :invalid
581
+ else
582
+ body = :invalid
583
+ warning("No ending line for extension block '#{ext}' found - ignoring extension block")
584
+ end
585
+ end
586
+
587
+ @doc.extension.send("parse_#{ext}", self, opts, body) if body != :invalid
588
+
589
+ true
590
+ end
591
+ Registry.define_parser(:block, :extension_block, EXT_BLOCK_START, self)
592
+
593
+
594
+ FOOTNOTE_DEFINITION_START = /^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n(?:#{BLANK_LINE}?#{CODEBLOCK_MATCH})*)/
595
+
596
+ # Parse the foot note definition at the current location.
597
+ def parse_footnote_definition
598
+ @src.pos += @src.matched_size
599
+
600
+ el = Element.new(:footnote_def)
601
+ parse_blocks(el, @src[2].gsub(INDENT, ''))
602
+ warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]]
603
+ (@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el
604
+ end
605
+ Registry.define_parser(:block, :footnote_definition, FOOTNOTE_DEFINITION_START, self)
606
+
607
+
608
+ require 'rexml/parsers/baseparser'
609
+
610
+ #:stopdoc:
611
+ # The following regexps are based on the ones used by REXML, with some slight modifications.
612
+ #:startdoc:
613
+ HTML_COMMENT_RE = /<!--(.*?)-->/m
614
+ HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
615
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/
616
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/
617
+ HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
618
+
619
+
620
+ HTML_PARSE_AS_BLOCK = %w{div blockquote table dl ol ul form fieldset}
621
+ HTML_PARSE_AS_SPAN = %w{a address b dd dt em h1 h2 h3 h4 h5 h6 legend li p pre span td th}
622
+ HTML_PARSE_AS_RAW = %w{script math}
623
+ HTML_PARSE_AS = Hash.new {|h,k| h[k] = :span}
624
+ HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
625
+ HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
626
+ HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
627
+
628
+ HTML_BLOCK_ELEMENTS = %w[div p pre h1 h2 h3 h4 h5 h6 hr form fieldset iframe legend script dl ul ol table ins del blockquote address]
629
+
630
+ HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
631
+
632
+ # Parse the HTML at the current position as block level HTML.
633
+ def parse_block_html
634
+ if result = @src.scan(HTML_COMMENT_RE)
635
+ @tree.children << Element.new(:html_raw, result, :type => :block)
636
+ @src.scan(/.*?\n/)
637
+ true
638
+ elsif result = @src.scan(HTML_INSTRUCTION_RE)
639
+ @tree.children << Element.new(:html_raw, result, :type => :block)
640
+ @src.scan(/.*?\n/)
641
+ true
642
+ else
643
+ if !((@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && (HTML_BLOCK_ELEMENTS.include?(@src[1]) || @src[1] =~ /:/)) ||
644
+ @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/))
645
+ return false
646
+ end
647
+
648
+ @src.scan(/^(.*?)\n/)
649
+ line = @src[1]
650
+ temp = nil
651
+ stack = []
652
+
653
+ while line.size > 0
654
+ index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
655
+ if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag) && (!temp || temp.options[:parse_type] == :block)
656
+ md = line.match(HTML_TAG_RE)
657
+ break if !(HTML_BLOCK_ELEMENTS.include?(md[1]) || md[1] =~ /:/)
658
+
659
+ add_text(md.pre_match + "\n", temp) if temp
660
+ line = md.post_match
661
+
662
+ attrs = {}
663
+ md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
664
+ el = Element.new(:html_element, md[1], :attr => attrs, :type => :block,
665
+ :parse_type => HTML_PARSE_AS[md[1]])
666
+
667
+ (temp || @tree).children << el
668
+ if !md[4]
669
+ @unclosed_html_tags.push(el)
670
+ stack << temp
671
+ temp = el
672
+ end
673
+ elsif index_close_tag
674
+ md = line.match(HTML_TAG_CLOSE_RE)
675
+ add_text(md.pre_match, temp) if temp
676
+
677
+ line = md.post_match
678
+ if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
679
+ el = @unclosed_html_tags.pop
680
+ @tree = @stack.pop unless temp
681
+ temp = stack.pop
682
+ if el.options[:parse_type] == :raw
683
+ raise Kramdown::Error, "Bug: please report!" if el.children.size > 1
684
+ el.children.first.type = :raw if el.children.first
685
+ end
686
+ else
687
+ if HTML_BLOCK_ELEMENTS.include?(md[1]) && (temp || @tree).options[:parse_type] == :block
688
+ warning("Found invalidly nested HTML closing tag for '#{md[1]}'")
689
+ end
690
+ if temp
691
+ add_text(md.to_s, temp)
692
+ else
693
+ add_text(md.to_s + "\n")
694
+ end
695
+ end
696
+ else
697
+ if temp
698
+ add_text(line, temp)
699
+ else
700
+ warning("Ignoring characters at the end of an HTML block line")
701
+ end
702
+ line = ''
703
+ end
704
+ end
705
+ if temp && temp.children.last && temp.children.last.type == :text
706
+ temp.children.last.value << "\n"
707
+ end
708
+ if temp
709
+ if temp.options[:parse_type] == :span || temp.options[:parse_type] == :raw
710
+ result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
711
+ add_text(result, temp)
712
+ end
713
+ @stack.push(@tree)
714
+ @tree = temp
715
+ end
716
+ true
717
+ end
718
+ end
719
+ Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
720
+
721
+
722
+
723
+
724
+ ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
725
+
726
+ # Parse the backslash-escaped character at the current location.
727
+ def parse_escaped_chars
728
+ @src.pos += @src.matched_size
729
+ add_text(@src[1])
730
+ end
731
+ Registry.define_parser(:span, :escaped_chars, ESCAPED_CHARS, self)
732
+
733
+
734
+ # Parse the HTML entity at the current location.
735
+ def parse_html_entity
736
+ @src.pos += @src.matched_size
737
+ add_text(@src.matched)
738
+ end
739
+ Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
740
+
741
+
742
+ SPECIAL_HTML_CHARS = /&|>|</
743
+
744
+ # Parse the special HTML characters at the current location.
745
+ def parse_special_html_chars
746
+ @src.pos += @src.matched_size
747
+ add_text(@src.matched)
748
+ end
749
+ Registry.define_parser(:span, :special_html_chars, SPECIAL_HTML_CHARS, self)
750
+
751
+
752
+ LINE_BREAK = /( |\\\\)(?=\n)/
753
+
754
+ # Parse the line break at the current location.
755
+ def parse_line_break
756
+ @src.pos += @src.matched_size
757
+ @tree.children << Element.new(:br)
758
+ end
759
+ Registry.define_parser(:span, :line_break, LINE_BREAK, self)
760
+
761
+
762
+ TYPOGRAPHIC_SYMS = [['---', '&mdash;'], ['--', '&ndash;'], ['...', '&hellip;'],
763
+ ['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
764
+ ['<< ', '&laquo;&nbsp;'], [' >>', '&nbsp;&raquo;'],
765
+ ['<<', '&laquo;'], ['>>', '&raquo;']]
766
+ TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
767
+ TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
768
+
769
+ # Parse the typographic symbols at the current location.
770
+ def parse_typographic_syms
771
+ @src.pos += @src.matched_size
772
+ add_text(TYPOGRAPHIC_SYMS_SUBST[@src.matched].dup)
773
+ end
774
+ Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
775
+
776
+
777
+ AUTOLINK_START = /<((mailto|https?|ftps?):.*?|.*?@.*?)>/
778
+
779
+ # Parse the autolink at the current location.
780
+ def parse_autolink
781
+ @src.pos += @src.matched_size
782
+
783
+ text = href = @src[1]
784
+ if @src[2].nil? || @src[2] == 'mailto'
785
+ text = obfuscate_email(@src[2] ? @src[1].sub(/^mailto:/, '') : @src[1])
786
+ mailto = obfuscate_email('mailto')
787
+ href = "#{mailto}:#{text}"
788
+ end
789
+ el = Element.new(:a, nil, {:attr => {'href' => href}})
790
+ add_text(text, el)
791
+ @tree.children << el
792
+ end
793
+ Registry.define_parser(:span, :autolink, AUTOLINK_START, self)
794
+
795
+
796
+ CODESPAN_DELIMITER = /`+/
797
+
798
+ # Parse the codespan at the current scanner location.
799
+ def parse_codespan
800
+ result = @src.scan(CODESPAN_DELIMITER)
801
+ simple = (result.length == 1)
802
+ reset_pos = @src.pos
803
+
804
+ if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
805
+ add_text(result)
806
+ return
807
+ end
808
+
809
+ text = @src.scan_until(/#{result}/)
810
+ if text
811
+ text.sub!(/#{result}\Z/, '')
812
+ if !simple
813
+ text = text[1..-1] if text[0..0] == ' '
814
+ text = text[0..-2] if text[-1..-1] == ' '
815
+ end
816
+ @tree.children << Element.new(:codespan, text)
817
+ else
818
+ @src.pos = reset_pos
819
+ add_text(result)
820
+ end
821
+ end
822
+ Registry.define_parser(:span, :codespan, CODESPAN_DELIMITER, self)
823
+
824
+
825
+ IAL_SPAN_START = /\{:(#{ALD_ANY_CHARS}+)\}/
826
+
827
+ # Parse the inline attribute list at the current location.
828
+ def parse_span_ial
829
+ @src.pos += @src.matched_size
830
+ if @tree.children.last && @tree.children.last.type != :text
831
+ attr = {}
832
+ parse_attribute_list(@src[1], attr)
833
+ update_ial_with_ial(@tree.children.last.options[:ial] ||= {}, attr)
834
+ update_attr_with_ial(@tree.children.last.options[:attr] ||= {}, attr)
835
+ else
836
+ warning("Ignoring span IAL because preceding element is just text")
837
+ add_text(@src.matched)
838
+ end
839
+ end
840
+ Registry.define_parser(:span, :span_ial, IAL_SPAN_START, self)
841
+
842
+
843
+ FOOTNOTE_MARKER_START = /\[\^(#{ALD_ID_NAME})\]/
844
+
845
+ # Parse the footnote marker at the current location.
846
+ def parse_footnote_marker
847
+ @src.pos += @src.matched_size
848
+ fn_def = @doc.parse_infos[:footnotes][@src[1]]
849
+ if fn_def
850
+ valid = fn_def[:marker] && fn_def[:marker].options[:stack][0..-2].zip(fn_def[:marker].options[:stack][1..-1]).all? do |par, child|
851
+ par.children.include?(child)
852
+ end
853
+ if !fn_def[:marker] || !valid
854
+ fn_def[:marker] = Element.new(:footnote, nil, :name => @src[1])
855
+ fn_def[:marker].options[:stack] = [@stack, @tree, fn_def[:marker]].flatten.compact
856
+ @tree.children << fn_def[:marker]
857
+ else
858
+ warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker")
859
+ add_text(@src.matched)
860
+ end
861
+ else
862
+ warning("Footnote definition for '#{@src[1]}' not found")
863
+ add_text(@src.matched)
864
+ end
865
+ end
866
+ Registry.define_parser(:span, :footnote_marker, FOOTNOTE_MARKER_START, self)
867
+
868
+
869
+ EMPHASIS_START = /(?:\*\*?|__?)/
870
+
871
+ # Parse the emphasis at the current location.
872
+ def parse_emphasis
873
+ result = @src.scan(EMPHASIS_START)
874
+ element = (result.length == 2 ? :strong : :em)
875
+ type = (result =~ /_/ ? '_' : '*')
876
+ reset_pos = @src.pos
877
+
878
+ if (type == '_' && @src.pre_match =~ /[[:alpha:]]\Z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/)
879
+ add_text(result)
880
+ return
881
+ end
882
+
883
+ sub_parse = lambda do |delim, elem|
884
+ el = Element.new(elem)
885
+ stop_re = /#{Regexp.escape(delim)}/
886
+ found = parse_spans(el, stop_re) do
887
+ (@src.string[@src.pos-1, 1] !~ /\s/) &&
888
+ (elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) &&
889
+ (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0
890
+ end
891
+ [found, el, stop_re]
892
+ end
893
+
894
+ found, el, stop_re = sub_parse.call(result, element)
895
+ if !found && element == :strong
896
+ @src.pos = reset_pos - 1
897
+ found, el, stop_re = sub_parse.call(type, :em)
898
+ end
899
+ if found
900
+ @src.scan(stop_re)
901
+ @tree.children << el
902
+ else
903
+ @src.pos = reset_pos
904
+ add_text(result)
905
+ end
906
+ end
907
+ Registry.define_parser(:span, :emphasis, EMPHASIS_START, self)
908
+
909
+
910
+ HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--)/
911
+
912
+ # Parse the HTML at the current position as span level HTML.
913
+ def parse_span_html
914
+ if result = @src.scan(HTML_COMMENT_RE)
915
+ @tree.children << Element.new(:html_raw, result, :type => :span)
916
+ elsif result = @src.scan(HTML_INSTRUCTION_RE)
917
+ @tree.children << Element.new(:html_raw, result, :type => :span)
918
+ elsif result = @src.scan(HTML_TAG_RE)
919
+ reset_pos = @src.pos
920
+ attrs = {}
921
+ @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
922
+ el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
923
+ if @src[4]
924
+ @tree.children << el
925
+ else
926
+ stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
927
+ if parse_spans(el, stop_re)
928
+ @src.scan(stop_re)
929
+ @tree.children << el
930
+ else
931
+ @src.pos = reset_pos
932
+ add_text(result)
933
+ end
934
+ end
935
+ else
936
+ add_text(@src.scan(/./))
937
+ end
938
+ end
939
+ Registry.define_parser(:span, :span_html, HTML_BLOCK_START, self)
940
+
941
+
942
+ LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
943
+ LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
944
+ LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/
945
+
946
+ LINK_START = /!?\[(?=[^^])/
947
+
948
+ # Parse the link at the current scanner position. This method is used to parse normal links as
949
+ # well as image links.
950
+ def parse_link
951
+ result = @src.scan(LINK_START)
952
+ reset_pos = @src.pos
953
+
954
+ link_type = (result =~ /^!/ ? :img : :a)
955
+
956
+ # no nested links allowed
957
+ if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
958
+ add_text(result)
959
+ return
960
+ end
961
+ el = Element.new(link_type)
962
+
963
+ stop_re = /\]|!?\[/
964
+ count = 1
965
+ found = parse_spans(el, stop_re) do
966
+ case @src.matched
967
+ when "[", "!["
968
+ count += 1
969
+ when "]"
970
+ count -= 1
971
+ end
972
+ count - el.children.select {|c| c.type == :img}.size == 0
973
+ end
974
+ if !found || el.children.empty?
975
+ @src.pos = reset_pos
976
+ add_text(result)
977
+ return
978
+ end
979
+ alt_text = @src.string[reset_pos...@src.pos]
980
+ conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
981
+ @src.scan(stop_re)
982
+
983
+ # reference style link or no link url
984
+ if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
985
+ link_id = (@src[1] || conv_link_id).downcase
986
+ if @doc.parse_infos[:link_defs].has_key?(link_id)
987
+ add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
988
+ else
989
+ warning("No link definition for link ID '#{link_id}' found")
990
+ @src.pos = reset_pos
991
+ add_text(result)
992
+ end
993
+ return
994
+ end
995
+
996
+ # link url in parentheses
997
+ if @src.scan(/\(<(.*?)>/)
998
+ link_url = @src[1]
999
+ if @src.scan(/\)/)
1000
+ add_link(el, link_url, nil, alt_text)
1001
+ return
1002
+ end
1003
+ else
1004
+ link_url = ''
1005
+ re = /\(|\)|\s/
1006
+ nr_of_brackets = 0
1007
+ while temp = @src.scan_until(re)
1008
+ link_url += temp
1009
+ case @src.matched
1010
+ when /\s/
1011
+ break
1012
+ when '('
1013
+ nr_of_brackets += 1
1014
+ when ')'
1015
+ nr_of_brackets -= 1
1016
+ break if nr_of_brackets == 0
1017
+ end
1018
+ end
1019
+ link_url = link_url[1..-2]
1020
+
1021
+ if nr_of_brackets == 0
1022
+ add_link(el, link_url, nil, alt_text)
1023
+ return
1024
+ end
1025
+ end
1026
+
1027
+ if @src.scan(LINK_INLINE_TITLE_RE)
1028
+ add_link(el, link_url, @src[2], alt_text)
1029
+ else
1030
+ @src.pos = reset_pos
1031
+ add_text(result)
1032
+ end
1033
+ end
1034
+ Registry.define_parser(:span, :link, LINK_START, self)
1035
+
1036
+
1037
+ # This helper methods adds the approriate attributes to the element +el+ of type +a+ or +img+
1038
+ # and the element itself to the <tt>@tree</tt>.
1039
+ def add_link(el, href, title, alt_text = nil)
1040
+ el.options[:attr] ||= {}
1041
+ el.options[:attr]['title'] = title if title
1042
+ if el.type == :a
1043
+ el.options[:attr]['href'] = href
1044
+ else
1045
+ el.options[:attr]['src'] = href
1046
+ el.options[:attr]['alt'] = alt_text
1047
+ el.children.clear
1048
+ end
1049
+ @tree.children << el
1050
+ end
1051
+
1052
+ end
1053
+
1054
+ end
1055
+
1056
+ end