kramdown 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (67) hide show
  1. data/ChangeLog +267 -0
  2. data/VERSION +1 -1
  3. data/benchmark/benchmark.rb +2 -1
  4. data/benchmark/generate_data.rb +110 -0
  5. data/benchmark/historic-jruby-1.4.0.dat +7 -0
  6. data/benchmark/historic-ruby-1.8.6.dat +7 -0
  7. data/benchmark/historic-ruby-1.8.7.dat +7 -0
  8. data/benchmark/historic-ruby-1.9.1p243.dat +7 -0
  9. data/benchmark/historic-ruby-1.9.2dev.dat +7 -0
  10. data/benchmark/static-jruby-1.4.0.dat +7 -0
  11. data/benchmark/static-ruby-1.8.6.dat +7 -0
  12. data/benchmark/static-ruby-1.8.7.dat +7 -0
  13. data/benchmark/static-ruby-1.9.1p243.dat +7 -0
  14. data/benchmark/static-ruby-1.9.2dev.dat +7 -0
  15. data/benchmark/testing.sh +1 -1
  16. data/doc/index.page +5 -5
  17. data/doc/installation.page +3 -3
  18. data/doc/quickref.page +3 -3
  19. data/doc/syntax.page +133 -101
  20. data/doc/tests.page +9 -1
  21. data/lib/kramdown/compatibility.rb +34 -0
  22. data/lib/kramdown/converter.rb +26 -8
  23. data/lib/kramdown/document.rb +2 -1
  24. data/lib/kramdown/parser.rb +1 -1192
  25. data/lib/kramdown/parser/kramdown.rb +272 -0
  26. data/lib/kramdown/parser/kramdown/attribute_list.rb +102 -0
  27. data/lib/kramdown/parser/kramdown/autolink.rb +42 -0
  28. data/lib/kramdown/parser/kramdown/blank_line.rb +43 -0
  29. data/lib/kramdown/parser/kramdown/blockquote.rb +42 -0
  30. data/lib/kramdown/parser/kramdown/codeblock.rb +62 -0
  31. data/lib/kramdown/parser/kramdown/codespan.rb +57 -0
  32. data/lib/kramdown/parser/kramdown/emphasis.rb +69 -0
  33. data/lib/kramdown/parser/kramdown/eob.rb +39 -0
  34. data/lib/kramdown/parser/kramdown/escaped_chars.rb +38 -0
  35. data/lib/kramdown/parser/kramdown/extension.rb +65 -0
  36. data/lib/kramdown/parser/kramdown/footnote.rb +72 -0
  37. data/lib/kramdown/parser/kramdown/header.rb +81 -0
  38. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +39 -0
  39. data/lib/kramdown/parser/kramdown/html.rb +253 -0
  40. data/lib/kramdown/{deprecated.rb → parser/kramdown/html_entity.rb} +10 -12
  41. data/lib/kramdown/parser/kramdown/line_break.rb +38 -0
  42. data/lib/kramdown/parser/kramdown/link.rb +153 -0
  43. data/lib/kramdown/parser/kramdown/list.rb +225 -0
  44. data/lib/kramdown/parser/kramdown/paragraph.rb +44 -0
  45. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +48 -0
  46. data/lib/kramdown/version.rb +1 -1
  47. data/test/testcases/block/09_html/comment.html +1 -0
  48. data/test/testcases/block/09_html/comment.text +1 -1
  49. data/test/testcases/block/09_html/content_model/tables.text +2 -2
  50. data/test/testcases/block/09_html/not_parsed.html +10 -0
  51. data/test/testcases/block/09_html/not_parsed.text +9 -0
  52. data/test/testcases/block/09_html/parse_as_raw.html +4 -0
  53. data/test/testcases/block/09_html/parse_as_raw.text +2 -0
  54. data/test/testcases/block/09_html/parse_block_html.html +4 -0
  55. data/test/testcases/block/09_html/parse_block_html.text +3 -0
  56. data/test/testcases/block/09_html/processing_instruction.html +1 -0
  57. data/test/testcases/block/09_html/processing_instruction.text +1 -1
  58. data/test/testcases/block/09_html/simple.html +8 -15
  59. data/test/testcases/block/09_html/simple.text +2 -12
  60. data/test/testcases/span/02_emphasis/normal.html +8 -4
  61. data/test/testcases/span/02_emphasis/normal.text +6 -2
  62. data/test/testcases/span/05_html/markdown_attr.html +2 -1
  63. data/test/testcases/span/05_html/markdown_attr.text +2 -1
  64. data/test/testcases/span/05_html/normal.html +6 -2
  65. data/test/testcases/span/05_html/normal.text +4 -0
  66. metadata +35 -4
  67. data/lib/kramdown/parser/registry.rb +0 -62
@@ -0,0 +1,272 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ require 'strscan'
24
+ require 'stringio'
25
+
26
+ #TODO: use [[:alpha:]] in all regexp to allow parsing of international values in 1.9.1
27
+ #NOTE: use @src.pre_match only before other check/match?/... operations, otherwise the content is changed
28
+
29
+ module Kramdown
30
+
31
+ module Parser
32
+
33
+ # Used for parsing a document in kramdown format.
34
+ class Kramdown
35
+
36
+ include ::Kramdown
37
+
38
+ attr_reader :tree
39
+ attr_reader :doc
40
+
41
+ # Create a new Kramdown parser object for the Kramdown::Document +doc+.
42
+ def initialize(doc)
43
+ @doc = doc
44
+
45
+ @src = nil
46
+ @tree = nil
47
+ @stack = []
48
+ @text_type = :text
49
+
50
+ @doc.parse_infos[:ald] = {}
51
+ @doc.parse_infos[:link_defs] = {}
52
+ @doc.parse_infos[:footnotes] = {}
53
+ end
54
+ private_class_method(:new, :allocate)
55
+
56
+
57
+ # Parse the string +source+ using the Kramdown::Document +doc+ and return the parse tree.
58
+ def self.parse(source, doc)
59
+ new(doc).parse(source)
60
+ end
61
+
62
+ # The source string provided on initialization is parsed and the created +tree+ is returned.
63
+ def parse(source)
64
+ configure_parser
65
+ tree = Element.new(:root)
66
+ parse_blocks(tree, adapt_source(source))
67
+ update_tree(tree)
68
+ @doc.parse_infos[:footnotes].each do |name, data|
69
+ update_tree(data[:content])
70
+ end
71
+ tree
72
+ end
73
+
74
+ # Add the given warning +text+ to the warning array of the Kramdown document.
75
+ def warning(text)
76
+ @doc.warnings << text
77
+ #TODO: add position information
78
+ end
79
+
80
+ #######
81
+ private
82
+ #######
83
+
84
+ BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
85
+ :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
86
+ :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
87
+ SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
88
+ :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
89
+
90
+ # Adapt the object to allow parsing like specified in the options.
91
+ def configure_parser
92
+ @parsers = {}
93
+ (BLOCK_PARSERS + SPAN_PARSERS).each do |name|
94
+ if self.class.has_parser?(name)
95
+ @parsers[name] = self.class.parser(name)
96
+ else
97
+ raise Kramdown::Error, "Unknown parser: #{name}"
98
+ end
99
+ end
100
+ @span_start = Regexp.union(*SPAN_PARSERS.map {|name| @parsers[name].start_re})
101
+ @span_start_re = /(?=#{@span_start})/
102
+ end
103
+
104
+ # Parse all block level elements in +text+ into the element +el+.
105
+ def parse_blocks(el, text = nil)
106
+ @stack.push([@tree, @src])
107
+ @tree, @src = el, (text.nil? ? @src : StringScanner.new(text))
108
+
109
+ status = catch(:stop_block_parsing) do
110
+ while !@src.eos?
111
+ BLOCK_PARSERS.any? do |name|
112
+ if @src.check(@parsers[name].start_re)
113
+ send(@parsers[name].method)
114
+ else
115
+ false
116
+ end
117
+ end || begin
118
+ warning('Warning: this should not occur - no block parser handled the line')
119
+ add_text(@src.scan(/.*\n/))
120
+ end
121
+ end
122
+ end
123
+
124
+ @tree, @src = *@stack.pop
125
+ status
126
+ end
127
+
128
+ # Update the tree by parsing all <tt>:text</tt> elements with the span level parser (resets
129
+ # +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
130
+ def update_tree(element)
131
+ element.children.map! do |child|
132
+ if child.type == :text
133
+ @stack, @tree = [], nil
134
+ @src = StringScanner.new(child.value)
135
+ parse_spans(child)
136
+ child.children
137
+ else
138
+ update_tree(child)
139
+ update_attr_with_ial(child.options[:attr] ||= {}, child.options[:ial]) if child.options[:ial]
140
+ child
141
+ end
142
+ end.flatten!
143
+ end
144
+
145
+ # Parse all span level elements in the source string.
146
+ def parse_spans(el, stop_re = nil, parsers = nil, text_type = :text)
147
+ @stack.push([@tree, @text_type])
148
+ @tree, @text_type = el, text_type
149
+
150
+ span_start = @span_start
151
+ span_start_re = @span_start_re
152
+ if parsers
153
+ span_start = Regexp.union(*parsers.map {|name| @parsers[name].start_re})
154
+ span_start_re = /(?=#{span_start})/
155
+ end
156
+ parsers = parsers || SPAN_PARSERS
157
+
158
+ used_re = (stop_re.nil? ? span_start_re : /(?=#{Regexp.union(stop_re, span_start)})/)
159
+ stop_re_found = false
160
+ while !@src.eos? && !stop_re_found
161
+ if result = @src.scan_until(used_re)
162
+ add_text(result)
163
+ if stop_re && (stop_re_matched = @src.check(stop_re))
164
+ stop_re_found = (block_given? ? yield : true)
165
+ end
166
+ processed = parsers.any? do |name|
167
+ if @src.check(@parsers[name].start_re)
168
+ send(@parsers[name].method)
169
+ true
170
+ else
171
+ false
172
+ end
173
+ end unless stop_re_found
174
+ if !processed && !stop_re_found
175
+ if stop_re_matched
176
+ add_text(@src.scan(/./))
177
+ else
178
+ raise Kramdown::Error, 'Bug: please report!'
179
+ end
180
+ end
181
+ else
182
+ add_text(@src.scan(/.*/m)) unless stop_re
183
+ break
184
+ end
185
+ end
186
+
187
+ @tree, @text_type = @stack.pop
188
+
189
+ stop_re_found
190
+ end
191
+
192
+ # Modify the string +source+ to be usable by the parser.
193
+ def adapt_source(source)
194
+ source.gsub(/\r\n?/, "\n").chomp + "\n"
195
+ end
196
+
197
+ # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
198
+ # +type+ element or creates a new text element with the given +type+.
199
+ def add_text(text, tree = @tree, type = @text_type)
200
+ if tree.children.last && tree.children.last.type == type
201
+ tree.children.last.value << text
202
+ elsif !text.empty?
203
+ tree.children << Element.new(type, text)
204
+ end
205
+ end
206
+
207
+ # Update the attributes with the information from the inline attribute list and all referenced ALDs.
208
+ def update_attr_with_ial(attr, ial)
209
+ ial[:refs].each do |ref|
210
+ update_attr_with_ial(attr, ref) if ref = @doc.parse_infos[:ald][ref]
211
+ end if ial[:refs]
212
+ attr['class'] = ((attr['class'] || '') + " #{ial['class']}").lstrip if ial['class']
213
+ ial.each {|k,v| attr[k] = v if k.kind_of?(String) && k != 'class' }
214
+ end
215
+
216
+
217
+ @@parsers = {}
218
+
219
+ # Holds all the needed data for one block/span level parser.
220
+ Data = Struct.new(:name, :start_re, :method)
221
+
222
+ # Add a parser method
223
+ #
224
+ # * with the given +name+,
225
+ # * using +start_re+ as start regexp
226
+ #
227
+ # to the registry. The method name is automatically derived from the +name+ or can explicitly
228
+ # be set by using the +meth_name+ parameter.
229
+ def self.define_parser(name, start_re, meth_name = "parse_#{name}")
230
+ raise "A parser with the name #{name} already exists!" if @@parsers.has_key?(name)
231
+ @@parsers[name] = Data.new(name, start_re, meth_name)
232
+ end
233
+
234
+ # Return the Data structure for the parser +name+.
235
+ def self.parser(name = nil)
236
+ @@parsers[name]
237
+ end
238
+
239
+ # Return +true+ if there is a parser called +name+.
240
+ def self.has_parser?(name)
241
+ @@parsers.has_key?(name)
242
+ end
243
+
244
+ INDENT = /^(?:\t| {4})/
245
+ OPT_SPACE = / {0,3}/
246
+
247
+ require 'kramdown/parser/kramdown/blank_line'
248
+ require 'kramdown/parser/kramdown/eob'
249
+ require 'kramdown/parser/kramdown/paragraph'
250
+ require 'kramdown/parser/kramdown/header'
251
+ require 'kramdown/parser/kramdown/blockquote'
252
+ require 'kramdown/parser/kramdown/codeblock'
253
+ require 'kramdown/parser/kramdown/horizontal_rule'
254
+ require 'kramdown/parser/kramdown/list'
255
+ require 'kramdown/parser/kramdown/link'
256
+ require 'kramdown/parser/kramdown/attribute_list'
257
+ require 'kramdown/parser/kramdown/extension'
258
+ require 'kramdown/parser/kramdown/footnote'
259
+ require 'kramdown/parser/kramdown/html'
260
+ require 'kramdown/parser/kramdown/escaped_chars'
261
+ require 'kramdown/parser/kramdown/html_entity'
262
+ require 'kramdown/parser/kramdown/line_break'
263
+ require 'kramdown/parser/kramdown/typographic_symbol'
264
+ require 'kramdown/parser/kramdown/autolink'
265
+ require 'kramdown/parser/kramdown/codespan'
266
+ require 'kramdown/parser/kramdown/emphasis'
267
+
268
+ end
269
+
270
+ end
271
+
272
+ end
@@ -0,0 +1,102 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ module Kramdown
24
+ module Parser
25
+ class Kramdown
26
+
27
+ # Parse the string +str+ and extract all attributes and add all found attributes to the hash
28
+ # +opts+.
29
+ def parse_attribute_list(str, opts)
30
+ str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
31
+ if ref
32
+ (opts[:refs] ||= []) << ref
33
+ elsif class_attr
34
+ opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
35
+ elsif id_attr
36
+ opts['id'] = id_attr
37
+ else
38
+ opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
39
+ end
40
+ end
41
+ end
42
+
43
+ # Update the +ial+ with the information from the inline attribute list +opts+.
44
+ def update_ial_with_ial(ial, opts)
45
+ (ial[:refs] ||= []) << opts[:refs]
46
+ ial['class'] = ((ial['class'] || '') + " #{opts['class']}").lstrip if opts['class']
47
+ opts.each {|k,v| ial[k] = v if k != :refs && k != 'class' }
48
+ end
49
+
50
+
51
+ ALD_ID_CHARS = /[\w\d-]/
52
+ ALD_ANY_CHARS = /\\\}|[^\}]/
53
+ ALD_ID_NAME = /(?:\w|\d)#{ALD_ID_CHARS}*/
54
+ ALD_TYPE_KEY_VALUE_PAIR = /(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])+?)\2/
55
+ ALD_TYPE_CLASS_NAME = /\.(#{ALD_ID_NAME})/
56
+ ALD_TYPE_ID_NAME = /#(#{ALD_ID_NAME})/
57
+ ALD_TYPE_REF = /(#{ALD_ID_NAME})/
58
+ ALD_TYPE_ANY = /(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
59
+ ALD_START = /^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
60
+
61
+ # Parse the attribute list definition at the current location.
62
+ def parse_ald
63
+ @src.pos += @src.matched_size
64
+ parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= {})
65
+ true
66
+ end
67
+ define_parser(:ald, ALD_START)
68
+
69
+
70
+ IAL_BLOCK_START = /^#{OPT_SPACE}\{:(?!:)(#{ALD_ANY_CHARS}+)\}\s*?\n/
71
+
72
+ # Parse the inline attribute list at the current location.
73
+ def parse_block_ial
74
+ @src.pos += @src.matched_size
75
+ if @tree.children.last && @tree.children.last.type != :blank
76
+ parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= {})
77
+ end
78
+ true
79
+ end
80
+ define_parser(:block_ial, IAL_BLOCK_START)
81
+
82
+
83
+ IAL_SPAN_START = /\{:(#{ALD_ANY_CHARS}+)\}/
84
+
85
+ # Parse the inline attribute list at the current location.
86
+ def parse_span_ial
87
+ @src.pos += @src.matched_size
88
+ if @tree.children.last && @tree.children.last.type != :text
89
+ attr = {}
90
+ parse_attribute_list(@src[1], attr)
91
+ update_ial_with_ial(@tree.children.last.options[:ial] ||= {}, attr)
92
+ update_attr_with_ial(@tree.children.last.options[:attr] ||= {}, attr)
93
+ else
94
+ warning("Ignoring span IAL because preceding element is just text")
95
+ add_text(@src.matched)
96
+ end
97
+ end
98
+ define_parser(:span_ial, IAL_SPAN_START)
99
+
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,42 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ module Kramdown
24
+ module Parser
25
+ class Kramdown
26
+
27
+ AUTOLINK_START = /<((mailto|https?|ftps?):.*?|\S*?@\S*?)>/
28
+
29
+ # Parse the autolink at the current location.
30
+ def parse_autolink
31
+ @src.pos += @src.matched_size
32
+ href = @src[1]
33
+ href= "mailto:#{href}" if @src[2].nil?
34
+ el = Element.new(:a, nil, {:attr => {'href' => href}, :obfuscate_text => (@src[2].nil? || @src[2] == 'mailto')})
35
+ add_text(@src[1].sub(/^mailto:/, ''), el)
36
+ @tree.children << el
37
+ end
38
+ define_parser(:autolink, AUTOLINK_START)
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ module Kramdown
24
+ module Parser
25
+ class Kramdown
26
+
27
+ BLANK_LINE = /(?:^\s*\n)+/
28
+
29
+ # Parse the blank line at the current postition.
30
+ def parse_blank_line
31
+ @src.pos += @src.matched_size
32
+ if @tree.children.last && @tree.children.last.type == :blank
33
+ @tree.children.last.value += @src.matched
34
+ else
35
+ @tree.children << Element.new(:blank, @src.matched)
36
+ end
37
+ true
38
+ end
39
+ define_parser(:blank_line, BLANK_LINE)
40
+
41
+ end
42
+ end
43
+ end