kramdown 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (87) hide show
  1. data/COPYING +1 -1
  2. data/ChangeLog +423 -1
  3. data/Rakefile +5 -5
  4. data/VERSION +1 -1
  5. data/doc/default.template +4 -4
  6. data/doc/index.page +5 -4
  7. data/doc/news.feed +1 -1
  8. data/doc/quickref.page +72 -25
  9. data/doc/syntax.page +238 -66
  10. data/doc/tests.page +2 -3
  11. data/lib/kramdown/converter.rb +41 -10
  12. data/lib/kramdown/deprecated.rb +41 -0
  13. data/lib/kramdown/document.rb +17 -8
  14. data/lib/kramdown/extension.rb +13 -7
  15. data/lib/kramdown/parser.rb +263 -95
  16. data/lib/kramdown/version.rb +28 -0
  17. data/test/run_tests.rb +1 -1
  18. data/test/test_files.rb +1 -1
  19. data/test/testcases/block/02_eob/middle.html +1 -0
  20. data/test/testcases/block/04_header/atx_header.html +8 -0
  21. data/test/testcases/block/04_header/atx_header.text +8 -0
  22. data/test/testcases/block/04_header/setext_header.html +6 -0
  23. data/test/testcases/block/04_header/setext_header.text +9 -0
  24. data/test/testcases/block/07_horizontal_rule/sepspaces.html +3 -0
  25. data/test/testcases/block/07_horizontal_rule/sepspaces.text +3 -0
  26. data/test/testcases/block/07_horizontal_rule/septabs.html +3 -0
  27. data/test/testcases/block/07_horizontal_rule/septabs.text +3 -0
  28. data/test/testcases/block/09_html/content_model/deflists.html +6 -0
  29. data/test/testcases/block/09_html/content_model/deflists.options +1 -0
  30. data/test/testcases/block/09_html/content_model/deflists.text +6 -0
  31. data/test/testcases/block/09_html/content_model/tables.html +14 -0
  32. data/test/testcases/block/09_html/content_model/tables.options +1 -0
  33. data/test/testcases/block/09_html/content_model/tables.text +14 -0
  34. data/test/testcases/block/09_html/html_and_codeblocks.options +1 -1
  35. data/test/testcases/block/09_html/invalid_html_1.html +0 -2
  36. data/test/testcases/block/09_html/invalid_html_2.html +2 -3
  37. data/test/testcases/block/09_html/markdown_attr.html +38 -0
  38. data/test/testcases/block/09_html/markdown_attr.text +38 -0
  39. data/test/testcases/block/09_html/not_parsed.html +14 -0
  40. data/test/testcases/block/09_html/not_parsed.text +15 -0
  41. data/test/testcases/block/09_html/parse_as_raw.html +16 -14
  42. data/test/testcases/block/09_html/parse_as_raw.options +1 -0
  43. data/test/testcases/block/09_html/parse_as_raw.text +13 -1
  44. data/test/testcases/block/09_html/parse_as_span.html +2 -5
  45. data/test/testcases/block/09_html/parse_as_span.options +1 -0
  46. data/test/testcases/block/09_html/{auto_parse_block_html.html → parse_block_html.html} +0 -0
  47. data/test/testcases/block/09_html/parse_block_html.options +1 -0
  48. data/test/testcases/block/09_html/{auto_parse_block_html.text → parse_block_html.text} +0 -0
  49. data/test/testcases/block/09_html/simple.html +21 -30
  50. data/test/testcases/block/09_html/simple.options +1 -0
  51. data/test/testcases/block/09_html/simple.text +13 -2
  52. data/test/testcases/block/11_ial/auto_id_and_ial.html +1 -0
  53. data/test/testcases/block/11_ial/auto_id_and_ial.options +1 -0
  54. data/test/testcases/block/11_ial/auto_id_and_ial.text +2 -0
  55. data/test/testcases/block/12_extension/{nokramdown.html → nomarkdown.html} +0 -0
  56. data/test/testcases/block/12_extension/{nokramdown.text → nomarkdown.text} +4 -4
  57. data/test/testcases/block/12_extension/{kdoptions.html → options.html} +9 -0
  58. data/test/testcases/block/12_extension/options.text +28 -0
  59. data/test/testcases/block/12_extension/{kdoptions2.html → options2.html} +0 -0
  60. data/test/testcases/block/12_extension/{kdoptions2.text → options2.text} +1 -1
  61. data/test/testcases/block/13_definition_list/definition_at_beginning.html +1 -0
  62. data/test/testcases/block/13_definition_list/definition_at_beginning.text +1 -0
  63. data/test/testcases/block/13_definition_list/multiple_terms.html +13 -0
  64. data/test/testcases/block/13_definition_list/multiple_terms.text +10 -0
  65. data/test/testcases/block/13_definition_list/para_wrapping.html +10 -0
  66. data/test/testcases/block/13_definition_list/para_wrapping.text +6 -0
  67. data/test/testcases/block/13_definition_list/separated_by_eob.html +8 -0
  68. data/test/testcases/block/13_definition_list/separated_by_eob.text +5 -0
  69. data/test/testcases/block/13_definition_list/simple.html +8 -0
  70. data/test/testcases/block/13_definition_list/simple.text +7 -0
  71. data/test/testcases/block/13_definition_list/styled_terms.html +4 -0
  72. data/test/testcases/block/13_definition_list/styled_terms.text +2 -0
  73. data/test/testcases/block/13_definition_list/too_much_space.html +3 -0
  74. data/test/testcases/block/13_definition_list/too_much_space.text +4 -0
  75. data/test/testcases/block/13_definition_list/with_blocks.html +38 -0
  76. data/test/testcases/block/13_definition_list/with_blocks.text +24 -0
  77. data/test/testcases/span/05_html/across_lines.html +1 -0
  78. data/test/testcases/span/05_html/across_lines.text +2 -0
  79. data/test/testcases/span/05_html/link_with_mailto.html +1 -0
  80. data/test/testcases/span/05_html/link_with_mailto.text +1 -0
  81. data/test/testcases/span/05_html/markdown_attr.html +5 -0
  82. data/test/testcases/span/05_html/markdown_attr.text +5 -0
  83. data/test/testcases/span/05_html/normal.html +7 -0
  84. data/test/testcases/span/05_html/normal.text +7 -0
  85. metadata +56 -12
  86. data/test/testcases/block/09_html/auto_parse_block_html.options +0 -1
  87. data/test/testcases/block/12_extension/kdoptions.text +0 -18
data/doc/tests.page CHANGED
@@ -34,11 +34,10 @@ fast but they do not provide additional syntax elements). As one can see below,
34
34
  currently (November 2009) ~5x faster than Maruku, ~10x faster than BlueFeather but ~30x slower than
35
35
  BlueCloth and rdiscount:
36
36
 
37
- {::nokramdown:}
38
37
  <pre><code>
39
38
  {execute_cmd: {command: "ruby -Ilib -rubygems benchmark/benchmark.rb", process_output: false, escape_html: true}}
40
- </code></pre>
41
- {::nokramdown:}
39
+ </code>
40
+ </pre>
42
41
 
43
42
  [Markdown Test Suite]: http://daringfireball.net/projects/downloads/MarkdownTest_1.0.zip
44
43
  [MDTest]: http://www.michelf.com/docs/projets/mdtest-1.0.zip
@@ -61,6 +61,10 @@ module Kramdown
61
61
  escape_html(el.value, false)
62
62
  end
63
63
 
64
+ def convert_eob(el, inner, indent)
65
+ ''
66
+ end
67
+
64
68
  def convert_p(el, inner, indent)
65
69
  "#{' '*indent}<p#{options_for_element(el)}>#{inner}</p>\n"
66
70
  end
@@ -97,22 +101,28 @@ module Kramdown
97
101
  "#{' '*indent}<#{el.type}#{options_for_element(el)}>\n#{inner}#{' '*indent}</#{el.type}>\n"
98
102
  end
99
103
  alias :convert_ol :convert_ul
104
+ alias :convert_dl :convert_ul
100
105
 
101
106
  def convert_li(el, inner, indent)
102
- output = ' '*indent << "<li" << options_for_element(el) << ">"
103
- if el.options[:first_as_block]
107
+ output = ' '*indent << "<#{el.type}" << options_for_element(el) << ">"
108
+ if el.options[:first_is_block]
104
109
  output << "\n" << inner << ' '*indent
105
110
  else
106
111
  output << inner << (inner =~ /\n\Z/ ? ' '*indent : '')
107
112
  end
108
- output << "</li>\n"
113
+ output << "</#{el.type}>\n"
114
+ end
115
+ alias :convert_dd :convert_li
116
+
117
+ def convert_dt(el, inner, indent)
118
+ "#{' '*indent}<dt#{options_for_element(el)}>#{inner}</dt>\n"
109
119
  end
110
120
 
111
121
  def convert_html_raw(el, inner, indent)
112
122
  el.value + (el.options[:type] == :block ? "\n" : '')
113
123
  end
114
124
 
115
- HTML_TAGS_WITH_BODY=['div']
125
+ HTML_TAGS_WITH_BODY=['div', 'script']
116
126
 
117
127
  def convert_html_element(el, inner, indent)
118
128
  if @doc.options[:filter_html].include?(el.value)
@@ -120,15 +130,22 @@ module Kramdown
120
130
  elsif el.options[:type] == :span
121
131
  "<#{el.value}#{options_for_element(el)}" << (!inner.empty? ? ">#{inner}</#{el.value}>" : " />")
122
132
  else
123
- output = ' '*indent << "<#{el.value}#{options_for_element(el)}"
124
- if !inner.empty?
125
- output << ">\n#{inner.chomp}\n" << ' '*indent << "</#{el.value}>"
133
+ output = ''
134
+ output << ' '*indent if !el.options[:no_start_indent] && el.options[:parse_type] != :raw && !el.options[:parent_is_raw]
135
+ output << "<#{el.value}#{options_for_element(el)}"
136
+ if !inner.empty? && (el.options[:compact] || el.options[:parse_type] != :block)
137
+ output << ">#{inner}</#{el.value}>"
138
+ elsif !inner.empty? && (el.children.first.type == :text || el.children.first.options[:no_start_indent])
139
+ output << ">#{inner}" << ' '*indent << "</#{el.value}>"
140
+ elsif !inner.empty?
141
+ output << ">\n#{inner}" << ' '*indent << "</#{el.value}>"
126
142
  elsif HTML_TAGS_WITH_BODY.include?(el.value)
127
143
  output << "></#{el.value}>"
128
144
  else
129
145
  output << " />"
130
146
  end
131
- output << "\n"
147
+ output << "\n" if el.options[:outer_element] || (el.options[:parse_type] != :raw && !el.options[:parent_is_raw])
148
+ output
132
149
  end
133
150
  end
134
151
 
@@ -164,6 +181,19 @@ module Kramdown
164
181
  end
165
182
  alias :convert_strong :convert_em
166
183
 
184
+ def convert_entity(el, inner, indent)
185
+ el.value
186
+ end
187
+
188
+ TYPOGRAPHIC_SYMS = {
189
+ :mdash => '&mdash;', :ndash => '&ndash;', :ellipsis => '&hellip;',
190
+ :laquo_space => '&laquo;&nbsp;', :raquo_space => '&nbsp;&raquo;',
191
+ :laquo => '&laquo;', :raquo => '&raquo;'
192
+ }
193
+ def convert_typographic_sym(el, inner, indent)
194
+ TYPOGRAPHIC_SYMS[el.value]
195
+ end
196
+
167
197
  def convert_root(el, inner, indent)
168
198
  inner << footnote_content
169
199
  end
@@ -174,7 +204,7 @@ module Kramdown
174
204
  ol = Element.new(:ol)
175
205
  ol.options[:attr] = {'start' => @footnote_start} if @footnote_start != 1
176
206
  @footnotes.each do |name, data|
177
- li = Element.new(:li, nil, {:attr => {:id => "fn:#{name}"}, :first_as_block => true})
207
+ li = Element.new(:li, nil, {:attr => {:id => "fn:#{name}"}, :first_is_block => true})
178
208
  li.children = Marshal.load(Marshal.dump(data[:content].children)) #TODO: probably remove this!!!!
179
209
  ol.children << li
180
210
 
@@ -204,7 +234,8 @@ module Kramdown
204
234
  ESCAPE_ALL_NOT_ENTITIES_RE = Regexp.union(REXML::Parsers::BaseParser::REFERENCE_RE, ESCAPE_ALL_RE)
205
235
 
206
236
  # Escape the special HTML characters in the string +str+. If +all+ is +true+ then all
207
- # characters are escaped, if +all+ is +false+
237
+ # characters are escaped, if +all+ is +false+ then only those characters are escaped that are
238
+ # not part on an HTML entity.
208
239
  def escape_html(str, all = true)
209
240
  str.gsub(all ? ESCAPE_ALL_RE : ESCAPE_ALL_NOT_ENTITIES_RE) {|m| ESCAPE_MAP[m] || m}
210
241
  end
@@ -0,0 +1,41 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ #--
4
+ # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
5
+ #
6
+ # This file is part of kramdown.
7
+ #
8
+ # kramdown is free software: you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation, either version 3 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU General Public License
19
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
20
+ #++
21
+ #
22
+
23
+ module Kramdown
24
+
25
+ class Extension
26
+
27
+ def parse_nokramdown(parser, opts, body)
28
+ warn("The extension 'nokramdown' is deprecated and has been renamed to 'nomarkdown'")
29
+ parse_nomarkdown(parser, opts, body)
30
+ end
31
+
32
+ def parse_kdoptions(parser, opts, body)
33
+ warn("The extension 'kdoptions' is deprecated and has been renamed to 'options'")
34
+ parse_options(parser, opts, body)
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+
41
+
@@ -20,16 +20,15 @@
20
20
  #++
21
21
  #
22
22
 
23
+ require 'kramdown/version'
23
24
  require 'kramdown/error'
24
25
  require 'kramdown/parser'
25
26
  require 'kramdown/converter'
26
27
  require 'kramdown/extension'
28
+ require 'kramdown/deprecated'
27
29
 
28
30
  module Kramdown
29
31
 
30
- # The kramdown version.
31
- VERSION = '0.1.0'
32
-
33
32
  # The main interface to kramdown.
34
33
  #
35
34
  # This class provides a one-stop-shop for using kramdown to convert text into various output
@@ -50,20 +49,30 @@ module Kramdown
50
49
  #
51
50
  # [:auto_ids (used by the parser)]
52
51
  # A boolean value deciding whether automatic header ID generation is used. Default: +false+.
53
- # When using the +kdoptions+ extension, the string 'false' will be the value +false+, every
54
- # other non-empty string will be +true+.
55
52
  # [:filter_html (used by the HTML converter)]
56
53
  # An array of HTML tag names that defines which tags should be filtered from the output. For
57
54
  # example, if the value contains +iframe+, then all HTML +iframe+ tags are filtered out and
58
- # only the body is displayed. Default: empty array. When using the +kdoptions+ extension, the
55
+ # only the body is displayed. Default: empty array. When using the +options+ extension, the
59
56
  # string value needs to hold the HTML tag names separated by one or more spaces.
60
57
  # [:footnote_nr (used by the HTML converter)]
61
58
  # The initial number used for creating the link to the first footnote. Default: +1+. When
62
- # using the +kdoptions+ extension, the string value needs to be a valid number.
59
+ # using the +options+ extension, the string value needs to be a valid number.
60
+ # [:parse_block_html (used by the parser)]
61
+ # A boolean value deciding whether kramdown syntax is processed in block HTML tags. Default:
62
+ # +false+.
63
+ # [:parse_span_html (used by the parser)]
64
+ # A boolean value deciding whether kramdown syntax is processed in span HTML tags. Default:
65
+ # +true+.
66
+ #
67
+ # When using the +options+ extension, all boolean values can be set to false by using the
68
+ # string 'false' or an empty string, any other non-empty string will be converted to the value
69
+ # +true+.
63
70
  DEFAULT_OPTIONS={
64
71
  :footnote_nr => 1,
65
72
  :filter_html => [],
66
- :auto_ids => false
73
+ :auto_ids => true,
74
+ :parse_block_html => false,
75
+ :parse_span_html => true
67
76
  }
68
77
 
69
78
 
@@ -44,18 +44,14 @@ module Kramdown
44
44
  end
45
45
 
46
46
  # Add the body (if available) as <tt>:raw</tt> Element to the +parser.tree+.
47
- def parse_nokramdown(parser, opts, body)
47
+ def parse_nomarkdown(parser, opts, body)
48
48
  parser.tree.children << Element.new(:raw, body) if body.kind_of?(String)
49
49
  end
50
50
 
51
51
  # Update the document options with the options set in +opts+.
52
- def parse_kdoptions(parser, opts, body)
52
+ def parse_options(parser, opts, body)
53
53
  if val = opts.delete('auto_ids')
54
- if val.downcase.strip == 'false'
55
- parser.doc.options[:auto_ids] = false
56
- elsif !val.empty?
57
- parser.doc.options[:auto_ids] = true
58
- end
54
+ parser.doc.options[:auto_ids] = boolean_value(val)
59
55
  end
60
56
  if val = opts.delete('filter_html')
61
57
  parser.doc.options[:filter_html] = val.split(/\s+/)
@@ -63,9 +59,19 @@ module Kramdown
63
59
  if val = opts.delete('footnote_nr')
64
60
  parser.doc.options[:footnote_nr] = Integer(val) rescue parser.doc.options[:footnote_nr]
65
61
  end
62
+ if val = opts.delete('parse_block_html')
63
+ parser.doc.options[:parse_block_html] = boolean_value(val)
64
+ end
65
+ if val = opts.delete('parse_span_html')
66
+ parser.doc.options[:parse_span_html] = boolean_value(val)
67
+ end
66
68
  opts.each {|k,v| parser.warning("Unknown kramdown options '#{k}'")}
67
69
  end
68
70
 
71
+ def boolean_value(val)
72
+ val.downcase.strip != 'false' && !val.empty?
73
+ end
74
+
69
75
  end
70
76
 
71
77
  end
@@ -84,10 +84,10 @@ module Kramdown
84
84
  #######
85
85
 
86
86
  BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
87
- :setext_header, :horizontal_rule, :list, :link_definition, :block_html,
87
+ :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
88
88
  :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
89
89
  SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
90
- :span_ial, :html_entity, :typographic_syms, :special_html_chars, :line_break, :escaped_chars,]
90
+ :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
91
91
 
92
92
  # Adapt the object to allow parsing like specified in the options.
93
93
  def configure_parser
@@ -292,6 +292,7 @@ module Kramdown
292
292
  # Parse the EOB marker at the current location.
293
293
  def parse_eob_marker
294
294
  @src.pos += @src.matched_size
295
+ @tree.children << Element.new(:eob)
295
296
  true
296
297
  end
297
298
  Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
@@ -312,8 +313,8 @@ module Kramdown
312
313
  end
313
314
  Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
314
315
 
315
-
316
- SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)\n(-|=)+\s*?\n/
316
+ HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
317
+ SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
317
318
 
318
319
  # Parse the Setext header at the current location.
319
320
  def parse_setext_header
@@ -321,10 +322,11 @@ module Kramdown
321
322
  return false
322
323
  end
323
324
  @src.pos += @src.matched_size
324
- text, level = @src[1].strip, @src[2]
325
+ text, id, level = @src[1].strip, @src[2], @src[3]
325
326
  el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
326
327
  add_text(text, el)
327
- el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
328
+ el.options[:attr] = {'id' => id} if id
329
+ el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
328
330
  @tree.children << el
329
331
  true
330
332
  end
@@ -332,7 +334,7 @@ module Kramdown
332
334
 
333
335
 
334
336
  ATX_HEADER_START = /^\#{1,6}/
335
- ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*\s*?\n/
337
+ ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
336
338
 
337
339
  # Parse the Atx header at the current location.
338
340
  def parse_atx_header
@@ -340,10 +342,11 @@ module Kramdown
340
342
  return false
341
343
  end
342
344
  result = @src.scan(ATX_HEADER_MATCH)
343
- level, text = @src[1], @src[2].strip
345
+ level, text, id = @src[1], @src[2].strip, @src[3]
344
346
  el = Element.new(:header, nil, :level => level.length)
345
347
  add_text(text, el)
346
- el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
348
+ el.options[:attr] = {'id' => id} if id
349
+ el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
347
350
  @tree.children << el
348
351
  true
349
352
  end
@@ -398,7 +401,7 @@ module Kramdown
398
401
  Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
399
402
 
400
403
 
401
- HR_START = /^#{OPT_SPACE}(\*|-|_) *\1 *\1 *(\1| )*\n/
404
+ HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
402
405
 
403
406
  # Parse the horizontal rule at the current location.
404
407
  def parse_horizontal_rule
@@ -431,23 +434,10 @@ module Kramdown
431
434
  if @src.check(HR_START)
432
435
  break
433
436
  elsif @src.scan(list_start_re)
434
- indentation, content = @src[1].length, @src[2]
435
437
  item = Element.new(:li)
438
+ item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
436
439
  list.children << item
437
- if content =~ /^\s*\n/
438
- indentation = 4
439
- else
440
- while content =~ /^ *\t/
441
- temp = content.scan(/^ */).first.length + indentation
442
- content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
443
- end
444
- indentation += content.scan(/^ */).first.length
445
- end
446
- content.sub!(/^\s*/, '')
447
- item.value = content
448
440
 
449
- indent_re = /^ {#{indentation}}/
450
- content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
451
441
  list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
452
442
  /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
453
443
  nested_list_found = false
@@ -491,7 +481,7 @@ module Kramdown
491
481
  text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
492
482
  item.children.unshift(text)
493
483
  else
494
- item.options[:first_as_block] = true
484
+ item.options[:first_is_block] = true
495
485
  end
496
486
 
497
487
  if item.children.last.type == :blank
@@ -507,6 +497,110 @@ module Kramdown
507
497
  end
508
498
  Registry.define_parser(:block, :list, LIST_START, self)
509
499
 
500
+ def parse_first_list_line(indentation, content)
501
+ if content =~ /^\s*\n/
502
+ indentation = 4
503
+ else
504
+ while content =~ /^ *\t/
505
+ temp = content.scan(/^ */).first.length + indentation
506
+ content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
507
+ end
508
+ indentation += content.scan(/^ */).first.length
509
+ end
510
+ content.sub!(/^\s*/, '')
511
+
512
+ indent_re = /^ {#{indentation}}/
513
+ content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
514
+ [content, indentation, content_re, indent_re]
515
+ end
516
+
517
+
518
+ DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
519
+
520
+ # Parse the ordered or unordered list at the current location.
521
+ def parse_definition_list
522
+ children = @tree.children
523
+ if !children.last || (children.length == 1 && children.last.type != :p ) ||
524
+ (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
525
+ return false
526
+ end
527
+
528
+ first_as_para = false
529
+ deflist = Element.new(:dl)
530
+ para = @tree.children.pop
531
+ if para.type == :blank
532
+ para = @tree.children.pop
533
+ first_as_para = true
534
+ end
535
+ para.children.first.value.split("\n").each do |term|
536
+ el = Element.new(:dt)
537
+ el.children << Element.new(:text, term)
538
+ deflist.children << el
539
+ end
540
+
541
+ item = nil
542
+ indent_re = nil
543
+ content_re = nil
544
+ def_start_re = DEFINITION_LIST_START
545
+ while !@src.eos?
546
+ if @src.scan(def_start_re)
547
+ item = Element.new(:dd)
548
+ item.options[:first_as_para] = first_as_para
549
+ item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
550
+ deflist.children << item
551
+
552
+ def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
553
+ first_as_para = false
554
+ elsif result = @src.scan(content_re)
555
+ result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
556
+ result.sub!(indent_re, '')
557
+ item.value << result
558
+ first_as_para = false
559
+ elsif result = @src.scan(BLANK_LINE)
560
+ first_as_para = true
561
+ item.value << result
562
+ else
563
+ break
564
+ end
565
+ end
566
+
567
+ last = nil
568
+ deflist.children.each do |item|
569
+ next if item.type == :dt
570
+
571
+ parse_blocks(item, item.value)
572
+ item.value = nil
573
+ next if item.children.size == 0
574
+
575
+ if item.children.last.type == :blank
576
+ last = item.children.pop
577
+ else
578
+ last = nil
579
+ end
580
+ if item.children.first.type == :p && !item.options.delete(:first_as_para)
581
+ text = item.children.shift.children.first
582
+ text.value += "\n" if !item.children.empty?
583
+ item.children.unshift(text)
584
+ else
585
+ item.options[:first_is_block] = true
586
+ end
587
+ end
588
+
589
+ if @tree.children.length >= 1 && @tree.children.last.type == :dl
590
+ @tree.children[-1].children += deflist.children
591
+ elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
592
+ @tree.children.pop
593
+ @tree.children[-1].children += deflist.children
594
+ else
595
+ @tree.children << deflist
596
+ end
597
+
598
+ @tree.children << last if !last.nil?
599
+
600
+ true
601
+ end
602
+ Registry.define_parser(:block, :definition_list, DEFINITION_LIST_START, self)
603
+
510
604
 
511
605
  PUNCTUATION_CHARS = "_.:,;!?-"
512
606
  LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
@@ -612,20 +706,34 @@ module Kramdown
612
706
  #:startdoc:
613
707
  HTML_COMMENT_RE = /<!--(.*?)-->/m
614
708
  HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
615
- HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/
616
- HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/
709
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
710
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
617
711
  HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
618
712
 
619
713
 
620
- HTML_PARSE_AS_BLOCK = %w{div blockquote table dl ol ul form fieldset}
621
- HTML_PARSE_AS_SPAN = %w{a address b dd dt em h1 h2 h3 h4 h5 h6 legend li p pre span td th}
622
- HTML_PARSE_AS_RAW = %w{script math}
623
- HTML_PARSE_AS = Hash.new {|h,k| h[k] = :span}
714
+ HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
715
+ map noscript object ol table tbody td th thead tfoot tr ul}
716
+ HTML_PARSE_AS_SPAN = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
717
+ h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
718
+ rp rt rtc ruby samp select small span strong sub sup tt var}
719
+ HTML_PARSE_AS_RAW = %w{script math option textarea}
720
+
721
+ HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
624
722
  HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
625
723
  HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
626
724
  HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
627
725
 
628
- HTML_BLOCK_ELEMENTS = %w[div p pre h1 h2 h3 h4 h5 h6 hr form fieldset iframe legend script dl ul ol table ins del blockquote address]
726
+ #:stopdoc:
727
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
728
+ # span HTML) and don't appear therefore!
729
+ #:startdoc:
730
+ HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
731
+ ins kbd label option q rb rbc rp rt rtc ruby samp select small span
732
+ strong sub sup textarea tt var}
733
+ HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
734
+ form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
735
+ td th thead tfoot tr ul}
736
+ HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
629
737
 
630
738
  HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
631
739
 
@@ -640,85 +748,125 @@ module Kramdown
640
748
  @src.scan(/.*?\n/)
641
749
  true
642
750
  else
643
- if !((@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && (HTML_BLOCK_ELEMENTS.include?(@src[1]) || @src[1] =~ /:/)) ||
644
- @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/))
645
- return false
751
+ if (!@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !@src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/)) ||
752
+ HTML_SPAN_ELEMENTS.include?(@src[1])
753
+ if @tree.type == :html_element && @tree.options[:parse_type] != :block
754
+ add_html_text(@src.scan(/.*?\n/), @tree)
755
+ add_html_text(@src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/), @tree)
756
+ return true
757
+ else
758
+ return false
759
+ end
646
760
  end
647
761
 
648
- @src.scan(/^(.*?)\n/)
649
- line = @src[1]
650
- temp = nil
762
+ current_el = (@tree.type == :html_element ? @tree : nil)
763
+ @src.scan(/^(#{OPT_SPACE})(.*?)\n/)
764
+ if current_el && current_el.options[:parse_type] == :raw
765
+ add_html_text(@src[1], current_el)
766
+ end
767
+ line = @src[2]
651
768
  stack = []
652
769
 
653
770
  while line.size > 0
654
771
  index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
655
- if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag) && (!temp || temp.options[:parse_type] == :block)
772
+ if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag)
656
773
  md = line.match(HTML_TAG_RE)
657
- break if !(HTML_BLOCK_ELEMENTS.include?(md[1]) || md[1] =~ /:/)
658
-
659
- add_text(md.pre_match + "\n", temp) if temp
660
774
  line = md.post_match
775
+ add_html_text(md.pre_match, current_el) if current_el
776
+ if HTML_SPAN_ELEMENTS.include?(md[1]) || (current_el && current_el.options[:parse_type] == :span)
777
+ add_html_text(md.to_s, current_el) if current_el
778
+ next
779
+ end
661
780
 
662
781
  attrs = {}
663
782
  md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
664
- el = Element.new(:html_element, md[1], :attr => attrs, :type => :block,
665
- :parse_type => HTML_PARSE_AS[md[1]])
666
783
 
667
- (temp || @tree).children << el
668
- if !md[4]
784
+ parse_type = if !current_el || current_el.options[:parse_type] != :raw
785
+ (@doc.options[:parse_block_html] ? HTML_PARSE_AS[md[1]] : :raw)
786
+ else
787
+ :raw
788
+ end
789
+ if val = get_parse_type(attrs.delete('markdown'))
790
+ parse_type = (val == :default ? HTML_PARSE_AS[md[1]] : val)
791
+ end
792
+ el = Element.new(:html_element, md[1], :attr => attrs, :type => :block, :parse_type => parse_type)
793
+ el.options[:no_start_indent] = true if !stack.empty?
794
+ el.options[:outer_element] = true if !current_el
795
+ el.options[:parent_is_raw] = true if current_el && current_el.options[:parse_type] == :raw
796
+
797
+ @tree.children << el
798
+ if !md[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
799
+ warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
800
+ elsif !md[4]
669
801
  @unclosed_html_tags.push(el)
670
- stack << temp
671
- temp = el
802
+ @stack.push(@tree)
803
+ stack.push(current_el)
804
+ @tree = current_el = el
672
805
  end
673
806
  elsif index_close_tag
674
807
  md = line.match(HTML_TAG_CLOSE_RE)
675
- add_text(md.pre_match, temp) if temp
676
-
677
808
  line = md.post_match
809
+ add_html_text(md.pre_match, current_el) if current_el
810
+
678
811
  if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
679
812
  el = @unclosed_html_tags.pop
680
- @tree = @stack.pop unless temp
681
- temp = stack.pop
682
- if el.options[:parse_type] == :raw
683
- raise Kramdown::Error, "Bug: please report!" if el.children.size > 1
684
- el.children.first.type = :raw if el.children.first
685
- end
813
+ @tree = @stack.pop
814
+ current_el.options[:compact] = true if stack.size > 0
815
+ current_el = stack.pop || (@tree.type == :html_element ? @tree : nil)
686
816
  else
687
- if HTML_BLOCK_ELEMENTS.include?(md[1]) && (temp || @tree).options[:parse_type] == :block
688
- warning("Found invalidly nested HTML closing tag for '#{md[1]}'")
689
- end
690
- if temp
691
- add_text(md.to_s, temp)
692
- else
693
- add_text(md.to_s + "\n")
817
+ if !HTML_SPAN_ELEMENTS.include?(md[1]) && @tree.options[:parse_type] != :span
818
+ warning("Found invalidly used HTML closing tag for '#{md[1]}'")
819
+ elsif current_el
820
+ add_html_text(md.to_s, current_el)
694
821
  end
695
822
  end
696
823
  else
697
- if temp
698
- add_text(line, temp)
824
+ if current_el
825
+ line.rstrip! if current_el.options[:parse_type] == :block
826
+ add_html_text(line + "\n", current_el)
699
827
  else
700
- warning("Ignoring characters at the end of an HTML block line")
828
+ add_text(line + "\n")
701
829
  end
702
830
  line = ''
703
831
  end
704
832
  end
705
- if temp && temp.children.last && temp.children.last.type == :text
706
- temp.children.last.value << "\n"
707
- end
708
- if temp
709
- if temp.options[:parse_type] == :span || temp.options[:parse_type] == :raw
710
- result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
711
- add_text(result, temp)
712
- end
713
- @stack.push(@tree)
714
- @tree = temp
833
+ if current_el && (current_el.options[:parse_type] == :span || current_el.options[:parse_type] == :raw)
834
+ result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
835
+ last = current_el.children.last
836
+ result = "\n" + result if last.nil? || (last.type != :text && last.type != :raw) || last.value !~ /\n\Z/
837
+ add_html_text(result, current_el)
715
838
  end
716
839
  true
717
840
  end
718
841
  end
719
842
  Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
720
843
 
844
+ # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
845
+ # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
846
+ # +val+ is nil, then the default parsing mode is used.
847
+ def get_parse_type(val)
848
+ case val
849
+ when "0" then :raw
850
+ when "1" then :default
851
+ when "span" then :span
852
+ when "block" then :block
853
+ when NilClass then nil
854
+ else
855
+ warning("Invalid markdown attribute val '#{val}', using default")
856
+ nil
857
+ end
858
+ end
721
859
 
860
+ # Special version of #add_text which either creates a :text element or a :raw element,
861
+ # depending on the HTML element type.
862
+ def add_html_text(text, tree)
863
+ type = (tree.options[:parse_type] == :raw ? :raw : :text)
864
+ if tree.children.last && tree.children.last.type == type
865
+ tree.children.last.value << text
866
+ elsif !text.empty?
867
+ tree.children << Element.new(type, text)
868
+ end
869
+ end
722
870
 
723
871
 
724
872
  ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
@@ -734,21 +882,11 @@ module Kramdown
734
882
  # Parse the HTML entity at the current location.
735
883
  def parse_html_entity
736
884
  @src.pos += @src.matched_size
737
- add_text(@src.matched)
885
+ @tree.children << Element.new(:entity, @src.matched)
738
886
  end
739
887
  Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
740
888
 
741
889
 
742
- SPECIAL_HTML_CHARS = /&|>|</
743
-
744
- # Parse the special HTML characters at the current location.
745
- def parse_special_html_chars
746
- @src.pos += @src.matched_size
747
- add_text(@src.matched)
748
- end
749
- Registry.define_parser(:span, :special_html_chars, SPECIAL_HTML_CHARS, self)
750
-
751
-
752
890
  LINE_BREAK = /( |\\\\)(?=\n)/
753
891
 
754
892
  # Parse the line break at the current location.
@@ -759,22 +897,27 @@ module Kramdown
759
897
  Registry.define_parser(:span, :line_break, LINE_BREAK, self)
760
898
 
761
899
 
762
- TYPOGRAPHIC_SYMS = [['---', '&mdash;'], ['--', '&ndash;'], ['...', '&hellip;'],
900
+ TYPOGRAPHIC_SYMS = [['---', :mdash], ['--', :ndash], ['...', :ellipsis],
763
901
  ['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
764
- ['<< ', '&laquo;&nbsp;'], [' >>', '&nbsp;&raquo;'],
765
- ['<<', '&laquo;'], ['>>', '&raquo;']]
902
+ ['<< ', :laquo_space], [' >>', :raquo_space],
903
+ ['<<', :laquo], ['>>', :raquo]]
766
904
  TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
767
905
  TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
768
906
 
769
907
  # Parse the typographic symbols at the current location.
770
908
  def parse_typographic_syms
771
909
  @src.pos += @src.matched_size
772
- add_text(TYPOGRAPHIC_SYMS_SUBST[@src.matched].dup)
910
+ val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
911
+ if val.kind_of?(Symbol)
912
+ @tree.children << Element.new(:typographic_sym, val)
913
+ else
914
+ add_text(val.dup)
915
+ end
773
916
  end
774
917
  Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
775
918
 
776
919
 
777
- AUTOLINK_START = /<((mailto|https?|ftps?):.*?|.*?@.*?)>/
920
+ AUTOLINK_START = /<((mailto|https?|ftps?):.*?|\S*?@\S*?)>/
778
921
 
779
922
  # Parse the autolink at the current location.
780
923
  def parse_autolink
@@ -916,17 +1059,42 @@ module Kramdown
916
1059
  elsif result = @src.scan(HTML_INSTRUCTION_RE)
917
1060
  @tree.children << Element.new(:html_raw, result, :type => :span)
918
1061
  elsif result = @src.scan(HTML_TAG_RE)
1062
+ if HTML_BLOCK_ELEMENTS.include?(@src[1])
1063
+ add_text(result)
1064
+ return
1065
+ end
919
1066
  reset_pos = @src.pos
920
1067
  attrs = {}
921
- @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
1068
+ @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
1069
+
1070
+ do_parsing = @doc.options[:parse_span_html]
1071
+ if val = get_parse_type(attrs.delete('markdown'))
1072
+ if val == :block
1073
+ warning("Cannot use block level parsing in span level HTML tag - using default mode")
1074
+ elsif val == :span || val == :default
1075
+ do_parsing = true
1076
+ elsif val == :raw
1077
+ do_parsing = false
1078
+ end
1079
+ end
1080
+ do_parsing = false if HTML_PARSE_AS_RAW.include?(@src[1])
1081
+
922
1082
  el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
1083
+ stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
923
1084
  if @src[4]
924
1085
  @tree.children << el
1086
+ elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
1087
+ warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
1088
+ @tree.children << el
925
1089
  else
926
- stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
927
1090
  if parse_spans(el, stop_re)
1091
+ end_pos = @src.pos
928
1092
  @src.scan(stop_re)
929
1093
  @tree.children << el
1094
+ if !do_parsing
1095
+ el.children.clear
1096
+ el.children << Element.new(:raw, @src.string[reset_pos...end_pos])
1097
+ end
930
1098
  else
931
1099
  @src.pos = reset_pos
932
1100
  add_text(result)
@@ -936,7 +1104,7 @@ module Kramdown
936
1104
  add_text(@src.scan(/./))
937
1105
  end
938
1106
  end
939
- Registry.define_parser(:span, :span_html, HTML_BLOCK_START, self)
1107
+ Registry.define_parser(:span, :span_html, HTML_SPAN_START, self)
940
1108
 
941
1109
 
942
1110
  LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/