kramdown 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (93) hide show
  1. data/CONTRIBUTERS +4 -0
  2. data/ChangeLog +671 -0
  3. data/README +10 -0
  4. data/Rakefile +40 -23
  5. data/VERSION +1 -1
  6. data/data/kramdown/document.html +1 -1
  7. data/data/kramdown/document.latex +10 -5
  8. data/doc/default.less.css +52 -10
  9. data/doc/default.template +4 -0
  10. data/doc/documentation.page +72 -0
  11. data/doc/index.page +8 -41
  12. data/doc/installation.page +6 -6
  13. data/doc/links.markdown +2 -0
  14. data/doc/quickref.page +6 -2
  15. data/doc/syntax.page +8 -7
  16. data/doc/tests.page +1 -2
  17. data/lib/kramdown.rb +1 -1
  18. data/lib/kramdown/compatibility.rb +1 -1
  19. data/lib/kramdown/converter.rb +8 -3
  20. data/lib/kramdown/converter/base.rb +27 -5
  21. data/lib/kramdown/converter/html.rb +26 -28
  22. data/lib/kramdown/converter/latex.rb +29 -15
  23. data/lib/kramdown/document.rb +15 -8
  24. data/lib/kramdown/error.rb +1 -1
  25. data/lib/kramdown/options.rb +21 -13
  26. data/lib/kramdown/parser.rb +9 -3
  27. data/lib/kramdown/parser/base.rb +95 -0
  28. data/lib/kramdown/parser/html.rb +387 -0
  29. data/lib/kramdown/parser/kramdown.rb +11 -56
  30. data/lib/kramdown/parser/kramdown/attribute_list.rb +1 -1
  31. data/lib/kramdown/parser/kramdown/autolink.rb +1 -1
  32. data/lib/kramdown/parser/kramdown/blank_line.rb +1 -1
  33. data/lib/kramdown/parser/kramdown/blockquote.rb +1 -1
  34. data/lib/kramdown/parser/kramdown/codeblock.rb +1 -1
  35. data/lib/kramdown/parser/kramdown/codespan.rb +1 -1
  36. data/lib/kramdown/parser/kramdown/emphasis.rb +1 -1
  37. data/lib/kramdown/parser/kramdown/eob.rb +1 -1
  38. data/lib/kramdown/parser/kramdown/escaped_chars.rb +1 -1
  39. data/lib/kramdown/parser/kramdown/extension.rb +2 -90
  40. data/lib/kramdown/parser/kramdown/footnote.rb +1 -1
  41. data/lib/kramdown/parser/kramdown/header.rb +1 -1
  42. data/lib/kramdown/parser/kramdown/horizontal_rule.rb +1 -1
  43. data/lib/kramdown/parser/kramdown/html.rb +69 -149
  44. data/lib/kramdown/parser/kramdown/html_entity.rb +4 -4
  45. data/lib/kramdown/parser/kramdown/line_break.rb +1 -1
  46. data/lib/kramdown/parser/kramdown/link.rb +2 -2
  47. data/lib/kramdown/parser/kramdown/list.rb +2 -6
  48. data/lib/kramdown/parser/kramdown/math.rb +3 -3
  49. data/lib/kramdown/parser/kramdown/paragraph.rb +1 -1
  50. data/lib/kramdown/parser/kramdown/smart_quotes.rb +3 -2
  51. data/lib/kramdown/parser/kramdown/table.rb +3 -2
  52. data/lib/kramdown/parser/kramdown/typographic_symbol.rb +7 -3
  53. data/lib/kramdown/version.rb +2 -2
  54. data/man/man1/kramdown.1 +19 -0
  55. data/test/run_tests.rb +1 -0
  56. data/test/test_files.rb +68 -7
  57. data/test/testcases/block/09_html/comment.html +5 -0
  58. data/test/testcases/block/09_html/comment.text +3 -0
  59. data/test/testcases/block/09_html/content_model/tables.html +2 -2
  60. data/test/testcases/block/09_html/html_to_native/code.html +10 -0
  61. data/test/testcases/block/09_html/html_to_native/code.text +9 -0
  62. data/test/testcases/block/09_html/html_to_native/comment.html +7 -0
  63. data/test/testcases/block/09_html/html_to_native/comment.text +8 -0
  64. data/test/testcases/block/09_html/html_to_native/emphasis.html +1 -0
  65. data/test/testcases/block/09_html/html_to_native/emphasis.text +1 -0
  66. data/test/testcases/block/09_html/html_to_native/entity.html +1 -0
  67. data/test/testcases/block/09_html/html_to_native/entity.text +1 -0
  68. data/test/testcases/block/09_html/html_to_native/header.html +6 -0
  69. data/test/testcases/block/09_html/html_to_native/header.options +2 -0
  70. data/test/testcases/block/09_html/html_to_native/header.text +6 -0
  71. data/test/testcases/block/09_html/html_to_native/list_dl.html +8 -0
  72. data/test/testcases/block/09_html/html_to_native/list_dl.text +8 -0
  73. data/test/testcases/block/09_html/html_to_native/list_ol.html +15 -0
  74. data/test/testcases/block/09_html/html_to_native/list_ol.text +17 -0
  75. data/test/testcases/block/09_html/html_to_native/list_ul.html +19 -0
  76. data/test/testcases/block/09_html/html_to_native/list_ul.text +22 -0
  77. data/test/testcases/block/09_html/html_to_native/options +1 -0
  78. data/test/testcases/block/09_html/html_to_native/paragraph.html +3 -0
  79. data/test/testcases/block/09_html/html_to_native/paragraph.text +4 -0
  80. data/test/testcases/block/09_html/html_to_native/table_normal.html +13 -0
  81. data/test/testcases/block/09_html/html_to_native/table_normal.text +12 -0
  82. data/test/testcases/block/09_html/html_to_native/table_simple.html +10 -0
  83. data/test/testcases/block/09_html/html_to_native/table_simple.text +14 -0
  84. data/test/testcases/block/09_html/html_to_native/typography.html +1 -0
  85. data/test/testcases/block/09_html/html_to_native/typography.text +1 -0
  86. data/test/testcases/block/09_html/parse_as_raw.html +3 -5
  87. data/test/testcases/block/09_html/parse_as_raw.text +0 -1
  88. data/test/testcases/span/04_footnote/definitions.latex +18 -0
  89. data/test/testcases/span/04_footnote/footnote_nr.latex +6 -0
  90. data/test/testcases/span/04_footnote/markers.latex +32 -0
  91. data/test/testcases/span/05_html/invalid.html +1 -0
  92. data/test/testcases/span/05_html/invalid.text +1 -0
  93. metadata +52 -5
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -69,7 +69,7 @@ module Kramdown
69
69
  #
70
70
  # Kramdown::Document.new(input_text, :input => 'ERBKramdown').to_html
71
71
  #
72
- class Kramdown
72
+ class Kramdown < Base
73
73
 
74
74
  include ::Kramdown
75
75
 
@@ -79,13 +79,12 @@ module Kramdown
79
79
 
80
80
  # Create a new Kramdown parser object for the Kramdown::Document +doc+.
81
81
  def initialize(doc)
82
- @doc = doc
83
- @extension = @doc.options[:extension] || Kramdown::Parser::Kramdown::Extension.new
82
+ super(doc)
84
83
 
85
84
  @src = nil
86
85
  @tree = nil
87
86
  @stack = []
88
- @text_type = :text
87
+ @text_type = :raw_text
89
88
  @block_ial = nil
90
89
 
91
90
  @doc.parse_infos[:ald] = {}
@@ -95,7 +94,7 @@ module Kramdown
95
94
 
96
95
  @block_parsers = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :table, :atx_header,
97
96
  :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
98
- :footnote_definition, :abbrev_definition, :ald, :block_math, :extension_block_depr,
97
+ :footnote_definition, :abbrev_definition, :ald, :block_math,
99
98
  :block_extension, :block_ial, :eob_marker, :paragraph]
100
99
  @span_parsers = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link, :smart_quotes, :inline_math,
101
100
  :span_extension, :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
@@ -104,11 +103,6 @@ module Kramdown
104
103
  private_class_method(:new, :allocate)
105
104
 
106
105
 
107
- # Parse the string +source+ using the Kramdown::Document +doc+ and return the parse tree.
108
- def self.parse(source, doc)
109
- new(doc).parse(source)
110
- end
111
-
112
106
  # The source string provided on initialization is parsed and the created +tree+ is returned.
113
107
  def parse(source)
114
108
  configure_parser
@@ -122,12 +116,6 @@ module Kramdown
122
116
  tree
123
117
  end
124
118
 
125
- # Add the given warning +text+ to the warning array of the Kramdown document.
126
- def warning(text)
127
- @doc.warnings << text
128
- #TODO: add position information
129
- end
130
-
131
119
  #######
132
120
  protected
133
121
  #######
@@ -177,12 +165,12 @@ module Kramdown
177
165
  status
178
166
  end
179
167
 
180
- # Update the tree by parsing all <tt>:text</tt> elements with the span level parser (resets
181
- # +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
168
+ # Update the tree by parsing all <tt>:raw_text</tt> elements with the span level parser
169
+ # (resets +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
182
170
  def update_tree(element)
183
171
  element.children.map! do |child|
184
- if child.type == :text
185
- @stack, @tree = [], nil
172
+ if child.type == :raw_text
173
+ @stack, @tree, @text_type = [], nil, :text
186
174
  @src = StringScanner.new(child.value)
187
175
  parse_spans(child)
188
176
  child.children
@@ -195,7 +183,7 @@ module Kramdown
195
183
  end
196
184
 
197
185
  # Parse all span level elements in the source string.
198
- def parse_spans(el, stop_re = nil, parsers = nil, text_type = :text)
186
+ def parse_spans(el, stop_re = nil, parsers = nil, text_type = @text_type)
199
187
  @stack.push([@tree, @text_type]) unless @tree.nil?
200
188
  @tree, @text_type = el, text_type
201
189
 
@@ -232,21 +220,6 @@ module Kramdown
232
220
  stop_re_found
233
221
  end
234
222
 
235
- # Modify the string +source+ to be usable by the parser.
236
- def adapt_source(source)
237
- source.gsub(/\r\n?/, "\n").chomp + "\n"
238
- end
239
-
240
- # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
241
- # +type+ element or creates a new text element with the given +type+.
242
- def add_text(text, tree = @tree, type = @text_type)
243
- if tree.children.last && tree.children.last.type == type
244
- tree.children.last.value << text
245
- elsif !text.empty?
246
- tree.children << Element.new(type, text)
247
- end
248
- end
249
-
250
223
  # Update the attributes with the information from the inline attribute list and all referenced ALDs.
251
224
  def update_attr_with_ial(attr, ial)
252
225
  ial[:refs].each do |ref|
@@ -259,29 +232,11 @@ module Kramdown
259
232
  # Create a new block level element, taking care of applying a preceding block IAL if it exists.
260
233
  def new_block_el(*args)
261
234
  el = Element.new(*args)
235
+ el.options[:category] ||= :block
262
236
  el.options[:ial] = @block_ial if @block_ial && el.type != :blank && el.type != :eob
263
237
  el
264
238
  end
265
239
 
266
- # Extract the part of the StringScanner backed string specified by the +range+. This method
267
- # also works correctly under Ruby 1.9.
268
- def extract_string(range, strscan = @src)
269
- result = nil
270
- if RUBY_VERSION >= '1.9'
271
- begin
272
- enc = strscan.string.encoding
273
- strscan.string.force_encoding('ASCII-8BIT')
274
- result = strscan.string[range].force_encoding(enc)
275
- ensure
276
- strscan.string.force_encoding(enc)
277
- end
278
- else
279
- result = strscan.string[range]
280
- end
281
- result
282
- end
283
-
284
-
285
240
  @@parsers = {}
286
241
 
287
242
  # Holds all the needed data for one block/span level parser.
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -26,94 +26,6 @@ module Kramdown
26
26
  module Parser
27
27
  class Kramdown
28
28
 
29
- # The base extension class.
30
- #
31
- # This class provides implementations for the default extensions defined in the kramdown
32
- # specification.
33
- #
34
- # An extension is a method called <tt>parse_EXTNAME</tt> where +EXTNAME+ is the extension name.
35
- # These methods are called with three parameters:
36
- #
37
- # [+parser+]
38
- # The parser instance from which the extension method is called.
39
- # [+opts+]
40
- # A hash containing the options set in the extension.
41
- # [+body+]
42
- # A string containing the body of the extension. If no body is available, this is +nil+.
43
- class Extension
44
-
45
- # Just ignore everything and do nothing.
46
- def parse_comment(parser, opts, body)
47
- nil
48
- end
49
-
50
- # Add the body (if available) as <tt>:raw</tt> Element to the +parser.tree+.
51
- def parse_nomarkdown(parser, opts, body)
52
- parser.tree.children << Element.new(:raw, body) if body.kind_of?(String)
53
- end
54
-
55
- # Update the document and parser options with the options set in +opts+.
56
- def parse_options(parser, opts, body)
57
- opts.select do |k,v|
58
- k = k.to_sym
59
- if Kramdown::Options.defined?(k)
60
- parser.doc.options[k] = Kramdown::Options.parse(k, v) rescue parser.doc.options[k]
61
- false
62
- else
63
- true
64
- end
65
- end.each do |k,v|
66
- parser.warning("Unknown kramdown option '#{k}'")
67
- end
68
- end
69
-
70
- end
71
-
72
-
73
- EXT_BLOCK_START_STR_DEPR = "^#{OPT_SPACE}\\{::(%s):(:)?(#{ALD_ANY_CHARS}*)\\}\s*?\n"
74
- EXT_BLOCK_START_DEPR = /#{EXT_BLOCK_START_STR_DEPR % ALD_ID_NAME}/
75
-
76
- # Parse the block extension at the current location.
77
- def parse_extension_block_depr
78
- @src.pos += @src.matched_size
79
-
80
- ext = @src[1]
81
- opts = {}
82
- body = nil
83
- parse_attribute_list(@src[3], opts)
84
-
85
- warn('DEPRECATION warning: This syntax is deprecated, use the new extension syntax')
86
- if !%w{comment nomarkdown options}.include?(ext)
87
- warn('DEPRECATION warning: Custom extensions will be removed in a future version - use a template processor like ERB instead')
88
- end
89
-
90
- if !@extension.public_methods.map {|m| m.to_s}.include?("parse_#{ext}")
91
- warning("No extension named '#{ext}' found - ignoring extension block")
92
- body = :invalid
93
- end
94
-
95
- if !@src[2]
96
- stop_re = /#{EXT_BLOCK_START_STR_DEPR % ext}/
97
- if result = @src.scan_until(stop_re)
98
- parse_attribute_list(@src[3], opts)
99
- body = result.sub!(stop_re, '') if body != :invalid
100
- else
101
- body = :invalid
102
- warning("No ending line for extension block '#{ext}' found - ignoring extension block")
103
- end
104
- end
105
-
106
- @extension.send("parse_#{ext}", self, opts, body) if body != :invalid
107
-
108
- true
109
- end
110
- define_parser(:extension_block_depr, EXT_BLOCK_START_DEPR)
111
-
112
-
113
- ##########################################
114
- ### Code for handling new extension syntax
115
- ##########################################
116
-
117
29
  def parse_extension_start_tag(type)
118
30
  @src.pos += @src.matched_size
119
31
 
@@ -145,7 +57,7 @@ module Kramdown
145
57
  when 'comment'
146
58
  # nothing to do
147
59
  when 'nomarkdown'
148
- @tree.children << Element.new(:raw, body, :type => type) if body.kind_of?(String)
60
+ @tree.children << Element.new(:raw, body, :category => type) if body.kind_of?(String)
149
61
  when 'options'
150
62
  opts.select do |k,v|
151
63
  k = k.to_sym
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  #--
4
- # Copyright (C) 2009 Thomas Leitner <t_leitner@gmx.at>
4
+ # Copyright (C) 2009-2010 Thomas Leitner <t_leitner@gmx.at>
5
5
  #
6
6
  # This file is part of kramdown.
7
7
  #
@@ -20,179 +20,102 @@
20
20
  #++
21
21
  #
22
22
 
23
- require 'rexml/parsers/baseparser'
23
+ require 'kramdown/parser/html'
24
24
 
25
25
  module Kramdown
26
26
  module Parser
27
27
  class Kramdown
28
28
 
29
- #:stopdoc:
30
- # The following regexps are based on the ones used by REXML, with some slight modifications.
31
- #:startdoc:
32
- HTML_COMMENT_RE = /<!--(.*?)-->/m
33
- HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
34
- HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
35
- HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
36
- HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/m
37
-
38
-
39
- HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
40
- map noscript object ol table tbody td th thead tfoot tr ul}
41
- HTML_PARSE_AS_SPAN = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
42
- h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
43
- rp rt rtc ruby samp select small span strong sub sup tt var}
44
- HTML_PARSE_AS_RAW = %w{script math option textarea}
45
-
46
- HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
47
- HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
48
- HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
49
- HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
50
-
51
- #:stopdoc:
52
- # Some HTML elements like script belong to both categories (i.e. are valid in block and
53
- # span HTML) and don't appear therefore!
54
- #:startdoc:
55
- HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
56
- ins kbd label option q rb rbc rp rt rtc ruby samp select small span
57
- strong sub sup textarea tt var}
58
- HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
59
- form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
60
- td th thead tfoot tr ul}
61
- HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
62
-
63
- HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
64
- HTML_RAW_START = /(?=<(#{REXML::Parsers::BaseParser::UNAME_STR}|\/))/
65
-
66
- # Parse the HTML at the current position as block level HTML.
67
- def parse_block_html
68
- if result = @src.scan(HTML_COMMENT_RE)
69
- @tree.children << Element.new(:xml_comment, result, :type => :block)
70
- @src.scan(/[ \t]*\n/)
71
- true
72
- elsif result = @src.scan(HTML_INSTRUCTION_RE)
73
- @tree.children << Element.new(:xml_pi, result, :type => :block)
74
- @src.scan(/[ \t]*\n/)
75
- true
76
- else
77
- if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
78
- @src.pos += @src.matched_size
79
- handle_html_start_tag
80
- true
81
- elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
82
- @src.pos += @src.matched_size
83
- name = @src[1]
84
-
85
- if @tree.type ==:html_element && @tree.value == name
86
- throw :stop_block_parsing, :found
87
- else
88
- warning("Found invalidly used HTML closing tag for '#{name}' - ignoring it")
89
- true
90
- end
91
- else
92
- false
93
- end
94
- end
95
- end
96
- define_parser(:block_html, HTML_BLOCK_START)
97
-
98
-
99
- # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
100
- # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
101
- # +val+ is nil, then the default parsing mode is used.
102
- def get_parse_type(val)
103
- case val
104
- when "0" then :raw
105
- when "1" then :default
106
- when "span" then :span
107
- when "block" then :block
108
- when NilClass then nil
109
- else
110
- warning("Invalid markdown attribute val '#{val}', using default")
111
- nil
112
- end
113
- end
114
-
115
- # Process the HTML start tag that has already be scanned/checked.
116
- def handle_html_start_tag
117
- curpos = @src.pos
118
- name = @src[1]
119
- closed = !@src[4].nil?
120
- attrs = {}
121
- @src[2].scan(HTML_ATTRIBUTE_RE).each {|attr,sep,val| attrs[attr] = val}
29
+ include Kramdown::Parser::Html::Parser
122
30
 
31
+ def handle_kramdown_html_tag(el, closed)
123
32
  parse_type = if @tree.type != :html_element || @tree.options[:parse_type] != :raw
124
- (@doc.options[:parse_block_html] ? HTML_PARSE_AS[name] : :raw)
33
+ (@doc.options[:parse_block_html] ? HTML_PARSE_AS[el.value] : :raw)
125
34
  else
126
35
  :raw
127
36
  end
128
- if val = get_parse_type(attrs.delete('markdown'))
129
- parse_type = (val == :default ? HTML_PARSE_AS[name] : val)
37
+ if val = html_parse_type(el.options[:attr].delete('markdown'))
38
+ parse_type = (val == :default ? HTML_PARSE_AS[el.value] : val)
130
39
  end
131
40
 
132
41
  @src.scan(/[ \t]*\n/) if parse_type == :block
133
-
134
- el = Element.new(:html_element, name, :attr => attrs, :type => :block, :parse_type => parse_type)
135
42
  el.options[:outer_element] = true if @tree.type != :html_element
136
43
  el.options[:parent_is_raw] = true if @tree.type == :html_element && @tree.options[:parse_type] == :raw
137
- @tree.children << el
44
+ el.options[:parse_type] = parse_type
138
45
 
139
- if !closed && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
140
- warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
141
- elsif !closed
46
+ if !closed
142
47
  if parse_type == :block
143
48
  end_tag_found = parse_blocks(el)
144
49
  if !end_tag_found
145
50
  warning("Found no end tag for '#{el.value}' - auto-closing it")
146
51
  end
147
52
  elsif parse_type == :span
53
+ curpos = @src.pos
148
54
  if result = @src.scan_until(/(?=<\/#{el.value}\s*>)/m)
149
- add_text(extract_string(curpos...@src.pos), el)
55
+ add_text(extract_string(curpos...@src.pos, @src), el)
150
56
  @src.scan(HTML_TAG_CLOSE_RE)
151
57
  else
152
58
  add_text(@src.scan(/.*/m), el)
153
59
  warning("Found no end tag for '#{el.value}' - auto-closing it")
154
60
  end
155
61
  else
156
- parse_raw_html(el)
62
+ parse_raw_html(el, &method(:handle_kramdown_html_tag))
157
63
  end
158
64
  @src.scan(/[ \t]*\n/) unless (@tree.type == :html_element && @tree.options[:parse_type] == :raw)
159
65
  end
160
66
  end
161
67
 
162
- # Parse raw HTML until the matching end tag for +el+ is found or until the end of the
163
- # document.
164
- def parse_raw_html(el)
165
- @stack.push(@tree)
166
- @tree = el
167
-
168
- done = false
169
- endpos = nil
170
- while !@src.eos? && !done
171
- if result = @src.scan_until(HTML_RAW_START)
172
- endpos = @src.pos
173
- add_text(result, @tree, :html_text)
174
- if @src.scan(HTML_TAG_RE)
175
- handle_html_start_tag
176
- elsif @src.scan(HTML_TAG_CLOSE_RE)
177
- if @tree.value == @src[1]
178
- done = true
179
- else
180
- warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
181
- end
68
+ # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
69
+ # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
70
+ # +val+ is nil, then the default parsing mode is used.
71
+ def html_parse_type(val)
72
+ case val
73
+ when "0" then :raw
74
+ when "1" then :default
75
+ when "span" then :span
76
+ when "block" then :block
77
+ when NilClass then nil
78
+ else
79
+ warning("Invalid markdown attribute val '#{val}', using default")
80
+ nil
81
+ end
82
+ end
83
+
84
+
85
+ HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
86
+
87
+ # Parse the HTML at the current position as block level HTML.
88
+ def parse_block_html
89
+ if result = @src.scan(HTML_COMMENT_RE)
90
+ @tree.children << Element.new(:xml_comment, result, :category => :block)
91
+ @src.scan(/[ \t]*\n/)
92
+ true
93
+ elsif result = @src.scan(HTML_INSTRUCTION_RE)
94
+ @tree.children << Element.new(:xml_pi, result, :category => :block)
95
+ @src.scan(/[ \t]*\n/)
96
+ true
97
+ else
98
+ if result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
99
+ @src.pos += @src.matched_size
100
+ handle_html_start_tag(&method(:handle_kramdown_html_tag))
101
+ Kramdown::Parser::Html::ElementConverter.new.process(@tree.children.last) if @doc.options[:html_to_native]
102
+ true
103
+ elsif result = @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1])
104
+ @src.pos += @src.matched_size
105
+ name = @src[1]
106
+
107
+ if @tree.type == :html_element && @tree.value == name
108
+ throw :stop_block_parsing, :found
182
109
  else
183
- add_text(@src.scan(/./), @tree, :html_text)
110
+ warning("Found invalidly used HTML closing tag for '#{name}' - ignoring it")
111
+ true
184
112
  end
185
113
  else
186
- result = @src.scan(/.*/m)
187
- add_text(result, @tree, :html_text)
188
- warning("Found no end tag for '#{@tree.value}' - auto-closing it")
189
- done = true
114
+ false
190
115
  end
191
116
  end
192
-
193
- @tree = @stack.pop
194
- endpos
195
117
  end
118
+ define_parser(:block_html, HTML_BLOCK_START)
196
119
 
197
120
 
198
121
  HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
@@ -200,9 +123,9 @@ module Kramdown
200
123
  # Parse the HTML at the current position as span level HTML.
201
124
  def parse_span_html
202
125
  if result = @src.scan(HTML_COMMENT_RE)
203
- @tree.children << Element.new(:xml_comment, result, :type => :span)
126
+ @tree.children << Element.new(:xml_comment, result, :category => :span)
204
127
  elsif result = @src.scan(HTML_INSTRUCTION_RE)
205
- @tree.children << Element.new(:xml_pi, result, :type => :span)
128
+ @tree.children << Element.new(:xml_pi, result, :category => :span)
206
129
  elsif result = @src.scan(HTML_TAG_CLOSE_RE)
207
130
  warning("Found invalidly used HTML closing tag for '#{@src[1]}' - ignoring it")
208
131
  elsif result = @src.scan(HTML_TAG_RE)
@@ -212,36 +135,33 @@ module Kramdown
212
135
  attrs = {}
213
136
  @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
214
137
 
215
- do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) ? false : @doc.options[:parse_span_html])
216
- if val = get_parse_type(attrs.delete('markdown'))
138
+ do_parsing = (HTML_PARSE_AS_RAW.include?(@src[1]) || @tree.options[:parse_type] == :raw ? false : @doc.options[:parse_span_html])
139
+ if val = html_parse_type(attrs.delete('markdown'))
217
140
  if val == :block
218
141
  warning("Cannot use block level parsing in span level HTML tag - using default mode")
219
142
  elsif val == :span
220
143
  do_parsing = true
221
144
  elsif val == :default
222
- (HTML_PARSE_AS_RAW.include?(@src[1]) ? false : true)
145
+ do_parsing = !HTML_PARSE_AS_RAW.include?(@src[1])
223
146
  elsif val == :raw
224
147
  do_parsing = false
225
148
  end
226
149
  end
227
150
 
228
- el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
151
+ el = Element.new(:html_element, @src[1], :attr => attrs, :category => :span, :parse_type => (do_parsing ? :span : :raw))
152
+ @tree.children << el
229
153
  stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
230
- if @src[4]
231
- @tree.children << el
232
- elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
154
+ if !@src[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
233
155
  warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
234
- @tree.children << el
235
- else
236
- if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html]), (do_parsing ? :text : :html_text))
237
- end_pos = @src.pos
156
+ elsif !@src[4]
157
+ if parse_spans(el, stop_re, (do_parsing ? nil : [:span_html]))
238
158
  @src.scan(stop_re)
239
159
  else
240
160
  warning("Found no end tag for '#{el.value}' - auto-closing it")
241
- add_text(@src.scan(/.*/m))
161
+ add_text(@src.scan(/.*/m), el)
242
162
  end
243
- @tree.children << el
244
163
  end
164
+ Kramdown::Parser::Html::ElementConverter.new.process(el) if @doc.options[:html_to_native]
245
165
  else
246
166
  add_text(@src.scan(/./))
247
167
  end