kramdown 0.13.2 → 0.13.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of kramdown might be problematic. Click here for more details.

Files changed (39) hide show
  1. data/CONTRIBUTERS +1 -1
  2. data/ChangeLog +196 -0
  3. data/Rakefile +5 -3
  4. data/VERSION +1 -1
  5. data/data/kramdown/document.latex +1 -1
  6. data/doc/default.scss.css +7 -1
  7. data/doc/index.page +1 -1
  8. data/doc/quickref.page +29 -8
  9. data/doc/tests.page +1 -1
  10. data/lib/kramdown/converter/base.rb +5 -5
  11. data/lib/kramdown/converter/html.rb +14 -15
  12. data/lib/kramdown/converter/kramdown.rb +5 -3
  13. data/lib/kramdown/converter/latex.rb +16 -16
  14. data/lib/kramdown/document.rb +47 -50
  15. data/lib/kramdown/parser/base.rb +8 -8
  16. data/lib/kramdown/parser/html.rb +56 -36
  17. data/lib/kramdown/parser/kramdown.rb +5 -5
  18. data/lib/kramdown/parser/kramdown/extensions.rb +2 -2
  19. data/lib/kramdown/parser/kramdown/link.rb +1 -1
  20. data/lib/kramdown/parser/kramdown/table.rb +3 -3
  21. data/lib/kramdown/utils/html.rb +6 -7
  22. data/lib/kramdown/utils/ordered_hash.rb +17 -0
  23. data/lib/kramdown/version.rb +1 -1
  24. data/test/test_files.rb +35 -0
  25. data/test/testcases/block/09_html/content_model/tables.html +8 -8
  26. data/test/testcases/block/09_html/content_model/tables.text +1 -1
  27. data/test/testcases/block/09_html/html_and_headers.html +5 -0
  28. data/test/testcases/block/09_html/html_and_headers.text +6 -0
  29. data/test/testcases/block/09_html/html_to_native/emphasis.html +2 -1
  30. data/test/testcases/block/09_html/html_to_native/table_normal.html +10 -12
  31. data/test/testcases/block/09_html/html_to_native/table_simple.html +11 -11
  32. data/test/testcases/block/14_table/header.html +9 -16
  33. data/test/testcases/block/14_table/simple.html +19 -0
  34. data/test/testcases/block/14_table/simple.html.19 +19 -0
  35. data/test/testcases/block/14_table/simple.text +6 -0
  36. data/test/testcases/block/14_table/table_with_footnote.html +25 -0
  37. data/test/testcases/block/14_table/table_with_footnote.latex +11 -0
  38. data/test/testcases/block/14_table/table_with_footnote.text +6 -0
  39. metadata +8 -3
@@ -43,14 +43,16 @@ module Kramdown
43
43
 
44
44
  def convert(el, opts = {:indent => 0})
45
45
  res = send("convert_#{el.type}", el, opts)
46
- if el.type != :html_element && el.type != :li && el.type != :dd && (ial = ial_for_element(el))
46
+ if ![:html_element, :li, :dd, :td].include?(el.type) && (ial = ial_for_element(el))
47
47
  res << ial
48
48
  res << "\n\n" if Element.category(el) == :block
49
49
  elsif [:ul, :dl, :ol, :codeblock].include?(el.type) && opts[:next] &&
50
50
  ([el.type, :codeblock].include?(opts[:next].type) ||
51
51
  (opts[:next].type == :blank && opts[:nnext] && [el.type, :codeblock].include?(opts[:nnext].type)))
52
52
  res << "^\n\n"
53
- elsif Element.category(el) == :block && ![:li, :dd, :dt, :td, :th, :tr, :thead, :tbody, :tfoot, :blank].include?(el.type) &&
53
+ elsif Element.category(el) == :block &&
54
+ ![:li, :dd, :dt, :td, :th, :tr, :thead, :tbody, :tfoot, :blank].include?(el.type) &&
55
+ (el.type != :html_element || @stack.last.type != :html_element) &&
54
56
  (el.type != :p || !el.options[:transparent])
55
57
  res << "\n"
56
58
  end
@@ -213,7 +215,7 @@ module Kramdown
213
215
  if el.options[:category] == :block && (@stack.last.type != :html_element || @stack.last.options[:content_model] != :raw)
214
216
  el.value + "\n"
215
217
  else
216
- el.value
218
+ el.value.dup
217
219
  end
218
220
  end
219
221
  alias :convert_xml_pi :convert_xml_comment
@@ -30,13 +30,13 @@ module Kramdown
30
30
  #
31
31
  # This converter uses ideas from other Markdown-to-LaTeX converters like Pandoc and Maruku.
32
32
  #
33
- # You can customize this converter by sub-classing it and overriding the <tt>convert_NAME</tt>
34
- # methods. Each such method takes the following parameters:
33
+ # You can customize this converter by sub-classing it and overriding the +convert_NAME+ methods.
34
+ # Each such method takes the following parameters:
35
35
  #
36
36
  # [+el+] The element of type +NAME+ to be converted.
37
37
  #
38
38
  # [+opts+] A hash containing processing options that are passed down from parent elements. The
39
- # key <tt>:parent</tt> is always set and contains the parent element as value.
39
+ # key :parent is always set and contains the parent element as value.
40
40
  #
41
41
  # The return value of such a method has to be a string containing the element +el+ formatted
42
42
  # correctly as LaTeX markup.
@@ -50,8 +50,8 @@ module Kramdown
50
50
  @data[:packages] = Set.new
51
51
  end
52
52
 
53
- # Dispatch the conversion of the element +el+ to a <tt>convert_TYPE</tt> method using the
54
- # +type+ of the element.
53
+ # Dispatch the conversion of the element +el+ to a +convert_TYPE+ method using the +type+ of
54
+ # the element.
55
55
  def convert(el, opts = {})
56
56
  send("convert_#{el.type}", el, opts)
57
57
  end
@@ -88,8 +88,8 @@ module Kramdown
88
88
  end
89
89
  end
90
90
 
91
- # Helper method used by +convert_p+ to convert a paragraph that only contains a single
92
- # <tt>:img</tt> element.
91
+ # Helper method used by +convert_p+ to convert a paragraph that only contains a single :img
92
+ # element.
93
93
  def convert_standalone_image(el, opts, img)
94
94
  attrs = attribute_list(el)
95
95
  "\\begin{figure}#{attrs}\n\\begin{center}\n#{img}\n\\end{center}\n\\caption{#{escape(el.children.first.attr['alt'])}}\n#{latex_link_target(el, true)}\n\\end{figure}#{attrs}\n"
@@ -180,9 +180,10 @@ module Kramdown
180
180
  TABLE_ALIGNMENT_CHAR = {:default => 'l', :left => 'l', :center => 'c', :right => 'r'} # :nodoc:
181
181
 
182
182
  def convert_table(el, opts)
183
+ @data[:packages] << 'longtable'
183
184
  align = el.options[:alignment].map {|a| TABLE_ALIGNMENT_CHAR[a]}.join('|')
184
185
  attrs = attribute_list(el)
185
- "#{latex_link_target(el)}\\begin{tabular}{|#{align}|}#{attrs}\n\\hline\n#{inner(el, opts)}\\hline\n\\end{tabular}#{attrs}\n\n"
186
+ "#{latex_link_target(el)}\\begin{longtable}{|#{align}|}#{attrs}\n\\hline\n#{inner(el, opts)}\\hline\n\\end{longtable}#{attrs}\n\n"
186
187
  end
187
188
 
188
189
  def convert_thead(el, opts)
@@ -228,7 +229,7 @@ module Kramdown
228
229
  if el.attr['src'] =~ /^(https?|ftps?):\/\//
229
230
  warning("Cannot include non-local image")
230
231
  ''
231
- elsif !el.options.attr['src'].empty?
232
+ elsif !el.attr['src'].empty?
232
233
  @data[:packages] << 'graphicx'
233
234
  "#{latex_link_target(el)}\\includegraphics{#{el.attr['src']}}"
234
235
  else
@@ -560,17 +561,16 @@ module Kramdown
560
561
  end
561
562
 
562
563
  # Wrap the +text+ inside a LaTeX environment of type +type+. The element +el+ is passed on to
563
- # the method #attribute_list -- the resulting string is appended to both the <tt>\\begin</tt>
564
- # and the <tt>\\end</tt> lines of the LaTeX environment for easier post-processing of LaTeX
565
- # environments.
564
+ # the method #attribute_list -- the resulting string is appended to both the \\begin and the
565
+ # \\end lines of the LaTeX environment for easier post-processing of LaTeX environments.
566
566
  def latex_environment(type, el, text)
567
567
  attrs = attribute_list(el)
568
568
  "\\begin{#{type}}#{latex_link_target(el)}#{attrs}\n#{text.rstrip}\n\\end{#{type}}#{attrs}\n"
569
569
  end
570
570
 
571
- # Return a string containing a valid <tt>\hypertarget</tt> command if the element has an ID
572
- # defined, or +nil+ otherwise. If the parameter +add_label+ is +true+, a <tt>\label</tt>
573
- # command will also be used additionally to the <tt>\hypertarget</tt> command.
571
+ # Return a string containing a valid \hypertarget command if the element has an ID defined, or
572
+ # +nil+ otherwise. If the parameter +add_label+ is +true+, a \label command will also be used
573
+ # additionally to the \hypertarget command.
574
574
  def latex_link_target(el, add_label = false)
575
575
  if (id = el.attr['id'])
576
576
  "\\hypertarget{#{id}}{}" << (add_label ? "\\label{#{id}}" : '')
@@ -579,7 +579,7 @@ module Kramdown
579
579
  end
580
580
  end
581
581
 
582
- # Return a LaTeX comment containing all attributes as <tt>key="value"</tt> pairs.
582
+ # Return a LaTeX comment containing all attributes as 'key="value"' pairs.
583
583
  def attribute_list(el)
584
584
  attrs = el.attr.map {|k,v| v.nil? ? '' : " #{k}=\"#{v.to_s}\""}.compact.sort.join('')
585
585
  attrs = " % #{attrs}" if !attrs.empty?
@@ -74,10 +74,10 @@ module Kramdown
74
74
  # Create a new Kramdown document from the string +source+ and use the provided +options+. The
75
75
  # options that can be used are defined in the Options module.
76
76
  #
77
- # The special options key <tt>:input</tt> can be used to select the parser that should parse the
77
+ # The special options key :input can be used to select the parser that should parse the
78
78
  # +source+. It has to be the name of a class in the Kramdown::Parser module. For example, to
79
- # select the kramdown parser, one would set the <tt>:input</tt> key to +Kramdown+. If this key
80
- # is not set, it defaults to +Kramdown+.
79
+ # select the kramdown parser, one would set the :input key to +Kramdown+. If this key is not
80
+ # set, it defaults to +Kramdown+.
81
81
  #
82
82
  # The +source+ is immediately parsed by the selected parser so that the root element is
83
83
  # immediately available and the output can be generated.
@@ -132,11 +132,11 @@ module Kramdown
132
132
  #
133
133
  # The root element contains the following option keys:
134
134
  #
135
- # <tt>:encoding</tt>:: When running on Ruby 1.9 this key has to be set to the encoding used for
136
- # the text parts of the kramdown document.
135
+ # :encoding:: When running on Ruby 1.9 this key has to be set to the encoding used for the text
136
+ # parts of the kramdown document.
137
137
  #
138
- # <tt>:abbrev_defs</tt>:: This key may be used to store the mapping of abbreviation to
139
- # abbreviation definition.
138
+ # :abbrev_defs:: This key may be used to store the mapping of abbreviation to abbreviation
139
+ # definition.
140
140
  #
141
141
  #
142
142
  # === :blank
@@ -159,8 +159,8 @@ module Kramdown
159
159
  #
160
160
  # Represents a paragraph.
161
161
  #
162
- # If the option <tt>:transparent</tt> is +true+, this element just represents a block of text.
163
- # I.e. this element just functions as a container for span-level elements.
162
+ # If the option :transparent is +true+, this element just represents a block of text. I.e. this
163
+ # element just functions as a container for span-level elements.
164
164
  #
165
165
  #
166
166
  # === :header
@@ -171,8 +171,8 @@ module Kramdown
171
171
  #
172
172
  # Represents a header.
173
173
  #
174
- # The option <tt>:level</tt> specifies the header level and has to contain a number between 1 and
175
- # \6. The option <tt>:raw_text</tt> has to contain the raw header text.
174
+ # The option :level specifies the header level and has to contain a number between 1 and \6. The
175
+ # option :raw_text has to contain the raw header text.
176
176
  #
177
177
  #
178
178
  # === :blockquote
@@ -269,9 +269,8 @@ module Kramdown
269
269
  # Represents a table. Each table row (i.e. :tr element) of the table has to contain the same
270
270
  # number of :td elements.
271
271
  #
272
- # The option <tt>:alignment</tt> has to be an array containing the alignment values, exactly one
273
- # for each column of the table. The possible alignment values are <tt>:left</tt>,
274
- # <tt>:center</tt>, <tt>:right</tt> and <tt>:default</tt>.
272
+ # The option :alignment has to be an array containing the alignment values, exactly one for each
273
+ # column of the table. The possible alignment values are :left, :center, :right and :default.
275
274
  #
276
275
  #
277
276
  # === :thead
@@ -330,8 +329,8 @@ module Kramdown
330
329
  #
331
330
  # The +value+ field has to contain the actual mathematical text.
332
331
  #
333
- # The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
334
- # depending on the context where the element is used.
332
+ # The option :category has to be set to either :span or :block depending on the context where the
333
+ # element is used.
335
334
  #
336
335
  #
337
336
  # == Text Markup Elements
@@ -400,9 +399,9 @@ module Kramdown
400
399
  # Represents a footnote marker.
401
400
  #
402
401
  # The +value+ field has to contain an element whose children are the content of the footnote. The
403
- # option <tt>:name</tt> has to contain a valid and unique footnote name. A valid footnote name
404
- # consists of a word character or a digit and then optionally followed by other word characters,
405
- # digits or dashes.
402
+ # option :name has to contain a valid and unique footnote name. A valid footnote name consists of
403
+ # a word character or a digit and then optionally followed by other word characters, digits or
404
+ # dashes.
406
405
  #
407
406
  #
408
407
  # === :em
@@ -432,7 +431,7 @@ module Kramdown
432
431
  # Represents an HTML entity.
433
432
  #
434
433
  # The +value+ field has to contain an instance of Kramdown::Utils::Entities::Entity. The option
435
- # <tt>:original</tt> can be used to store the original representation of the entity.
434
+ # :original can be used to store the original representation of the entity.
436
435
  #
437
436
  #
438
437
  # === :typographic_sym
@@ -446,13 +445,13 @@ module Kramdown
446
445
  # The +value+ field needs to contain a Symbol representing the specific typographic symbol from
447
446
  # the following list:
448
447
  #
449
- # <tt>:mdash</tt>:: An mdash character (---)
450
- # <tt>:ndash</tt>:: An ndash character (--)
451
- # <tt>:hellip</tt>:: An ellipsis (...)
452
- # <tt>:laquo</tt>:: A left guillemet (<<)
453
- # <tt>:raquo</tt>:: A right guillemet (>>)
454
- # <tt>:laquo_space</tt>:: A left guillemet with a space (<< )
455
- # <tt>:raquo_space</tt>:: A right guillemet with a space ( >>)
448
+ # :mdash:: An mdash character (---)
449
+ # :ndash:: An ndash character (--)
450
+ # :hellip:: An ellipsis (...)
451
+ # :laquo:: A left guillemet (<<)
452
+ # :raquo:: A right guillemet (>>)
453
+ # :laquo_space:: A left guillemet with a space (<< )
454
+ # :raquo_space:: A right guillemet with a space ( >>)
456
455
  #
457
456
  #
458
457
  # === :smart_quote
@@ -465,10 +464,10 @@ module Kramdown
465
464
  #
466
465
  # The +value+ field needs to contain a Symbol representing the specific quotation character:
467
466
  #
468
- # <tt>:lsquo</tt>:: Left single quote
469
- # <tt>:rsquo</tt>:: Right single quote
470
- # <tt>:ldquo</tt>:: Left double quote
471
- # <tt>:rdquo</tt>:: Right double quote
467
+ # :lsquo:: Left single quote
468
+ # :rsquo:: Right single quote
469
+ # :ldquo:: Left double quote
470
+ # :rdquo:: Right double quote
472
471
  #
473
472
  #
474
473
  # === :abbreviation
@@ -480,7 +479,7 @@ module Kramdown
480
479
  # Represents a text part that is an abbreviation.
481
480
  #
482
481
  # The +value+ field has to contain the text part that is the abbreviation. The definition of the
483
- # abbreviation is stored in the <tt>:root</tt> element of the document.
482
+ # abbreviation is stored in the :root element of the document.
484
483
  #
485
484
  #
486
485
  # == Other Elements
@@ -495,11 +494,10 @@ module Kramdown
495
494
  #
496
495
  # The +value+ field has to contain the name of the HTML element the element is representing.
497
496
  #
498
- # The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
499
- # depending on the whether the element is a block-level or a span-level element. The option
500
- # <tt>:content_model</tt> has to be set to the content model for the element (either
501
- # <tt>:block</tt> if it contains block-level elements, <tt>:span</tt> if it contains span-level
502
- # elements or <tt>:raw</tt> if it contains raw content).
497
+ # The option :category has to be set to either :span or :block depending on the whether the
498
+ # element is a block-level or a span-level element. The option :content_model has to be set to the
499
+ # content model for the element (either :block if it contains block-level elements, :span if it
500
+ # contains span-level elements or :raw if it contains raw content).
503
501
  #
504
502
  #
505
503
  # === :xml_comment
@@ -512,8 +510,8 @@ module Kramdown
512
510
  #
513
511
  # The +value+ field has to contain the whole XML/HTML comment including the delimiters.
514
512
  #
515
- # The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
516
- # depending on the context where the element is used.
513
+ # The option :category has to be set to either :span or :block depending on the context where the
514
+ # element is used.
517
515
  #
518
516
  #
519
517
  # === :xml_pi
@@ -527,8 +525,8 @@ module Kramdown
527
525
  # The +value+ field has to contain the whole XML/HTML processing instruction including the
528
526
  # delimiters.
529
527
  #
530
- # The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
531
- # depending on the context where the element is used.
528
+ # The option :category has to be set to either :span or :block depending on the context where the
529
+ # element is used.
532
530
  #
533
531
  #
534
532
  # === :comment
@@ -541,8 +539,8 @@ module Kramdown
541
539
  #
542
540
  # The +value+ field has to contain the comment.
543
541
  #
544
- # The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
545
- # depending on the context where the element is used.
542
+ # The option :category has to be set to either :span or :block depending on the context where the
543
+ # element is used.
546
544
  #
547
545
  #
548
546
  # === :raw
@@ -556,12 +554,12 @@ module Kramdown
556
554
  #
557
555
  # The +value+ field has to contain the actual raw text.
558
556
  #
559
- # The option <tt>:category</tt> has to be set to either <tt>:span</tt> or <tt>:block</tt>
560
- # depending on the context where the element is used. The option <tt>:type</tt> can be set to an
561
- # array of strings to define for which converters the raw string is valid.
557
+ # The option :category has to be set to either :span or :block depending on the context where the
558
+ # element is used. The option :type can be set to an array of strings to define for which
559
+ # converters the raw string is valid.
562
560
  class Element
563
561
 
564
- # A symbol representing the element type. For example, <tt>:p</tt> or <tt>:blockquote</tt>.
562
+ # A symbol representing the element type. For example, :p or :blockquote.
565
563
  attr_accessor :type
566
564
 
567
565
  # The value of the element. The interpretation of this field depends on the type of the element.
@@ -598,11 +596,10 @@ module Kramdown
598
596
  [:text, :a, :br, :img, :codespan, :footnote, :em, :strong, :entity, :typographic_sym,
599
597
  :smart_quote, :abbreviation].each {|b| CATEGORY[b] = :span}
600
598
 
601
- # Return the category of +el+ which can be <tt>:block</tt>, <tt>:span</tt> or +nil+.
599
+ # Return the category of +el+ which can be :block, :span or +nil+.
602
600
  #
603
601
  # Most elements have a fixed category, however, some elements can either appear in a block-level
604
- # or a span-level context. These elements need to have the option <tt>:category</tt> correctly
605
- # set.
602
+ # or a span-level context. These elements need to have the option :category correctly set.
606
603
  def self.category(el)
607
604
  CATEGORY[el.type] || el.options[:category]
608
605
  end
@@ -37,8 +37,8 @@ module Kramdown
37
37
  #
38
38
  # Implementing a new parser is rather easy: just derive a new class from this class and put it
39
39
  # in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
40
- # parser works correctly. Then you need to implement the <tt>#parse</tt> method which has to
41
- # contain the parsing code.
40
+ # parser works correctly. Then you need to implement the +#parse+ method which has to contain
41
+ # the parsing code.
42
42
  #
43
43
  # Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
44
44
  class Base
@@ -57,8 +57,8 @@ module Kramdown
57
57
 
58
58
  # Initialize the parser object with the +source+ string and the parsing +options+.
59
59
  #
60
- # The <tt>@root</tt> element, the <tt>@warnings</tt> array and <tt>@text_type</tt> (specifies
61
- # the default type for newly created text nodes) are automatically initialized.
60
+ # The @root element, the @warnings array and @text_type (specifies the default type for newly
61
+ # created text nodes) are automatically initialized.
62
62
  def initialize(source, options)
63
63
  @source = source
64
64
  @options = Kramdown::Options.merge(options)
@@ -71,7 +71,7 @@ module Kramdown
71
71
  # Parse the +source+ string into an element tree, possibly using the parsing +options+, and
72
72
  # return the root element of the element tree and an array with warning messages.
73
73
  #
74
- # Initializes a new instance of the calling class and then calls the #parse method that must
74
+ # Initializes a new instance of the calling class and then calls the +#parse+ method that must
75
75
  # be implemented by each subclass.
76
76
  def self.parse(source, options = {})
77
77
  parser = new(source, options)
@@ -81,8 +81,8 @@ module Kramdown
81
81
 
82
82
  # Parse the source string into an element tree.
83
83
  #
84
- # The parsing code should parse the source provided in <tt>@source</tt> and build an element
85
- # tree the root of which should be <tt>@root</tt>.
84
+ # The parsing code should parse the source provided in @source and build an element tree the
85
+ # root of which should be @root.
86
86
  #
87
87
  # This is the only method that has to be implemented by sub-classes!
88
88
  def parse
@@ -96,7 +96,7 @@ module Kramdown
96
96
  end
97
97
 
98
98
  # Modify the string +source+ to be usable by the parser (unifies line ending characters to
99
- # <tt>\n</tt> and makes sure +source+ ends with a new line character).
99
+ # +\n+ and makes sure +source+ ends with a new line character).
100
100
  def adapt_source(source)
101
101
  source.gsub(/\r\n?/, "\n").chomp + "\n"
102
102
  end
@@ -45,13 +45,15 @@ module Kramdown
45
45
  HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::UNAME_STR})\s*>/m
46
46
  HTML_ENTITY_RE = /&([\w:][\-\w\.:]*);|&#(\d+);|&\#x([0-9a-fA-F]+);/
47
47
 
48
-
49
- HTML_CONTENT_MODEL_BLOCK = %w{applet button blockquote body colgroup dd div dl fieldset
50
- form iframe li map noscript object ol table tbody thead tfoot tr td ul}
51
- HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym address b bdo big cite caption del dfn dt em
48
+ HTML_CONTENT_MODEL_BLOCK = %w{address applet article aside button blockquote body
49
+ dd div dl fieldset figure figcaption footer form header hgroup iframe li map menu nav
50
+ noscript object section td}
51
+ HTML_CONTENT_MODEL_SPAN = %w{a abbr acronym b bdo big button cite caption del dfn dt em
52
52
  h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p q rb rbc
53
- rp rt rtc ruby samp select small span strong sub sup th tt var}
53
+ rp rt rtc ruby samp select small span strong sub sup summary th tt var}
54
54
  HTML_CONTENT_MODEL_RAW = %w{script math option textarea pre code}
55
+ # The following elements are also parsed as raw since they need child elements that cannot
56
+ # be expressed using kramdown syntax: colgroup table tbody thead tfoot tr ul ol
55
57
 
56
58
  HTML_CONTENT_MODEL = Hash.new {|h,k| h[k] = :raw}
57
59
  HTML_CONTENT_MODEL_BLOCK.each {|i| HTML_CONTENT_MODEL[i] = :block}
@@ -64,7 +66,7 @@ module Kramdown
64
66
  ins kbd label option q rb rbc rp rt rtc ruby samp select small span
65
67
  strong sub sup textarea tt var}
66
68
  HTML_BLOCK_ELEMENTS = %w{address article aside applet body button blockquote caption col colgroup dd div dl dt fieldset
67
- figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend listing menu
69
+ figcaption footer form h1 h2 h3 h4 h5 h6 header hgroup hr html head iframe legend menu
68
70
  li map nav ol optgroup p pre section summary table tbody td th thead tfoot tr ul}
69
71
  HTML_ELEMENTS_WITHOUT_BODY = %w{area base br col command embed hr img input keygen link meta param source track wbr}
70
72
  end
@@ -72,13 +74,13 @@ module Kramdown
72
74
 
73
75
  # Contains the parsing methods. This module can be mixed into any parser to get HTML parsing
74
76
  # functionality. The only thing that must be provided by the class are instance variable
75
- # <tt>@stack</tt> for storing the needed state and <tt>@src</tt> (instance of StringScanner)
76
- # for the actual parsing.
77
+ # @stack for storing the needed state and @src (instance of StringScanner) for the actual
78
+ # parsing.
77
79
  module Parser
78
80
 
79
81
  include Constants
80
82
 
81
- # Process the HTML start tag that has already be <tt>scan</tt>ned/<tt>check</tt>ed.
83
+ # Process the HTML start tag that has already be scanned/checked via @src.
82
84
  #
83
85
  # Does the common processing steps and then yields to the caller for further processing
84
86
  # (first parameter is the created element, the second parameter is +true+ if the HTML
@@ -215,28 +217,32 @@ module Kramdown
215
217
  else return
216
218
  end
217
219
 
218
- type = el.value
219
- remove_text_children(el) if REMOVE_TEXT_CHILDREN.include?(type)
220
-
221
220
  mname = "convert_#{el.value}"
222
221
  if do_conversion && self.class.method_defined?(mname)
223
222
  send(mname, el)
224
- elsif do_conversion && SIMPLE_ELEMENTS.include?(type)
225
- set_basics(el, type.intern)
226
- process_children(el, do_conversion, preserve_text)
227
223
  else
228
- process_html_element(el, do_conversion, preserve_text)
229
- end
224
+ type = el.value
225
+ remove_text_children(el) if do_conversion && REMOVE_TEXT_CHILDREN.include?(type)
226
+
227
+ if do_conversion && SIMPLE_ELEMENTS.include?(type)
228
+ set_basics(el, type.intern)
229
+ process_children(el, do_conversion, preserve_text)
230
+ else
231
+ process_html_element(el, do_conversion, preserve_text)
232
+ end
230
233
 
231
- strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
232
- remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
233
- wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
234
+ if do_conversion
235
+ strip_whitespace(el) if STRIP_WHITESPACE.include?(type)
236
+ remove_whitespace_children(el) if REMOVE_WHITESPACE_CHILDREN.include?(type)
237
+ wrap_text_children(el) if WRAP_TEXT_CHILDREN.include?(type)
238
+ end
239
+ end
234
240
  end
235
241
 
236
242
  def process_children(el, do_conversion = true, preserve_text = false)
237
243
  el.children.map! do |c|
238
244
  if c.type == :text
239
- process_text(c.value, preserve_text)
245
+ process_text(c.value, preserve_text || !do_conversion)
240
246
  else
241
247
  process(c, do_conversion, preserve_text, el)
242
248
  c
@@ -277,7 +283,7 @@ module Kramdown
277
283
 
278
284
  def process_html_element(el, do_conversion = true, preserve_text = false)
279
285
  el.options.replace(:category => HTML_SPAN_ELEMENTS.include?(el.value) ? :span : :block,
280
- :content_model => HTML_CONTENT_MODEL[el.value])
286
+ :content_model => (do_conversion ? HTML_CONTENT_MODEL[el.value] : :raw))
281
287
  process_children(el, do_conversion, preserve_text)
282
288
  end
283
289
 
@@ -411,28 +417,26 @@ module Kramdown
411
417
  process_html_element(el, false)
412
418
  return
413
419
  end
420
+ remove_text_children(el)
414
421
  process_children(el)
415
422
  set_basics(el, :table)
416
- el.options[:alignment] = []
417
423
 
418
- nr_cols = 0
419
424
  calc_alignment = lambda do |c|
420
- align = c.attr['align']
421
- if c.type == :html_element && c.value == 'col' && (align.nil? || %w{left right center}.include?(align))
422
- el.options[:alignment] << (align.nil? ? :default : align.to_sym)
423
- elsif c.type == :tr
424
- nr_cols = c.children.length
425
- break
425
+ if c.type == :tr
426
+ el.options[:alignment] = c.children.map do |td|
427
+ if td.attr['style']
428
+ td.attr['style'].slice!(/(?:;\s*)?text-align:\s+(center|left|right)/)
429
+ td.attr.delete('style') if td.attr['style'].strip.empty?
430
+ $1.to_sym
431
+ else
432
+ :default
433
+ end
434
+ end
426
435
  else
427
436
  c.children.each {|cc| calc_alignment.call(cc)}
428
437
  end
429
438
  end
430
439
  calc_alignment.call(el)
431
- if el.options[:alignment].length > nr_cols
432
- el.options[:alignment][nr_cols..-1] = []
433
- else
434
- el.options[:alignment] += [:default] * (nr_cols - el.options[:alignment].length)
435
- end
436
440
  el.children.delete_if {|c| c.type == :html_element}
437
441
 
438
442
  change_th_type = lambda do |c|
@@ -485,12 +489,28 @@ module Kramdown
485
489
  check_nr_cells.call(el)
486
490
  return false if nr_cells == -1
487
491
 
492
+ alignment = nil
493
+ check_alignment = Proc.new do |t|
494
+ if t.value == 'tr'
495
+ cur_alignment = t.children.select {|cc| cc.value == 'th' || cc.value == 'td'}.map do |cell|
496
+ md = /text-align:\s+(center|left|right|justify|inherit)/.match(cell.attr['style'].to_s)
497
+ return false if md && (md[1] == 'justify' || md[1] == 'inherit')
498
+ md.nil? ? :default : md[1]
499
+ end
500
+ alignment = cur_alignment if alignment.nil?
501
+ return false if alignment != cur_alignment
502
+ else
503
+ t.children.each {|cc| check_alignment.call(cc)}
504
+ end
505
+ end
506
+ check_alignment.call(el)
507
+
488
508
  check_rows = lambda do |t, type|
489
509
  t.children.all? {|r| (r.value == 'tr' || r.type == :text) && r.children.all? {|c| c.value == type || c.type == :text}}
490
510
  end
491
511
  check_rows.call(el, 'td') ||
492
512
  (el.children.all? do |t|
493
- t.type == :text || t.value == 'col' || (t.value == 'thead' && check_rows.call(t, 'th')) ||
513
+ t.type == :text || (t.value == 'thead' && check_rows.call(t, 'th')) ||
494
514
  ((t.value == 'tfoot' || t.value == 'tbody') && check_rows.call(t, 'td'))
495
515
  end && el.children.any? {|t| t.value == 'tbody'})
496
516
  end