kramdown 2.1.0 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CONTRIBUTERS +19 -3
  3. data/README.md +8 -2
  4. data/VERSION +1 -1
  5. data/lib/kramdown/converter/base.rb +2 -1
  6. data/lib/kramdown/converter/html.rb +37 -30
  7. data/lib/kramdown/converter/kramdown.rb +20 -10
  8. data/lib/kramdown/converter/latex.rb +2 -2
  9. data/lib/kramdown/converter/math_engine/mathjax.rb +7 -33
  10. data/lib/kramdown/converter/syntax_highlighter/rouge.rb +17 -9
  11. data/lib/kramdown/converter/syntax_highlighter.rb +1 -1
  12. data/lib/kramdown/element.rb +24 -0
  13. data/lib/kramdown/options.rb +62 -12
  14. data/lib/kramdown/parser/base.rb +3 -1
  15. data/lib/kramdown/parser/html.rb +16 -9
  16. data/lib/kramdown/parser/kramdown/abbreviation.rb +1 -1
  17. data/lib/kramdown/parser/kramdown/autolink.rb +2 -2
  18. data/lib/kramdown/parser/kramdown/codespan.rb +18 -4
  19. data/lib/kramdown/parser/kramdown/emphasis.rb +1 -1
  20. data/lib/kramdown/parser/kramdown/extensions.rb +6 -0
  21. data/lib/kramdown/parser/kramdown/header.rb +3 -2
  22. data/lib/kramdown/parser/kramdown/html.rb +4 -10
  23. data/lib/kramdown/parser/kramdown/list.rb +37 -9
  24. data/lib/kramdown/parser/kramdown/math.rb +1 -1
  25. data/lib/kramdown/parser/kramdown/table.rb +2 -2
  26. data/lib/kramdown/parser/kramdown.rb +8 -1
  27. data/lib/kramdown/utils/html.rb +9 -0
  28. data/lib/kramdown/version.rb +1 -1
  29. data/man/man1/kramdown.1 +23 -0
  30. data/test/test_files.rb +28 -18
  31. data/test/test_location.rb +2 -2
  32. data/test/test_string_scanner_kramdown.rb +1 -1
  33. data/test/testcases/block/03_paragraph/standalone_image.html +5 -0
  34. data/test/testcases/block/03_paragraph/standalone_image.text +3 -0
  35. data/test/testcases/block/03_paragraph/to_kramdown.kramdown +7 -0
  36. data/test/testcases/block/03_paragraph/to_kramdown.text +5 -0
  37. data/test/testcases/block/04_header/atx_header.html +6 -0
  38. data/test/testcases/block/04_header/atx_header.text +6 -0
  39. data/test/testcases/block/06_codeblock/rouge/multiple.html +1 -1
  40. data/test/testcases/block/06_codeblock/rouge/simple.html +1 -1
  41. data/test/testcases/block/09_html/processing_instruction.html +5 -6
  42. data/test/testcases/block/09_html/standalone_image_in_div.htmlinput +7 -0
  43. data/test/testcases/block/09_html/standalone_image_in_div.text +8 -0
  44. data/test/testcases/block/09_html/table.kramdown +8 -0
  45. data/test/testcases/block/09_html/table.text +7 -0
  46. data/test/testcases/block/12_extension/options.html +4 -4
  47. data/test/testcases/block/12_extension/options.text +2 -0
  48. data/test/testcases/block/12_extension/options2.html +4 -4
  49. data/test/testcases/block/14_table/table_with_footnote.html +4 -4
  50. data/test/testcases/block/15_math/gh_128.html +1 -2
  51. data/test/testcases/block/15_math/normal.html +16 -15
  52. data/test/testcases/block/16_toc/toc_with_footnotes.html +4 -4
  53. data/test/testcases/cjk-line-break.html +4 -0
  54. data/test/testcases/cjk-line-break.options +1 -0
  55. data/test/testcases/cjk-line-break.text +12 -0
  56. data/test/testcases/man/example.man +1 -1
  57. data/test/testcases/man/example.text +1 -1
  58. data/test/testcases/span/02_emphasis/normal.html +4 -0
  59. data/test/testcases/span/02_emphasis/normal.text +4 -0
  60. data/test/testcases/span/03_codespan/normal.html +4 -0
  61. data/test/testcases/span/03_codespan/normal.text +4 -0
  62. data/test/testcases/span/04_footnote/backlink_inline.html +21 -21
  63. data/test/testcases/span/04_footnote/backlink_text.html +4 -4
  64. data/test/testcases/span/04_footnote/footnote_nr.html +6 -6
  65. data/test/testcases/span/04_footnote/footnote_prefix.html +6 -6
  66. data/test/testcases/span/04_footnote/inside_footnote.html +9 -9
  67. data/test/testcases/span/04_footnote/markers.html +16 -16
  68. data/test/testcases/span/04_footnote/placement.html +4 -4
  69. data/test/testcases/span/04_footnote/regexp_problem.html +4 -4
  70. data/test/testcases/span/04_footnote/without_backlink.html +3 -3
  71. data/test/testcases/span/05_html/normal.html +1 -1
  72. data/test/testcases/span/abbreviations/abbrev_in_html.html +9 -0
  73. data/test/testcases/span/abbreviations/abbrev_in_html.text +10 -0
  74. data/test/testcases/span/abbreviations/in_footnote.html +4 -4
  75. data/test/testcases/span/math/normal.html +4 -4
  76. data/test/testcases/span/text_substitutions/entities.html +1 -1
  77. data/test/testcases/span/text_substitutions/entities.text +1 -1
  78. metadata +36 -15
  79. data/test/testcases/block/15_math/mathjax_preview.html +0 -4
  80. data/test/testcases/block/15_math/mathjax_preview.options +0 -2
  81. data/test/testcases/block/15_math/mathjax_preview.text +0 -5
  82. data/test/testcases/block/15_math/mathjax_preview_as_code.html +0 -4
  83. data/test/testcases/block/15_math/mathjax_preview_as_code.options +0 -3
  84. data/test/testcases/block/15_math/mathjax_preview_as_code.text +0 -5
  85. data/test/testcases/block/15_math/mathjax_preview_simple.html +0 -4
  86. data/test/testcases/block/15_math/mathjax_preview_simple.options +0 -2
  87. data/test/testcases/block/15_math/mathjax_preview_simple.text +0 -5
@@ -39,6 +39,7 @@ module Kramdown
39
39
  ALLOWED_TYPES = [String, Integer, Float, Symbol, Boolean, Object]
40
40
 
41
41
  @options = {}
42
+ @cached_defaults = nil
42
43
 
43
44
  # Define a new option called +name+ (a Symbol) with the given +type+ (String, Integer, Float,
44
45
  # Symbol, Boolean, Object), default value +default+ and the description +desc+. If a block is
@@ -54,6 +55,7 @@ module Kramdown
54
55
  raise ArgumentError, "Invalid type for default value" if !(type === default) && !default.nil?
55
56
  raise ArgumentError, "Missing validator block" if type == Object && block.nil?
56
57
  @options[name] = Definition.new(name, type, default, desc, block)
58
+ @cached_defaults = nil
57
59
  end
58
60
 
59
61
  # Return all option definitions.
@@ -68,15 +70,17 @@ module Kramdown
68
70
 
69
71
  # Return a Hash with the default values for all options.
70
72
  def self.defaults
71
- temp = {}
72
- @options.each {|_n, o| temp[o.name] = o.default }
73
- temp
73
+ @cached_defaults ||= begin
74
+ temp = {}
75
+ @options.each {|_n, o| temp[o.name] = o.default }
76
+ temp.freeze
77
+ end
74
78
  end
75
79
 
76
80
  # Merge the #defaults Hash with the *parsed* options from the given Hash, i.e. only valid option
77
81
  # names are considered and their value is run through the #parse method.
78
82
  def self.merge(hash)
79
- temp = defaults
83
+ temp = defaults.dup
80
84
  hash.each do |k, v|
81
85
  k = k.to_sym
82
86
  temp[k] = @options.key?(k) ? parse(k, v) : v
@@ -328,7 +332,11 @@ module Kramdown
328
332
  Used by: HTML converter, kramdown converter
329
333
  EOF
330
334
 
331
- define(:toc_levels, Object, (1..6).to_a, <<~EOF) do |val|
335
+ TOC_LEVELS_RANGE = (1..6).freeze
336
+ TOC_LEVELS_ARRAY = TOC_LEVELS_RANGE.to_a.freeze
337
+ private_constant :TOC_LEVELS_RANGE, :TOC_LEVELS_ARRAY
338
+
339
+ define(:toc_levels, Object, TOC_LEVELS_ARRAY, <<~EOF) do |val|
332
340
  Defines the levels that are used for the table of contents
333
341
 
334
342
  The individual levels can be specified by separating them with commas
@@ -347,12 +355,20 @@ module Kramdown
347
355
  else
348
356
  raise Kramdown::Error, "Invalid syntax for option toc_levels"
349
357
  end
350
- when Array, Range
351
- val = val.map(&:to_i).uniq
358
+ when Array
359
+ unless val.eql?(TOC_LEVELS_ARRAY)
360
+ val = val.map(&:to_i).uniq
361
+ end
362
+ when Range
363
+ if val.eql?(TOC_LEVELS_RANGE)
364
+ val = TOC_LEVELS_ARRAY
365
+ else
366
+ val = val.map(&:to_i).uniq
367
+ end
352
368
  else
353
369
  raise Kramdown::Error, "Invalid type #{val.class} for option toc_levels"
354
370
  end
355
- if val.any? {|i| !(1..6).cover?(i) }
371
+ if val.any? {|i| !TOC_LEVELS_RANGE.cover?(i) }
356
372
  raise Kramdown::Error, "Level numbers for option toc_levels have to be integers from 1 to 6"
357
373
  end
358
374
  val
@@ -377,7 +393,11 @@ module Kramdown
377
393
  simple_array_validator(val, :latex_headers, 6)
378
394
  end
379
395
 
380
- define(:smart_quotes, Object, %w[lsquo rsquo ldquo rdquo], <<~EOF) do |val|
396
+ SMART_QUOTES_ENTITIES = %w[lsquo rsquo ldquo rdquo].freeze
397
+ SMART_QUOTES_STR = SMART_QUOTES_ENTITIES.join(',').freeze
398
+ private_constant :SMART_QUOTES_ENTITIES, :SMART_QUOTES_STR
399
+
400
+ define(:smart_quotes, Object, SMART_QUOTES_ENTITIES, <<~EOF) do |val|
381
401
  Defines the HTML entity names or code points for smart quote output
382
402
 
383
403
  The entities identified by entity name or code point that should be
@@ -388,9 +408,13 @@ module Kramdown
388
408
  Default: lsquo,rsquo,ldquo,rdquo
389
409
  Used by: HTML/Latex converter
390
410
  EOF
391
- val = simple_array_validator(val, :smart_quotes, 4)
392
- val.map! {|v| Integer(v) rescue v }
393
- val
411
+ if val == SMART_QUOTES_STR || val == SMART_QUOTES_ENTITIES
412
+ SMART_QUOTES_ENTITIES
413
+ else
414
+ val = simple_array_validator(val, :smart_quotes, 4)
415
+ val.map! {|v| Integer(v) rescue v }
416
+ val
417
+ end
394
418
  end
395
419
 
396
420
  define(:typographic_symbols, Object, {}, <<~EOF) do |val|
@@ -562,6 +586,32 @@ module Kramdown
562
586
  Used by: HTML
563
587
  EOF
564
588
 
589
+ define(:remove_line_breaks_for_cjk, Boolean, false, <<~EOF)
590
+ Specifies whether line breaks should be removed between CJK characters
591
+
592
+ Default: false
593
+ Used by: HTML converter
594
+ EOF
595
+
596
+ define(:forbidden_inline_options, Object, %w[template], <<~EOF) do |val|
597
+ Defines the options that may not be set using the {::options} extension
598
+
599
+ The value needs to be an array of option names.
600
+
601
+ Default: [template]
602
+ Used by: HTML converter
603
+ EOF
604
+ val.map! {|item| item.kind_of?(String) ? str_to_sym(item) : item }
605
+ simple_array_validator(val, :forbidden_inline_options)
606
+ end
607
+
608
+ define(:list_indent, Integer, 2, <<~EOF)
609
+ Sets the number of spaces to use for list indentation
610
+
611
+ Default: 2
612
+ Used by: Kramdown converter
613
+ EOF
614
+
565
615
  end
566
616
 
567
617
  end
@@ -93,7 +93,9 @@ module Kramdown
93
93
  raise "The source text contains invalid characters for the used encoding #{source.encoding}"
94
94
  end
95
95
  source = source.encode('UTF-8')
96
- source.gsub(/\r\n?/, "\n").chomp + "\n"
96
+ source.gsub!(/\r\n?/, "\n")
97
+ source.chomp!
98
+ source << "\n"
97
99
  end
98
100
 
99
101
  # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
@@ -16,7 +16,7 @@ module Kramdown
16
16
 
17
17
  module Parser
18
18
 
19
- # Used for parsing a HTML document.
19
+ # Used for parsing an HTML document.
20
20
  #
21
21
  # The parsing code is in the Parser module that can also be used by other parsers.
22
22
  class Html < Base
@@ -240,7 +240,14 @@ module Kramdown
240
240
  return
241
241
  when :html_element
242
242
  when :root
243
- el.children.each {|c| process(c) }
243
+ el.children.map! do |c|
244
+ if c.type == :text
245
+ process_text(c.value, !do_conversion)
246
+ else
247
+ process(c)
248
+ c
249
+ end
250
+ end.flatten!
244
251
  remove_whitespace_children(el)
245
252
  return
246
253
  else return
@@ -286,7 +293,7 @@ module Kramdown
286
293
  src = Kramdown::Utils::StringScanner.new(raw)
287
294
  result = []
288
295
  until src.eos?
289
- if (tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/))
296
+ if (tmp = src.scan_until(/(?=#{HTML_ENTITY_RE})/o))
290
297
  result << Element.new(:text, tmp)
291
298
  src.scan(HTML_ENTITY_RE)
292
299
  val = src[1] || (src[2]&.to_i) || src[3].hex
@@ -324,7 +331,7 @@ module Kramdown
324
331
  tmp = []
325
332
  last_is_p = false
326
333
  el.children.each do |c|
327
- if Element.category(c) != :block || c.type == :text
334
+ if !c.block? || c.type == :text
328
335
  unless last_is_p
329
336
  tmp << Element.new(:p, nil, nil, transparent: true)
330
337
  last_is_p = true
@@ -354,8 +361,8 @@ module Kramdown
354
361
  el.children = el.children.reject do |c|
355
362
  i += 1
356
363
  c.type == :text && c.value.strip.empty? &&
357
- (i == 0 || i == el.children.length - 1 || (Element.category(el.children[i - 1]) == :block &&
358
- Element.category(el.children[i + 1]) == :block))
364
+ (i == 0 || i == el.children.length - 1 || ((el.children[i - 1]).block? &&
365
+ (el.children[i + 1]).block?))
359
366
  end
360
367
  end
361
368
 
@@ -581,11 +588,11 @@ module Kramdown
581
588
  @src = Kramdown::Utils::StringScanner.new(adapt_source(source))
582
589
 
583
590
  while true
584
- if (result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/))
591
+ if (result = @src.scan(/\s*#{HTML_INSTRUCTION_RE}/o))
585
592
  @tree.children << Element.new(:xml_pi, result.strip, nil, category: :block)
586
- elsif (result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/))
593
+ elsif (result = @src.scan(/\s*#{HTML_DOCTYPE_RE}/o))
587
594
  # ignore the doctype
588
- elsif (result = @src.scan(/\s*#{HTML_COMMENT_RE}/))
595
+ elsif (result = @src.scan(/\s*#{HTML_COMMENT_RE}/o))
589
596
  @tree.children << Element.new(:xml_comment, result.strip, nil, category: :block)
590
597
  else
591
598
  break
@@ -46,7 +46,7 @@ module Kramdown
46
46
  regexps << /(?=(?:\W|^)#{regexps.first}(?!\w))/ # regexp should only match on word boundaries
47
47
  end
48
48
  el.children.map! do |child|
49
- if child.type == :text
49
+ if child.type == :text && el.options[:content_model] != :raw
50
50
  if child.value =~ regexps.first
51
51
  result = []
52
52
  strscan = Kramdown::Utils::StringScanner.new(child.value, child.options[:location])
@@ -11,8 +11,8 @@ module Kramdown
11
11
  module Parser
12
12
  class Kramdown
13
13
 
14
- ACHARS = '[[:alnum:]]_'
15
- AUTOLINK_START_STR = "<((mailto|https?|ftps?):.+?|[-.#{ACHARS}]+@[-#{ACHARS}]+(?:\.[-#{ACHARS}]+)*\.[a-z]+)>"
14
+ ACHARS = '[[:alnum:]]-_.'
15
+ AUTOLINK_START_STR = "<((mailto|https?|ftps?):.+?|[#{ACHARS}]+?@[#{ACHARS}]+?)>"
16
16
  AUTOLINK_START = /#{AUTOLINK_START_STR}/u
17
17
 
18
18
  # Parse the autolink at the current location.
@@ -20,18 +20,32 @@ module Kramdown
20
20
  simple = (result.length == 1)
21
21
  saved_pos = @src.save_pos
22
22
 
23
- if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
23
+ if simple && @src.pre_match =~ /\s\Z|\A\Z/ && @src.match?(/\s/)
24
24
  add_text(result)
25
25
  return
26
26
  end
27
27
 
28
- if (text = @src.scan_until(/#{result}/))
29
- text.sub!(/#{result}\Z/, '')
28
+ # assign static regex to avoid allocating the same on every instance
29
+ # where +result+ equals a single-backtick. Interpolate otherwise.
30
+ if result == '`'
31
+ scan_pattern = /`/
32
+ str_sub_pattern = /`\Z/
33
+ else
34
+ scan_pattern = /#{result}/
35
+ str_sub_pattern = /#{result}\Z/
36
+ end
37
+
38
+ if (text = @src.scan_until(scan_pattern))
39
+ text.sub!(str_sub_pattern, '')
30
40
  unless simple
31
41
  text = text[1..-1] if text[0..0] == ' '
32
42
  text = text[0..-2] if text[-1..-1] == ' '
33
43
  end
34
- @tree.children << Element.new(:codespan, text, nil, location: start_line_number)
44
+ @tree.children << Element.new(:codespan, text, nil, {
45
+ codespan_delimiter: result,
46
+ location: start_line_number
47
+ })
48
+
35
49
  else
36
50
  @src.revert_pos(saved_pos)
37
51
  add_text(result)
@@ -22,7 +22,7 @@ module Kramdown
22
22
  element = (result.length == 2 ? :strong : :em)
23
23
  type = result[0..0]
24
24
 
25
- if (type == '_' && @src.pre_match =~ /[[:alpha:]-]\z/) || @src.check(/\s/) ||
25
+ if (type == '_' && @src.pre_match =~ /[[:alpha:]]-?[[:alpha:]]*\z/) || @src.check(/\s/) ||
26
26
  @tree.type == element || @stack.any? {|el, _| el.type == element }
27
27
  add_text(result)
28
28
  return
@@ -110,6 +110,12 @@ module Kramdown
110
110
  opts.select do |k, v|
111
111
  k = k.to_sym
112
112
  if Kramdown::Options.defined?(k)
113
+ if @options[:forbidden_inline_options].include?(k) ||
114
+ k == :forbidden_inline_options
115
+ warning("Option #{k} may not be set inline")
116
+ next false
117
+ end
118
+
113
119
  begin
114
120
  val = Kramdown::Options.parse(k, v)
115
121
  @options[k] = val
@@ -8,6 +8,7 @@
8
8
  #
9
9
 
10
10
  require 'kramdown/parser/kramdown/block_boundary'
11
+ require 'rexml/xmltokens'
11
12
 
12
13
  module Kramdown
13
14
  module Parser
@@ -31,7 +32,7 @@ module Kramdown
31
32
  def parse_atx_header
32
33
  return false unless after_block_boundary?
33
34
  text, id = parse_header_contents
34
- text.sub!(/[\t ]#+\z/, '') && text.rstrip!
35
+ text.sub!(/(?<!\\)#+\z/, '') && text.rstrip!
35
36
  return false if text.empty?
36
37
  add_header(@src["level"].length, text, id)
37
38
  true
@@ -40,7 +41,7 @@ module Kramdown
40
41
 
41
42
  protected
42
43
 
43
- HEADER_ID = /[\t ]{#(?<id>[A-Za-z][\w:-]*)}\z/
44
+ HEADER_ID = /[\t ]{#(?<id>#{REXML::XMLTokens::NAME_START_CHAR}#{REXML::XMLTokens::NAME_CHAR}*)}\z/
44
45
 
45
46
  # Returns header text and optional ID.
46
47
  def parse_header_contents
@@ -65,7 +65,7 @@ module Kramdown
65
65
  end
66
66
  end
67
67
 
68
- HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
68
+ HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|!--|\/)/
69
69
 
70
70
  # Parse the HTML at the current position as block-level HTML.
71
71
  def parse_block_html
@@ -74,17 +74,13 @@ module Kramdown
74
74
  @tree.children << Element.new(:xml_comment, result, nil, category: :block, location: line)
75
75
  @src.scan(TRAILING_WHITESPACE)
76
76
  true
77
- elsif (result = @src.scan(HTML_INSTRUCTION_RE))
78
- @tree.children << Element.new(:xml_pi, result, nil, category: :block, location: line)
79
- @src.scan(TRAILING_WHITESPACE)
80
- true
81
77
  else
82
- if @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1].downcase)
78
+ if @src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/o) && !HTML_SPAN_ELEMENTS.include?(@src[1].downcase)
83
79
  @src.pos += @src.matched_size
84
80
  handle_html_start_tag(line, &method(:handle_kramdown_html_tag))
85
81
  Kramdown::Parser::Html::ElementConverter.convert(@root, @tree.children.last) if @options[:html_to_native]
86
82
  true
87
- elsif @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/) && !HTML_SPAN_ELEMENTS.include?(@src[1].downcase)
83
+ elsif @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/o) && !HTML_SPAN_ELEMENTS.include?(@src[1].downcase)
88
84
  name = @src[1].downcase
89
85
 
90
86
  if @tree.type == :html_element && @tree.value == name
@@ -100,15 +96,13 @@ module Kramdown
100
96
  end
101
97
  define_parser(:block_html, HTML_BLOCK_START)
102
98
 
103
- HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
99
+ HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|!--|\/)/
104
100
 
105
101
  # Parse the HTML at the current position as span-level HTML.
106
102
  def parse_span_html
107
103
  line = @src.current_line_number
108
104
  if (result = @src.scan(HTML_COMMENT_RE))
109
105
  @tree.children << Element.new(:xml_comment, result, nil, category: :span, location: line)
110
- elsif (result = @src.scan(HTML_INSTRUCTION_RE))
111
- @tree.children << Element.new(:xml_pi, result, nil, category: :span, location: line)
112
106
  elsif (result = @src.scan(HTML_TAG_CLOSE_RE))
113
107
  warning("Found invalidly used HTML closing tag for '#{@src[1]}' on line #{line}")
114
108
  add_text(result)
@@ -44,8 +44,10 @@ module Kramdown
44
44
  [content, indentation, *PARSE_FIRST_LIST_LINE_REGEXP_CACHE[indentation]]
45
45
  end
46
46
 
47
- LIST_START_UL = /^(#{OPT_SPACE}[+*-])([\t| ].*?\n)/
48
- LIST_START_OL = /^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
47
+ PATTERN_TAIL = /[\t| ].*?\n/
48
+
49
+ LIST_START_UL = /^(#{OPT_SPACE}[+*-])(#{PATTERN_TAIL})/
50
+ LIST_START_OL = /^(#{OPT_SPACE}\d+\.)(#{PATTERN_TAIL})/
49
51
  LIST_START = /#{LIST_START_UL}|#{LIST_START_OL}/
50
52
 
51
53
  # Parse the ordered or unordered list at the current location.
@@ -67,6 +69,7 @@ module Kramdown
67
69
  eob_found = true
68
70
  break
69
71
  elsif @src.scan(list_start_re)
72
+ list.options[:first_list_marker] ||= @src[1].strip
70
73
  item = Element.new(:li, nil, nil, location: start_line_number)
71
74
  item.value, indentation, content_re, lazy_re, indent_re =
72
75
  parse_first_list_line(@src[1].length, @src[2])
@@ -77,11 +80,7 @@ module Kramdown
77
80
  ''
78
81
  end
79
82
 
80
- list_start_re = if type == :ul
81
- /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/
82
- else
83
- /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/
84
- end
83
+ list_start_re = fetch_pattern(type, indentation)
85
84
  nested_list_found = (item.value =~ LIST_START)
86
85
  last_is_blank = false
87
86
  item.value = [item.value]
@@ -148,7 +147,7 @@ module Kramdown
148
147
  end
149
148
  define_parser(:list, LIST_START)
150
149
 
151
- DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
150
+ DEFINITION_LIST_START = /^(#{OPT_SPACE}:)(#{PATTERN_TAIL})/
152
151
 
153
152
  # Parse the ordered or unordered list at the current location.
154
153
  def parse_definition_list
@@ -198,7 +197,7 @@ module Kramdown
198
197
  ''
199
198
  end
200
199
 
201
- def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
200
+ def_start_re = fetch_pattern(:dl, indentation)
202
201
  first_as_para = false
203
202
  last_is_blank = false
204
203
  elsif @src.check(EOB_MARKER)
@@ -252,6 +251,35 @@ module Kramdown
252
251
  end
253
252
  define_parser(:definition_list, DEFINITION_LIST_START)
254
253
 
254
+ private
255
+
256
+ # precomputed patterns for indentations 1..4 and fallback expression
257
+ # to compute pattern when indentation is outside the 1..4 range.
258
+ def fetch_pattern(type, indentation)
259
+ if type == :ul
260
+ case indentation
261
+ when 1 then %r/^( {0}[+*-])(#{PATTERN_TAIL})/o
262
+ when 2 then %r/^( {0,1}[+*-])(#{PATTERN_TAIL})/o
263
+ when 3 then %r/^( {0,2}[+*-])(#{PATTERN_TAIL})/o
264
+ else %r/^( {0,3}[+*-])(#{PATTERN_TAIL})/o
265
+ end
266
+ elsif type == :ol
267
+ case indentation
268
+ when 1 then %r/^( {0}\d+\.)(#{PATTERN_TAIL})/o
269
+ when 2 then %r/^( {0,1}\d+\.)(#{PATTERN_TAIL})/o
270
+ when 3 then %r/^( {0,2}\d+\.)(#{PATTERN_TAIL})/o
271
+ else %r/^( {0,3}\d+\.)(#{PATTERN_TAIL})/o
272
+ end
273
+ elsif type == :dl
274
+ case indentation
275
+ when 1 then %r/^( {0}:)(#{PATTERN_TAIL})/o
276
+ when 2 then %r/^( {0,1}:)(#{PATTERN_TAIL})/o
277
+ when 3 then %r/^( {0,2}:)(#{PATTERN_TAIL})/o
278
+ else %r/^( {0,3}:)(#{PATTERN_TAIL})/o
279
+ end
280
+ end
281
+ end
282
+
255
283
  end
256
284
  end
257
285
  end
@@ -21,7 +21,7 @@ module Kramdown
21
21
  if !after_block_boundary?
22
22
  return false
23
23
  elsif @src[1]
24
- @src.scan(/^#{OPT_SPACE}\\/) if @src[3]
24
+ @src.scan(/^#{OPT_SPACE}\\/o) if @src[3]
25
25
  return false
26
26
  end
27
27
 
@@ -132,8 +132,8 @@ module Kramdown
132
132
  pipe_on_line = false
133
133
  end
134
134
  else
135
- break if lines.size > 1 && !pipe_on_line && lines.first !~ /^#{TABLE_PIPE_CHECK}/
136
- pipe_on_line = (lines.size > 1 ? false : pipe_on_line) || (lines.last =~ /^#{TABLE_PIPE_CHECK}/)
135
+ break if lines.size > 1 && !pipe_on_line && lines.first !~ /^#{TABLE_PIPE_CHECK}/o
136
+ pipe_on_line = (lines.size > 1 ? false : pipe_on_line) || (lines.last =~ /^#{TABLE_PIPE_CHECK}/o)
137
137
  end
138
138
  end
139
139
  @src.revert_pos(saved_pos) and return false unless pipe_on_line
@@ -79,6 +79,8 @@ module Kramdown
79
79
  @span_parsers = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
80
80
  :smart_quotes, :inline_math, :span_extensions, :html_entity,
81
81
  :typographic_syms, :line_break, :escaped_chars]
82
+
83
+ @span_pattern_cache ||= Hash.new { |h, k| h[k] = {} }
82
84
  end
83
85
  private_class_method(:new, :allocate)
84
86
 
@@ -195,6 +197,11 @@ module Kramdown
195
197
  end.flatten!
196
198
  end
197
199
 
200
+ def span_pattern_cache(stop_re, span_start)
201
+ @span_pattern_cache[stop_re][span_start] ||= /(?=#{Regexp.union(stop_re, span_start)})/
202
+ end
203
+ private :span_pattern_cache
204
+
198
205
  # Parse all span-level elements in the source string of @src into +el+.
199
206
  #
200
207
  # If the parameter +stop_re+ (a regexp) is used, parsing is immediately stopped if the regexp
@@ -213,7 +220,7 @@ module Kramdown
213
220
  span_start, span_start_re = span_parser_regexps(parsers) if parsers
214
221
  parsers ||= @span_parsers
215
222
 
216
- used_re = (stop_re.nil? ? span_start_re : /(?=#{Regexp.union(stop_re, span_start)})/)
223
+ used_re = (stop_re.nil? ? span_start_re : span_pattern_cache(stop_re, span_start))
217
224
  stop_re_found = false
218
225
  while !@src.eos? && !stop_re_found
219
226
  if (result = @src.scan_until(used_re))
@@ -42,6 +42,8 @@ module Kramdown
42
42
 
43
43
  # Return the HTML representation of the attributes +attr+.
44
44
  def html_attributes(attr)
45
+ return '' if attr.empty?
46
+
45
47
  attr.map do |k, v|
46
48
  v.nil? || (k == 'id' && v.strip.empty?) ? '' : " #{k}=\"#{escape_html(v.to_s, :attribute)}\""
47
49
  end.join('')
@@ -68,6 +70,13 @@ module Kramdown
68
70
  str.gsub(ESCAPE_RE_FROM_TYPE[type]) {|m| ESCAPE_MAP[m] || m }
69
71
  end
70
72
 
73
+ REDUNDANT_LINE_BREAK_REGEX = /([\p{Han}\p{Hiragana}\p{Katakana}]+)\n([\p{Han}\p{Hiragana}\p{Katakana}]+)/u
74
+ def fix_cjk_line_break(str)
75
+ while str.gsub!(REDUNDANT_LINE_BREAK_REGEX, '\1\2')
76
+ end
77
+ str
78
+ end
79
+
71
80
  end
72
81
 
73
82
  end
@@ -10,6 +10,6 @@
10
10
  module Kramdown
11
11
 
12
12
  # The kramdown version.
13
- VERSION = '2.1.0'
13
+ VERSION = '2.4.0'
14
14
 
15
15
  end
data/man/man1/kramdown.1 CHANGED
@@ -118,6 +118,15 @@ This option can be used to set a prefix for footnote IDs\. This is useful when r
118
118
  Default: \[u2018]\[u2019] Used by: HTML
119
119
  .RE
120
120
  .TP
121
+ \fB\-\-forbidden\-inline\-options\fP \fIARG\fP
122
+ Defines the options that may not be set using the {::options} extension
123
+ .RS
124
+ .P
125
+ The value needs to be an array of option names\.
126
+ .P
127
+ Default: [template] Used by: HTML converter
128
+ .RE
129
+ .TP
121
130
  \fB\-\-header\-offset\fP \fIARG\fP
122
131
  Sets the output offset for headers
123
132
  .RS
@@ -165,6 +174,13 @@ If the value is a String, it has to contain a valid YAML hash and the hash has t
165
174
  Default: {} Used by: kramdown parser
166
175
  .RE
167
176
  .TP
177
+ \fB\-\-list\-indent\fP \fIARG\fP
178
+ Sets the number of spaces to use for list indentation
179
+ .RS
180
+ .P
181
+ Default: 2 Used by: Kramdown converter
182
+ .RE
183
+ .TP
168
184
  \fB\-\-math\-engine\fP \fIARG\fP
169
185
  Set the math engine
170
186
  .RS
@@ -214,6 +230,13 @@ If this option is \fBtrue\fP, the RemoveHtmlTags converter removes block HTML ta
214
230
  Default: true Used by: RemoveHtmlTags converter
215
231
  .RE
216
232
  .TP
233
+ \fB\-\-[no\-]remove\-line\-breaks\-for\-cjk\fP
234
+ Specifies whether line breaks should be removed between CJK characters
235
+ .RS
236
+ .P
237
+ Default: false Used by: HTML converter
238
+ .RE
239
+ .TP
217
240
  \fB\-\-[no\-]remove\-span\-html\-tags\fP
218
241
  Remove span HTML tags
219
242
  .RS