maruku 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/docs/markdown_syntax.md +9 -21
  5. data/lib/maruku/defaults.rb +1 -1
  6. data/lib/maruku/element.rb +18 -3
  7. data/lib/maruku/ext/fenced_code.rb +1 -1
  8. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +1 -1
  9. data/lib/maruku/ext/math/to_html.rb +2 -9
  10. data/lib/maruku/html.rb +5 -8
  11. data/lib/maruku/input/html_helper.rb +94 -81
  12. data/lib/maruku/input/mdline.rb +6 -4
  13. data/lib/maruku/input/parse_block.rb +145 -37
  14. data/lib/maruku/input/parse_span.rb +7 -8
  15. data/lib/maruku/input/rubypants.rb +22 -9
  16. data/lib/maruku/maruku.rb +5 -0
  17. data/lib/maruku/output/to_html.rb +15 -6
  18. data/lib/maruku/output/to_latex.rb +9 -3
  19. data/lib/maruku/output/to_s.rb +0 -1
  20. data/lib/maruku/string_utils.rb +2 -2
  21. data/lib/maruku/version.rb +1 -1
  22. data/spec/block_docs/abbrev.md +18 -18
  23. data/spec/block_docs/attribute_sanitize.md +22 -0
  24. data/spec/block_docs/auto_cdata.md +48 -0
  25. data/spec/block_docs/bug_table.md +4 -4
  26. data/spec/block_docs/code4.md +79 -0
  27. data/spec/block_docs/div_without_newline.md +16 -0
  28. data/spec/block_docs/empty_cells.md +3 -9
  29. data/spec/block_docs/entities.md +6 -12
  30. data/spec/block_docs/extra_table1.md +6 -6
  31. data/spec/block_docs/fenced_code_blocks.md +12 -20
  32. data/spec/block_docs/fenced_code_blocks_highlighted.md +1 -2
  33. data/spec/block_docs/footnotes2.md +4 -1
  34. data/spec/block_docs/ignore_bad_header.md +9 -0
  35. data/spec/block_docs/issue106.md +78 -0
  36. data/spec/block_docs/issue115.md +20 -0
  37. data/spec/block_docs/issue117.md +13 -0
  38. data/spec/block_docs/issue120.md +48 -0
  39. data/spec/block_docs/issue123.md +11 -0
  40. data/spec/block_docs/issue124.md +16 -0
  41. data/spec/block_docs/issue40.md +24 -12
  42. data/spec/block_docs/issue89.md +1 -1
  43. data/spec/block_docs/lists_nested_blankline.md +14 -8
  44. data/spec/block_docs/lists_ol.md +5 -5
  45. data/spec/block_docs/lists_paraindent.md +6 -11
  46. data/spec/block_docs/math-blahtex/equations.md +12 -13
  47. data/spec/block_docs/math-blahtex/math2.md +9 -2
  48. data/spec/block_docs/math/embedded_invalid_svg.md +31 -2
  49. data/spec/block_docs/math/embedded_svg.md +41 -2
  50. data/spec/block_docs/math/equations.md +7 -2
  51. data/spec/block_docs/math/inline.md +2 -2
  52. data/spec/block_docs/math/math2.md +9 -1
  53. data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
  54. data/spec/block_docs/math/table.md +2 -2
  55. data/spec/block_docs/math/table2.md +6 -6
  56. data/spec/block_docs/table_attributes.md +4 -6
  57. data/spec/block_docs/table_colspan.md +41 -0
  58. data/spec/block_docs/tables.md +10 -21
  59. data/spec/block_docs/tables2.md +74 -0
  60. data/spec/block_docs/xml_comments.md +32 -0
  61. data/spec/span_spec.rb +1 -1
  62. data/spec/spec_helper.rb +1 -0
  63. metadata +42 -28
  64. metadata.gz.sig +3 -3
  65. data/spec/block_docs/xml2.md +0 -19
@@ -40,19 +40,19 @@ module MaRuKu
40
40
  return :definition if self =~ Definition
41
41
  # I had a bug with emails and urls at the beginning of the
42
42
  # line that were mistaken for raw_html
43
- return :text if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
43
+ return :text if self =~ /\A[ ]{0,3}#{EMailAddress}/
44
44
  return :text if self =~ /\A[ ]{0,3}<http:/
45
45
  # raw html is like PHP Markdown Extra: at most three spaces before
46
46
  return :xml_instr if self =~ /\A\s*<\?/
47
- return :raw_html if self =~ %r{^[ ]{0,3}</?\s*\w+}
47
+ return :raw_html if self =~ %r{\A[ ]{0,3}</?\s*\w+}
48
48
  return :raw_html if self =~ /\A[ ]{0,3}<\!\-\-/
49
49
  return :header1 if self =~ /\A(=)+/
50
50
  return :header2 if self =~ /\A([-\s])+\z/
51
51
  return :header3 if self =~ /\A(#)+\s*\S+/
52
52
  # at least three asterisks/hyphens/underscores on a line, and only whitespace
53
53
  return :hrule if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
54
- return :ulist if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
55
- return :olist if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
54
+ return :ulist if self =~ /\A[ ]{0,3}([\*\-\+])\s+.*/
55
+ return :olist if self =~ /\A[ ]{0,3}\d+\.\s+.*/
56
56
  return :code if number_of_leading_spaces >= 4
57
57
  return :quote if self =~ /\A>/
58
58
  return :ald if self =~ AttributeDefinitionList
@@ -126,4 +126,6 @@ module MaRuKu
126
126
  # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
127
127
  # | -------------:| ------------------------------ |
128
128
  TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
129
+
130
+ EMailAddress = /<([^:@>]+?@[^:@>]+?)>/
129
131
  end
@@ -1,5 +1,3 @@
1
- require 'set'
2
-
3
1
  module MaRuKu; module In; module Markdown; module BlockLevelParser
4
2
 
5
3
  include Helpers
@@ -99,18 +97,19 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
99
97
  output << read_abbreviation(src)
100
98
  when :xml_instr
101
99
  read_xml_instruction(src, output)
102
- else # warn if we forgot something
103
- line = src.cur_line
104
- maruku_error "Ignoring line '#{line}' type = #{md_type}", src
105
- src.shift_line
100
+ else # unhandled line type at this level
101
+ # Just treat it as raw text
102
+ read_text_material(src, output)
106
103
  end
107
104
  end
108
105
 
109
106
  merge_ial(output, src, output)
110
- output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
111
-
112
- # get rid of empty line markers
113
- output.delete_if {|x| x == :empty }
107
+ output.delete_if do |x|
108
+ # Strip out IAL
109
+ (x.kind_of?(MDElement) && x.node_type == :ial) ||
110
+ # get rid of empty line markers
111
+ x == :empty
112
+ end
114
113
 
115
114
  # See for each list if we can omit the paragraphs
116
115
  # TODO: do this after
@@ -152,7 +151,7 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
152
151
  output << md_el(:definition_list, definition)
153
152
  end
154
153
  else # Start of a paragraph
155
- output << read_paragraph(src)
154
+ output.concat read_paragraph(src)
156
155
  end
157
156
  end
158
157
 
@@ -278,16 +277,6 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
278
277
  break
279
278
  when :olist, :ulist
280
279
  break if !src.next_line || src.next_line.md_type == t
281
- when :raw_html
282
- # This is a pretty awful hack to handle inline HTML
283
- # but it means double-parsing HMTL.
284
- html = parse_span([src.cur_line], src)
285
- unless html.empty? || html.first.is_a?(String)
286
- if html.first.parsed_html
287
- first_node_name = html.first.parsed_html.first_node_name
288
- end
289
- end
290
- break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
291
280
  end
292
281
  break if src.cur_line.strip.empty?
293
282
  break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
@@ -297,7 +286,48 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
297
286
  end
298
287
  children = parse_span(lines, src)
299
288
 
300
- md_par(children)
289
+ pick_apart_non_inline_html(children)
290
+ end
291
+
292
+ # If there are non-inline HTML tags in the paragraph, break them out into
293
+ # their own elements and make paragraphs out of everything else.
294
+ def pick_apart_non_inline_html(children)
295
+ output = []
296
+ para_children = []
297
+
298
+ children.each do |child|
299
+ if element_is_non_inline_html?(child)
300
+ unless para_children.empty?
301
+ # Fix up paragraphs before non-inline elements having an extra space
302
+ last_child = para_children.last
303
+ if last_child.is_a?(String) && !last_child.empty?
304
+ last_child.replace last_child[0..-2]
305
+ end
306
+
307
+ output << md_par(para_children)
308
+ para_children = []
309
+ end
310
+ output << child
311
+ else
312
+ para_children << child
313
+ end
314
+ end
315
+
316
+ unless para_children.empty?
317
+ output << md_par(para_children)
318
+ end
319
+
320
+ output
321
+ end
322
+
323
+ # Is the given element an HTML element whose root is not an inline element?
324
+ def element_is_non_inline_html?(elem)
325
+ if elem.is_a?(MDElement) && elem.node_type == :raw_html && elem.parsed_html
326
+ first_node_name = elem.parsed_html.first_node_name
327
+ first_node_name && !HTML_INLINE_ELEMS.include?(elem.parsed_html.first_node_name)
328
+ else
329
+ false
330
+ end
301
331
  end
302
332
 
303
333
  # Reads one list item, either ordered or unordered.
@@ -310,14 +340,18 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
310
340
  indentation, ial = spaces_before_first_char(first)
311
341
  al = read_attribute_list(CharSource.new(ial, src)) if ial
312
342
  ial_offset = ial ? ial.length + 3 : 0
313
- lines, want_my_paragraph =
314
- read_indented_content(src, indentation, [], item_type, ial_offset)
343
+ lines, want_my_paragraph = read_indented_content(src, indentation, [], item_type, ial_offset)
344
+
345
+ # in case there is a second line and this line starts a new list, format it.
346
+ if !lines.empty? && [:ulist, :olist].include?(MaRuKu::MDLine.new(lines.first).md_type)
347
+ lines.unshift ""
348
+ end
315
349
 
316
350
  # add first line
317
351
  # Strip first '*', '-', '+' from first line
318
- stripped = first[indentation, first.size - 1]
352
+ first_changed = first.gsub(/([^\t]*)(\t)/) { $1 + " " * (TAB_SIZE - $1.length % TAB_SIZE) }
353
+ stripped = first_changed[indentation, first_changed.size - 1]
319
354
  lines.unshift stripped
320
-
321
355
  src2 = LineSource.new(lines, src, parent_offset)
322
356
  children = parse_blocks(src2)
323
357
 
@@ -512,8 +546,18 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
512
546
  out << md_ref_def(id, url, :title => title)
513
547
  end
514
548
 
515
- def split_cells(s)
516
- s.split('|').reject(&:empty?).map(&:strip)
549
+ def split_cells(s, allowBlank = false)
550
+ if allowBlank
551
+ if /^[|].*[|]$/ =~ s # handle the simple and decorated table cases
552
+ s.split('|', -1)[1..-2] # allow blank cells, but only keep the inner elements of the cells
553
+ elsif /^.*[|]$/ =~ s
554
+ s.split('|', -1)[0..-2] # allow blank cells, but only keep the inner elements of the cells
555
+ else
556
+ s.split('|', -1)
557
+ end
558
+ else
559
+ s.split('|').reject(&:empty?).map(&:strip)
560
+ end
517
561
  end
518
562
 
519
563
  def read_table(src)
@@ -530,7 +574,9 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
530
574
  # otherwise left-align.
531
575
  starts = s.start_with? ':'
532
576
  ends = s.end_with? ':'
533
- if starts && ends
577
+ if s.empty? # blank
578
+ nil
579
+ elsif starts && ends
534
580
  :center
535
581
  elsif ends
536
582
  :right
@@ -539,8 +585,11 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
539
585
  end
540
586
  end
541
587
 
588
+ align.pop if align[-1].nil? # trailing blank
542
589
  num_columns = align.size
543
590
 
591
+ head.pop if head.size == num_columns + 1 && head[-1].al.size == 0 # trailing blank
592
+
544
593
  if head.size != num_columns
545
594
  maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
546
595
  tell_user "I will ignore this table."
@@ -549,23 +598,82 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
549
598
  end
550
599
 
551
600
  rows = []
552
-
553
- while src.cur_line && src.cur_line =~ /\|/
554
- row = split_cells(src.shift_line).map do |s|
555
- md_el(:cell, parse_span(s))
601
+ while src.cur_line && src.cur_line.include?('|')
602
+ row = []
603
+ colCount = 0
604
+ colspan = 1
605
+ currElem = nil
606
+ currIdx = 0
607
+ split_cells(src.shift_line, true).map do |s|
608
+ if s.empty?
609
+ # empty cells increase the colspan of the previous cell
610
+ found = false
611
+ colspan += 1
612
+ al = (currElem &&currElem.al) || AttributeList.new
613
+ if al.size > 0
614
+ elem = find_colspan(al)
615
+ if elem != nil
616
+ elem[1] = colspan.to_s
617
+ found = true
618
+ end
619
+ end
620
+ al.push(["colspan", colspan.to_s]) unless found # also handles the case of and empty attribute list
621
+ else
622
+ colspan = 1
623
+ row[currIdx] = md_el(:cell, parse_span(s))
624
+ currElem = row[currIdx]
625
+ currIdx += 1
626
+ end
556
627
  end
557
628
 
629
+ #
630
+ # sanity check - make sure the current row has the right number of columns (including spans)
631
+ # If not, dump the table and return a break
632
+ #
633
+ num_columns = count_columns(row)
634
+ if num_columns == head.size + 1 && row[-1].al.size == 0 #trailing blank cell
635
+ row.pop
636
+ num_columns -= 1
637
+ end
558
638
  if head.size != num_columns
559
- maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
639
+ maruku_error "Row does not have #{head.size} columns: \n#{row.inspect} - #{num_columns}"
560
640
  tell_user "I will ignore this table."
561
- # XXX try to recover
641
+ # XXX need to recover
562
642
  return md_br
563
643
  end
564
644
  rows << row
565
645
  end
646
+ rows.unshift(head) # put the header row on the processed table
647
+ md_el(:table, rows, { :align => align })
648
+ end
649
+
650
+ #
651
+ # count the actual number of elements in a row taking into account colspans
652
+ #
653
+ def count_columns(row)
654
+ colCount = 0
655
+
656
+ row.each do |cell|
657
+ if cell.al && cell.al.size > 0
658
+ al = find_colspan(cell.al)
659
+ if al != nil
660
+ colCount += al[1].to_i
661
+ else
662
+ colCount += 1
663
+ end
664
+ else
665
+ colCount += 1
666
+ end
667
+ end
668
+
669
+ colCount
670
+ end
566
671
 
567
- children = (head + rows).flatten
568
- md_el(:table, children, { :align => align })
672
+ #
673
+ # Search an attribute list looking for a colspan
674
+ #
675
+ def find_colspan(al)
676
+ al.find {|alElem| alElem[0] == "colspan" }
569
677
  end
570
678
 
571
679
  # If current line is text, a definition list is coming
@@ -335,7 +335,7 @@ module MaRuKu::In::Markdown::SpanLevelParser
335
335
  end
336
336
  end
337
337
 
338
- # Reads a simple string (no formatting) until one of break_on_chars,
338
+ # Reads a simple string (no formatting) until one of exit_on_chars,
339
339
  # while escaping the escaped.
340
340
  # If the string is empty, it returns nil.
341
341
  # By default, raises on error if the string terminates unexpectedly. This can be
@@ -355,7 +355,7 @@ module MaRuKu::In::Markdown::SpanLevelParser
355
355
  when nil
356
356
  if warn
357
357
  maruku_error "String finished while reading (break on " +
358
- "#{exit_on_chars.inspect})" +
358
+ "#{(exit_on_chars + exit_on_strings).inspect})" +
359
359
  " already read: #{text.inspect}", src
360
360
  end
361
361
  break
@@ -447,6 +447,11 @@ module MaRuKu::In::Markdown::SpanLevelParser
447
447
  # We will read until this string
448
448
  end_string = "`" * num_ticks
449
449
 
450
+ # Try to handle empty single-ticks
451
+ if num_ticks > 1 && !src.next_matches(/.*#{Regexp.escape(end_string)}/)
452
+ con.push_element(end_string) and return
453
+ end
454
+
450
455
  code = read_simple(src, nil, nil, end_string)
451
456
 
452
457
  # We didn't find a closing batch!
@@ -454,12 +459,6 @@ module MaRuKu::In::Markdown::SpanLevelParser
454
459
  con.push_element(end_string + (code || '')) and return
455
460
  end
456
461
 
457
- # We didn't find a closing batch!
458
- if !code || src.cur_char != '`'
459
- con.push_element(end_string + (code || ''))
460
- return
461
- end
462
-
463
462
  # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
464
463
  src.ignore_chars num_ticks
465
464
 
@@ -164,8 +164,17 @@ module MaRuKu::In::Markdown::SpanLevelParser
164
164
  # Replace all matches in the input at once with the
165
165
  # same elements from "replacement".
166
166
  def apply(first, input, output)
167
- intersperse(first.split(pattern), replacement).each do |x|
168
- append_to_output(output, x)
167
+ split = first.split(pattern)
168
+ if split.empty?
169
+ first.scan(pattern).size.times do
170
+ clone_elems(replacement).each do |x|
171
+ append_to_output(output, x)
172
+ end
173
+ end
174
+ else
175
+ intersperse(first.split(pattern), replacement).each do |x|
176
+ append_to_output(output, x)
177
+ end
169
178
  end
170
179
  end
171
180
 
@@ -174,18 +183,22 @@ module MaRuKu::In::Markdown::SpanLevelParser
174
183
  # Sort of like "join" - places the elements in "elem"
175
184
  # between each adjacent element in the array.
176
185
  def intersperse(ary, elem)
177
- return ary if ary.length <= 1
186
+ return clone_elems(elem) if ary.empty?
187
+ return ary if ary.length == 1
178
188
  h, *t = ary
179
189
  t.inject([h]) do |r, e|
180
- entities = elem.map do |el|
181
- en = el.clone
182
- en.doc = doc
183
- en
184
- end
185
- r.concat entities
190
+ r.concat clone_elems(elem)
186
191
  r << e
187
192
  end
188
193
  end
194
+
195
+ def clone_elems(elems)
196
+ elems.map do |el|
197
+ en = el.clone
198
+ en.doc = doc
199
+ en
200
+ end
201
+ end
189
202
  end
190
203
 
191
204
  # A more complex rule that uses a capture group from the
@@ -9,4 +9,9 @@ class Maruku < MaRuKu::MDDocument
9
9
  self.attributes.merge! meta
10
10
  parse_doc(s) if s
11
11
  end
12
+
13
+ def to_s
14
+ warn "Maruku#to_s is deprecated and will be removed or changed in a near-future version of Maruku."
15
+ super
16
+ end
12
17
  end
@@ -4,6 +4,14 @@ require 'cgi'
4
4
  # This module groups all functions related to HTML export.
5
5
  module MaRuKu::Out::HTML
6
6
 
7
+ # Escape text for use in HTML (content or attributes) by running it through
8
+ # standard XML escaping (quotes and angle brackets and ampersands)
9
+ def self.escapeHTML(text)
10
+ CGI.escapeHTML(text)
11
+ # TODO: When we drop Rubies < 1.9.3, re-add .gsub(/[^[:print:]\n\r\t]/, '') to
12
+ # get rid of non-printable control characters.
13
+ end
14
+
7
15
  # A simple class to represent an HTML element for output.
8
16
  class HTMLElement
9
17
  attr_accessor :name
@@ -88,7 +96,7 @@ module MaRuKu::Out::HTML
88
96
 
89
97
  # Helper to create a text node
90
98
  def xtext(text)
91
- CGI.escapeHTML(text)
99
+ MaRuKu::Out::HTML.escapeHTML(text)
92
100
  end
93
101
 
94
102
  # Helper to create an element
@@ -386,15 +394,14 @@ module MaRuKu::Out::HTML
386
394
  # Pretty much the same as the HTMLElement constructor except it
387
395
  # copies standard attributes out of the Maruku Element's attributes hash.
388
396
  def html_element(name, content="", attributes={})
389
- if attributes.empty? && content.is_a?(Hash)
390
- attributes = content
391
- end
397
+ attributes = content if attributes.empty? && content.is_a?(Hash)
392
398
 
393
399
  Array(HTML4Attributes[name]).each do |att|
394
400
  if v = @attributes[att]
395
- attributes[att.to_s] = v.to_s
401
+ attributes[att.to_s] = MaRuKu::Out::HTML.escapeHTML(v.to_s)
396
402
  end
397
403
  end
404
+
398
405
  content = yield if block_given?
399
406
 
400
407
  HTMLElement.new(name, attributes, content)
@@ -798,7 +805,9 @@ module MaRuKu::Out::HTML
798
805
  def to_html_table
799
806
  num_columns = self.align.size
800
807
 
801
- head, *rows = @children.each_slice(num_columns).to_a
808
+ # The table data is passed as a multi-dimensional array
809
+ # we just need to split the head from the body
810
+ head, *rows = @children
802
811
 
803
812
  table = html_element('table')
804
813
  thead = xelem('thead')