maruku 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/docs/markdown_syntax.md +9 -21
  5. data/lib/maruku/defaults.rb +1 -1
  6. data/lib/maruku/element.rb +18 -3
  7. data/lib/maruku/ext/fenced_code.rb +1 -1
  8. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +1 -1
  9. data/lib/maruku/ext/math/to_html.rb +2 -9
  10. data/lib/maruku/html.rb +5 -8
  11. data/lib/maruku/input/html_helper.rb +94 -81
  12. data/lib/maruku/input/mdline.rb +6 -4
  13. data/lib/maruku/input/parse_block.rb +145 -37
  14. data/lib/maruku/input/parse_span.rb +7 -8
  15. data/lib/maruku/input/rubypants.rb +22 -9
  16. data/lib/maruku/maruku.rb +5 -0
  17. data/lib/maruku/output/to_html.rb +15 -6
  18. data/lib/maruku/output/to_latex.rb +9 -3
  19. data/lib/maruku/output/to_s.rb +0 -1
  20. data/lib/maruku/string_utils.rb +2 -2
  21. data/lib/maruku/version.rb +1 -1
  22. data/spec/block_docs/abbrev.md +18 -18
  23. data/spec/block_docs/attribute_sanitize.md +22 -0
  24. data/spec/block_docs/auto_cdata.md +48 -0
  25. data/spec/block_docs/bug_table.md +4 -4
  26. data/spec/block_docs/code4.md +79 -0
  27. data/spec/block_docs/div_without_newline.md +16 -0
  28. data/spec/block_docs/empty_cells.md +3 -9
  29. data/spec/block_docs/entities.md +6 -12
  30. data/spec/block_docs/extra_table1.md +6 -6
  31. data/spec/block_docs/fenced_code_blocks.md +12 -20
  32. data/spec/block_docs/fenced_code_blocks_highlighted.md +1 -2
  33. data/spec/block_docs/footnotes2.md +4 -1
  34. data/spec/block_docs/ignore_bad_header.md +9 -0
  35. data/spec/block_docs/issue106.md +78 -0
  36. data/spec/block_docs/issue115.md +20 -0
  37. data/spec/block_docs/issue117.md +13 -0
  38. data/spec/block_docs/issue120.md +48 -0
  39. data/spec/block_docs/issue123.md +11 -0
  40. data/spec/block_docs/issue124.md +16 -0
  41. data/spec/block_docs/issue40.md +24 -12
  42. data/spec/block_docs/issue89.md +1 -1
  43. data/spec/block_docs/lists_nested_blankline.md +14 -8
  44. data/spec/block_docs/lists_ol.md +5 -5
  45. data/spec/block_docs/lists_paraindent.md +6 -11
  46. data/spec/block_docs/math-blahtex/equations.md +12 -13
  47. data/spec/block_docs/math-blahtex/math2.md +9 -2
  48. data/spec/block_docs/math/embedded_invalid_svg.md +31 -2
  49. data/spec/block_docs/math/embedded_svg.md +41 -2
  50. data/spec/block_docs/math/equations.md +7 -2
  51. data/spec/block_docs/math/inline.md +2 -2
  52. data/spec/block_docs/math/math2.md +9 -1
  53. data/spec/block_docs/math/spaces_after_inline_math.md +17 -0
  54. data/spec/block_docs/math/table.md +2 -2
  55. data/spec/block_docs/math/table2.md +6 -6
  56. data/spec/block_docs/table_attributes.md +4 -6
  57. data/spec/block_docs/table_colspan.md +41 -0
  58. data/spec/block_docs/tables.md +10 -21
  59. data/spec/block_docs/tables2.md +74 -0
  60. data/spec/block_docs/xml_comments.md +32 -0
  61. data/spec/span_spec.rb +1 -1
  62. data/spec/spec_helper.rb +1 -0
  63. metadata +42 -28
  64. metadata.gz.sig +3 -3
  65. data/spec/block_docs/xml2.md +0 -19
@@ -40,19 +40,19 @@ module MaRuKu
40
40
  return :definition if self =~ Definition
41
41
  # I had a bug with emails and urls at the beginning of the
42
42
  # line that were mistaken for raw_html
43
- return :text if self =~ /\A[ ]{0,3}<([^:@>]+?@[^:@>]+?)>/
43
+ return :text if self =~ /\A[ ]{0,3}#{EMailAddress}/
44
44
  return :text if self =~ /\A[ ]{0,3}<http:/
45
45
  # raw html is like PHP Markdown Extra: at most three spaces before
46
46
  return :xml_instr if self =~ /\A\s*<\?/
47
- return :raw_html if self =~ %r{^[ ]{0,3}</?\s*\w+}
47
+ return :raw_html if self =~ %r{\A[ ]{0,3}</?\s*\w+}
48
48
  return :raw_html if self =~ /\A[ ]{0,3}<\!\-\-/
49
49
  return :header1 if self =~ /\A(=)+/
50
50
  return :header2 if self =~ /\A([-\s])+\z/
51
51
  return :header3 if self =~ /\A(#)+\s*\S+/
52
52
  # at least three asterisks/hyphens/underscores on a line, and only whitespace
53
53
  return :hrule if self =~ /\A(\s*[\*\-_]\s*){3,}\z/
54
- return :ulist if self =~ /\A([ ]{0,3}|\t)([\*\-\+])\s+.*/
55
- return :olist if self =~ /\A([ ]{0,3}|\t)\d+\.\s+.*/
54
+ return :ulist if self =~ /\A[ ]{0,3}([\*\-\+])\s+.*/
55
+ return :olist if self =~ /\A[ ]{0,3}\d+\.\s+.*/
56
56
  return :code if number_of_leading_spaces >= 4
57
57
  return :quote if self =~ /\A>/
58
58
  return :ald if self =~ AttributeDefinitionList
@@ -126,4 +126,6 @@ module MaRuKu
126
126
  # Table syntax: http://michelf.ca/projects/php-markdown/extra/#table
127
127
  # | -------------:| ------------------------------ |
128
128
  TableSeparator = /\A(?>\|?\s*\:?\-+\:?\s*\|?)+?\z/
129
+
130
+ EMailAddress = /<([^:@>]+?@[^:@>]+?)>/
129
131
  end
@@ -1,5 +1,3 @@
1
- require 'set'
2
-
3
1
  module MaRuKu; module In; module Markdown; module BlockLevelParser
4
2
 
5
3
  include Helpers
@@ -99,18 +97,19 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
99
97
  output << read_abbreviation(src)
100
98
  when :xml_instr
101
99
  read_xml_instruction(src, output)
102
- else # warn if we forgot something
103
- line = src.cur_line
104
- maruku_error "Ignoring line '#{line}' type = #{md_type}", src
105
- src.shift_line
100
+ else # unhandled line type at this level
101
+ # Just treat it as raw text
102
+ read_text_material(src, output)
106
103
  end
107
104
  end
108
105
 
109
106
  merge_ial(output, src, output)
110
- output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial }
111
-
112
- # get rid of empty line markers
113
- output.delete_if {|x| x == :empty }
107
+ output.delete_if do |x|
108
+ # Strip out IAL
109
+ (x.kind_of?(MDElement) && x.node_type == :ial) ||
110
+ # get rid of empty line markers
111
+ x == :empty
112
+ end
114
113
 
115
114
  # See for each list if we can omit the paragraphs
116
115
  # TODO: do this after
@@ -152,7 +151,7 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
152
151
  output << md_el(:definition_list, definition)
153
152
  end
154
153
  else # Start of a paragraph
155
- output << read_paragraph(src)
154
+ output.concat read_paragraph(src)
156
155
  end
157
156
  end
158
157
 
@@ -278,16 +277,6 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
278
277
  break
279
278
  when :olist, :ulist
280
279
  break if !src.next_line || src.next_line.md_type == t
281
- when :raw_html
282
- # This is a pretty awful hack to handle inline HTML
283
- # but it means double-parsing HMTL.
284
- html = parse_span([src.cur_line], src)
285
- unless html.empty? || html.first.is_a?(String)
286
- if html.first.parsed_html
287
- first_node_name = html.first.parsed_html.first_node_name
288
- end
289
- end
290
- break if first_node_name && !HTML_INLINE_ELEMS.include?(first_node_name)
291
280
  end
292
281
  break if src.cur_line.strip.empty?
293
282
  break if src.next_line && [:header1, :header2].include?(src.next_line.md_type)
@@ -297,7 +286,48 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
297
286
  end
298
287
  children = parse_span(lines, src)
299
288
 
300
- md_par(children)
289
+ pick_apart_non_inline_html(children)
290
+ end
291
+
292
+ # If there are non-inline HTML tags in the paragraph, break them out into
293
+ # their own elements and make paragraphs out of everything else.
294
+ def pick_apart_non_inline_html(children)
295
+ output = []
296
+ para_children = []
297
+
298
+ children.each do |child|
299
+ if element_is_non_inline_html?(child)
300
+ unless para_children.empty?
301
+ # Fix up paragraphs before non-inline elements having an extra space
302
+ last_child = para_children.last
303
+ if last_child.is_a?(String) && !last_child.empty?
304
+ last_child.replace last_child[0..-2]
305
+ end
306
+
307
+ output << md_par(para_children)
308
+ para_children = []
309
+ end
310
+ output << child
311
+ else
312
+ para_children << child
313
+ end
314
+ end
315
+
316
+ unless para_children.empty?
317
+ output << md_par(para_children)
318
+ end
319
+
320
+ output
321
+ end
322
+
323
+ # Is the given element an HTML element whose root is not an inline element?
324
+ def element_is_non_inline_html?(elem)
325
+ if elem.is_a?(MDElement) && elem.node_type == :raw_html && elem.parsed_html
326
+ first_node_name = elem.parsed_html.first_node_name
327
+ first_node_name && !HTML_INLINE_ELEMS.include?(elem.parsed_html.first_node_name)
328
+ else
329
+ false
330
+ end
301
331
  end
302
332
 
303
333
  # Reads one list item, either ordered or unordered.
@@ -310,14 +340,18 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
310
340
  indentation, ial = spaces_before_first_char(first)
311
341
  al = read_attribute_list(CharSource.new(ial, src)) if ial
312
342
  ial_offset = ial ? ial.length + 3 : 0
313
- lines, want_my_paragraph =
314
- read_indented_content(src, indentation, [], item_type, ial_offset)
343
+ lines, want_my_paragraph = read_indented_content(src, indentation, [], item_type, ial_offset)
344
+
345
+ # in case there is a second line and this line starts a new list, format it.
346
+ if !lines.empty? && [:ulist, :olist].include?(MaRuKu::MDLine.new(lines.first).md_type)
347
+ lines.unshift ""
348
+ end
315
349
 
316
350
  # add first line
317
351
  # Strip first '*', '-', '+' from first line
318
- stripped = first[indentation, first.size - 1]
352
+ first_changed = first.gsub(/([^\t]*)(\t)/) { $1 + " " * (TAB_SIZE - $1.length % TAB_SIZE) }
353
+ stripped = first_changed[indentation, first_changed.size - 1]
319
354
  lines.unshift stripped
320
-
321
355
  src2 = LineSource.new(lines, src, parent_offset)
322
356
  children = parse_blocks(src2)
323
357
 
@@ -512,8 +546,18 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
512
546
  out << md_ref_def(id, url, :title => title)
513
547
  end
514
548
 
515
- def split_cells(s)
516
- s.split('|').reject(&:empty?).map(&:strip)
549
+ def split_cells(s, allowBlank = false)
550
+ if allowBlank
551
+ if /^[|].*[|]$/ =~ s # handle the simple and decorated table cases
552
+ s.split('|', -1)[1..-2] # allow blank cells, but only keep the inner elements of the cells
553
+ elsif /^.*[|]$/ =~ s
554
+ s.split('|', -1)[0..-2] # allow blank cells, but only keep the inner elements of the cells
555
+ else
556
+ s.split('|', -1)
557
+ end
558
+ else
559
+ s.split('|').reject(&:empty?).map(&:strip)
560
+ end
517
561
  end
518
562
 
519
563
  def read_table(src)
@@ -530,7 +574,9 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
530
574
  # otherwise left-align.
531
575
  starts = s.start_with? ':'
532
576
  ends = s.end_with? ':'
533
- if starts && ends
577
+ if s.empty? # blank
578
+ nil
579
+ elsif starts && ends
534
580
  :center
535
581
  elsif ends
536
582
  :right
@@ -539,8 +585,11 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
539
585
  end
540
586
  end
541
587
 
588
+ align.pop if align[-1].nil? # trailing blank
542
589
  num_columns = align.size
543
590
 
591
+ head.pop if head.size == num_columns + 1 && head[-1].al.size == 0 # trailing blank
592
+
544
593
  if head.size != num_columns
545
594
  maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
546
595
  tell_user "I will ignore this table."
@@ -549,23 +598,82 @@ module MaRuKu; module In; module Markdown; module BlockLevelParser
549
598
  end
550
599
 
551
600
  rows = []
552
-
553
- while src.cur_line && src.cur_line =~ /\|/
554
- row = split_cells(src.shift_line).map do |s|
555
- md_el(:cell, parse_span(s))
601
+ while src.cur_line && src.cur_line.include?('|')
602
+ row = []
603
+ colCount = 0
604
+ colspan = 1
605
+ currElem = nil
606
+ currIdx = 0
607
+ split_cells(src.shift_line, true).map do |s|
608
+ if s.empty?
609
+ # empty cells increase the colspan of the previous cell
610
+ found = false
611
+ colspan += 1
612
+ al = (currElem &&currElem.al) || AttributeList.new
613
+ if al.size > 0
614
+ elem = find_colspan(al)
615
+ if elem != nil
616
+ elem[1] = colspan.to_s
617
+ found = true
618
+ end
619
+ end
620
+ al.push(["colspan", colspan.to_s]) unless found # also handles the case of and empty attribute list
621
+ else
622
+ colspan = 1
623
+ row[currIdx] = md_el(:cell, parse_span(s))
624
+ currElem = row[currIdx]
625
+ currIdx += 1
626
+ end
556
627
  end
557
628
 
629
+ #
630
+ # sanity check - make sure the current row has the right number of columns (including spans)
631
+ # If not, dump the table and return a break
632
+ #
633
+ num_columns = count_columns(row)
634
+ if num_columns == head.size + 1 && row[-1].al.size == 0 #trailing blank cell
635
+ row.pop
636
+ num_columns -= 1
637
+ end
558
638
  if head.size != num_columns
559
- maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
639
+ maruku_error "Row does not have #{head.size} columns: \n#{row.inspect} - #{num_columns}"
560
640
  tell_user "I will ignore this table."
561
- # XXX try to recover
641
+ # XXX need to recover
562
642
  return md_br
563
643
  end
564
644
  rows << row
565
645
  end
646
+ rows.unshift(head) # put the header row on the processed table
647
+ md_el(:table, rows, { :align => align })
648
+ end
649
+
650
+ #
651
+ # count the actual number of elements in a row taking into account colspans
652
+ #
653
+ def count_columns(row)
654
+ colCount = 0
655
+
656
+ row.each do |cell|
657
+ if cell.al && cell.al.size > 0
658
+ al = find_colspan(cell.al)
659
+ if al != nil
660
+ colCount += al[1].to_i
661
+ else
662
+ colCount += 1
663
+ end
664
+ else
665
+ colCount += 1
666
+ end
667
+ end
668
+
669
+ colCount
670
+ end
566
671
 
567
- children = (head + rows).flatten
568
- md_el(:table, children, { :align => align })
672
+ #
673
+ # Search an attribute list looking for a colspan
674
+ #
675
+ def find_colspan(al)
676
+ al.find {|alElem| alElem[0] == "colspan" }
569
677
  end
570
678
 
571
679
  # If current line is text, a definition list is coming
@@ -335,7 +335,7 @@ module MaRuKu::In::Markdown::SpanLevelParser
335
335
  end
336
336
  end
337
337
 
338
- # Reads a simple string (no formatting) until one of break_on_chars,
338
+ # Reads a simple string (no formatting) until one of exit_on_chars,
339
339
  # while escaping the escaped.
340
340
  # If the string is empty, it returns nil.
341
341
  # By default, raises on error if the string terminates unexpectedly. This can be
@@ -355,7 +355,7 @@ module MaRuKu::In::Markdown::SpanLevelParser
355
355
  when nil
356
356
  if warn
357
357
  maruku_error "String finished while reading (break on " +
358
- "#{exit_on_chars.inspect})" +
358
+ "#{(exit_on_chars + exit_on_strings).inspect})" +
359
359
  " already read: #{text.inspect}", src
360
360
  end
361
361
  break
@@ -447,6 +447,11 @@ module MaRuKu::In::Markdown::SpanLevelParser
447
447
  # We will read until this string
448
448
  end_string = "`" * num_ticks
449
449
 
450
+ # Try to handle empty single-ticks
451
+ if num_ticks > 1 && !src.next_matches(/.*#{Regexp.escape(end_string)}/)
452
+ con.push_element(end_string) and return
453
+ end
454
+
450
455
  code = read_simple(src, nil, nil, end_string)
451
456
 
452
457
  # We didn't find a closing batch!
@@ -454,12 +459,6 @@ module MaRuKu::In::Markdown::SpanLevelParser
454
459
  con.push_element(end_string + (code || '')) and return
455
460
  end
456
461
 
457
- # We didn't find a closing batch!
458
- if !code || src.cur_char != '`'
459
- con.push_element(end_string + (code || ''))
460
- return
461
- end
462
-
463
462
  # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
464
463
  src.ignore_chars num_ticks
465
464
 
@@ -164,8 +164,17 @@ module MaRuKu::In::Markdown::SpanLevelParser
164
164
  # Replace all matches in the input at once with the
165
165
  # same elements from "replacement".
166
166
  def apply(first, input, output)
167
- intersperse(first.split(pattern), replacement).each do |x|
168
- append_to_output(output, x)
167
+ split = first.split(pattern)
168
+ if split.empty?
169
+ first.scan(pattern).size.times do
170
+ clone_elems(replacement).each do |x|
171
+ append_to_output(output, x)
172
+ end
173
+ end
174
+ else
175
+ intersperse(first.split(pattern), replacement).each do |x|
176
+ append_to_output(output, x)
177
+ end
169
178
  end
170
179
  end
171
180
 
@@ -174,18 +183,22 @@ module MaRuKu::In::Markdown::SpanLevelParser
174
183
  # Sort of like "join" - places the elements in "elem"
175
184
  # between each adjacent element in the array.
176
185
  def intersperse(ary, elem)
177
- return ary if ary.length <= 1
186
+ return clone_elems(elem) if ary.empty?
187
+ return ary if ary.length == 1
178
188
  h, *t = ary
179
189
  t.inject([h]) do |r, e|
180
- entities = elem.map do |el|
181
- en = el.clone
182
- en.doc = doc
183
- en
184
- end
185
- r.concat entities
190
+ r.concat clone_elems(elem)
186
191
  r << e
187
192
  end
188
193
  end
194
+
195
+ def clone_elems(elems)
196
+ elems.map do |el|
197
+ en = el.clone
198
+ en.doc = doc
199
+ en
200
+ end
201
+ end
189
202
  end
190
203
 
191
204
  # A more complex rule that uses a capture group from the
@@ -9,4 +9,9 @@ class Maruku < MaRuKu::MDDocument
9
9
  self.attributes.merge! meta
10
10
  parse_doc(s) if s
11
11
  end
12
+
13
+ def to_s
14
+ warn "Maruku#to_s is deprecated and will be removed or changed in a near-future version of Maruku."
15
+ super
16
+ end
12
17
  end
@@ -4,6 +4,14 @@ require 'cgi'
4
4
  # This module groups all functions related to HTML export.
5
5
  module MaRuKu::Out::HTML
6
6
 
7
+ # Escape text for use in HTML (content or attributes) by running it through
8
+ # standard XML escaping (quotes and angle brackets and ampersands)
9
+ def self.escapeHTML(text)
10
+ CGI.escapeHTML(text)
11
+ # TODO: When we drop Rubies < 1.9.3, re-add .gsub(/[^[:print:]\n\r\t]/, '') to
12
+ # get rid of non-printable control characters.
13
+ end
14
+
7
15
  # A simple class to represent an HTML element for output.
8
16
  class HTMLElement
9
17
  attr_accessor :name
@@ -88,7 +96,7 @@ module MaRuKu::Out::HTML
88
96
 
89
97
  # Helper to create a text node
90
98
  def xtext(text)
91
- CGI.escapeHTML(text)
99
+ MaRuKu::Out::HTML.escapeHTML(text)
92
100
  end
93
101
 
94
102
  # Helper to create an element
@@ -386,15 +394,14 @@ module MaRuKu::Out::HTML
386
394
  # Pretty much the same as the HTMLElement constructor except it
387
395
  # copies standard attributes out of the Maruku Element's attributes hash.
388
396
  def html_element(name, content="", attributes={})
389
- if attributes.empty? && content.is_a?(Hash)
390
- attributes = content
391
- end
397
+ attributes = content if attributes.empty? && content.is_a?(Hash)
392
398
 
393
399
  Array(HTML4Attributes[name]).each do |att|
394
400
  if v = @attributes[att]
395
- attributes[att.to_s] = v.to_s
401
+ attributes[att.to_s] = MaRuKu::Out::HTML.escapeHTML(v.to_s)
396
402
  end
397
403
  end
404
+
398
405
  content = yield if block_given?
399
406
 
400
407
  HTMLElement.new(name, attributes, content)
@@ -798,7 +805,9 @@ module MaRuKu::Out::HTML
798
805
  def to_html_table
799
806
  num_columns = self.align.size
800
807
 
801
- head, *rows = @children.each_slice(num_columns).to_a
808
+ # The table data is passed as a multi-dimensional array
809
+ # we just need to split the head from the body
810
+ head, *rows = @children
802
811
 
803
812
  table = html_element('table')
804
813
  thead = xelem('thead')