maruku 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,9 @@ require 'maruku/structures'
22
22
  # Code for parsing whole Markdown documents
23
23
  require 'maruku/parse_doc'
24
24
 
25
+ # A class for reading and sanitizing inline HTML
26
+ require 'maruku/html_helper'
27
+
25
28
  # Code for parsing Markdown block-level elements
26
29
  require 'maruku/parse_block'
27
30
 
@@ -0,0 +1,103 @@
1
+
2
+ class Maruku
3
+
4
+ # this class helps me read and sanitize code blocks
5
+
6
+ class HTMLHelper
7
+ Tag = %r{^<(/)?(\w+)([^>]*)>}m
8
+ EverythingElse = %r{^[^<]+}m
9
+ CommentStart = %r{^<!--}x
10
+ CommentEnd = %r{^.*-->}
11
+ TO_SANITIZE = ['img','hr']
12
+
13
+ attr_accessor :inside_comment
14
+
15
+ def initialize
16
+ @rest = ""
17
+ @tag_stack = []
18
+ @m = nil
19
+ @already = ""
20
+ @inside_comment = false
21
+ end
22
+
23
+ def eat_this(line)
24
+ @rest = line + "\n" + @rest
25
+
26
+ until @rest.empty?
27
+ if @inside_comment
28
+ if @m = CommentEnd.match(@rest)
29
+ @inside_comment = false
30
+ @already += @m.pre_match + @m.to_s
31
+ @rest = @m.post_match
32
+ elsif @m = EverythingElse.match(@rest)
33
+ @already += @m.pre_match + @m.to_s
34
+ @rest = @m.post_match
35
+ end
36
+ else
37
+ if @m = CommentStart.match(@rest)
38
+ @inside_comment = true
39
+ @already += @m.pre_match + @m.to_s
40
+ @rest = @m.post_match
41
+ elsif @m = Tag.match(@rest)
42
+ @already += @m.pre_match
43
+ @rest = @m.post_match
44
+
45
+ is_closing = !!@m[1]
46
+ tag = @m[2]
47
+ attributes = @m[3]
48
+
49
+ is_single = false
50
+ if attributes =~ /\A(.*)\/\Z/
51
+ attributes = $1
52
+ is_single = true
53
+ end
54
+
55
+ if TO_SANITIZE.include? tag
56
+ @already += '<%s %s />' % [tag, attributes]
57
+ elsif is_closing
58
+ @already += @m.to_s
59
+ if @tag_stack.last != tag
60
+ error "Malformed: tag <#{tag}> "+
61
+ "closes <#{@tag_stack.last}>"
62
+ end
63
+ if @tag_stack.empty?
64
+ error "Malformed: closing tag #{tag.inspect} "+
65
+ "in empty list"
66
+ end
67
+ @tag_stack.pop
68
+ elsif not is_single
69
+ @tag_stack.push tag
70
+ @already += @m.to_s
71
+ end
72
+ elsif @m = EverythingElse.match(@rest)
73
+ @already += @m.pre_match + @m.to_s
74
+ @rest = @m.post_match
75
+ else
76
+ # puts "No match??? #{@rest.inspect}"
77
+ end
78
+ end # not inside comment
79
+
80
+ # puts inspect
81
+ # puts "Read: #{@tag_stack.inspect}"
82
+ end
83
+ end
84
+
85
+
86
+ def error(s)
87
+ raise "Error: #{s} "+ inspect
88
+ end
89
+
90
+ def inspect; "HTML READER\n comment=#{inside_comment} "+
91
+ "match=#{@m.to_s.inspect}"+
92
+ "\n * * * BEFORE * * *\n#{@already.inspect}"+
93
+ "\n * * * AFTER * * *\n#{@rest.inspect}"+
94
+ "\n * * * TAGS stack * * *\n#{@tag_stack.inspect}"
95
+ end
96
+ def stuff_you_read
97
+ @already
98
+ end
99
+ def is_finished?
100
+ not @inside_comment and @tag_stack.empty?
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,50 @@
1
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
+ #
3
+ # This file is part of Maruku.
4
+ #
5
+ # Maruku is free software; you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation; either version 2 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # Maruku is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with Maruku; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
+
19
+ # Structures definition
20
+ require 'maruku/structures'
21
+
22
+ # Code for parsing whole Markdown documents
23
+ require 'maruku/parse_doc'
24
+
25
+ # Code for parsing Markdown block-level elements
26
+ require 'maruku/parse_block'
27
+
28
+ # Code for parsing Markdown span-level elements
29
+ require 'maruku/parse_span'
30
+
31
+ # Ugly things kept in a closet
32
+ require 'maruku/string_utils'
33
+
34
+ # Code for creating a table of contents
35
+ require 'maruku/toc'
36
+
37
+ # Version and URL
38
+ require 'maruku/version'
39
+
40
+
41
+ # Exporting to html
42
+ require 'maruku/to_html'
43
+
44
+ # Exporting to latex
45
+ require 'maruku/to_latex'
46
+ require 'maruku/to_latex_strings'
47
+ require 'maruku/to_latex_entities'
48
+
49
+ # Exporting to text: strips all formatting (not complete)
50
+ require 'maruku/to_s'
@@ -70,7 +70,7 @@ class Maruku
70
70
  end
71
71
  when :quote; output << read_quote
72
72
  when :code; e = read_code; output << e if e
73
- when :raw_html; output << read_raw_html
73
+ when :raw_html; e = read_raw_html; output << e if e
74
74
 
75
75
  # these do not produce output
76
76
  when :footnote_text; read_footnote_text
@@ -81,7 +81,8 @@ class Maruku
81
81
  # warn if we forgot something
82
82
  else
83
83
  node_type = cur_line_node_type
84
- $stderr.puts "Ignoring line '#{shift_line}' type = #{node_type}"
84
+ line = shift_line
85
+ # $stderr.puts "Ignoring line '#{line}' type = #{node_type}"
85
86
  end
86
87
 
87
88
  if current_metadata and output.last
@@ -155,13 +156,18 @@ class Maruku
155
156
  e
156
157
  end
157
158
 
159
+ # returns an hash
160
+ def parse_attributes(s)
161
+ {:id => s[1,s.size]}
162
+ end
158
163
  # reads a header like '#### header ####'
164
+
159
165
  def read_header3
160
166
  e = create_md_element(:header)
161
167
  line = shift_line.strip
162
- if line =~ HeaderWithId
168
+ if line =~ HeaderWithAttributes
163
169
  line = $1.strip
164
- e.meta[:id] = $2
170
+ e.meta.merge! parse_attributes($2)
165
171
  end
166
172
 
167
173
  e.meta[:level] = num_leading_hashes(line)
@@ -172,37 +178,39 @@ class Maruku
172
178
 
173
179
  e
174
180
  end
175
-
176
-
177
- def read_raw_html
178
- lines = []
179
-
180
- cur_line =~ %r{^<(\w+)}
181
- tag = $1
182
- # puts "Start tag = #{tag} "
183
-
184
- while cur_line
185
- break if (number_of_leading_spaces(cur_line) == 0) &&
186
- (not [:raw_html, :empty].include? cur_line_node_type)
187
181
 
188
- lines << shift_line
189
- # check for a closing tag
190
- if (lines.last =~ %r{^</(\w+)}||
191
- lines.last =~ %r{</(\w+)>\s*$}) && $1 == tag
192
- break
182
+
183
+ def read_raw_html
184
+ # raw_html = ""
185
+
186
+ h = HTMLHelper.new
187
+ begin
188
+ l=shift_line
189
+ h.eat_this l
190
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
191
+ while cur_line and not h.is_finished?
192
+ l=shift_line
193
+ # puts "html -> #{l.inspect}"
194
+ h.eat_this l
193
195
  end
196
+ rescue Exception => e
197
+ puts e.inspect
198
+ # puts h.inspect
194
199
  end
195
200
 
196
- # dbg_describe_ary(lines, 'HTML')
197
-
198
- raw_html = lines.join("\n")
199
-
201
+ raw_html = h.stuff_you_read
202
+
200
203
  e = create_md_element(:raw_html)
201
204
 
202
205
  begin
206
+ # remove newlines and whitespace at begin
207
+ # end end of string, or else REXML gets confused
208
+ raw_html = raw_html.gsub(/\A\s*</,'<').
209
+ gsub(/>[\s\n]*\Z/,'>')
203
210
  e.meta[:parsed_html] = Document.new(raw_html)
204
211
  rescue
205
- $stderr.puts "Malformed block of HTML:\n#{raw_html}"
212
+ #$stderr.puts "Malformed block of HTML:\n#{raw_html}"
213
+ #puts h.inspect
206
214
  end
207
215
 
208
216
  e.meta[:raw_html] = raw_html
@@ -359,12 +367,16 @@ class Maruku
359
367
  while lines.last && lines.last.strip.size == 0
360
368
  lines.pop
361
369
  end
370
+
371
+ while lines.first && lines.first.strip.size == 0
372
+ lines.shift
373
+ end
362
374
 
363
375
  return nil if lines.empty?
364
376
 
365
377
  source = lines.join("\n")
366
378
  # ignore trailing lines
367
- source = source.gsub(/\n+\Z/,'')
379
+ # source = source.gsub(/\n+\Z/,'')
368
380
 
369
381
  # dbg_describe_ary(lines, 'CODE')
370
382
  e.meta[:raw_code] = source
@@ -33,7 +33,9 @@ class Maruku
33
33
  @stack = []
34
34
 
35
35
  @meta = parse_email_headers(s)
36
- lines = split_lines(@meta[:data])
36
+ data = @meta[:data]
37
+ @meta.delete :data
38
+ lines = split_lines(data)
37
39
  @children = parse_lines_as_markdown(lines)
38
40
 
39
41
  self.search_abbreviations
@@ -191,6 +191,7 @@ class Maruku
191
191
  # line that were mistaken for raw_html
192
192
  return :text if l=~EMailAddress or l=~ URL
193
193
  return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
194
+ return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
194
195
  return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
195
196
  return :olist if l =~ /^\s?\d+\..*\w+/
196
197
  return :empty if l.strip.size == 0
@@ -200,8 +201,10 @@ class Maruku
200
201
  # at least three asterisks on a line, and only whitespace
201
202
  return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
202
203
  return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
204
+ return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
203
205
  return :quote if l =~ /^>/
204
206
  return :metadata if l =~ /^@/
207
+ return :m2ref if l =~ /^\s{0,3}\{[\w\d\s]+\}:/
205
208
  return :text
206
209
  end
207
210
 
@@ -257,6 +260,8 @@ class Maruku
257
260
 
258
261
  HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
259
262
 
263
+ HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
264
+
260
265
  TabSize = 4;
261
266
 
262
267
  # if contains a pipe, it could be a table header
@@ -0,0 +1,42 @@
1
+ require 'maruku'
2
+ require 'bluecloth'
3
+
4
+ data = $stdin.read
5
+
6
+ num = 10
7
+
8
+ stats =
9
+ [
10
+
11
+ [BlueCloth, :to_html],
12
+ [Maruku, :to_html],
13
+ [Maruku, :to_latex]
14
+
15
+ ].map do |c, method|
16
+ puts "Computing for #{c}"
17
+
18
+ start = Time.now
19
+ doc = nil
20
+ for i in 1..num
21
+ puts "#{i}"
22
+ doc = c.new(data)
23
+ end
24
+ stop = Time.now
25
+ parsing = (stop-start)/num
26
+
27
+ start = Time.now
28
+ for i in 1..num
29
+ puts "#{i}"
30
+ s = doc.send method
31
+ end
32
+ stop = Time.now
33
+ rendering = (stop-start)/num
34
+
35
+ [c, method, parsing, rendering]
36
+ end
37
+
38
+ for c, method, parsing, rendering in stats
39
+ puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
40
+ "= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
41
+ end
42
+
@@ -0,0 +1,116 @@
1
+
2
+ require 'maruku'
3
+
4
+ class Maruku
5
+
6
+
7
+ def Maruku.failed(test, doc, s)
8
+ raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
9
+ "#{doc.inspect}\n*****\n{doc.to_html}"
10
+ end
11
+
12
+ def Maruku.metaTests
13
+ ref = {:id => 'id1', :class => ['class1','class2'],
14
+ :style=> 'Style is : important = for all } things'}
15
+
16
+
17
+ tests = MetaTests.split('***')
18
+ for test in tests
19
+ #puts "Test: #{test.inspect}"
20
+ doc = Maruku.new(test)
21
+
22
+ doc.children.size == 1 ||
23
+ failed(test, doc, "children != 1")
24
+
25
+
26
+ h = doc.children[0]
27
+
28
+ h.node_type==:header ||
29
+ failed(test, doc, "child not header")
30
+
31
+ # puts doc.inspect
32
+ # puts doc.to_html
33
+ end
34
+ end
35
+
36
+ MetaTests = <<EOF
37
+
38
+ # Head # {ref1 ref2 ref3}
39
+
40
+ {ref1}: id: id1; class: class1
41
+ {ref2}: class: class2
42
+ {ref3}: style: "Style is : important = for all } things"
43
+
44
+ ***
45
+
46
+ # Head # {ref1 ref3 ref2}
47
+
48
+ {ref1}: id: id1; class: class1
49
+ {ref2}: class: class2
50
+ {ref3}: style: "Style is : important = for all } things"
51
+
52
+ ***
53
+
54
+ # Head # {ref1 ref2 ref3}
55
+
56
+ {ref1}: id= id1; class=class1
57
+ {ref2}: class=class2
58
+ {ref3}: style="Style is : important = for all } things"
59
+
60
+ ***
61
+
62
+ # Head # {ref1 ref2 ref3}
63
+
64
+ {ref1}: id=id1 class=class1
65
+ {ref2}: class=class2
66
+ {ref3}: style="Style is : important = for all } things"
67
+
68
+ ***
69
+ # Head # {ref1 ref2 ref3}
70
+
71
+ {ref1}: id:id1 class:class1
72
+ {ref2}: class : class2
73
+ {ref3}: style = "Style is : important = for all } things"
74
+
75
+ ***
76
+ # Head # {ref1 ref2 ref3}
77
+
78
+ {ref1}: id:id1 class:class1
79
+ {ref2}: class : class2
80
+ {ref3}: style = "Style is : important = for all } things"
81
+
82
+ ***
83
+
84
+ # Head # {#id1 .class1 ref2 ref3}
85
+
86
+ {ref2}: class : class2
87
+ {ref3}: style = "Style is : important = for all } things"
88
+
89
+ ***
90
+
91
+ # Head # { #id1 .class1 ref2 ref3 }
92
+
93
+ {ref2}: class : class2
94
+ {ref3}: style = "Style is : important = for all } things"
95
+
96
+ ***
97
+
98
+ # Head # { id=id1 class=class1 ref2 ref3 }
99
+
100
+ {ref2}: class : class2
101
+ {ref3}: style = "Style is : important = for all } things"
102
+
103
+ ***
104
+
105
+ # Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
106
+
107
+ EOF
108
+
109
+ end
110
+
111
+ if File.basename($0) == 'tests.rb'
112
+ Maruku.metaTests
113
+
114
+ end
115
+
116
+