maruku 0.2.12 → 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,6 +22,9 @@ require 'maruku/structures'
22
22
  # Code for parsing whole Markdown documents
23
23
  require 'maruku/parse_doc'
24
24
 
25
+ # A class for reading and sanitizing inline HTML
26
+ require 'maruku/html_helper'
27
+
25
28
  # Code for parsing Markdown block-level elements
26
29
  require 'maruku/parse_block'
27
30
 
@@ -0,0 +1,103 @@
1
+
2
+ class Maruku
3
+
4
+ # this class helps me read and sanitize code blocks
5
+
6
+ class HTMLHelper
7
+ Tag = %r{^<(/)?(\w+)([^>]*)>}m
8
+ EverythingElse = %r{^[^<]+}m
9
+ CommentStart = %r{^<!--}x
10
+ CommentEnd = %r{^.*-->}
11
+ TO_SANITIZE = ['img','hr']
12
+
13
+ attr_accessor :inside_comment
14
+
15
+ def initialize
16
+ @rest = ""
17
+ @tag_stack = []
18
+ @m = nil
19
+ @already = ""
20
+ @inside_comment = false
21
+ end
22
+
23
+ def eat_this(line)
24
+ @rest = line + "\n" + @rest
25
+
26
+ until @rest.empty?
27
+ if @inside_comment
28
+ if @m = CommentEnd.match(@rest)
29
+ @inside_comment = false
30
+ @already += @m.pre_match + @m.to_s
31
+ @rest = @m.post_match
32
+ elsif @m = EverythingElse.match(@rest)
33
+ @already += @m.pre_match + @m.to_s
34
+ @rest = @m.post_match
35
+ end
36
+ else
37
+ if @m = CommentStart.match(@rest)
38
+ @inside_comment = true
39
+ @already += @m.pre_match + @m.to_s
40
+ @rest = @m.post_match
41
+ elsif @m = Tag.match(@rest)
42
+ @already += @m.pre_match
43
+ @rest = @m.post_match
44
+
45
+ is_closing = !!@m[1]
46
+ tag = @m[2]
47
+ attributes = @m[3]
48
+
49
+ is_single = false
50
+ if attributes =~ /\A(.*)\/\Z/
51
+ attributes = $1
52
+ is_single = true
53
+ end
54
+
55
+ if TO_SANITIZE.include? tag
56
+ @already += '<%s %s />' % [tag, attributes]
57
+ elsif is_closing
58
+ @already += @m.to_s
59
+ if @tag_stack.last != tag
60
+ error "Malformed: tag <#{tag}> "+
61
+ "closes <#{@tag_stack.last}>"
62
+ end
63
+ if @tag_stack.empty?
64
+ error "Malformed: closing tag #{tag.inspect} "+
65
+ "in empty list"
66
+ end
67
+ @tag_stack.pop
68
+ elsif not is_single
69
+ @tag_stack.push tag
70
+ @already += @m.to_s
71
+ end
72
+ elsif @m = EverythingElse.match(@rest)
73
+ @already += @m.pre_match + @m.to_s
74
+ @rest = @m.post_match
75
+ else
76
+ # puts "No match??? #{@rest.inspect}"
77
+ end
78
+ end # not inside comment
79
+
80
+ # puts inspect
81
+ # puts "Read: #{@tag_stack.inspect}"
82
+ end
83
+ end
84
+
85
+
86
+ def error(s)
87
+ raise "Error: #{s} "+ inspect
88
+ end
89
+
90
+ def inspect; "HTML READER\n comment=#{inside_comment} "+
91
+ "match=#{@m.to_s.inspect}"+
92
+ "\n * * * BEFORE * * *\n#{@already.inspect}"+
93
+ "\n * * * AFTER * * *\n#{@rest.inspect}"+
94
+ "\n * * * TAGS stack * * *\n#{@tag_stack.inspect}"
95
+ end
96
+ def stuff_you_read
97
+ @already
98
+ end
99
+ def is_finished?
100
+ not @inside_comment and @tag_stack.empty?
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,50 @@
1
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
+ #
3
+ # This file is part of Maruku.
4
+ #
5
+ # Maruku is free software; you can redistribute it and/or modify
6
+ # it under the terms of the GNU General Public License as published by
7
+ # the Free Software Foundation; either version 2 of the License, or
8
+ # (at your option) any later version.
9
+ #
10
+ # Maruku is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License
16
+ # along with Maruku; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
+
19
+ # Structures definition
20
+ require 'maruku/structures'
21
+
22
+ # Code for parsing whole Markdown documents
23
+ require 'maruku/parse_doc'
24
+
25
+ # Code for parsing Markdown block-level elements
26
+ require 'maruku/parse_block'
27
+
28
+ # Code for parsing Markdown span-level elements
29
+ require 'maruku/parse_span'
30
+
31
+ # Ugly things kept in a closet
32
+ require 'maruku/string_utils'
33
+
34
+ # Code for creating a table of contents
35
+ require 'maruku/toc'
36
+
37
+ # Version and URL
38
+ require 'maruku/version'
39
+
40
+
41
+ # Exporting to html
42
+ require 'maruku/to_html'
43
+
44
+ # Exporting to latex
45
+ require 'maruku/to_latex'
46
+ require 'maruku/to_latex_strings'
47
+ require 'maruku/to_latex_entities'
48
+
49
+ # Exporting to text: strips all formatting (not complete)
50
+ require 'maruku/to_s'
@@ -70,7 +70,7 @@ class Maruku
70
70
  end
71
71
  when :quote; output << read_quote
72
72
  when :code; e = read_code; output << e if e
73
- when :raw_html; output << read_raw_html
73
+ when :raw_html; e = read_raw_html; output << e if e
74
74
 
75
75
  # these do not produce output
76
76
  when :footnote_text; read_footnote_text
@@ -81,7 +81,8 @@ class Maruku
81
81
  # warn if we forgot something
82
82
  else
83
83
  node_type = cur_line_node_type
84
- $stderr.puts "Ignoring line '#{shift_line}' type = #{node_type}"
84
+ line = shift_line
85
+ # $stderr.puts "Ignoring line '#{line}' type = #{node_type}"
85
86
  end
86
87
 
87
88
  if current_metadata and output.last
@@ -155,13 +156,18 @@ class Maruku
155
156
  e
156
157
  end
157
158
 
159
+ # returns an hash
160
+ def parse_attributes(s)
161
+ {:id => s[1,s.size]}
162
+ end
158
163
  # reads a header like '#### header ####'
164
+
159
165
  def read_header3
160
166
  e = create_md_element(:header)
161
167
  line = shift_line.strip
162
- if line =~ HeaderWithId
168
+ if line =~ HeaderWithAttributes
163
169
  line = $1.strip
164
- e.meta[:id] = $2
170
+ e.meta.merge! parse_attributes($2)
165
171
  end
166
172
 
167
173
  e.meta[:level] = num_leading_hashes(line)
@@ -172,37 +178,39 @@ class Maruku
172
178
 
173
179
  e
174
180
  end
175
-
176
-
177
- def read_raw_html
178
- lines = []
179
-
180
- cur_line =~ %r{^<(\w+)}
181
- tag = $1
182
- # puts "Start tag = #{tag} "
183
-
184
- while cur_line
185
- break if (number_of_leading_spaces(cur_line) == 0) &&
186
- (not [:raw_html, :empty].include? cur_line_node_type)
187
181
 
188
- lines << shift_line
189
- # check for a closing tag
190
- if (lines.last =~ %r{^</(\w+)}||
191
- lines.last =~ %r{</(\w+)>\s*$}) && $1 == tag
192
- break
182
+
183
+ def read_raw_html
184
+ # raw_html = ""
185
+
186
+ h = HTMLHelper.new
187
+ begin
188
+ l=shift_line
189
+ h.eat_this l
190
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
191
+ while cur_line and not h.is_finished?
192
+ l=shift_line
193
+ # puts "html -> #{l.inspect}"
194
+ h.eat_this l
193
195
  end
196
+ rescue Exception => e
197
+ puts e.inspect
198
+ # puts h.inspect
194
199
  end
195
200
 
196
- # dbg_describe_ary(lines, 'HTML')
197
-
198
- raw_html = lines.join("\n")
199
-
201
+ raw_html = h.stuff_you_read
202
+
200
203
  e = create_md_element(:raw_html)
201
204
 
202
205
  begin
206
+ # remove newlines and whitespace at begin
207
+ # end end of string, or else REXML gets confused
208
+ raw_html = raw_html.gsub(/\A\s*</,'<').
209
+ gsub(/>[\s\n]*\Z/,'>')
203
210
  e.meta[:parsed_html] = Document.new(raw_html)
204
211
  rescue
205
- $stderr.puts "Malformed block of HTML:\n#{raw_html}"
212
+ #$stderr.puts "Malformed block of HTML:\n#{raw_html}"
213
+ #puts h.inspect
206
214
  end
207
215
 
208
216
  e.meta[:raw_html] = raw_html
@@ -359,12 +367,16 @@ class Maruku
359
367
  while lines.last && lines.last.strip.size == 0
360
368
  lines.pop
361
369
  end
370
+
371
+ while lines.first && lines.first.strip.size == 0
372
+ lines.shift
373
+ end
362
374
 
363
375
  return nil if lines.empty?
364
376
 
365
377
  source = lines.join("\n")
366
378
  # ignore trailing lines
367
- source = source.gsub(/\n+\Z/,'')
379
+ # source = source.gsub(/\n+\Z/,'')
368
380
 
369
381
  # dbg_describe_ary(lines, 'CODE')
370
382
  e.meta[:raw_code] = source
@@ -33,7 +33,9 @@ class Maruku
33
33
  @stack = []
34
34
 
35
35
  @meta = parse_email_headers(s)
36
- lines = split_lines(@meta[:data])
36
+ data = @meta[:data]
37
+ @meta.delete :data
38
+ lines = split_lines(data)
37
39
  @children = parse_lines_as_markdown(lines)
38
40
 
39
41
  self.search_abbreviations
@@ -191,6 +191,7 @@ class Maruku
191
191
  # line that were mistaken for raw_html
192
192
  return :text if l=~EMailAddress or l=~ URL
193
193
  return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
194
+ return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
194
195
  return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
195
196
  return :olist if l =~ /^\s?\d+\..*\w+/
196
197
  return :empty if l.strip.size == 0
@@ -200,8 +201,10 @@ class Maruku
200
201
  # at least three asterisks on a line, and only whitespace
201
202
  return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
202
203
  return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
204
+ return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
203
205
  return :quote if l =~ /^>/
204
206
  return :metadata if l =~ /^@/
207
+ return :m2ref if l =~ /^\s{0,3}\{[\w\d\s]+\}:/
205
208
  return :text
206
209
  end
207
210
 
@@ -257,6 +260,8 @@ class Maruku
257
260
 
258
261
  HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
259
262
 
263
+ HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
264
+
260
265
  TabSize = 4;
261
266
 
262
267
  # if contains a pipe, it could be a table header
@@ -0,0 +1,42 @@
1
+ require 'maruku'
2
+ require 'bluecloth'
3
+
4
+ data = $stdin.read
5
+
6
+ num = 10
7
+
8
+ stats =
9
+ [
10
+
11
+ [BlueCloth, :to_html],
12
+ [Maruku, :to_html],
13
+ [Maruku, :to_latex]
14
+
15
+ ].map do |c, method|
16
+ puts "Computing for #{c}"
17
+
18
+ start = Time.now
19
+ doc = nil
20
+ for i in 1..num
21
+ puts "#{i}"
22
+ doc = c.new(data)
23
+ end
24
+ stop = Time.now
25
+ parsing = (stop-start)/num
26
+
27
+ start = Time.now
28
+ for i in 1..num
29
+ puts "#{i}"
30
+ s = doc.send method
31
+ end
32
+ stop = Time.now
33
+ rendering = (stop-start)/num
34
+
35
+ [c, method, parsing, rendering]
36
+ end
37
+
38
+ for c, method, parsing, rendering in stats
39
+ puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
40
+ "= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
41
+ end
42
+
@@ -0,0 +1,116 @@
1
+
2
+ require 'maruku'
3
+
4
+ class Maruku
5
+
6
+
7
+ def Maruku.failed(test, doc, s)
8
+ raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
9
+ "#{doc.inspect}\n*****\n{doc.to_html}"
10
+ end
11
+
12
+ def Maruku.metaTests
13
+ ref = {:id => 'id1', :class => ['class1','class2'],
14
+ :style=> 'Style is : important = for all } things'}
15
+
16
+
17
+ tests = MetaTests.split('***')
18
+ for test in tests
19
+ #puts "Test: #{test.inspect}"
20
+ doc = Maruku.new(test)
21
+
22
+ doc.children.size == 1 ||
23
+ failed(test, doc, "children != 1")
24
+
25
+
26
+ h = doc.children[0]
27
+
28
+ h.node_type==:header ||
29
+ failed(test, doc, "child not header")
30
+
31
+ # puts doc.inspect
32
+ # puts doc.to_html
33
+ end
34
+ end
35
+
36
+ MetaTests = <<EOF
37
+
38
+ # Head # {ref1 ref2 ref3}
39
+
40
+ {ref1}: id: id1; class: class1
41
+ {ref2}: class: class2
42
+ {ref3}: style: "Style is : important = for all } things"
43
+
44
+ ***
45
+
46
+ # Head # {ref1 ref3 ref2}
47
+
48
+ {ref1}: id: id1; class: class1
49
+ {ref2}: class: class2
50
+ {ref3}: style: "Style is : important = for all } things"
51
+
52
+ ***
53
+
54
+ # Head # {ref1 ref2 ref3}
55
+
56
+ {ref1}: id= id1; class=class1
57
+ {ref2}: class=class2
58
+ {ref3}: style="Style is : important = for all } things"
59
+
60
+ ***
61
+
62
+ # Head # {ref1 ref2 ref3}
63
+
64
+ {ref1}: id=id1 class=class1
65
+ {ref2}: class=class2
66
+ {ref3}: style="Style is : important = for all } things"
67
+
68
+ ***
69
+ # Head # {ref1 ref2 ref3}
70
+
71
+ {ref1}: id:id1 class:class1
72
+ {ref2}: class : class2
73
+ {ref3}: style = "Style is : important = for all } things"
74
+
75
+ ***
76
+ # Head # {ref1 ref2 ref3}
77
+
78
+ {ref1}: id:id1 class:class1
79
+ {ref2}: class : class2
80
+ {ref3}: style = "Style is : important = for all } things"
81
+
82
+ ***
83
+
84
+ # Head # {#id1 .class1 ref2 ref3}
85
+
86
+ {ref2}: class : class2
87
+ {ref3}: style = "Style is : important = for all } things"
88
+
89
+ ***
90
+
91
+ # Head # { #id1 .class1 ref2 ref3 }
92
+
93
+ {ref2}: class : class2
94
+ {ref3}: style = "Style is : important = for all } things"
95
+
96
+ ***
97
+
98
+ # Head # { id=id1 class=class1 ref2 ref3 }
99
+
100
+ {ref2}: class : class2
101
+ {ref3}: style = "Style is : important = for all } things"
102
+
103
+ ***
104
+
105
+ # Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
106
+
107
+ EOF
108
+
109
+ end
110
+
111
+ if File.basename($0) == 'tests.rb'
112
+ Maruku.metaTests
113
+
114
+ end
115
+
116
+