maruku 0.2.12 → 0.2.13
Sign up to get free protection for your applications and to get access to all the features.
- data/docs/index.html +118 -0
- data/docs/markdown_extra2.html +34 -0
- data/docs/markdown_extra2.md +83 -0
- data/docs/markdown_syntax.html +265 -0
- data/docs/maruku.html +118 -0
- data/docs/proposal.html +90 -0
- data/docs/proposal.md +333 -0
- data/lib/maruku.rb +3 -0
- data/lib/maruku/html_helper.rb +103 -0
- data/lib/maruku/maruku.rb +50 -0
- data/lib/maruku/parse_block.rb +39 -27
- data/lib/maruku/parse_doc.rb +3 -1
- data/lib/maruku/string_utils.rb +5 -0
- data/lib/maruku/tests/benchmark.rb +42 -0
- data/lib/maruku/tests/tests.rb +116 -0
- data/lib/maruku/to_html.rb +11 -5
- data/lib/maruku/version.rb +1 -1
- data/tests/a.md +10 -0
- data/tests/bugs/complex_escaping.md +4 -0
- data/tests/others/inline_html.md +8 -1
- metadata +15 -2
data/lib/maruku.rb
CHANGED
@@ -22,6 +22,9 @@ require 'maruku/structures'
|
|
22
22
|
# Code for parsing whole Markdown documents
|
23
23
|
require 'maruku/parse_doc'
|
24
24
|
|
25
|
+
# A class for reading and sanitizing inline HTML
|
26
|
+
require 'maruku/html_helper'
|
27
|
+
|
25
28
|
# Code for parsing Markdown block-level elements
|
26
29
|
require 'maruku/parse_block'
|
27
30
|
|
@@ -0,0 +1,103 @@
|
|
1
|
+
|
2
|
+
class Maruku
|
3
|
+
|
4
|
+
# this class helps me read and sanitize code blocks
|
5
|
+
|
6
|
+
class HTMLHelper
|
7
|
+
Tag = %r{^<(/)?(\w+)([^>]*)>}m
|
8
|
+
EverythingElse = %r{^[^<]+}m
|
9
|
+
CommentStart = %r{^<!--}x
|
10
|
+
CommentEnd = %r{^.*-->}
|
11
|
+
TO_SANITIZE = ['img','hr']
|
12
|
+
|
13
|
+
attr_accessor :inside_comment
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@rest = ""
|
17
|
+
@tag_stack = []
|
18
|
+
@m = nil
|
19
|
+
@already = ""
|
20
|
+
@inside_comment = false
|
21
|
+
end
|
22
|
+
|
23
|
+
def eat_this(line)
|
24
|
+
@rest = line + "\n" + @rest
|
25
|
+
|
26
|
+
until @rest.empty?
|
27
|
+
if @inside_comment
|
28
|
+
if @m = CommentEnd.match(@rest)
|
29
|
+
@inside_comment = false
|
30
|
+
@already += @m.pre_match + @m.to_s
|
31
|
+
@rest = @m.post_match
|
32
|
+
elsif @m = EverythingElse.match(@rest)
|
33
|
+
@already += @m.pre_match + @m.to_s
|
34
|
+
@rest = @m.post_match
|
35
|
+
end
|
36
|
+
else
|
37
|
+
if @m = CommentStart.match(@rest)
|
38
|
+
@inside_comment = true
|
39
|
+
@already += @m.pre_match + @m.to_s
|
40
|
+
@rest = @m.post_match
|
41
|
+
elsif @m = Tag.match(@rest)
|
42
|
+
@already += @m.pre_match
|
43
|
+
@rest = @m.post_match
|
44
|
+
|
45
|
+
is_closing = !!@m[1]
|
46
|
+
tag = @m[2]
|
47
|
+
attributes = @m[3]
|
48
|
+
|
49
|
+
is_single = false
|
50
|
+
if attributes =~ /\A(.*)\/\Z/
|
51
|
+
attributes = $1
|
52
|
+
is_single = true
|
53
|
+
end
|
54
|
+
|
55
|
+
if TO_SANITIZE.include? tag
|
56
|
+
@already += '<%s %s />' % [tag, attributes]
|
57
|
+
elsif is_closing
|
58
|
+
@already += @m.to_s
|
59
|
+
if @tag_stack.last != tag
|
60
|
+
error "Malformed: tag <#{tag}> "+
|
61
|
+
"closes <#{@tag_stack.last}>"
|
62
|
+
end
|
63
|
+
if @tag_stack.empty?
|
64
|
+
error "Malformed: closing tag #{tag.inspect} "+
|
65
|
+
"in empty list"
|
66
|
+
end
|
67
|
+
@tag_stack.pop
|
68
|
+
elsif not is_single
|
69
|
+
@tag_stack.push tag
|
70
|
+
@already += @m.to_s
|
71
|
+
end
|
72
|
+
elsif @m = EverythingElse.match(@rest)
|
73
|
+
@already += @m.pre_match + @m.to_s
|
74
|
+
@rest = @m.post_match
|
75
|
+
else
|
76
|
+
# puts "No match??? #{@rest.inspect}"
|
77
|
+
end
|
78
|
+
end # not inside comment
|
79
|
+
|
80
|
+
# puts inspect
|
81
|
+
# puts "Read: #{@tag_stack.inspect}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def error(s)
|
87
|
+
raise "Error: #{s} "+ inspect
|
88
|
+
end
|
89
|
+
|
90
|
+
def inspect; "HTML READER\n comment=#{inside_comment} "+
|
91
|
+
"match=#{@m.to_s.inspect}"+
|
92
|
+
"\n * * * BEFORE * * *\n#{@already.inspect}"+
|
93
|
+
"\n * * * AFTER * * *\n#{@rest.inspect}"+
|
94
|
+
"\n * * * TAGS stack * * *\n#{@tag_stack.inspect}"
|
95
|
+
end
|
96
|
+
def stuff_you_read
|
97
|
+
@already
|
98
|
+
end
|
99
|
+
def is_finished?
|
100
|
+
not @inside_comment and @tag_stack.empty?
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
2
|
+
#
|
3
|
+
# This file is part of Maruku.
|
4
|
+
#
|
5
|
+
# Maruku is free software; you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation; either version 2 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# Maruku is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with Maruku; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
|
19
|
+
# Structures definition
|
20
|
+
require 'maruku/structures'
|
21
|
+
|
22
|
+
# Code for parsing whole Markdown documents
|
23
|
+
require 'maruku/parse_doc'
|
24
|
+
|
25
|
+
# Code for parsing Markdown block-level elements
|
26
|
+
require 'maruku/parse_block'
|
27
|
+
|
28
|
+
# Code for parsing Markdown span-level elements
|
29
|
+
require 'maruku/parse_span'
|
30
|
+
|
31
|
+
# Ugly things kept in a closet
|
32
|
+
require 'maruku/string_utils'
|
33
|
+
|
34
|
+
# Code for creating a table of contents
|
35
|
+
require 'maruku/toc'
|
36
|
+
|
37
|
+
# Version and URL
|
38
|
+
require 'maruku/version'
|
39
|
+
|
40
|
+
|
41
|
+
# Exporting to html
|
42
|
+
require 'maruku/to_html'
|
43
|
+
|
44
|
+
# Exporting to latex
|
45
|
+
require 'maruku/to_latex'
|
46
|
+
require 'maruku/to_latex_strings'
|
47
|
+
require 'maruku/to_latex_entities'
|
48
|
+
|
49
|
+
# Exporting to text: strips all formatting (not complete)
|
50
|
+
require 'maruku/to_s'
|
data/lib/maruku/parse_block.rb
CHANGED
@@ -70,7 +70,7 @@ class Maruku
|
|
70
70
|
end
|
71
71
|
when :quote; output << read_quote
|
72
72
|
when :code; e = read_code; output << e if e
|
73
|
-
when :raw_html; output <<
|
73
|
+
when :raw_html; e = read_raw_html; output << e if e
|
74
74
|
|
75
75
|
# these do not produce output
|
76
76
|
when :footnote_text; read_footnote_text
|
@@ -81,7 +81,8 @@ class Maruku
|
|
81
81
|
# warn if we forgot something
|
82
82
|
else
|
83
83
|
node_type = cur_line_node_type
|
84
|
-
|
84
|
+
line = shift_line
|
85
|
+
# $stderr.puts "Ignoring line '#{line}' type = #{node_type}"
|
85
86
|
end
|
86
87
|
|
87
88
|
if current_metadata and output.last
|
@@ -155,13 +156,18 @@ class Maruku
|
|
155
156
|
e
|
156
157
|
end
|
157
158
|
|
159
|
+
# returns an hash
|
160
|
+
def parse_attributes(s)
|
161
|
+
{:id => s[1,s.size]}
|
162
|
+
end
|
158
163
|
# reads a header like '#### header ####'
|
164
|
+
|
159
165
|
def read_header3
|
160
166
|
e = create_md_element(:header)
|
161
167
|
line = shift_line.strip
|
162
|
-
if line =~
|
168
|
+
if line =~ HeaderWithAttributes
|
163
169
|
line = $1.strip
|
164
|
-
e.meta
|
170
|
+
e.meta.merge! parse_attributes($2)
|
165
171
|
end
|
166
172
|
|
167
173
|
e.meta[:level] = num_leading_hashes(line)
|
@@ -172,37 +178,39 @@ class Maruku
|
|
172
178
|
|
173
179
|
e
|
174
180
|
end
|
175
|
-
|
176
|
-
|
177
|
-
def read_raw_html
|
178
|
-
lines = []
|
179
|
-
|
180
|
-
cur_line =~ %r{^<(\w+)}
|
181
|
-
tag = $1
|
182
|
-
# puts "Start tag = #{tag} "
|
183
|
-
|
184
|
-
while cur_line
|
185
|
-
break if (number_of_leading_spaces(cur_line) == 0) &&
|
186
|
-
(not [:raw_html, :empty].include? cur_line_node_type)
|
187
181
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
182
|
+
|
183
|
+
def read_raw_html
|
184
|
+
# raw_html = ""
|
185
|
+
|
186
|
+
h = HTMLHelper.new
|
187
|
+
begin
|
188
|
+
l=shift_line
|
189
|
+
h.eat_this l
|
190
|
+
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
|
191
|
+
while cur_line and not h.is_finished?
|
192
|
+
l=shift_line
|
193
|
+
# puts "html -> #{l.inspect}"
|
194
|
+
h.eat_this l
|
193
195
|
end
|
196
|
+
rescue Exception => e
|
197
|
+
puts e.inspect
|
198
|
+
# puts h.inspect
|
194
199
|
end
|
195
200
|
|
196
|
-
|
197
|
-
|
198
|
-
raw_html = lines.join("\n")
|
199
|
-
|
201
|
+
raw_html = h.stuff_you_read
|
202
|
+
|
200
203
|
e = create_md_element(:raw_html)
|
201
204
|
|
202
205
|
begin
|
206
|
+
# remove newlines and whitespace at begin
|
207
|
+
# end end of string, or else REXML gets confused
|
208
|
+
raw_html = raw_html.gsub(/\A\s*</,'<').
|
209
|
+
gsub(/>[\s\n]*\Z/,'>')
|
203
210
|
e.meta[:parsed_html] = Document.new(raw_html)
|
204
211
|
rescue
|
205
|
-
|
212
|
+
#$stderr.puts "Malformed block of HTML:\n#{raw_html}"
|
213
|
+
#puts h.inspect
|
206
214
|
end
|
207
215
|
|
208
216
|
e.meta[:raw_html] = raw_html
|
@@ -359,12 +367,16 @@ class Maruku
|
|
359
367
|
while lines.last && lines.last.strip.size == 0
|
360
368
|
lines.pop
|
361
369
|
end
|
370
|
+
|
371
|
+
while lines.first && lines.first.strip.size == 0
|
372
|
+
lines.shift
|
373
|
+
end
|
362
374
|
|
363
375
|
return nil if lines.empty?
|
364
376
|
|
365
377
|
source = lines.join("\n")
|
366
378
|
# ignore trailing lines
|
367
|
-
source = source.gsub(/\n+\Z/,'')
|
379
|
+
# source = source.gsub(/\n+\Z/,'')
|
368
380
|
|
369
381
|
# dbg_describe_ary(lines, 'CODE')
|
370
382
|
e.meta[:raw_code] = source
|
data/lib/maruku/parse_doc.rb
CHANGED
data/lib/maruku/string_utils.rb
CHANGED
@@ -191,6 +191,7 @@ class Maruku
|
|
191
191
|
# line that were mistaken for raw_html
|
192
192
|
return :text if l=~EMailAddress or l=~ URL
|
193
193
|
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
|
194
|
+
return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
|
194
195
|
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
|
195
196
|
return :olist if l =~ /^\s?\d+\..*\w+/
|
196
197
|
return :empty if l.strip.size == 0
|
@@ -200,8 +201,10 @@ class Maruku
|
|
200
201
|
# at least three asterisks on a line, and only whitespace
|
201
202
|
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
|
202
203
|
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
|
204
|
+
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
|
203
205
|
return :quote if l =~ /^>/
|
204
206
|
return :metadata if l =~ /^@/
|
207
|
+
return :m2ref if l =~ /^\s{0,3}\{[\w\d\s]+\}:/
|
205
208
|
return :text
|
206
209
|
end
|
207
210
|
|
@@ -257,6 +260,8 @@ class Maruku
|
|
257
260
|
|
258
261
|
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
|
259
262
|
|
263
|
+
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
|
264
|
+
|
260
265
|
TabSize = 4;
|
261
266
|
|
262
267
|
# if contains a pipe, it could be a table header
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'maruku'
|
2
|
+
require 'bluecloth'
|
3
|
+
|
4
|
+
data = $stdin.read
|
5
|
+
|
6
|
+
num = 10
|
7
|
+
|
8
|
+
stats =
|
9
|
+
[
|
10
|
+
|
11
|
+
[BlueCloth, :to_html],
|
12
|
+
[Maruku, :to_html],
|
13
|
+
[Maruku, :to_latex]
|
14
|
+
|
15
|
+
].map do |c, method|
|
16
|
+
puts "Computing for #{c}"
|
17
|
+
|
18
|
+
start = Time.now
|
19
|
+
doc = nil
|
20
|
+
for i in 1..num
|
21
|
+
puts "#{i}"
|
22
|
+
doc = c.new(data)
|
23
|
+
end
|
24
|
+
stop = Time.now
|
25
|
+
parsing = (stop-start)/num
|
26
|
+
|
27
|
+
start = Time.now
|
28
|
+
for i in 1..num
|
29
|
+
puts "#{i}"
|
30
|
+
s = doc.send method
|
31
|
+
end
|
32
|
+
stop = Time.now
|
33
|
+
rendering = (stop-start)/num
|
34
|
+
|
35
|
+
[c, method, parsing, rendering]
|
36
|
+
end
|
37
|
+
|
38
|
+
for c, method, parsing, rendering in stats
|
39
|
+
puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
|
40
|
+
"= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
|
41
|
+
end
|
42
|
+
|
@@ -0,0 +1,116 @@
|
|
1
|
+
|
2
|
+
require 'maruku'
|
3
|
+
|
4
|
+
class Maruku
|
5
|
+
|
6
|
+
|
7
|
+
def Maruku.failed(test, doc, s)
|
8
|
+
raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
|
9
|
+
"#{doc.inspect}\n*****\n{doc.to_html}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def Maruku.metaTests
|
13
|
+
ref = {:id => 'id1', :class => ['class1','class2'],
|
14
|
+
:style=> 'Style is : important = for all } things'}
|
15
|
+
|
16
|
+
|
17
|
+
tests = MetaTests.split('***')
|
18
|
+
for test in tests
|
19
|
+
#puts "Test: #{test.inspect}"
|
20
|
+
doc = Maruku.new(test)
|
21
|
+
|
22
|
+
doc.children.size == 1 ||
|
23
|
+
failed(test, doc, "children != 1")
|
24
|
+
|
25
|
+
|
26
|
+
h = doc.children[0]
|
27
|
+
|
28
|
+
h.node_type==:header ||
|
29
|
+
failed(test, doc, "child not header")
|
30
|
+
|
31
|
+
# puts doc.inspect
|
32
|
+
# puts doc.to_html
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
MetaTests = <<EOF
|
37
|
+
|
38
|
+
# Head # {ref1 ref2 ref3}
|
39
|
+
|
40
|
+
{ref1}: id: id1; class: class1
|
41
|
+
{ref2}: class: class2
|
42
|
+
{ref3}: style: "Style is : important = for all } things"
|
43
|
+
|
44
|
+
***
|
45
|
+
|
46
|
+
# Head # {ref1 ref3 ref2}
|
47
|
+
|
48
|
+
{ref1}: id: id1; class: class1
|
49
|
+
{ref2}: class: class2
|
50
|
+
{ref3}: style: "Style is : important = for all } things"
|
51
|
+
|
52
|
+
***
|
53
|
+
|
54
|
+
# Head # {ref1 ref2 ref3}
|
55
|
+
|
56
|
+
{ref1}: id= id1; class=class1
|
57
|
+
{ref2}: class=class2
|
58
|
+
{ref3}: style="Style is : important = for all } things"
|
59
|
+
|
60
|
+
***
|
61
|
+
|
62
|
+
# Head # {ref1 ref2 ref3}
|
63
|
+
|
64
|
+
{ref1}: id=id1 class=class1
|
65
|
+
{ref2}: class=class2
|
66
|
+
{ref3}: style="Style is : important = for all } things"
|
67
|
+
|
68
|
+
***
|
69
|
+
# Head # {ref1 ref2 ref3}
|
70
|
+
|
71
|
+
{ref1}: id:id1 class:class1
|
72
|
+
{ref2}: class : class2
|
73
|
+
{ref3}: style = "Style is : important = for all } things"
|
74
|
+
|
75
|
+
***
|
76
|
+
# Head # {ref1 ref2 ref3}
|
77
|
+
|
78
|
+
{ref1}: id:id1 class:class1
|
79
|
+
{ref2}: class : class2
|
80
|
+
{ref3}: style = "Style is : important = for all } things"
|
81
|
+
|
82
|
+
***
|
83
|
+
|
84
|
+
# Head # {#id1 .class1 ref2 ref3}
|
85
|
+
|
86
|
+
{ref2}: class : class2
|
87
|
+
{ref3}: style = "Style is : important = for all } things"
|
88
|
+
|
89
|
+
***
|
90
|
+
|
91
|
+
# Head # { #id1 .class1 ref2 ref3 }
|
92
|
+
|
93
|
+
{ref2}: class : class2
|
94
|
+
{ref3}: style = "Style is : important = for all } things"
|
95
|
+
|
96
|
+
***
|
97
|
+
|
98
|
+
# Head # { id=id1 class=class1 ref2 ref3 }
|
99
|
+
|
100
|
+
{ref2}: class : class2
|
101
|
+
{ref3}: style = "Style is : important = for all } things"
|
102
|
+
|
103
|
+
***
|
104
|
+
|
105
|
+
# Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
|
106
|
+
|
107
|
+
EOF
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
if File.basename($0) == 'tests.rb'
|
112
|
+
Maruku.metaTests
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
|