maruku 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/docs/index.html +118 -0
- data/docs/markdown_extra2.html +34 -0
- data/docs/markdown_extra2.md +83 -0
- data/docs/markdown_syntax.html +265 -0
- data/docs/maruku.html +118 -0
- data/docs/proposal.html +90 -0
- data/docs/proposal.md +333 -0
- data/lib/maruku.rb +3 -0
- data/lib/maruku/html_helper.rb +103 -0
- data/lib/maruku/maruku.rb +50 -0
- data/lib/maruku/parse_block.rb +39 -27
- data/lib/maruku/parse_doc.rb +3 -1
- data/lib/maruku/string_utils.rb +5 -0
- data/lib/maruku/tests/benchmark.rb +42 -0
- data/lib/maruku/tests/tests.rb +116 -0
- data/lib/maruku/to_html.rb +11 -5
- data/lib/maruku/version.rb +1 -1
- data/tests/a.md +10 -0
- data/tests/bugs/complex_escaping.md +4 -0
- data/tests/others/inline_html.md +8 -1
- metadata +15 -2
data/lib/maruku.rb
CHANGED
@@ -22,6 +22,9 @@ require 'maruku/structures'
|
|
22
22
|
# Code for parsing whole Markdown documents
|
23
23
|
require 'maruku/parse_doc'
|
24
24
|
|
25
|
+
# A class for reading and sanitizing inline HTML
|
26
|
+
require 'maruku/html_helper'
|
27
|
+
|
25
28
|
# Code for parsing Markdown block-level elements
|
26
29
|
require 'maruku/parse_block'
|
27
30
|
|
@@ -0,0 +1,103 @@
|
|
1
|
+
|
2
|
+
class Maruku
|
3
|
+
|
4
|
+
# this class helps me read and sanitize code blocks
|
5
|
+
|
6
|
+
class HTMLHelper
|
7
|
+
Tag = %r{^<(/)?(\w+)([^>]*)>}m
|
8
|
+
EverythingElse = %r{^[^<]+}m
|
9
|
+
CommentStart = %r{^<!--}x
|
10
|
+
CommentEnd = %r{^.*-->}
|
11
|
+
TO_SANITIZE = ['img','hr']
|
12
|
+
|
13
|
+
attr_accessor :inside_comment
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@rest = ""
|
17
|
+
@tag_stack = []
|
18
|
+
@m = nil
|
19
|
+
@already = ""
|
20
|
+
@inside_comment = false
|
21
|
+
end
|
22
|
+
|
23
|
+
def eat_this(line)
|
24
|
+
@rest = line + "\n" + @rest
|
25
|
+
|
26
|
+
until @rest.empty?
|
27
|
+
if @inside_comment
|
28
|
+
if @m = CommentEnd.match(@rest)
|
29
|
+
@inside_comment = false
|
30
|
+
@already += @m.pre_match + @m.to_s
|
31
|
+
@rest = @m.post_match
|
32
|
+
elsif @m = EverythingElse.match(@rest)
|
33
|
+
@already += @m.pre_match + @m.to_s
|
34
|
+
@rest = @m.post_match
|
35
|
+
end
|
36
|
+
else
|
37
|
+
if @m = CommentStart.match(@rest)
|
38
|
+
@inside_comment = true
|
39
|
+
@already += @m.pre_match + @m.to_s
|
40
|
+
@rest = @m.post_match
|
41
|
+
elsif @m = Tag.match(@rest)
|
42
|
+
@already += @m.pre_match
|
43
|
+
@rest = @m.post_match
|
44
|
+
|
45
|
+
is_closing = !!@m[1]
|
46
|
+
tag = @m[2]
|
47
|
+
attributes = @m[3]
|
48
|
+
|
49
|
+
is_single = false
|
50
|
+
if attributes =~ /\A(.*)\/\Z/
|
51
|
+
attributes = $1
|
52
|
+
is_single = true
|
53
|
+
end
|
54
|
+
|
55
|
+
if TO_SANITIZE.include? tag
|
56
|
+
@already += '<%s %s />' % [tag, attributes]
|
57
|
+
elsif is_closing
|
58
|
+
@already += @m.to_s
|
59
|
+
if @tag_stack.last != tag
|
60
|
+
error "Malformed: tag <#{tag}> "+
|
61
|
+
"closes <#{@tag_stack.last}>"
|
62
|
+
end
|
63
|
+
if @tag_stack.empty?
|
64
|
+
error "Malformed: closing tag #{tag.inspect} "+
|
65
|
+
"in empty list"
|
66
|
+
end
|
67
|
+
@tag_stack.pop
|
68
|
+
elsif not is_single
|
69
|
+
@tag_stack.push tag
|
70
|
+
@already += @m.to_s
|
71
|
+
end
|
72
|
+
elsif @m = EverythingElse.match(@rest)
|
73
|
+
@already += @m.pre_match + @m.to_s
|
74
|
+
@rest = @m.post_match
|
75
|
+
else
|
76
|
+
# puts "No match??? #{@rest.inspect}"
|
77
|
+
end
|
78
|
+
end # not inside comment
|
79
|
+
|
80
|
+
# puts inspect
|
81
|
+
# puts "Read: #{@tag_stack.inspect}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def error(s)
|
87
|
+
raise "Error: #{s} "+ inspect
|
88
|
+
end
|
89
|
+
|
90
|
+
def inspect; "HTML READER\n comment=#{inside_comment} "+
|
91
|
+
"match=#{@m.to_s.inspect}"+
|
92
|
+
"\n * * * BEFORE * * *\n#{@already.inspect}"+
|
93
|
+
"\n * * * AFTER * * *\n#{@rest.inspect}"+
|
94
|
+
"\n * * * TAGS stack * * *\n#{@tag_stack.inspect}"
|
95
|
+
end
|
96
|
+
def stuff_you_read
|
97
|
+
@already
|
98
|
+
end
|
99
|
+
def is_finished?
|
100
|
+
not @inside_comment and @tag_stack.empty?
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
2
|
+
#
|
3
|
+
# This file is part of Maruku.
|
4
|
+
#
|
5
|
+
# Maruku is free software; you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation; either version 2 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# Maruku is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with Maruku; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
|
19
|
+
# Structures definition
|
20
|
+
require 'maruku/structures'
|
21
|
+
|
22
|
+
# Code for parsing whole Markdown documents
|
23
|
+
require 'maruku/parse_doc'
|
24
|
+
|
25
|
+
# Code for parsing Markdown block-level elements
|
26
|
+
require 'maruku/parse_block'
|
27
|
+
|
28
|
+
# Code for parsing Markdown span-level elements
|
29
|
+
require 'maruku/parse_span'
|
30
|
+
|
31
|
+
# Ugly things kept in a closet
|
32
|
+
require 'maruku/string_utils'
|
33
|
+
|
34
|
+
# Code for creating a table of contents
|
35
|
+
require 'maruku/toc'
|
36
|
+
|
37
|
+
# Version and URL
|
38
|
+
require 'maruku/version'
|
39
|
+
|
40
|
+
|
41
|
+
# Exporting to html
|
42
|
+
require 'maruku/to_html'
|
43
|
+
|
44
|
+
# Exporting to latex
|
45
|
+
require 'maruku/to_latex'
|
46
|
+
require 'maruku/to_latex_strings'
|
47
|
+
require 'maruku/to_latex_entities'
|
48
|
+
|
49
|
+
# Exporting to text: strips all formatting (not complete)
|
50
|
+
require 'maruku/to_s'
|
data/lib/maruku/parse_block.rb
CHANGED
@@ -70,7 +70,7 @@ class Maruku
|
|
70
70
|
end
|
71
71
|
when :quote; output << read_quote
|
72
72
|
when :code; e = read_code; output << e if e
|
73
|
-
when :raw_html; output <<
|
73
|
+
when :raw_html; e = read_raw_html; output << e if e
|
74
74
|
|
75
75
|
# these do not produce output
|
76
76
|
when :footnote_text; read_footnote_text
|
@@ -81,7 +81,8 @@ class Maruku
|
|
81
81
|
# warn if we forgot something
|
82
82
|
else
|
83
83
|
node_type = cur_line_node_type
|
84
|
-
|
84
|
+
line = shift_line
|
85
|
+
# $stderr.puts "Ignoring line '#{line}' type = #{node_type}"
|
85
86
|
end
|
86
87
|
|
87
88
|
if current_metadata and output.last
|
@@ -155,13 +156,18 @@ class Maruku
|
|
155
156
|
e
|
156
157
|
end
|
157
158
|
|
159
|
+
# returns an hash
|
160
|
+
def parse_attributes(s)
|
161
|
+
{:id => s[1,s.size]}
|
162
|
+
end
|
158
163
|
# reads a header like '#### header ####'
|
164
|
+
|
159
165
|
def read_header3
|
160
166
|
e = create_md_element(:header)
|
161
167
|
line = shift_line.strip
|
162
|
-
if line =~
|
168
|
+
if line =~ HeaderWithAttributes
|
163
169
|
line = $1.strip
|
164
|
-
e.meta
|
170
|
+
e.meta.merge! parse_attributes($2)
|
165
171
|
end
|
166
172
|
|
167
173
|
e.meta[:level] = num_leading_hashes(line)
|
@@ -172,37 +178,39 @@ class Maruku
|
|
172
178
|
|
173
179
|
e
|
174
180
|
end
|
175
|
-
|
176
|
-
|
177
|
-
def read_raw_html
|
178
|
-
lines = []
|
179
|
-
|
180
|
-
cur_line =~ %r{^<(\w+)}
|
181
|
-
tag = $1
|
182
|
-
# puts "Start tag = #{tag} "
|
183
|
-
|
184
|
-
while cur_line
|
185
|
-
break if (number_of_leading_spaces(cur_line) == 0) &&
|
186
|
-
(not [:raw_html, :empty].include? cur_line_node_type)
|
187
181
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
182
|
+
|
183
|
+
def read_raw_html
|
184
|
+
# raw_html = ""
|
185
|
+
|
186
|
+
h = HTMLHelper.new
|
187
|
+
begin
|
188
|
+
l=shift_line
|
189
|
+
h.eat_this l
|
190
|
+
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
|
191
|
+
while cur_line and not h.is_finished?
|
192
|
+
l=shift_line
|
193
|
+
# puts "html -> #{l.inspect}"
|
194
|
+
h.eat_this l
|
193
195
|
end
|
196
|
+
rescue Exception => e
|
197
|
+
puts e.inspect
|
198
|
+
# puts h.inspect
|
194
199
|
end
|
195
200
|
|
196
|
-
|
197
|
-
|
198
|
-
raw_html = lines.join("\n")
|
199
|
-
|
201
|
+
raw_html = h.stuff_you_read
|
202
|
+
|
200
203
|
e = create_md_element(:raw_html)
|
201
204
|
|
202
205
|
begin
|
206
|
+
# remove newlines and whitespace at begin
|
207
|
+
# end end of string, or else REXML gets confused
|
208
|
+
raw_html = raw_html.gsub(/\A\s*</,'<').
|
209
|
+
gsub(/>[\s\n]*\Z/,'>')
|
203
210
|
e.meta[:parsed_html] = Document.new(raw_html)
|
204
211
|
rescue
|
205
|
-
|
212
|
+
#$stderr.puts "Malformed block of HTML:\n#{raw_html}"
|
213
|
+
#puts h.inspect
|
206
214
|
end
|
207
215
|
|
208
216
|
e.meta[:raw_html] = raw_html
|
@@ -359,12 +367,16 @@ class Maruku
|
|
359
367
|
while lines.last && lines.last.strip.size == 0
|
360
368
|
lines.pop
|
361
369
|
end
|
370
|
+
|
371
|
+
while lines.first && lines.first.strip.size == 0
|
372
|
+
lines.shift
|
373
|
+
end
|
362
374
|
|
363
375
|
return nil if lines.empty?
|
364
376
|
|
365
377
|
source = lines.join("\n")
|
366
378
|
# ignore trailing lines
|
367
|
-
source = source.gsub(/\n+\Z/,'')
|
379
|
+
# source = source.gsub(/\n+\Z/,'')
|
368
380
|
|
369
381
|
# dbg_describe_ary(lines, 'CODE')
|
370
382
|
e.meta[:raw_code] = source
|
data/lib/maruku/parse_doc.rb
CHANGED
data/lib/maruku/string_utils.rb
CHANGED
@@ -191,6 +191,7 @@ class Maruku
|
|
191
191
|
# line that were mistaken for raw_html
|
192
192
|
return :text if l=~EMailAddress or l=~ URL
|
193
193
|
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
|
194
|
+
return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
|
194
195
|
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
|
195
196
|
return :olist if l =~ /^\s?\d+\..*\w+/
|
196
197
|
return :empty if l.strip.size == 0
|
@@ -200,8 +201,10 @@ class Maruku
|
|
200
201
|
# at least three asterisks on a line, and only whitespace
|
201
202
|
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
|
202
203
|
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
|
204
|
+
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
|
203
205
|
return :quote if l =~ /^>/
|
204
206
|
return :metadata if l =~ /^@/
|
207
|
+
return :m2ref if l =~ /^\s{0,3}\{[\w\d\s]+\}:/
|
205
208
|
return :text
|
206
209
|
end
|
207
210
|
|
@@ -257,6 +260,8 @@ class Maruku
|
|
257
260
|
|
258
261
|
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
|
259
262
|
|
263
|
+
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
|
264
|
+
|
260
265
|
TabSize = 4;
|
261
266
|
|
262
267
|
# if contains a pipe, it could be a table header
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'maruku'
|
2
|
+
require 'bluecloth'
|
3
|
+
|
4
|
+
data = $stdin.read
|
5
|
+
|
6
|
+
num = 10
|
7
|
+
|
8
|
+
stats =
|
9
|
+
[
|
10
|
+
|
11
|
+
[BlueCloth, :to_html],
|
12
|
+
[Maruku, :to_html],
|
13
|
+
[Maruku, :to_latex]
|
14
|
+
|
15
|
+
].map do |c, method|
|
16
|
+
puts "Computing for #{c}"
|
17
|
+
|
18
|
+
start = Time.now
|
19
|
+
doc = nil
|
20
|
+
for i in 1..num
|
21
|
+
puts "#{i}"
|
22
|
+
doc = c.new(data)
|
23
|
+
end
|
24
|
+
stop = Time.now
|
25
|
+
parsing = (stop-start)/num
|
26
|
+
|
27
|
+
start = Time.now
|
28
|
+
for i in 1..num
|
29
|
+
puts "#{i}"
|
30
|
+
s = doc.send method
|
31
|
+
end
|
32
|
+
stop = Time.now
|
33
|
+
rendering = (stop-start)/num
|
34
|
+
|
35
|
+
[c, method, parsing, rendering]
|
36
|
+
end
|
37
|
+
|
38
|
+
for c, method, parsing, rendering in stats
|
39
|
+
puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
|
40
|
+
"= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
|
41
|
+
end
|
42
|
+
|
@@ -0,0 +1,116 @@
|
|
1
|
+
|
2
|
+
require 'maruku'
|
3
|
+
|
4
|
+
class Maruku
|
5
|
+
|
6
|
+
|
7
|
+
def Maruku.failed(test, doc, s)
|
8
|
+
raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
|
9
|
+
"#{doc.inspect}\n*****\n{doc.to_html}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def Maruku.metaTests
|
13
|
+
ref = {:id => 'id1', :class => ['class1','class2'],
|
14
|
+
:style=> 'Style is : important = for all } things'}
|
15
|
+
|
16
|
+
|
17
|
+
tests = MetaTests.split('***')
|
18
|
+
for test in tests
|
19
|
+
#puts "Test: #{test.inspect}"
|
20
|
+
doc = Maruku.new(test)
|
21
|
+
|
22
|
+
doc.children.size == 1 ||
|
23
|
+
failed(test, doc, "children != 1")
|
24
|
+
|
25
|
+
|
26
|
+
h = doc.children[0]
|
27
|
+
|
28
|
+
h.node_type==:header ||
|
29
|
+
failed(test, doc, "child not header")
|
30
|
+
|
31
|
+
# puts doc.inspect
|
32
|
+
# puts doc.to_html
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
MetaTests = <<EOF
|
37
|
+
|
38
|
+
# Head # {ref1 ref2 ref3}
|
39
|
+
|
40
|
+
{ref1}: id: id1; class: class1
|
41
|
+
{ref2}: class: class2
|
42
|
+
{ref3}: style: "Style is : important = for all } things"
|
43
|
+
|
44
|
+
***
|
45
|
+
|
46
|
+
# Head # {ref1 ref3 ref2}
|
47
|
+
|
48
|
+
{ref1}: id: id1; class: class1
|
49
|
+
{ref2}: class: class2
|
50
|
+
{ref3}: style: "Style is : important = for all } things"
|
51
|
+
|
52
|
+
***
|
53
|
+
|
54
|
+
# Head # {ref1 ref2 ref3}
|
55
|
+
|
56
|
+
{ref1}: id= id1; class=class1
|
57
|
+
{ref2}: class=class2
|
58
|
+
{ref3}: style="Style is : important = for all } things"
|
59
|
+
|
60
|
+
***
|
61
|
+
|
62
|
+
# Head # {ref1 ref2 ref3}
|
63
|
+
|
64
|
+
{ref1}: id=id1 class=class1
|
65
|
+
{ref2}: class=class2
|
66
|
+
{ref3}: style="Style is : important = for all } things"
|
67
|
+
|
68
|
+
***
|
69
|
+
# Head # {ref1 ref2 ref3}
|
70
|
+
|
71
|
+
{ref1}: id:id1 class:class1
|
72
|
+
{ref2}: class : class2
|
73
|
+
{ref3}: style = "Style is : important = for all } things"
|
74
|
+
|
75
|
+
***
|
76
|
+
# Head # {ref1 ref2 ref3}
|
77
|
+
|
78
|
+
{ref1}: id:id1 class:class1
|
79
|
+
{ref2}: class : class2
|
80
|
+
{ref3}: style = "Style is : important = for all } things"
|
81
|
+
|
82
|
+
***
|
83
|
+
|
84
|
+
# Head # {#id1 .class1 ref2 ref3}
|
85
|
+
|
86
|
+
{ref2}: class : class2
|
87
|
+
{ref3}: style = "Style is : important = for all } things"
|
88
|
+
|
89
|
+
***
|
90
|
+
|
91
|
+
# Head # { #id1 .class1 ref2 ref3 }
|
92
|
+
|
93
|
+
{ref2}: class : class2
|
94
|
+
{ref3}: style = "Style is : important = for all } things"
|
95
|
+
|
96
|
+
***
|
97
|
+
|
98
|
+
# Head # { id=id1 class=class1 ref2 ref3 }
|
99
|
+
|
100
|
+
{ref2}: class : class2
|
101
|
+
{ref3}: style = "Style is : important = for all } things"
|
102
|
+
|
103
|
+
***
|
104
|
+
|
105
|
+
# Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
|
106
|
+
|
107
|
+
EOF
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
if File.basename($0) == 'tests.rb'
|
112
|
+
Maruku.metaTests
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
|