maiku 0.6.1.maiku
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/maruku.rb +141 -0
- data/lib/maruku/attributes.rb +175 -0
- data/lib/maruku/defaults.rb +71 -0
- data/lib/maruku/errors_management.rb +92 -0
- data/lib/maruku/ext/div.rb +133 -0
- data/lib/maruku/ext/math.rb +41 -0
- data/lib/maruku/ext/math/elements.rb +27 -0
- data/lib/maruku/ext/math/latex_fix.rb +12 -0
- data/lib/maruku/ext/math/mathml_engines/blahtex.rb +107 -0
- data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
- data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
- data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
- data/lib/maruku/ext/math/parsing.rb +119 -0
- data/lib/maruku/ext/math/to_html.rb +187 -0
- data/lib/maruku/ext/math/to_latex.rb +26 -0
- data/lib/maruku/helpers.rb +260 -0
- data/lib/maruku/input/charsource.rb +326 -0
- data/lib/maruku/input/extensions.rb +69 -0
- data/lib/maruku/input/html_helper.rb +189 -0
- data/lib/maruku/input/linesource.rb +111 -0
- data/lib/maruku/input/parse_block.rb +616 -0
- data/lib/maruku/input/parse_doc.rb +232 -0
- data/lib/maruku/input/parse_span_better.rb +746 -0
- data/lib/maruku/input/rubypants.rb +225 -0
- data/lib/maruku/input/type_detection.rb +147 -0
- data/lib/maruku/input_textile2/t2_parser.rb +163 -0
- data/lib/maruku/maruku.rb +33 -0
- data/lib/maruku/output/s5/fancy.rb +756 -0
- data/lib/maruku/output/s5/to_s5.rb +138 -0
- data/lib/maruku/output/to_html.rb +991 -0
- data/lib/maruku/output/to_latex.rb +590 -0
- data/lib/maruku/output/to_latex_entities.rb +367 -0
- data/lib/maruku/output/to_latex_strings.rb +64 -0
- data/lib/maruku/output/to_markdown.rb +164 -0
- data/lib/maruku/output/to_s.rb +56 -0
- data/lib/maruku/string_utils.rb +201 -0
- data/lib/maruku/structures.rb +167 -0
- data/lib/maruku/structures_inspect.rb +87 -0
- data/lib/maruku/structures_iterators.rb +61 -0
- data/lib/maruku/textile2.rb +1 -0
- data/lib/maruku/toc.rb +199 -0
- data/lib/maruku/usage/example1.rb +33 -0
- data/lib/maruku/version.rb +39 -0
- metadata +167 -0
@@ -0,0 +1,232 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
+
#
|
4
|
+
# This file is part of Maruku.
|
5
|
+
#
|
6
|
+
# Maruku is free software; you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation; either version 2 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Maruku is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Maruku; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
#++
|
20
|
+
|
21
|
+
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
22
|
+
|
23
|
+
def parse_doc(s)
|
24
|
+
# FIXME \r\n => \n
|
25
|
+
meta2 = parse_email_headers(s)
|
26
|
+
data = meta2[:data]
|
27
|
+
meta2.delete :data
|
28
|
+
|
29
|
+
self.attributes.merge! meta2
|
30
|
+
|
31
|
+
=begin maruku_doc
|
32
|
+
Attribute: encoding
|
33
|
+
Scope: document
|
34
|
+
Summary: Encoding for the document.
|
35
|
+
|
36
|
+
If the `encoding` attribute is specified, then the content
|
37
|
+
will be converted from the specified encoding to UTF-8.
|
38
|
+
=end
|
39
|
+
|
40
|
+
enc = self.attributes[:encoding]
|
41
|
+
self.attributes.delete :encoding
|
42
|
+
if enc && enc.downcase != 'utf-8'
|
43
|
+
|
44
|
+
# Switch to ruby 1.9 String#encode
|
45
|
+
# with backward 1.8 compatibility
|
46
|
+
if data.respond_to?(:encode!)
|
47
|
+
data.encode!('UTF-8', enc)
|
48
|
+
else
|
49
|
+
require 'iconv'
|
50
|
+
data = Iconv.new('utf-8', enc).iconv(data)
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
@children = parse_text_as_markdown(data)
|
56
|
+
|
57
|
+
if true #markdown_extra?
|
58
|
+
self.search_abbreviations
|
59
|
+
self.substitute_markdown_inside_raw_html
|
60
|
+
end
|
61
|
+
|
62
|
+
toc = create_toc
|
63
|
+
|
64
|
+
# use title if not set
|
65
|
+
if not self.attributes[:title] and toc.header_element
|
66
|
+
title = toc.header_element.to_s
|
67
|
+
self.attributes[:title] = title
|
68
|
+
# puts "Set document title to #{title}"
|
69
|
+
end
|
70
|
+
|
71
|
+
# save for later use
|
72
|
+
self.toc = toc
|
73
|
+
|
74
|
+
# Now do the attributes magic
|
75
|
+
each_element do |e|
|
76
|
+
# default attribute list
|
77
|
+
if default = self.ald[e.node_type.to_s]
|
78
|
+
expand_attribute_list(default, e.attributes)
|
79
|
+
end
|
80
|
+
expand_attribute_list(e.al, e.attributes)
|
81
|
+
# puts "#{e.node_type}: #{e.attributes.inspect}"
|
82
|
+
end
|
83
|
+
|
84
|
+
=begin maruku_doc
|
85
|
+
Attribute: unsafe_features
|
86
|
+
Scope: global
|
87
|
+
Summary: Enables execution of XML instructions.
|
88
|
+
|
89
|
+
Disabled by default because of security concerns.
|
90
|
+
=end
|
91
|
+
|
92
|
+
if Maruku::Globals[:unsafe_features]
|
93
|
+
self.execute_code_blocks
|
94
|
+
# TODO: remove executed code blocks
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Expands an attribute list in an Hash
|
99
|
+
def expand_attribute_list(al, result)
|
100
|
+
al.each do |k, v|
|
101
|
+
case k
|
102
|
+
when :class
|
103
|
+
if not result[:class]
|
104
|
+
result[:class] = v
|
105
|
+
else
|
106
|
+
result[:class] += " " + v
|
107
|
+
end
|
108
|
+
when :id; result[:id] = v
|
109
|
+
when :ref;
|
110
|
+
if self.ald[v]
|
111
|
+
already = (result[:expanded_references] ||= [])
|
112
|
+
if not already.include?(v)
|
113
|
+
already.push v
|
114
|
+
expand_attribute_list(self.ald[v], result)
|
115
|
+
else
|
116
|
+
already.push v
|
117
|
+
maruku_error "Circular reference between labels.\n\n"+
|
118
|
+
"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
|
119
|
+
(already.map{|x| x.inspect}.join(' => '))
|
120
|
+
end
|
121
|
+
else
|
122
|
+
if not result[:unresolved_references]
|
123
|
+
result[:unresolved_references] = v
|
124
|
+
else
|
125
|
+
result[:unresolved_references] << " #{v}"
|
126
|
+
end
|
127
|
+
|
128
|
+
# $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
|
129
|
+
result[v.to_sym] = true
|
130
|
+
end
|
131
|
+
else
|
132
|
+
result[k.to_sym]=v
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def safe_execute_code(object, code)
|
138
|
+
begin
|
139
|
+
return object.instance_eval(code)
|
140
|
+
rescue Exception => e
|
141
|
+
maruku_error "Exception while executing this:\n"+
|
142
|
+
add_tabs(code, 1, ">")+
|
143
|
+
"\nThe error was:\n"+
|
144
|
+
add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
|
145
|
+
rescue RuntimeError => e
|
146
|
+
maruku_error "2: Exception while executing this:\n"+
|
147
|
+
add_tabs(code, 1, ">")+
|
148
|
+
"\nThe error was:\n"+
|
149
|
+
add_tabs(e.inspect, 1, "|")
|
150
|
+
rescue SyntaxError => e
|
151
|
+
maruku_error "2: Exception while executing this:\n"+
|
152
|
+
add_tabs(code, 1, ">")+
|
153
|
+
"\nThe error was:\n"+
|
154
|
+
add_tabs(e.inspect, 1, "|")
|
155
|
+
end
|
156
|
+
nil
|
157
|
+
end
|
158
|
+
|
159
|
+
def execute_code_blocks
|
160
|
+
self.each_element(:xml_instr) do |e|
|
161
|
+
if e.target == 'maruku'
|
162
|
+
result = safe_execute_code(e, e.code)
|
163
|
+
if result.kind_of?(String)
|
164
|
+
puts "Result is : #{result.inspect}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def search_abbreviations
|
171
|
+
self.abbreviations.each do |abbrev, title|
|
172
|
+
reg = Regexp.new(Regexp.escape(abbrev))
|
173
|
+
self.replace_each_string do |s|
|
174
|
+
# bug if many abbreviations are present (agorf)
|
175
|
+
if m = reg.match(s)
|
176
|
+
e = md_abbr(abbrev.dup, title ? title.dup : nil)
|
177
|
+
[m.pre_match, e, m.post_match]
|
178
|
+
else
|
179
|
+
s
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
include REXML
|
186
|
+
# (PHP Markdown extra) Search for elements that have
|
187
|
+
# markdown=1 or markdown=block defined
|
188
|
+
def substitute_markdown_inside_raw_html
|
189
|
+
self.each_element(:raw_html) do |e|
|
190
|
+
doc = e.instance_variable_get :@parsed_html
|
191
|
+
if doc # valid html
|
192
|
+
# parse block-level markdown elements in these HTML tags
|
193
|
+
block_tags = ['div']
|
194
|
+
|
195
|
+
# use xpath to find elements with 'markdown' attribute
|
196
|
+
XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
|
197
|
+
# puts "Found #{e}"
|
198
|
+
# should we parse block-level or span-level?
|
199
|
+
|
200
|
+
how = e.attributes['markdown']
|
201
|
+
parse_blocks = (how == 'block') || block_tags.include?(e.name)
|
202
|
+
|
203
|
+
# Select all text elements of e
|
204
|
+
XPath.match(e, "//text()" ).each { |original_text|
|
205
|
+
s = original_text.value.strip
|
206
|
+
if s.size > 0
|
207
|
+
|
208
|
+
# puts "Parsing #{s.inspect} as blocks: #{parse_blocks} (#{e.name}, #{e.attributes['markdown']}) "
|
209
|
+
|
210
|
+
el = md_el(:dummy,
|
211
|
+
parse_blocks ? parse_text_as_markdown(s) :
|
212
|
+
parse_lines_as_span([s]) )
|
213
|
+
p = original_text.parent
|
214
|
+
el.children_to_html.each do |x|
|
215
|
+
p.insert_before(original_text, x)
|
216
|
+
end
|
217
|
+
p.delete(original_text)
|
218
|
+
|
219
|
+
end
|
220
|
+
}
|
221
|
+
|
222
|
+
|
223
|
+
# remove 'markdown' attribute
|
224
|
+
e.delete_attribute 'markdown'
|
225
|
+
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
end end end end
|
@@ -0,0 +1,746 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
+
#
|
4
|
+
# This file is part of Maruku.
|
5
|
+
#
|
6
|
+
# Maruku is free software; you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation; either version 2 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Maruku is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Maruku; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
#++
|
20
|
+
|
21
|
+
|
22
|
+
require 'set'
|
23
|
+
|
24
|
+
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
25
|
+
include MaRuKu::Helpers
|
26
|
+
|
27
|
+
EscapedCharInText =
|
28
|
+
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
|
29
|
+
|
30
|
+
EscapedCharInQuotes =
|
31
|
+
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
|
32
|
+
|
33
|
+
EscapedCharInInlineCode = [?\\,?`]
|
34
|
+
|
35
|
+
def parse_lines_as_span(lines, parent=nil)
|
36
|
+
parse_span_better lines.join("\n"), parent
|
37
|
+
end
|
38
|
+
|
39
|
+
def parse_span_better(string, parent=nil)
|
40
|
+
if not string.kind_of? String then
|
41
|
+
error "Passed #{string.class}." end
|
42
|
+
|
43
|
+
st = (string + "")
|
44
|
+
st.freeze
|
45
|
+
src = CharSource.new(st, parent)
|
46
|
+
read_span(src, EscapedCharInText, [nil])
|
47
|
+
end
|
48
|
+
|
49
|
+
# This is the main loop for reading span elements
|
50
|
+
#
|
51
|
+
# It's long, but not *complex* or difficult to understand.
|
52
|
+
#
|
53
|
+
#
|
54
|
+
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
|
55
|
+
con = SpanContext.new
|
56
|
+
c = d = nil
|
57
|
+
while true
|
58
|
+
c = src.cur_char
|
59
|
+
|
60
|
+
# This is only an optimization which cuts 50% of the time used.
|
61
|
+
# (but you can't use a-zA-z in exit_on_chars)
|
62
|
+
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
|
63
|
+
con.cur_string << src.shift_char
|
64
|
+
next
|
65
|
+
end
|
66
|
+
|
67
|
+
break if exit_on_chars && exit_on_chars.include?(c)
|
68
|
+
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
|
69
|
+
|
70
|
+
# check if there are extensions
|
71
|
+
if check_span_extensions(src, con)
|
72
|
+
next
|
73
|
+
end
|
74
|
+
|
75
|
+
case c = src.cur_char
|
76
|
+
when ?\ # it's space (32)
|
77
|
+
if src.cur_chars_are " \n"
|
78
|
+
src.ignore_chars(3)
|
79
|
+
con.push_element md_br()
|
80
|
+
next
|
81
|
+
else
|
82
|
+
src.ignore_char
|
83
|
+
con.push_space
|
84
|
+
end
|
85
|
+
when ?\n, ?\t
|
86
|
+
src.ignore_char
|
87
|
+
con.push_space
|
88
|
+
when ?`
|
89
|
+
read_inline_code(src,con)
|
90
|
+
when ?<
|
91
|
+
# It could be:
|
92
|
+
# 1) HTML "<div ..."
|
93
|
+
# 2) HTML "<!-- ..."
|
94
|
+
# 3) url "<http:// ", "<ftp:// ..."
|
95
|
+
# 4) email "<andrea@... ", "<mailto:andrea@..."
|
96
|
+
# 5) on itself! "a < b "
|
97
|
+
# 6) Start of <<guillemettes>>
|
98
|
+
|
99
|
+
case d = src.next_char
|
100
|
+
when ?<; # guillemettes
|
101
|
+
src.ignore_chars(2)
|
102
|
+
con.push_char ?<
|
103
|
+
con.push_char ?<
|
104
|
+
when ?!;
|
105
|
+
if src.cur_chars_are '<!--'
|
106
|
+
read_inline_html(src, con)
|
107
|
+
else
|
108
|
+
con.push_char src.shift_char
|
109
|
+
end
|
110
|
+
when ??
|
111
|
+
read_xml_instr_span(src, con)
|
112
|
+
when ?\ , ?\t
|
113
|
+
con.push_char src.shift_char
|
114
|
+
else
|
115
|
+
if src.next_matches(/<mailto:/) or
|
116
|
+
src.next_matches(/<[\w\.]+\@/)
|
117
|
+
read_email_el(src, con)
|
118
|
+
elsif src.next_matches(/<\w+:/)
|
119
|
+
read_url_el(src, con)
|
120
|
+
elsif src.next_matches(/<\w/)
|
121
|
+
#puts "This is HTML: #{src.cur_chars(20)}"
|
122
|
+
read_inline_html(src, con)
|
123
|
+
else
|
124
|
+
#puts "This is NOT HTML: #{src.cur_chars(20)}"
|
125
|
+
con.push_char src.shift_char
|
126
|
+
end
|
127
|
+
end
|
128
|
+
when ?\\
|
129
|
+
d = src.next_char
|
130
|
+
if d == ?'
|
131
|
+
src.ignore_chars(2)
|
132
|
+
con.push_element md_entity('apos')
|
133
|
+
elsif d == ?"
|
134
|
+
src.ignore_chars(2)
|
135
|
+
con.push_element md_entity('quot')
|
136
|
+
elsif escaped.include? d
|
137
|
+
src.ignore_chars(2)
|
138
|
+
con.push_char d
|
139
|
+
else
|
140
|
+
con.push_char src.shift_char
|
141
|
+
end
|
142
|
+
when ?[
|
143
|
+
if markdown_extra? && src.next_char == ?^
|
144
|
+
read_footnote_ref(src,con)
|
145
|
+
else
|
146
|
+
read_link(src, con)
|
147
|
+
end
|
148
|
+
when ?!
|
149
|
+
if src.next_char == ?[
|
150
|
+
read_image(src, con)
|
151
|
+
else
|
152
|
+
con.push_char src.shift_char
|
153
|
+
end
|
154
|
+
when ?&
|
155
|
+
# named references
|
156
|
+
if m = src.read_regexp(/\&([\w\d]+);/)
|
157
|
+
con.push_element md_entity(m[1])
|
158
|
+
# numeric
|
159
|
+
elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
|
160
|
+
num = m[1] ? m[2].hex : m[2].to_i
|
161
|
+
con.push_element md_entity(num)
|
162
|
+
else
|
163
|
+
con.push_char src.shift_char
|
164
|
+
end
|
165
|
+
when ?*
|
166
|
+
if not src.next_char
|
167
|
+
maruku_error "Opening * as last char.", src, con
|
168
|
+
maruku_recover "Threating as literal"
|
169
|
+
con.push_char src.shift_char
|
170
|
+
else
|
171
|
+
follows = src.cur_chars(4)
|
172
|
+
if follows =~ /^\*\*\*[^\s\*]/
|
173
|
+
con.push_element read_emstrong(src,'***')
|
174
|
+
elsif follows =~ /^\*\*[^\s\*]/
|
175
|
+
con.push_element read_strong(src,'**')
|
176
|
+
elsif follows =~ /^\*[^\s\*]/
|
177
|
+
con.push_element read_em(src,'*')
|
178
|
+
else # * is just a normal char
|
179
|
+
con.push_char src.shift_char
|
180
|
+
end
|
181
|
+
end
|
182
|
+
when ?_
|
183
|
+
if not src.next_char
|
184
|
+
maruku_error "Opening _ as last char", src, con
|
185
|
+
maruku_recover "Threating as literal", src, con
|
186
|
+
con.push_char src.shift_char
|
187
|
+
else
|
188
|
+
# we don't want "mod_ruby" to start an emphasis
|
189
|
+
# so we start one only if
|
190
|
+
# 1) there's nothing else in the span (first char)
|
191
|
+
# or 2) the last char was a space
|
192
|
+
# or 3) the current string is empty
|
193
|
+
#if con.elements.empty? ||
|
194
|
+
if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
|
195
|
+
# also, we check the next characters
|
196
|
+
follows = src.cur_chars(4)
|
197
|
+
if follows =~ /^\_\_\_[^\s\_]/
|
198
|
+
con.push_element read_emstrong(src,'___')
|
199
|
+
elsif follows =~ /^\_\_[^\s\_]/
|
200
|
+
con.push_element read_strong(src,'__')
|
201
|
+
elsif follows =~ /^\_[^\s\_]/
|
202
|
+
con.push_element read_em(src,'_')
|
203
|
+
else # _ is just a normal char
|
204
|
+
con.push_char src.shift_char
|
205
|
+
end
|
206
|
+
else
|
207
|
+
# _ is just a normal char
|
208
|
+
con.push_char src.shift_char
|
209
|
+
end
|
210
|
+
end
|
211
|
+
when ?{ # extension
|
212
|
+
if [?#, ?., ?:].include? src.next_char
|
213
|
+
src.ignore_char # {
|
214
|
+
interpret_extension(src, con, [?}])
|
215
|
+
src.ignore_char # }
|
216
|
+
else
|
217
|
+
con.push_char src.shift_char
|
218
|
+
end
|
219
|
+
when nil
|
220
|
+
maruku_error( ("Unclosed span (waiting for %s"+
|
221
|
+
"#{exit_on_strings.inspect})") % [
|
222
|
+
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
|
223
|
+
src,con)
|
224
|
+
break
|
225
|
+
else # normal text
|
226
|
+
con.push_char src.shift_char
|
227
|
+
end # end case
|
228
|
+
end # end while true
|
229
|
+
con.push_string_if_present
|
230
|
+
|
231
|
+
# Assign IAL to elements
|
232
|
+
merge_ial(con.elements, src, con)
|
233
|
+
|
234
|
+
|
235
|
+
# Remove leading space
|
236
|
+
if (s = con.elements.first).kind_of? String
|
237
|
+
if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
|
238
|
+
con.elements.shift if s.size == 0
|
239
|
+
end
|
240
|
+
|
241
|
+
# Remove final spaces
|
242
|
+
if (s = con.elements.last).kind_of? String
|
243
|
+
s.chop! if s[-1] == ?\
|
244
|
+
con.elements.pop if s.size == 0
|
245
|
+
end
|
246
|
+
|
247
|
+
educated = educate(con.elements)
|
248
|
+
|
249
|
+
educated
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
def read_xml_instr_span(src, con)
|
254
|
+
src.ignore_chars(2) # starting <?
|
255
|
+
|
256
|
+
# read target <?target code... ?>
|
257
|
+
target = if m = src.read_regexp(/(\w+)/)
|
258
|
+
m[1]
|
259
|
+
else
|
260
|
+
''
|
261
|
+
end
|
262
|
+
|
263
|
+
delim = "?>"
|
264
|
+
|
265
|
+
code =
|
266
|
+
read_simple(src, escaped=[], break_on_chars=[],
|
267
|
+
break_on_strings=[delim])
|
268
|
+
|
269
|
+
src.ignore_chars delim.size
|
270
|
+
|
271
|
+
code = (code || "").strip
|
272
|
+
con.push_element md_xml_instr(target, code)
|
273
|
+
end
|
274
|
+
|
275
|
+
# Start: cursor on character **after** '{'
|
276
|
+
# End: curson on '}' or EOF
|
277
|
+
def interpret_extension(src, con, break_on_chars)
|
278
|
+
case src.cur_char
|
279
|
+
when ?:
|
280
|
+
src.ignore_char # :
|
281
|
+
extension_meta(src, con, break_on_chars)
|
282
|
+
when ?#, ?.
|
283
|
+
extension_meta(src, con, break_on_chars)
|
284
|
+
else
|
285
|
+
stuff = read_simple(src, escaped=[?}], break_on_chars, [])
|
286
|
+
if stuff =~ /^(\w+\s|[^\w])/
|
287
|
+
extension_id = $1.strip
|
288
|
+
if false
|
289
|
+
else
|
290
|
+
maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
|
291
|
+
"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
|
292
|
+
extension_meta(src, con, break_on_chars)
|
293
|
+
end
|
294
|
+
else
|
295
|
+
maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
|
296
|
+
extension_meta(src, con, break_on_chars)
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
def extension_meta(src, con, break_on_chars)
|
302
|
+
if m = src.read_regexp(/([^\s\:\"\']+):/)
|
303
|
+
name = m[1]
|
304
|
+
al = read_attribute_list(src, con, break_on_chars)
|
305
|
+
# puts "#{name}=#{al.inspect}"
|
306
|
+
self.doc.ald[name] = al
|
307
|
+
con.push md_ald(name, al)
|
308
|
+
else
|
309
|
+
al = read_attribute_list(src, con, break_on_chars)
|
310
|
+
self.doc.ald[name] = al
|
311
|
+
con.push md_ial(al)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
def read_url_el(src,con)
|
316
|
+
src.ignore_char # leading <
|
317
|
+
url = read_simple(src, [], [?>])
|
318
|
+
src.ignore_char # closing >
|
319
|
+
|
320
|
+
con.push_element md_url(url)
|
321
|
+
end
|
322
|
+
|
323
|
+
def read_email_el(src,con)
|
324
|
+
src.ignore_char # leading <
|
325
|
+
mail = read_simple(src, [], [?>])
|
326
|
+
src.ignore_char # closing >
|
327
|
+
|
328
|
+
address = mail.gsub(/^mailto:/,'')
|
329
|
+
con.push_element md_email(address)
|
330
|
+
end
|
331
|
+
|
332
|
+
def read_url(src, break_on)
|
333
|
+
if [?',?"].include? src.cur_char
|
334
|
+
error 'Invalid char for url', src
|
335
|
+
end
|
336
|
+
|
337
|
+
url = read_simple(src, [], break_on)
|
338
|
+
if not url # empty url
|
339
|
+
url = ""
|
340
|
+
end
|
341
|
+
|
342
|
+
if url[0] == ?< && url[-1] == ?>
|
343
|
+
url = url[1, url.size-2]
|
344
|
+
end
|
345
|
+
|
346
|
+
if url.size == 0
|
347
|
+
return nil
|
348
|
+
end
|
349
|
+
|
350
|
+
url
|
351
|
+
end
|
352
|
+
|
353
|
+
|
354
|
+
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
|
355
|
+
case src.cur_char
|
356
|
+
when ?', ?"
|
357
|
+
read_quoted(src, con)
|
358
|
+
else
|
359
|
+
read_simple(src, escaped, exit_on_chars)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
# Tries to read a quoted value. If stream does not
|
364
|
+
# start with ' or ", returns nil.
|
365
|
+
def read_quoted(src, con)
|
366
|
+
case src.cur_char
|
367
|
+
when ?', ?"
|
368
|
+
quote_char = src.shift_char # opening quote
|
369
|
+
string = read_simple(src, EscapedCharInQuotes, [quote_char])
|
370
|
+
src.ignore_char # closing quote
|
371
|
+
return string
|
372
|
+
else
|
373
|
+
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
|
374
|
+
return nil
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
# Reads a simple string (no formatting) until one of break_on_chars,
|
379
|
+
# while escaping the escaped.
|
380
|
+
# If the string is empty, it returns nil.
|
381
|
+
# Raises on error if the string terminates unexpectedly.
|
382
|
+
# # If eat_delim is true, and if the delim is not the EOF, then the delim
|
383
|
+
# # gets eaten from the stream.
|
384
|
+
def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
|
385
|
+
text = ""
|
386
|
+
while true
|
387
|
+
# puts "Reading simple #{text.inspect}"
|
388
|
+
c = src.cur_char
|
389
|
+
if exit_on_chars && exit_on_chars.include?(c)
|
390
|
+
# src.ignore_char if eat_delim
|
391
|
+
break
|
392
|
+
end
|
393
|
+
|
394
|
+
break if exit_on_strings &&
|
395
|
+
exit_on_strings.any? {|x| src.cur_chars_are x}
|
396
|
+
|
397
|
+
case c
|
398
|
+
when nil
|
399
|
+
s= "String finished while reading (break on "+
|
400
|
+
"#{exit_on_chars.map{|x|""<<x}.inspect})"+
|
401
|
+
" already read: #{text.inspect}"
|
402
|
+
maruku_error s, src
|
403
|
+
maruku_recover "I boldly continue", src
|
404
|
+
break
|
405
|
+
when ?\\
|
406
|
+
d = src.next_char
|
407
|
+
if escaped.include? d
|
408
|
+
src.ignore_chars(2)
|
409
|
+
text << d
|
410
|
+
else
|
411
|
+
text << src.shift_char
|
412
|
+
end
|
413
|
+
else
|
414
|
+
text << src.shift_char
|
415
|
+
end
|
416
|
+
end
|
417
|
+
# puts "Read simple #{text.inspect}"
|
418
|
+
text.empty? ? nil : text
|
419
|
+
end
|
420
|
+
|
421
|
+
def read_em(src, delim)
|
422
|
+
src.ignore_char
|
423
|
+
children = read_span(src, EscapedCharInText, nil, [delim])
|
424
|
+
src.ignore_char
|
425
|
+
md_em(children)
|
426
|
+
end
|
427
|
+
|
428
|
+
def read_strong(src, delim)
|
429
|
+
src.ignore_chars(2)
|
430
|
+
children = read_span(src, EscapedCharInText, nil, [delim])
|
431
|
+
src.ignore_chars(2)
|
432
|
+
md_strong(children)
|
433
|
+
end
|
434
|
+
|
435
|
+
def read_emstrong(src, delim)
|
436
|
+
src.ignore_chars(3)
|
437
|
+
children = read_span(src, EscapedCharInText, nil, [delim])
|
438
|
+
src.ignore_chars(3)
|
439
|
+
md_emstrong(children)
|
440
|
+
end
|
441
|
+
|
442
|
+
SPACE = ?\ # = 32
|
443
|
+
|
444
|
+
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
|
445
|
+
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
|
446
|
+
R_REF_ID = Regexp.compile(/([^\]]*)\]/)
|
447
|
+
|
448
|
+
# Reads a bracketed id "[refid]". Consumes also both brackets.
|
449
|
+
def read_ref_id(src, con)
|
450
|
+
src.ignore_char # [
|
451
|
+
src.consume_whitespace
|
452
|
+
# puts "Next: #{src.cur_chars(10).inspect}"
|
453
|
+
if m = src.read_regexp(R_REF_ID)
|
454
|
+
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
|
455
|
+
# puts "Then: #{src.cur_chars(10).inspect}"
|
456
|
+
m[1]
|
457
|
+
else
|
458
|
+
nil
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def read_footnote_ref(src,con)
|
463
|
+
ref = read_ref_id(src,con)
|
464
|
+
con.push_element md_foot_ref(ref)
|
465
|
+
end
|
466
|
+
|
467
|
+
def read_inline_html(src, con)
|
468
|
+
h = HTMLHelper.new
|
469
|
+
begin
|
470
|
+
# This is our current buffer in the context
|
471
|
+
next_stuff = src.current_remaining_buffer
|
472
|
+
|
473
|
+
consumed = 0
|
474
|
+
while true
|
475
|
+
if consumed >= next_stuff.size
|
476
|
+
maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
|
477
|
+
break
|
478
|
+
end
|
479
|
+
|
480
|
+
h.eat_this next_stuff[consumed].chr; consumed += 1
|
481
|
+
break if h.is_finished?
|
482
|
+
end
|
483
|
+
src.ignore_chars(consumed)
|
484
|
+
con.push_element md_html(h.stuff_you_read)
|
485
|
+
|
486
|
+
#start = src.current_remaining_buffer
|
487
|
+
# h.eat_this start
|
488
|
+
# if not h.is_finished?
|
489
|
+
# error "inline_html: Malformed:\n "+
|
490
|
+
# "#{start.inspect}\n #{h.inspect}",src,con
|
491
|
+
# end
|
492
|
+
#
|
493
|
+
# consumed = start.size - h.rest.size
|
494
|
+
# if consumed > 0
|
495
|
+
# con.push_element md_html(h.stuff_you_read)
|
496
|
+
# src.ignore_chars(consumed)
|
497
|
+
# else
|
498
|
+
# puts "HTML helper did not work on #{start.inspect}"
|
499
|
+
# con.push_char src.shift_char
|
500
|
+
# end
|
501
|
+
rescue Exception => e
|
502
|
+
maruku_error "Bad html: \n" +
|
503
|
+
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
|
504
|
+
src,con
|
505
|
+
maruku_recover "I will try to continue after bad HTML.", src, con
|
506
|
+
con.push_char src.shift_char
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
def read_inline_code(src, con)
|
511
|
+
# Count the number of ticks
|
512
|
+
num_ticks = 0
|
513
|
+
while src.cur_char == ?`
|
514
|
+
num_ticks += 1
|
515
|
+
src.ignore_char
|
516
|
+
end
|
517
|
+
# We will read until this string
|
518
|
+
end_string = "`"*num_ticks
|
519
|
+
|
520
|
+
code =
|
521
|
+
read_simple(src, escaped=[], break_on_chars=[],
|
522
|
+
break_on_strings=[end_string])
|
523
|
+
|
524
|
+
# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
|
525
|
+
src.ignore_chars num_ticks
|
526
|
+
|
527
|
+
# Ignore at most one space
|
528
|
+
if num_ticks > 1 && code[0] == SPACE
|
529
|
+
code = code[1, code.size-1]
|
530
|
+
end
|
531
|
+
|
532
|
+
# drop last space
|
533
|
+
if num_ticks > 1 && code[-1] == SPACE
|
534
|
+
code = code[0,code.size-1]
|
535
|
+
end
|
536
|
+
|
537
|
+
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
|
538
|
+
con.push_element md_code(code)
|
539
|
+
end
|
540
|
+
|
541
|
+
def read_link(src, con)
|
542
|
+
# we read the string and see what happens
|
543
|
+
src.ignore_char # opening bracket
|
544
|
+
children = read_span(src, EscapedCharInText, [?]])
|
545
|
+
src.ignore_char # closing bracket
|
546
|
+
|
547
|
+
# ignore space
|
548
|
+
if src.cur_char == SPACE and
|
549
|
+
(src.next_char == ?[ or src.next_char == ?( )
|
550
|
+
src.shift_char
|
551
|
+
end
|
552
|
+
|
553
|
+
case src.cur_char
|
554
|
+
when ?(
|
555
|
+
src.ignore_char # opening (
|
556
|
+
src.consume_whitespace
|
557
|
+
url = read_url(src, [SPACE,?\t,?)])
|
558
|
+
if not url
|
559
|
+
url = '' # no url is ok
|
560
|
+
end
|
561
|
+
src.consume_whitespace
|
562
|
+
title = nil
|
563
|
+
if src.cur_char != ?) # we have a title
|
564
|
+
quote_char = src.cur_char
|
565
|
+
title = read_quoted(src,con)
|
566
|
+
|
567
|
+
if not title
|
568
|
+
maruku_error 'Must quote title',src,con
|
569
|
+
else
|
570
|
+
# Tries to read a title with quotes: 
|
571
|
+
# this is the most ugly thing in Markdown
|
572
|
+
if not src.next_matches(/\s*\)/)
|
573
|
+
# if there is not a closing par ), then read
|
574
|
+
# the rest and guess it's title with quotes
|
575
|
+
rest = read_simple(src, escaped=[], break_on_chars=[?)],
|
576
|
+
break_on_strings=[])
|
577
|
+
# chop the closing char
|
578
|
+
rest.chop!
|
579
|
+
title << quote_char << rest
|
580
|
+
end
|
581
|
+
end
|
582
|
+
end
|
583
|
+
src.consume_whitespace
|
584
|
+
closing = src.shift_char # closing )
|
585
|
+
if closing != ?)
|
586
|
+
maruku_error 'Unclosed link',src,con
|
587
|
+
maruku_recover "No closing ): I will not create"+
|
588
|
+
" the link for #{children.inspect}", src, con
|
589
|
+
con.push_elements children
|
590
|
+
return
|
591
|
+
end
|
592
|
+
con.push_element md_im_link(children,url, title)
|
593
|
+
when ?[ # link ref
|
594
|
+
ref_id = read_ref_id(src,con)
|
595
|
+
if ref_id
|
596
|
+
if ref_id.size == 0
|
597
|
+
ref_id = sanitize_ref_id(children.to_s)
|
598
|
+
else
|
599
|
+
ref_id = sanitize_ref_id(ref_id)
|
600
|
+
end
|
601
|
+
con.push_element md_link(children, ref_id)
|
602
|
+
else
|
603
|
+
maruku_error "Could not read ref_id", src, con
|
604
|
+
maruku_recover "I will not create the link for "+
|
605
|
+
"#{children.inspect}", src, con
|
606
|
+
con.push_elements children
|
607
|
+
return
|
608
|
+
end
|
609
|
+
else # empty [link]
|
610
|
+
id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
|
611
|
+
con.push_element md_link(children, id)
|
612
|
+
end
|
613
|
+
end # read link
|
614
|
+
|
615
|
+
def read_image(src, con)
|
616
|
+
src.ignore_chars(2) # opening "!["
|
617
|
+
alt_text = read_span(src, EscapedCharInText, [?]])
|
618
|
+
src.ignore_char # closing bracket
|
619
|
+
# ignore space
|
620
|
+
if src.cur_char == SPACE and
|
621
|
+
(src.next_char == ?[ or src.next_char == ?( )
|
622
|
+
src.ignore_char
|
623
|
+
end
|
624
|
+
case src.cur_char
|
625
|
+
when ?(
|
626
|
+
src.ignore_char # opening (
|
627
|
+
src.consume_whitespace
|
628
|
+
url = read_url(src, [SPACE,?\t,?)])
|
629
|
+
if not url
|
630
|
+
error "Could not read url from #{src.cur_chars(10).inspect}",
|
631
|
+
src,con
|
632
|
+
end
|
633
|
+
src.consume_whitespace
|
634
|
+
title = nil
|
635
|
+
if src.cur_char != ?) # we have a title
|
636
|
+
quote_char = src.cur_char
|
637
|
+
title = read_quoted(src,con)
|
638
|
+
if not title
|
639
|
+
maruku_error 'Must quote title',src,con
|
640
|
+
else
|
641
|
+
# Tries to read a title with quotes: 
|
642
|
+
# this is the most ugly thing in Markdown
|
643
|
+
if not src.next_matches(/\s*\)/)
|
644
|
+
# if there is not a closing par ), then read
|
645
|
+
# the rest and guess it's title with quotes
|
646
|
+
rest = read_simple(src, escaped=[], break_on_chars=[?)],
|
647
|
+
break_on_strings=[])
|
648
|
+
# chop the closing char
|
649
|
+
rest.chop!
|
650
|
+
title << quote_char << rest
|
651
|
+
end
|
652
|
+
end
|
653
|
+
end
|
654
|
+
src.consume_whitespace
|
655
|
+
closing = src.shift_char # closing )
|
656
|
+
if closing != ?)
|
657
|
+
error( ("Unclosed link: '"<<closing<<"'")+
|
658
|
+
" Read url=#{url.inspect} title=#{title.inspect}",src,con)
|
659
|
+
end
|
660
|
+
con.push_element md_im_image(alt_text, url, title)
|
661
|
+
when ?[ # link ref
|
662
|
+
ref_id = read_ref_id(src,con)
|
663
|
+
if not ref_id # TODO: check around
|
664
|
+
error('Reference not closed.', src, con)
|
665
|
+
ref_id = ""
|
666
|
+
end
|
667
|
+
if ref_id.size == 0
|
668
|
+
ref_id = alt_text.to_s
|
669
|
+
end
|
670
|
+
|
671
|
+
ref_id = sanitize_ref_id(ref_id)
|
672
|
+
|
673
|
+
con.push_element md_image(alt_text, ref_id)
|
674
|
+
else # no stuff
|
675
|
+
ref_id = sanitize_ref_id(alt_text.to_s)
|
676
|
+
con.push_element md_image(alt_text, ref_id)
|
677
|
+
end
|
678
|
+
end # read link
|
679
|
+
|
680
|
+
|
681
|
+
class SpanContext
|
682
|
+
include MaRuKu::Strings
|
683
|
+
|
684
|
+
# Read elements
|
685
|
+
attr_accessor :elements
|
686
|
+
attr_accessor :cur_string
|
687
|
+
|
688
|
+
def initialize
|
689
|
+
@elements = []
|
690
|
+
@cur_string = ""
|
691
|
+
end
|
692
|
+
|
693
|
+
def push_element(e)
|
694
|
+
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
|
695
|
+
not (e.kind_of?(String) or e.kind_of?(MDElement))
|
696
|
+
|
697
|
+
push_string_if_present
|
698
|
+
@elements << e
|
699
|
+
nil
|
700
|
+
end
|
701
|
+
alias push push_element
|
702
|
+
|
703
|
+
def push_elements(a)
|
704
|
+
for e in a
|
705
|
+
if e.kind_of? String
|
706
|
+
e.each_byte do |b| push_char b end
|
707
|
+
else
|
708
|
+
push_element e
|
709
|
+
end
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
def push_string_if_present
|
714
|
+
if @cur_string.size > 0
|
715
|
+
@elements << @cur_string
|
716
|
+
@cur_string = ""
|
717
|
+
end
|
718
|
+
nil
|
719
|
+
end
|
720
|
+
|
721
|
+
def push_char(c)
|
722
|
+
@cur_string << c
|
723
|
+
nil
|
724
|
+
end
|
725
|
+
|
726
|
+
# push space into current string if
|
727
|
+
# there isn't one
|
728
|
+
def push_space
|
729
|
+
last = @cur_string[@cur_string.size-1]
|
730
|
+
@cur_string << ?\ if last != ?\
|
731
|
+
end
|
732
|
+
|
733
|
+
def describe
|
734
|
+
lines = @elements.map{|x| x.inspect}.join("\n")
|
735
|
+
s = "Elements read in span: \n" +
|
736
|
+
add_tabs(lines,1, ' -')+"\n"
|
737
|
+
|
738
|
+
if @cur_string.size > 0
|
739
|
+
s += "Current string: \n #{@cur_string.inspect}\n"
|
740
|
+
end
|
741
|
+
s
|
742
|
+
end
|
743
|
+
end # SpanContext
|
744
|
+
|
745
|
+
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
|
746
|
+
|