maruku 0.2.13 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/maruku +23 -15
- data/bin/maruku0.3 +37 -0
- data/bin/marutest +277 -0
- data/docs/changelog-0.3.html +99 -0
- data/docs/changelog-0.3.md +84 -0
- data/docs/faq.html +46 -0
- data/docs/faq.md +32 -0
- data/docs/index.html +629 -64
- data/docs/markdown_extra2.html +67 -14
- data/docs/markdown_syntax.html +631 -94
- data/docs/markdown_syntax_2.html +152 -0
- data/docs/maruku.html +629 -64
- data/docs/maruku.md +108 -105
- data/docs/proposal.html +362 -55
- data/docs/proposal.md +133 -169
- data/docs/todo.html +30 -0
- data/lib/maruku.rb +13 -3
- data/lib/maruku/errors_management.rb +75 -0
- data/lib/maruku/helpers.rb +164 -0
- data/lib/maruku/html_helper.rb +33 -13
- data/lib/maruku/parse_block.rb +89 -92
- data/lib/maruku/parse_doc.rb +43 -18
- data/lib/maruku/parse_span.rb +17 -46
- data/lib/maruku/parse_span_better.rb +681 -0
- data/lib/maruku/string_utils.rb +17 -10
- data/lib/maruku/structures.rb +62 -35
- data/lib/maruku/structures_iterators.rb +39 -0
- data/lib/maruku/tests/benchmark.rb +12 -4
- data/lib/maruku/tests/new_parser.rb +318 -0
- data/lib/maruku/to_html.rb +113 -44
- data/lib/maruku/to_latex.rb +32 -14
- data/lib/maruku/to_markdown.rb +110 -0
- data/lib/maruku/toc.rb +35 -1
- data/lib/maruku/version.rb +10 -1
- data/lib/test.rb +29 -0
- data/tests/others/escaping.md +6 -4
- data/tests/others/links.md +1 -1
- data/tests/others/lists_after_paragraph.md +44 -0
- data/tests/unittest/abbreviations.md +71 -0
- data/tests/unittest/blank.md +43 -0
- data/tests/unittest/blanks_in_code.md +131 -0
- data/tests/unittest/code.md +64 -0
- data/tests/unittest/code2.md +59 -0
- data/tests/unittest/code3.md +121 -0
- data/tests/unittest/easy.md +36 -0
- data/tests/unittest/email.md +39 -0
- data/tests/unittest/encoding/iso-8859-1.md +9 -0
- data/tests/unittest/encoding/utf-8.md +38 -0
- data/tests/unittest/entities.md +174 -0
- data/tests/unittest/escaping.md +97 -0
- data/tests/unittest/extra_dl.md +81 -0
- data/tests/unittest/extra_header_id.md +96 -0
- data/tests/unittest/extra_table1.md +78 -0
- data/tests/unittest/footnotes.md +120 -0
- data/tests/unittest/headers.md +64 -0
- data/tests/unittest/hrule.md +77 -0
- data/tests/unittest/images.md +114 -0
- data/tests/unittest/inline_html.md +185 -0
- data/tests/unittest/links.md +162 -0
- data/tests/unittest/list1.md +80 -0
- data/tests/unittest/list2.md +75 -0
- data/tests/unittest/list3.md +111 -0
- data/tests/unittest/list4.md +43 -0
- data/tests/unittest/lists.md +262 -0
- data/tests/unittest/lists_after_paragraph.md +280 -0
- data/tests/unittest/lists_ol.md +323 -0
- data/tests/unittest/misc_sw.md +751 -0
- data/tests/unittest/notyet/escape.md +46 -0
- data/tests/unittest/notyet/header_after_par.md +85 -0
- data/tests/unittest/notyet/ticks.md +67 -0
- data/tests/unittest/notyet/triggering.md +210 -0
- data/tests/unittest/one.md +33 -0
- data/tests/unittest/paragraph.md +34 -0
- data/tests/unittest/paragraph_rules/dont_merge_ref.md +60 -0
- data/tests/unittest/paragraph_rules/tab_is_blank.md +43 -0
- data/tests/unittest/paragraphs.md +84 -0
- data/tests/unittest/recover/recover_links.md +32 -0
- data/tests/unittest/references/long_example.md +87 -0
- data/tests/unittest/references/spaces_and_numbers.md +27 -0
- data/tests/unittest/syntax_hl.md +99 -0
- data/tests/unittest/test.md +36 -0
- data/tests/unittest/wrapping.md +88 -0
- data/tests/utf8-files/simple.md +1 -0
- metadata +139 -86
- data/lib/maruku/maruku.rb +0 -50
- data/tests/a.md +0 -10
data/lib/maruku/parse_doc.rb
CHANGED
@@ -16,14 +16,17 @@
|
|
16
16
|
# along with Maruku; if not, write to the Free Software
|
17
17
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
18
18
|
|
19
|
+
require 'iconv'
|
20
|
+
|
19
21
|
class Maruku
|
20
|
-
def initialize(s=nil)
|
22
|
+
def initialize(s=nil, meta={})
|
21
23
|
@node_type = :document
|
22
24
|
@doc = self
|
23
25
|
|
24
26
|
@refs = {}
|
25
27
|
@footnotes = {}
|
26
28
|
@abbreviations = {}
|
29
|
+
@meta = meta
|
27
30
|
|
28
31
|
parse_doc(s) if s
|
29
32
|
end
|
@@ -32,14 +35,31 @@ class Maruku
|
|
32
35
|
# setup initial stack
|
33
36
|
@stack = []
|
34
37
|
|
35
|
-
|
36
|
-
data =
|
37
|
-
|
38
|
-
|
38
|
+
meta2 = parse_email_headers(s)
|
39
|
+
data = meta2[:data]
|
40
|
+
meta2.delete :data
|
41
|
+
|
42
|
+
@meta.merge! meta2
|
43
|
+
|
44
|
+
enc = @meta[:encoding]
|
45
|
+
@meta.delete :encoding
|
46
|
+
if enc && enc.downcase != 'utf-8'
|
47
|
+
# puts "Converting from #{enc} to UTF-8."
|
48
|
+
converted = Iconv.new('utf-8', enc).iconv(data)
|
49
|
+
|
50
|
+
# puts "Data: #{data.inspect}: #{data}"
|
51
|
+
# puts "Conv: #{converted.inspect}: #{converted}"
|
52
|
+
|
53
|
+
data = converted
|
54
|
+
end
|
55
|
+
|
56
|
+
lines = Maruku.split_lines(data)
|
39
57
|
@children = parse_lines_as_markdown(lines)
|
40
58
|
|
41
|
-
|
42
|
-
|
59
|
+
if true #markdown_extra?
|
60
|
+
self.search_abbreviations
|
61
|
+
self.substitute_markdown_inside_raw_html
|
62
|
+
end
|
43
63
|
|
44
64
|
toc = create_toc
|
45
65
|
|
@@ -47,24 +67,28 @@ class Maruku
|
|
47
67
|
if not self.meta[:title] and toc.header_element
|
48
68
|
title = toc.header_element.to_s
|
49
69
|
self.meta[:title] = title
|
50
|
-
puts "Set document title to #{title}"
|
70
|
+
# puts "Set document title to #{title}"
|
51
71
|
end
|
52
72
|
|
53
73
|
# save for later use
|
54
74
|
self.toc = toc
|
55
75
|
|
56
|
-
|
76
|
+
# puts self.inspect
|
57
77
|
end
|
58
78
|
|
59
79
|
def search_abbreviations
|
60
80
|
@abbreviations.each do |abbrev, title|
|
61
|
-
|
62
|
-
self.
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
81
|
+
reg = Regexp.new(Regexp.escape(abbrev))
|
82
|
+
self.replace_each_string do |s|
|
83
|
+
if m = reg.match(s)
|
84
|
+
e = create_md_element(:abbreviation)
|
85
|
+
e.children = [abbrev.dup]
|
86
|
+
e.meta[:title] = title.dup if title
|
87
|
+
[m.pre_match, e, m.post_match]
|
88
|
+
else
|
89
|
+
s
|
90
|
+
end
|
91
|
+
end
|
68
92
|
end
|
69
93
|
end
|
70
94
|
|
@@ -72,7 +96,7 @@ class Maruku
|
|
72
96
|
# markdown=1 or markdown=block defined
|
73
97
|
def substitute_markdown_inside_raw_html
|
74
98
|
self.each_element(:raw_html) do |e|
|
75
|
-
doc = e.
|
99
|
+
doc = e.instance_variable_get :@parsed_html
|
76
100
|
if doc # valid html
|
77
101
|
# parse block-level markdown elements in these HTML tags
|
78
102
|
block_tags = ['div']
|
@@ -89,7 +113,7 @@ class Maruku
|
|
89
113
|
s = original_text.to_s.strip # XXX
|
90
114
|
el = create_md_element(:dummy,
|
91
115
|
parse_blocks ? parse_text_as_markdown(s) :
|
92
|
-
parse_lines_as_span(s) )
|
116
|
+
parse_lines_as_span([s]) )
|
93
117
|
el.children_to_html.each do |x|
|
94
118
|
e.insert_before(original_text, x)
|
95
119
|
end
|
@@ -100,5 +124,6 @@ class Maruku
|
|
100
124
|
end
|
101
125
|
end
|
102
126
|
end
|
127
|
+
|
103
128
|
|
104
129
|
end
|
data/lib/maruku/parse_span.rb
CHANGED
@@ -31,8 +31,7 @@ class Maruku
|
|
31
31
|
# first, get rid of linebreaks
|
32
32
|
res = resolve_linebreaks(lines)
|
33
33
|
|
34
|
-
span = MDElement.new
|
35
|
-
span.children = res
|
34
|
+
span = MDElement.new(:dummy, res)
|
36
35
|
|
37
36
|
# encode all escapes
|
38
37
|
span.replace_each_string { |s| s.escape_md_special }
|
@@ -97,15 +96,8 @@ class Maruku
|
|
97
96
|
|
98
97
|
for reg in [inlineHTML1, inlineHTML2]
|
99
98
|
span.map_match(reg) { |match|
|
100
|
-
raw_html =
|
101
|
-
|
102
|
-
e.meta[:raw_html] = raw_html
|
103
|
-
begin
|
104
|
-
e.meta[:parsed_html] = Document.new(raw_html)
|
105
|
-
rescue
|
106
|
-
$stderr.puts "Malformed HTML:\n#{raw_html}"
|
107
|
-
end
|
108
|
-
e
|
99
|
+
raw_html = match[1]
|
100
|
+
convert_raw_html_in_list(raw_html)
|
109
101
|
}
|
110
102
|
end
|
111
103
|
|
@@ -321,44 +313,23 @@ class Maruku
|
|
321
313
|
res
|
322
314
|
end
|
323
315
|
|
316
|
+
# raw_html is something like
|
317
|
+
# <em> A</em> dopwkk *maruk* <em>A</em>
|
318
|
+
def convert_raw_html_in_list(raw_html)
|
319
|
+
e = create_md_element(:raw_html)
|
320
|
+
e.meta[:raw_html] = raw_html
|
321
|
+
begin
|
322
|
+
e.meta[:parsed_html] = Document.new(raw_html)
|
323
|
+
rescue
|
324
|
+
$stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
|
325
|
+
end
|
326
|
+
e
|
327
|
+
end
|
328
|
+
|
324
329
|
end
|
325
330
|
|
326
331
|
# And now the black magic that makes the part above so elegant
|
327
|
-
|
328
|
-
class MDElement
|
329
|
-
|
330
|
-
# yields to each element of specified node_type
|
331
|
-
def each_element(e_node_type, &block)
|
332
|
-
@children.each do |c|
|
333
|
-
if c.kind_of? MDElement
|
334
|
-
if (not e_node_type) || (e_node_type == c.node_type)
|
335
|
-
block.call c
|
336
|
-
end
|
337
|
-
c.each_element(e_node_type, &block)
|
338
|
-
end
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
# Apply passed block to each String in the hierarchy.
|
343
|
-
def replace_each_string(&block)
|
344
|
-
for c in @children
|
345
|
-
if c.kind_of? MDElement
|
346
|
-
c.replace_each_string(&block)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
processed = []
|
351
|
-
until @children.empty?
|
352
|
-
c = @children.shift
|
353
|
-
if c.kind_of? String
|
354
|
-
result = block.call(c)
|
355
|
-
[*result].each do |e| processed << e end
|
356
|
-
else
|
357
|
-
processed << c
|
358
|
-
end
|
359
|
-
end
|
360
|
-
@children = processed
|
361
|
-
end
|
332
|
+
class MDElement
|
362
333
|
|
363
334
|
# Try to match the regexp to each string in the hierarchy
|
364
335
|
# (using `replace_each_string`). If the regexp match, eliminate
|
@@ -0,0 +1,681 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Maruku
|
4
|
+
include Helpers
|
5
|
+
|
6
|
+
EscapedCharInText =
|
7
|
+
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
|
8
|
+
|
9
|
+
EscapedCharInQuotes =
|
10
|
+
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
|
11
|
+
|
12
|
+
EscapedCharInInlineCode = [?\\,?`]
|
13
|
+
|
14
|
+
def parse_lines_as_span(lines)
|
15
|
+
parse_span_better lines.join("\n")
|
16
|
+
end
|
17
|
+
|
18
|
+
def parse_span_better(string)
|
19
|
+
if not string.kind_of? String then
|
20
|
+
error "Passed #{string.class}." end
|
21
|
+
|
22
|
+
st = (string + "")
|
23
|
+
st.freeze
|
24
|
+
src = CharSource.new(st)
|
25
|
+
read_span(src, EscapedCharInText, [nil])
|
26
|
+
end
|
27
|
+
|
28
|
+
# This is the main loop for reading span elements
|
29
|
+
#
|
30
|
+
# It's long, but not *complex* or difficult to understand.
|
31
|
+
#
|
32
|
+
#
|
33
|
+
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
|
34
|
+
con = SpanContext.new
|
35
|
+
c = d = nil
|
36
|
+
while true
|
37
|
+
c = src.cur_char
|
38
|
+
|
39
|
+
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
|
40
|
+
# src.read_text_chars con.cur_string
|
41
|
+
con.cur_string << src.shift_char
|
42
|
+
next
|
43
|
+
end
|
44
|
+
|
45
|
+
break if exit_on_chars && exit_on_chars.include?(c)
|
46
|
+
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
|
47
|
+
|
48
|
+
case c
|
49
|
+
when ?\ # it's space (32)
|
50
|
+
if src.cur_chars_are " \n"
|
51
|
+
src.ignore_chars(3)
|
52
|
+
con.push_element create_md_element(:linebreak)
|
53
|
+
next
|
54
|
+
else
|
55
|
+
src.ignore_char
|
56
|
+
con.push_space
|
57
|
+
end
|
58
|
+
when ?\n, ?\t
|
59
|
+
src.ignore_char
|
60
|
+
con.push_space
|
61
|
+
when ?`
|
62
|
+
read_inline_code(src,con)
|
63
|
+
when ?<
|
64
|
+
# It could be:
|
65
|
+
# 1) HTML "<div ..."
|
66
|
+
# 2) HTML "<!-- ..."
|
67
|
+
# 3) url "<http:// ", "<ftp:// ..."
|
68
|
+
# 4) email "<andrea@... ", "<mailto:andrea@..."
|
69
|
+
# 5) on itself! "a < b "
|
70
|
+
|
71
|
+
case d = src.next_char
|
72
|
+
when ?!;
|
73
|
+
if src.cur_chars_are '<!--'
|
74
|
+
read_inline_html(src, con)
|
75
|
+
else
|
76
|
+
con.push_char src.shift_char
|
77
|
+
end
|
78
|
+
when ??; read_server_directive
|
79
|
+
when ?\ , ?\t
|
80
|
+
con.push_char src.shift_char
|
81
|
+
else;
|
82
|
+
if src.next_matches(/<mailto:/) or
|
83
|
+
src.next_matches(/<[\w\.]+\@/)
|
84
|
+
read_email_el(src, con)
|
85
|
+
elsif src.next_matches(/<\w+:/)
|
86
|
+
read_url_el(src, con)
|
87
|
+
elsif src.next_matches(/<\w/)
|
88
|
+
# puts "This is HTML: #{src.cur_chars(20)}"
|
89
|
+
read_inline_html(src, con)
|
90
|
+
else
|
91
|
+
# puts "This is NOT HTML: #{src.cur_chars(20)}"
|
92
|
+
con.push_char src.shift_char
|
93
|
+
end
|
94
|
+
end
|
95
|
+
when ?\\
|
96
|
+
d = src.next_char
|
97
|
+
if escaped.include? d
|
98
|
+
src.ignore_chars(2)
|
99
|
+
con.push_char d
|
100
|
+
else
|
101
|
+
con.push_char src.shift_char
|
102
|
+
end
|
103
|
+
when ?[
|
104
|
+
if markdown_extra? && src.next_char == ?^
|
105
|
+
read_footnote_ref(src,con)
|
106
|
+
else
|
107
|
+
read_link(src, con)
|
108
|
+
end
|
109
|
+
when ?!
|
110
|
+
if src.next_char == ?[
|
111
|
+
read_image(src, con)
|
112
|
+
else
|
113
|
+
con.push_char src.shift_char
|
114
|
+
end
|
115
|
+
when ?&
|
116
|
+
if m = src.read_regexp(/&([\w\d]+);/)
|
117
|
+
con.push_element md_entity(m[1])
|
118
|
+
else
|
119
|
+
con.push_char src.shift_char
|
120
|
+
end
|
121
|
+
when ?*
|
122
|
+
if not src.next_char
|
123
|
+
error "Opening * as last char", src, con
|
124
|
+
tell_user "Threating as literal"
|
125
|
+
con.push_char src.shift_char
|
126
|
+
else
|
127
|
+
follows = src.cur_chars(4)
|
128
|
+
if follows =~ /^\*\*\*[^\s\*]/
|
129
|
+
con.push_element read_emstrong(src,'***')
|
130
|
+
elsif follows =~ /^\*\*[^\s\*]/
|
131
|
+
con.push_element read_strong(src,'**')
|
132
|
+
elsif follows =~ /^\*[^\s\*]/
|
133
|
+
con.push_element read_em(src,'*')
|
134
|
+
else # * is just a normal char
|
135
|
+
con.push_char src.shift_char
|
136
|
+
end
|
137
|
+
end
|
138
|
+
when ?_
|
139
|
+
if not src.next_char
|
140
|
+
error "Opening _ as last char", src, con
|
141
|
+
tell_user "Threating as literal"
|
142
|
+
con.push_char src.shift_char
|
143
|
+
else
|
144
|
+
follows = src.cur_chars(4)
|
145
|
+
if follows =~ /^\_\_\_[^\s\_]/
|
146
|
+
con.push_element read_emstrong(src,'___')
|
147
|
+
elsif follows =~ /^\_\_[^\s\_]/
|
148
|
+
con.push_element read_strong(src,'__')
|
149
|
+
elsif follows =~ /^\_[^\s\_]/
|
150
|
+
con.push_element read_em(src,'_')
|
151
|
+
else # _ is just a normal char
|
152
|
+
con.push_char src.shift_char
|
153
|
+
end
|
154
|
+
end
|
155
|
+
when nil
|
156
|
+
error ("Unclosed span (waiting for %s"+
|
157
|
+
"#{exit_on_strings.inspect})") % [
|
158
|
+
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
|
159
|
+
src,con
|
160
|
+
|
161
|
+
tell_user "I will boldly go ahead."
|
162
|
+
break
|
163
|
+
else # normal text
|
164
|
+
con.push_char src.shift_char
|
165
|
+
end # end case
|
166
|
+
end # end while true
|
167
|
+
con.push_string_if_present
|
168
|
+
con.elements
|
169
|
+
end
|
170
|
+
|
171
|
+
def read_url_el(src,con)
|
172
|
+
src.ignore_char # leading <
|
173
|
+
url = read_simple(src, [], [?>])
|
174
|
+
src.ignore_char # closing >
|
175
|
+
|
176
|
+
con.push_element md_url(url)
|
177
|
+
end
|
178
|
+
|
179
|
+
def read_email_el(src,con)
|
180
|
+
src.ignore_char # leading <
|
181
|
+
mail = read_simple(src, [], [?>])
|
182
|
+
src.ignore_char # closing >
|
183
|
+
|
184
|
+
address = mail.gsub(/^mailto:/,'')
|
185
|
+
con.push_element md_email(address)
|
186
|
+
end
|
187
|
+
|
188
|
+
def read_url(src, break_on)
|
189
|
+
if [?',?"].include? src.cur_char
|
190
|
+
error 'Invalid char for url', src
|
191
|
+
end
|
192
|
+
|
193
|
+
url = read_simple(src, [], break_on)
|
194
|
+
|
195
|
+
if url[0] == ?< && url[-1] == ?>
|
196
|
+
url = url[1, url.size-2]
|
197
|
+
end
|
198
|
+
|
199
|
+
if url.size == 0
|
200
|
+
return nil
|
201
|
+
end
|
202
|
+
|
203
|
+
url
|
204
|
+
end
|
205
|
+
|
206
|
+
# Tries to read a quoted value. If stream does not
|
207
|
+
# start with ' or ", returns nil.
|
208
|
+
def read_quoted(src,con)
|
209
|
+
case src.cur_char
|
210
|
+
when ?', ?"
|
211
|
+
quote_char = src.shift_char # opening quote
|
212
|
+
string = read_simple(src, EscapedCharInQuotes, [quote_char])
|
213
|
+
src.ignore_char # closing quote
|
214
|
+
return string
|
215
|
+
else
|
216
|
+
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
|
217
|
+
return nil
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
# Reads a simple string (no formatting) until one of break_on_chars,
|
222
|
+
# while escaping the escaped
|
223
|
+
def read_simple(src, escaped, exit_on_chars)
|
224
|
+
text = ""
|
225
|
+
while true
|
226
|
+
# puts "Reading simple #{text.inspect}"
|
227
|
+
c = src.cur_char
|
228
|
+
if exit_on_chars && exit_on_chars.include?(c)
|
229
|
+
# puts (" breaking on "<<c)+" contained in "+exit_on_chars.inspect
|
230
|
+
break
|
231
|
+
end
|
232
|
+
case c
|
233
|
+
when nil
|
234
|
+
s= "String finished while reading (break on #{exit_on_chars.inspect})"+
|
235
|
+
" already read: #{text.inspect}"
|
236
|
+
error s, src
|
237
|
+
tell_user "I boldly continue"
|
238
|
+
break
|
239
|
+
when ?\\
|
240
|
+
d = src.next_char
|
241
|
+
if escaped.include? d
|
242
|
+
src.ignore_chars(2)
|
243
|
+
text << d
|
244
|
+
else
|
245
|
+
text << src.shift_char
|
246
|
+
end
|
247
|
+
else
|
248
|
+
text << src.shift_char
|
249
|
+
end
|
250
|
+
end
|
251
|
+
# puts "Read simple #{text.inspect}"
|
252
|
+
text
|
253
|
+
end
|
254
|
+
|
255
|
+
def read_em(src, delim)
|
256
|
+
src.ignore_char
|
257
|
+
children = read_span(src, EscapedCharInText, nil, [delim])
|
258
|
+
src.ignore_char
|
259
|
+
md_em(children)
|
260
|
+
end
|
261
|
+
|
262
|
+
def read_strong(src, delim)
|
263
|
+
src.ignore_chars(2)
|
264
|
+
children = read_span(src, EscapedCharInText, nil, [delim])
|
265
|
+
src.ignore_chars(2)
|
266
|
+
md_strong(children)
|
267
|
+
end
|
268
|
+
|
269
|
+
def read_emstrong(src, delim)
|
270
|
+
src.ignore_chars(3)
|
271
|
+
children = read_span(src, EscapedCharInText, nil, [delim])
|
272
|
+
src.ignore_chars(3)
|
273
|
+
md_emstrong(children)
|
274
|
+
end
|
275
|
+
|
276
|
+
SPACE = ?\ # = 32
|
277
|
+
|
278
|
+
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
|
279
|
+
R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
|
280
|
+
|
281
|
+
# Reads a bracketed id "[refid]". Consumes also both brackets.
|
282
|
+
def read_ref_id(src, con)
|
283
|
+
src.ignore_char # [
|
284
|
+
src.consume_whitespace
|
285
|
+
# puts "Next: #{src.cur_chars(10).inspect}"
|
286
|
+
if m = src.read_regexp(R_REF_ID)
|
287
|
+
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
|
288
|
+
# puts "Then: #{src.cur_chars(10).inspect}"
|
289
|
+
m[1]
|
290
|
+
else
|
291
|
+
nil
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def read_footnote_ref(src,con)
|
296
|
+
ref = read_ref_id(src,con)
|
297
|
+
con.push_element md_foot_ref(ref)
|
298
|
+
end
|
299
|
+
|
300
|
+
def read_inline_html(src, con)
|
301
|
+
h = HTMLHelper.new
|
302
|
+
begin
|
303
|
+
# This is our current buffer in the context
|
304
|
+
start = src.current_remaining_buffer
|
305
|
+
|
306
|
+
h.eat_this start
|
307
|
+
if not h.is_finished?
|
308
|
+
error "inline_html: Malformed:\n "+
|
309
|
+
"#{start.inspect}\n #{h.inspect}",src,con
|
310
|
+
end
|
311
|
+
|
312
|
+
consumed = start.size - h.rest.size
|
313
|
+
if consumed > 0
|
314
|
+
con.push_element md_html(h.stuff_you_read)
|
315
|
+
src.ignore_chars(consumed)
|
316
|
+
else
|
317
|
+
puts "HTML helper did not work on #{start.inspect}"
|
318
|
+
con.push_char src.shift_char
|
319
|
+
end
|
320
|
+
rescue Exception => e
|
321
|
+
maruku_error "Bad html: \n" +
|
322
|
+
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
|
323
|
+
src,con
|
324
|
+
tell_user "I will try to continue after bad HTML."
|
325
|
+
con.push_char src.shift_char
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def read_inline_code(src, con)
|
330
|
+
num_ticks = 0
|
331
|
+
|
332
|
+
while src.cur_char == ?`
|
333
|
+
num_ticks += 1
|
334
|
+
src.ignore_char
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
# ignore space
|
339
|
+
if num_ticks > 1 && src.cur_char == SPACE
|
340
|
+
src.ignore_char
|
341
|
+
end
|
342
|
+
|
343
|
+
# puts "Ticks: #{num_ticks } next: #{src.some} "
|
344
|
+
|
345
|
+
end_string = "`"*num_ticks
|
346
|
+
|
347
|
+
code = ''
|
348
|
+
while true
|
349
|
+
if not src.cur_char
|
350
|
+
error("Ticks not finished: read #{code.inspect}"+
|
351
|
+
" and waiting for #{end_string.inspect} num=#{num_ticks}",
|
352
|
+
src,con)
|
353
|
+
tell_user "Read invalid code block: #{code.inspect}"
|
354
|
+
break
|
355
|
+
end
|
356
|
+
|
357
|
+
if src.cur_chars(num_ticks) ==end_string # bah
|
358
|
+
# puts "Breaking on #{src.some} (end:#{end_string.inspect})"
|
359
|
+
src.ignore_chars num_ticks
|
360
|
+
break
|
361
|
+
end
|
362
|
+
|
363
|
+
code << src.shift_char
|
364
|
+
end
|
365
|
+
|
366
|
+
# drop last space
|
367
|
+
if num_ticks > 1 && code[-1] == SPACE
|
368
|
+
code = code[0,code.size-1]
|
369
|
+
end
|
370
|
+
|
371
|
+
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
|
372
|
+
con.push_element md_code(code)
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
|
377
|
+
def read_server_directive
|
378
|
+
# match = gimme(/^(.*)\?>/)
|
379
|
+
# if not match
|
380
|
+
# error "Server directive not closed"
|
381
|
+
# end
|
382
|
+
# server = match[1]
|
383
|
+
# con.found_object create_md_element(:server, server)
|
384
|
+
end
|
385
|
+
|
386
|
+
def read_link(src, con)
|
387
|
+
# we read the string and see what happens
|
388
|
+
src.ignore_char # opening bracket
|
389
|
+
children = read_span(src, EscapedCharInText, [?]])
|
390
|
+
src.ignore_char # closing bracket
|
391
|
+
|
392
|
+
# ignore space
|
393
|
+
if src.cur_char == SPACE and
|
394
|
+
(src.next_char == ?[ or src.next_char == ?( )
|
395
|
+
src.shift_char
|
396
|
+
end
|
397
|
+
case src.cur_char
|
398
|
+
when ?(
|
399
|
+
src.ignore_char # opening (
|
400
|
+
src.consume_whitespace
|
401
|
+
url = read_url(src, [SPACE,?\t,?)])
|
402
|
+
if not url
|
403
|
+
url = '' # no url is ok
|
404
|
+
end
|
405
|
+
src.consume_whitespace
|
406
|
+
title = nil
|
407
|
+
if src.cur_char != ?) # we have a title
|
408
|
+
title = read_quoted(src,con)
|
409
|
+
end
|
410
|
+
src.consume_whitespace
|
411
|
+
closing = src.shift_char # closing )
|
412
|
+
if closing != ?)
|
413
|
+
error 'Unclosed link',src,con
|
414
|
+
tell_user "No closing ): I will not create"+
|
415
|
+
" the link for #{children.inspect}"
|
416
|
+
con.push_elements children
|
417
|
+
return
|
418
|
+
end
|
419
|
+
con.push_element md_im_link(children,url, title)
|
420
|
+
when ?[ # link ref
|
421
|
+
ref_id = read_ref_id(src,con)
|
422
|
+
if ref_id
|
423
|
+
con.push_element md_link(children, ref_id)
|
424
|
+
else
|
425
|
+
maruku_error "Could not read ref_id", src, con
|
426
|
+
tell_user "I will not create the link for #{children.inspect}"
|
427
|
+
con.push_elements children
|
428
|
+
return
|
429
|
+
end
|
430
|
+
else # no stuff
|
431
|
+
con.push_elements children
|
432
|
+
end
|
433
|
+
end # read link
|
434
|
+
|
435
|
+
def read_image(src, con)
|
436
|
+
src.ignore_chars(2) # opening "!["
|
437
|
+
alt_text = read_span(src, EscapedCharInText, [?]])
|
438
|
+
src.ignore_char # closing bracket
|
439
|
+
# ignore space
|
440
|
+
if src.cur_char == SPACE and
|
441
|
+
(src.next_char == ?[ or src.next_char == ?( )
|
442
|
+
src.ignore_char
|
443
|
+
end
|
444
|
+
case src.cur_char
|
445
|
+
when ?(
|
446
|
+
src.ignore_char # opening (
|
447
|
+
src.consume_whitespace
|
448
|
+
url = read_url(src, [SPACE,?\t,?)])
|
449
|
+
if not url
|
450
|
+
error "Could not read url from #{src.cur_chars(10).inspect}",
|
451
|
+
src,con
|
452
|
+
end
|
453
|
+
src.consume_whitespace
|
454
|
+
title = nil
|
455
|
+
if src.cur_char != ?) # we have a title
|
456
|
+
title = read_quoted(src,con)
|
457
|
+
error 'Must quote title',src,con if not title
|
458
|
+
end
|
459
|
+
src.consume_whitespace
|
460
|
+
closing = src.shift_char # closing )
|
461
|
+
if closing != ?)
|
462
|
+
error ("Unclosed link: '"<<closing<<"'")+
|
463
|
+
" Read url=#{url.inspect} title=#{title.inspect}",src,con
|
464
|
+
end
|
465
|
+
con.push_element md_im_image(alt_text, url, title)
|
466
|
+
when ?[ # link ref
|
467
|
+
ref_id = read_ref_id(src,con)
|
468
|
+
con.push_element md_image(alt_text, ref_id)
|
469
|
+
else # no stuff
|
470
|
+
con.push_elements children
|
471
|
+
end
|
472
|
+
end # read link
|
473
|
+
|
474
|
+
end
|
475
|
+
|
476
|
+
|
477
|
+
class SpanContext
|
478
|
+
include MarukuStrings
|
479
|
+
|
480
|
+
# Read elements
|
481
|
+
attr_accessor :elements
|
482
|
+
attr_accessor :cur_string
|
483
|
+
|
484
|
+
def initialize
|
485
|
+
@elements = []
|
486
|
+
@cur_string = ""
|
487
|
+
end
|
488
|
+
|
489
|
+
def push_element(e)
|
490
|
+
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
|
491
|
+
not (e.kind_of?(String) or e.kind_of?(MDElement))
|
492
|
+
|
493
|
+
push_string_if_present
|
494
|
+
@elements << e
|
495
|
+
nil
|
496
|
+
end
|
497
|
+
|
498
|
+
def push_elements(a)
|
499
|
+
for e in a
|
500
|
+
if e.kind_of? String
|
501
|
+
e.each_byte do |b| push_char b end
|
502
|
+
else
|
503
|
+
push_element e
|
504
|
+
end
|
505
|
+
end
|
506
|
+
end
|
507
|
+
def push_string_if_present
|
508
|
+
if @cur_string.size > 0
|
509
|
+
@elements << @cur_string
|
510
|
+
@cur_string = ""
|
511
|
+
end
|
512
|
+
nil
|
513
|
+
end
|
514
|
+
|
515
|
+
def push_char(c)
|
516
|
+
@cur_string << c
|
517
|
+
nil
|
518
|
+
end
|
519
|
+
|
520
|
+
# push space into current string if
|
521
|
+
# there isn't one
|
522
|
+
def push_space
|
523
|
+
last = @cur_string[@cur_string.size-1]
|
524
|
+
@cur_string << ?\ if last != ?\
|
525
|
+
end
|
526
|
+
|
527
|
+
def describe
|
528
|
+
lines = @elements.map{|x| x.inspect}.join("\n")
|
529
|
+
s = "Elements read in span: \n" +
|
530
|
+
add_tabs(lines,1, ' -')+"\n"
|
531
|
+
|
532
|
+
if @cur_string.size > 0
|
533
|
+
s += "Current string: \n #{@cur_string.inspect}\n"
|
534
|
+
end
|
535
|
+
s
|
536
|
+
end
|
537
|
+
|
538
|
+
end
|
539
|
+
|
540
|
+
class CharSource
|
541
|
+
include MarukuStrings
|
542
|
+
|
543
|
+
def initialize(s)
|
544
|
+
@elements = []
|
545
|
+
@cur_string = ""
|
546
|
+
@buffer = s
|
547
|
+
@buffer_index = 0
|
548
|
+
end
|
549
|
+
|
550
|
+
# Return current char as a FixNum (or nil).
|
551
|
+
def cur_char; @buffer[@buffer_index] end
|
552
|
+
|
553
|
+
# Return the next n chars as a String.
|
554
|
+
def cur_chars(n); @buffer[@buffer_index,n] end
|
555
|
+
|
556
|
+
# Return the char after current char as a FixNum (or nil).
|
557
|
+
def next_char; @buffer[@buffer_index+1] end
|
558
|
+
|
559
|
+
def shift_char
|
560
|
+
c = @buffer[@buffer_index]
|
561
|
+
@buffer_index+=1
|
562
|
+
c
|
563
|
+
end
|
564
|
+
|
565
|
+
def ignore_char
|
566
|
+
@buffer_index+=1
|
567
|
+
end
|
568
|
+
|
569
|
+
def ignore_chars(n)
|
570
|
+
@buffer_index+=n
|
571
|
+
nil
|
572
|
+
end
|
573
|
+
|
574
|
+
def current_remaining_buffer
|
575
|
+
@buffer[@buffer_index, @buffer.size-@buffer_index]
|
576
|
+
end
|
577
|
+
|
578
|
+
def cur_chars_are(string)
|
579
|
+
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
|
580
|
+
@buffer =~ r2
|
581
|
+
end
|
582
|
+
|
583
|
+
def next_matches(r)
|
584
|
+
r2 = /^.{#{@buffer_index}}#{r}/m
|
585
|
+
r2.match @buffer
|
586
|
+
end
|
587
|
+
|
588
|
+
def read_regexp(r)
|
589
|
+
r2 = /^.{#{@buffer_index}}#{r}/m
|
590
|
+
m = r2.match @buffer
|
591
|
+
if m
|
592
|
+
consumed = m.to_s.size - @buffer_index
|
593
|
+
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
|
594
|
+
ignore_chars consumed
|
595
|
+
else
|
596
|
+
# puts "Could not read regexp #{r2.inspect} from buffer "+
|
597
|
+
# " index=#{@buffer_index}"
|
598
|
+
# puts "Cur chars = #{cur_chars(20).inspect}"
|
599
|
+
# puts "Matches? = #{cur_chars(20) =~ r}"
|
600
|
+
end
|
601
|
+
m
|
602
|
+
end
|
603
|
+
|
604
|
+
def consume_whitespace
|
605
|
+
while c = cur_char
|
606
|
+
if (c == 32 || c == ?\t)
|
607
|
+
# puts "ignoring #{c}"
|
608
|
+
ignore_char
|
609
|
+
else
|
610
|
+
# puts "#{c} is not ws: "<<c
|
611
|
+
break
|
612
|
+
end
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
def read_text_chars(out)
|
617
|
+
s = @buffer.size; c=nil
|
618
|
+
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
|
619
|
+
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
|
620
|
+
out << c
|
621
|
+
@buffer_index += 1
|
622
|
+
end
|
623
|
+
end
|
624
|
+
|
625
|
+
def describe
|
626
|
+
|
627
|
+
len = 75
|
628
|
+
num_before = [len/2, @buffer_index].min
|
629
|
+
num_after = [len/2, @buffer.size-@buffer_index].min
|
630
|
+
num_before_max = @buffer_index
|
631
|
+
num_after_max = @buffer.size-@buffer_index
|
632
|
+
|
633
|
+
# puts "num #{num_before} #{num_after}"
|
634
|
+
num_before = [num_before_max, len-num_after].min
|
635
|
+
num_after = [num_after_max, len-num_before].min
|
636
|
+
# puts "num #{num_before} #{num_after}"
|
637
|
+
|
638
|
+
index_start = [@buffer_index - num_before, 0].max
|
639
|
+
index_end = [@buffer_index + num_after, @buffer.size].min
|
640
|
+
|
641
|
+
size = index_end- index_start
|
642
|
+
|
643
|
+
# puts "- #{index_start} #{size}"
|
644
|
+
|
645
|
+
str = @buffer[index_start, size]
|
646
|
+
str.gsub!("\n",'N')
|
647
|
+
str.gsub!("\t",'T')
|
648
|
+
|
649
|
+
if index_end == @buffer.size
|
650
|
+
str += "EOF"
|
651
|
+
end
|
652
|
+
|
653
|
+
pre_s = @buffer_index-index_start
|
654
|
+
pre_s = [pre_s, 0].max
|
655
|
+
pre_s2 = [len-pre_s,0].max
|
656
|
+
# puts "pre_S = #{pre_s}"
|
657
|
+
pre =" "*(pre_s)
|
658
|
+
|
659
|
+
"-"*len+"\n"+
|
660
|
+
str + "\n" +
|
661
|
+
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
|
662
|
+
# pre + "|\n"+
|
663
|
+
pre + "+--- Byte #{@buffer_index}\n"+
|
664
|
+
|
665
|
+
|
666
|
+
"Shown bytes [#{index_start} to #{size}] of #{@buffer.size}:\n"+
|
667
|
+
add_tabs(@buffer,1,">")
|
668
|
+
|
669
|
+
# "CharSource: At character #{@buffer_index} of block "+
|
670
|
+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
|
671
|
+
# " before: \n ... #{cur_chars(50).inspect} ... "
|
672
|
+
end
|
673
|
+
|
674
|
+
def some
|
675
|
+
cur_chars(15).inspect
|
676
|
+
end
|
677
|
+
end
|
678
|
+
|
679
|
+
|
680
|
+
|
681
|
+
|