maiku 0.6.1.maiku

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/lib/maruku.rb +141 -0
  2. data/lib/maruku/attributes.rb +175 -0
  3. data/lib/maruku/defaults.rb +71 -0
  4. data/lib/maruku/errors_management.rb +92 -0
  5. data/lib/maruku/ext/div.rb +133 -0
  6. data/lib/maruku/ext/math.rb +41 -0
  7. data/lib/maruku/ext/math/elements.rb +27 -0
  8. data/lib/maruku/ext/math/latex_fix.rb +12 -0
  9. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +107 -0
  10. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
  11. data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
  12. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  13. data/lib/maruku/ext/math/parsing.rb +119 -0
  14. data/lib/maruku/ext/math/to_html.rb +187 -0
  15. data/lib/maruku/ext/math/to_latex.rb +26 -0
  16. data/lib/maruku/helpers.rb +260 -0
  17. data/lib/maruku/input/charsource.rb +326 -0
  18. data/lib/maruku/input/extensions.rb +69 -0
  19. data/lib/maruku/input/html_helper.rb +189 -0
  20. data/lib/maruku/input/linesource.rb +111 -0
  21. data/lib/maruku/input/parse_block.rb +616 -0
  22. data/lib/maruku/input/parse_doc.rb +232 -0
  23. data/lib/maruku/input/parse_span_better.rb +746 -0
  24. data/lib/maruku/input/rubypants.rb +225 -0
  25. data/lib/maruku/input/type_detection.rb +147 -0
  26. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  27. data/lib/maruku/maruku.rb +33 -0
  28. data/lib/maruku/output/s5/fancy.rb +756 -0
  29. data/lib/maruku/output/s5/to_s5.rb +138 -0
  30. data/lib/maruku/output/to_html.rb +991 -0
  31. data/lib/maruku/output/to_latex.rb +590 -0
  32. data/lib/maruku/output/to_latex_entities.rb +367 -0
  33. data/lib/maruku/output/to_latex_strings.rb +64 -0
  34. data/lib/maruku/output/to_markdown.rb +164 -0
  35. data/lib/maruku/output/to_s.rb +56 -0
  36. data/lib/maruku/string_utils.rb +201 -0
  37. data/lib/maruku/structures.rb +167 -0
  38. data/lib/maruku/structures_inspect.rb +87 -0
  39. data/lib/maruku/structures_iterators.rb +61 -0
  40. data/lib/maruku/textile2.rb +1 -0
  41. data/lib/maruku/toc.rb +199 -0
  42. data/lib/maruku/usage/example1.rb +33 -0
  43. data/lib/maruku/version.rb +39 -0
  44. metadata +167 -0
@@ -0,0 +1,232 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
22
+
23
+ def parse_doc(s)
24
+ # FIXME \r\n => \n
25
+ meta2 = parse_email_headers(s)
26
+ data = meta2[:data]
27
+ meta2.delete :data
28
+
29
+ self.attributes.merge! meta2
30
+
31
+ =begin maruku_doc
32
+ Attribute: encoding
33
+ Scope: document
34
+ Summary: Encoding for the document.
35
+
36
+ If the `encoding` attribute is specified, then the content
37
+ will be converted from the specified encoding to UTF-8.
38
+ =end
39
+
40
+ enc = self.attributes[:encoding]
41
+ self.attributes.delete :encoding
42
+ if enc && enc.downcase != 'utf-8'
43
+
44
+ # Switch to ruby 1.9 String#encode
45
+ # with backward 1.8 compatibility
46
+ if data.respond_to?(:encode!)
47
+ data.encode!('UTF-8', enc)
48
+ else
49
+ require 'iconv'
50
+ data = Iconv.new('utf-8', enc).iconv(data)
51
+ end
52
+
53
+ end
54
+
55
+ @children = parse_text_as_markdown(data)
56
+
57
+ if true #markdown_extra?
58
+ self.search_abbreviations
59
+ self.substitute_markdown_inside_raw_html
60
+ end
61
+
62
+ toc = create_toc
63
+
64
+ # use title if not set
65
+ if not self.attributes[:title] and toc.header_element
66
+ title = toc.header_element.to_s
67
+ self.attributes[:title] = title
68
+ # puts "Set document title to #{title}"
69
+ end
70
+
71
+ # save for later use
72
+ self.toc = toc
73
+
74
+ # Now do the attributes magic
75
+ each_element do |e|
76
+ # default attribute list
77
+ if default = self.ald[e.node_type.to_s]
78
+ expand_attribute_list(default, e.attributes)
79
+ end
80
+ expand_attribute_list(e.al, e.attributes)
81
+ # puts "#{e.node_type}: #{e.attributes.inspect}"
82
+ end
83
+
84
+ =begin maruku_doc
85
+ Attribute: unsafe_features
86
+ Scope: global
87
+ Summary: Enables execution of XML instructions.
88
+
89
+ Disabled by default because of security concerns.
90
+ =end
91
+
92
+ if Maruku::Globals[:unsafe_features]
93
+ self.execute_code_blocks
94
+ # TODO: remove executed code blocks
95
+ end
96
+ end
97
+
98
+ # Expands an attribute list in an Hash
99
+ def expand_attribute_list(al, result)
100
+ al.each do |k, v|
101
+ case k
102
+ when :class
103
+ if not result[:class]
104
+ result[:class] = v
105
+ else
106
+ result[:class] += " " + v
107
+ end
108
+ when :id; result[:id] = v
109
+ when :ref;
110
+ if self.ald[v]
111
+ already = (result[:expanded_references] ||= [])
112
+ if not already.include?(v)
113
+ already.push v
114
+ expand_attribute_list(self.ald[v], result)
115
+ else
116
+ already.push v
117
+ maruku_error "Circular reference between labels.\n\n"+
118
+ "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
119
+ (already.map{|x| x.inspect}.join(' => '))
120
+ end
121
+ else
122
+ if not result[:unresolved_references]
123
+ result[:unresolved_references] = v
124
+ else
125
+ result[:unresolved_references] << " #{v}"
126
+ end
127
+
128
+ # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
129
+ result[v.to_sym] = true
130
+ end
131
+ else
132
+ result[k.to_sym]=v
133
+ end
134
+ end
135
+ end
136
+
137
+ def safe_execute_code(object, code)
138
+ begin
139
+ return object.instance_eval(code)
140
+ rescue Exception => e
141
+ maruku_error "Exception while executing this:\n"+
142
+ add_tabs(code, 1, ">")+
143
+ "\nThe error was:\n"+
144
+ add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
145
+ rescue RuntimeError => e
146
+ maruku_error "2: Exception while executing this:\n"+
147
+ add_tabs(code, 1, ">")+
148
+ "\nThe error was:\n"+
149
+ add_tabs(e.inspect, 1, "|")
150
+ rescue SyntaxError => e
151
+ maruku_error "2: Exception while executing this:\n"+
152
+ add_tabs(code, 1, ">")+
153
+ "\nThe error was:\n"+
154
+ add_tabs(e.inspect, 1, "|")
155
+ end
156
+ nil
157
+ end
158
+
159
+ def execute_code_blocks
160
+ self.each_element(:xml_instr) do |e|
161
+ if e.target == 'maruku'
162
+ result = safe_execute_code(e, e.code)
163
+ if result.kind_of?(String)
164
+ puts "Result is : #{result.inspect}"
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ def search_abbreviations
171
+ self.abbreviations.each do |abbrev, title|
172
+ reg = Regexp.new(Regexp.escape(abbrev))
173
+ self.replace_each_string do |s|
174
+ # bug if many abbreviations are present (agorf)
175
+ if m = reg.match(s)
176
+ e = md_abbr(abbrev.dup, title ? title.dup : nil)
177
+ [m.pre_match, e, m.post_match]
178
+ else
179
+ s
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ include REXML
186
+ # (PHP Markdown extra) Search for elements that have
187
+ # markdown=1 or markdown=block defined
188
+ def substitute_markdown_inside_raw_html
189
+ self.each_element(:raw_html) do |e|
190
+ doc = e.instance_variable_get :@parsed_html
191
+ if doc # valid html
192
+ # parse block-level markdown elements in these HTML tags
193
+ block_tags = ['div']
194
+
195
+ # use xpath to find elements with 'markdown' attribute
196
+ XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
197
+ # puts "Found #{e}"
198
+ # should we parse block-level or span-level?
199
+
200
+ how = e.attributes['markdown']
201
+ parse_blocks = (how == 'block') || block_tags.include?(e.name)
202
+
203
+ # Select all text elements of e
204
+ XPath.match(e, "//text()" ).each { |original_text|
205
+ s = original_text.value.strip
206
+ if s.size > 0
207
+
208
+ # puts "Parsing #{s.inspect} as blocks: #{parse_blocks} (#{e.name}, #{e.attributes['markdown']}) "
209
+
210
+ el = md_el(:dummy,
211
+ parse_blocks ? parse_text_as_markdown(s) :
212
+ parse_lines_as_span([s]) )
213
+ p = original_text.parent
214
+ el.children_to_html.each do |x|
215
+ p.insert_before(original_text, x)
216
+ end
217
+ p.delete(original_text)
218
+
219
+ end
220
+ }
221
+
222
+
223
+ # remove 'markdown' attribute
224
+ e.delete_attribute 'markdown'
225
+
226
+ end
227
+
228
+ end
229
+ end
230
+ end
231
+
232
+ end end end end
@@ -0,0 +1,746 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'set'
23
+
24
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
25
+ include MaRuKu::Helpers
26
+
27
+ EscapedCharInText =
28
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
+
30
+ EscapedCharInQuotes =
31
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
+
33
+ EscapedCharInInlineCode = [?\\,?`]
34
+
35
+ def parse_lines_as_span(lines, parent=nil)
36
+ parse_span_better lines.join("\n"), parent
37
+ end
38
+
39
+ def parse_span_better(string, parent=nil)
40
+ if not string.kind_of? String then
41
+ error "Passed #{string.class}." end
42
+
43
+ st = (string + "")
44
+ st.freeze
45
+ src = CharSource.new(st, parent)
46
+ read_span(src, EscapedCharInText, [nil])
47
+ end
48
+
49
+ # This is the main loop for reading span elements
50
+ #
51
+ # It's long, but not *complex* or difficult to understand.
52
+ #
53
+ #
54
+ def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
+ con = SpanContext.new
56
+ c = d = nil
57
+ while true
58
+ c = src.cur_char
59
+
60
+ # This is only an optimization which cuts 50% of the time used.
61
+ # (but you can't use a-zA-z in exit_on_chars)
62
+ if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
+ con.cur_string << src.shift_char
64
+ next
65
+ end
66
+
67
+ break if exit_on_chars && exit_on_chars.include?(c)
68
+ break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
+
70
+ # check if there are extensions
71
+ if check_span_extensions(src, con)
72
+ next
73
+ end
74
+
75
+ case c = src.cur_char
76
+ when ?\ # it's space (32)
77
+ if src.cur_chars_are " \n"
78
+ src.ignore_chars(3)
79
+ con.push_element md_br()
80
+ next
81
+ else
82
+ src.ignore_char
83
+ con.push_space
84
+ end
85
+ when ?\n, ?\t
86
+ src.ignore_char
87
+ con.push_space
88
+ when ?`
89
+ read_inline_code(src,con)
90
+ when ?<
91
+ # It could be:
92
+ # 1) HTML "<div ..."
93
+ # 2) HTML "<!-- ..."
94
+ # 3) url "<http:// ", "<ftp:// ..."
95
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
96
+ # 5) on itself! "a < b "
97
+ # 6) Start of <<guillemettes>>
98
+
99
+ case d = src.next_char
100
+ when ?<; # guillemettes
101
+ src.ignore_chars(2)
102
+ con.push_char ?<
103
+ con.push_char ?<
104
+ when ?!;
105
+ if src.cur_chars_are '<!--'
106
+ read_inline_html(src, con)
107
+ else
108
+ con.push_char src.shift_char
109
+ end
110
+ when ??
111
+ read_xml_instr_span(src, con)
112
+ when ?\ , ?\t
113
+ con.push_char src.shift_char
114
+ else
115
+ if src.next_matches(/<mailto:/) or
116
+ src.next_matches(/<[\w\.]+\@/)
117
+ read_email_el(src, con)
118
+ elsif src.next_matches(/<\w+:/)
119
+ read_url_el(src, con)
120
+ elsif src.next_matches(/<\w/)
121
+ #puts "This is HTML: #{src.cur_chars(20)}"
122
+ read_inline_html(src, con)
123
+ else
124
+ #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
+ con.push_char src.shift_char
126
+ end
127
+ end
128
+ when ?\\
129
+ d = src.next_char
130
+ if d == ?'
131
+ src.ignore_chars(2)
132
+ con.push_element md_entity('apos')
133
+ elsif d == ?"
134
+ src.ignore_chars(2)
135
+ con.push_element md_entity('quot')
136
+ elsif escaped.include? d
137
+ src.ignore_chars(2)
138
+ con.push_char d
139
+ else
140
+ con.push_char src.shift_char
141
+ end
142
+ when ?[
143
+ if markdown_extra? && src.next_char == ?^
144
+ read_footnote_ref(src,con)
145
+ else
146
+ read_link(src, con)
147
+ end
148
+ when ?!
149
+ if src.next_char == ?[
150
+ read_image(src, con)
151
+ else
152
+ con.push_char src.shift_char
153
+ end
154
+ when ?&
155
+ # named references
156
+ if m = src.read_regexp(/\&([\w\d]+);/)
157
+ con.push_element md_entity(m[1])
158
+ # numeric
159
+ elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
+ num = m[1] ? m[2].hex : m[2].to_i
161
+ con.push_element md_entity(num)
162
+ else
163
+ con.push_char src.shift_char
164
+ end
165
+ when ?*
166
+ if not src.next_char
167
+ maruku_error "Opening * as last char.", src, con
168
+ maruku_recover "Threating as literal"
169
+ con.push_char src.shift_char
170
+ else
171
+ follows = src.cur_chars(4)
172
+ if follows =~ /^\*\*\*[^\s\*]/
173
+ con.push_element read_emstrong(src,'***')
174
+ elsif follows =~ /^\*\*[^\s\*]/
175
+ con.push_element read_strong(src,'**')
176
+ elsif follows =~ /^\*[^\s\*]/
177
+ con.push_element read_em(src,'*')
178
+ else # * is just a normal char
179
+ con.push_char src.shift_char
180
+ end
181
+ end
182
+ when ?_
183
+ if not src.next_char
184
+ maruku_error "Opening _ as last char", src, con
185
+ maruku_recover "Threating as literal", src, con
186
+ con.push_char src.shift_char
187
+ else
188
+ # we don't want "mod_ruby" to start an emphasis
189
+ # so we start one only if
190
+ # 1) there's nothing else in the span (first char)
191
+ # or 2) the last char was a space
192
+ # or 3) the current string is empty
193
+ #if con.elements.empty? ||
194
+ if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
195
+ # also, we check the next characters
196
+ follows = src.cur_chars(4)
197
+ if follows =~ /^\_\_\_[^\s\_]/
198
+ con.push_element read_emstrong(src,'___')
199
+ elsif follows =~ /^\_\_[^\s\_]/
200
+ con.push_element read_strong(src,'__')
201
+ elsif follows =~ /^\_[^\s\_]/
202
+ con.push_element read_em(src,'_')
203
+ else # _ is just a normal char
204
+ con.push_char src.shift_char
205
+ end
206
+ else
207
+ # _ is just a normal char
208
+ con.push_char src.shift_char
209
+ end
210
+ end
211
+ when ?{ # extension
212
+ if [?#, ?., ?:].include? src.next_char
213
+ src.ignore_char # {
214
+ interpret_extension(src, con, [?}])
215
+ src.ignore_char # }
216
+ else
217
+ con.push_char src.shift_char
218
+ end
219
+ when nil
220
+ maruku_error( ("Unclosed span (waiting for %s"+
221
+ "#{exit_on_strings.inspect})") % [
222
+ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
223
+ src,con)
224
+ break
225
+ else # normal text
226
+ con.push_char src.shift_char
227
+ end # end case
228
+ end # end while true
229
+ con.push_string_if_present
230
+
231
+ # Assign IAL to elements
232
+ merge_ial(con.elements, src, con)
233
+
234
+
235
+ # Remove leading space
236
+ if (s = con.elements.first).kind_of? String
237
+ if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
238
+ con.elements.shift if s.size == 0
239
+ end
240
+
241
+ # Remove final spaces
242
+ if (s = con.elements.last).kind_of? String
243
+ s.chop! if s[-1] == ?\
244
+ con.elements.pop if s.size == 0
245
+ end
246
+
247
+ educated = educate(con.elements)
248
+
249
+ educated
250
+ end
251
+
252
+
253
+ def read_xml_instr_span(src, con)
254
+ src.ignore_chars(2) # starting <?
255
+
256
+ # read target <?target code... ?>
257
+ target = if m = src.read_regexp(/(\w+)/)
258
+ m[1]
259
+ else
260
+ ''
261
+ end
262
+
263
+ delim = "?>"
264
+
265
+ code =
266
+ read_simple(src, escaped=[], break_on_chars=[],
267
+ break_on_strings=[delim])
268
+
269
+ src.ignore_chars delim.size
270
+
271
+ code = (code || "").strip
272
+ con.push_element md_xml_instr(target, code)
273
+ end
274
+
275
+ # Start: cursor on character **after** '{'
276
+ # End: curson on '}' or EOF
277
+ def interpret_extension(src, con, break_on_chars)
278
+ case src.cur_char
279
+ when ?:
280
+ src.ignore_char # :
281
+ extension_meta(src, con, break_on_chars)
282
+ when ?#, ?.
283
+ extension_meta(src, con, break_on_chars)
284
+ else
285
+ stuff = read_simple(src, escaped=[?}], break_on_chars, [])
286
+ if stuff =~ /^(\w+\s|[^\w])/
287
+ extension_id = $1.strip
288
+ if false
289
+ else
290
+ maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
291
+ "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
292
+ extension_meta(src, con, break_on_chars)
293
+ end
294
+ else
295
+ maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
296
+ extension_meta(src, con, break_on_chars)
297
+ end
298
+ end
299
+ end
300
+
301
+ def extension_meta(src, con, break_on_chars)
302
+ if m = src.read_regexp(/([^\s\:\"\']+):/)
303
+ name = m[1]
304
+ al = read_attribute_list(src, con, break_on_chars)
305
+ # puts "#{name}=#{al.inspect}"
306
+ self.doc.ald[name] = al
307
+ con.push md_ald(name, al)
308
+ else
309
+ al = read_attribute_list(src, con, break_on_chars)
310
+ self.doc.ald[name] = al
311
+ con.push md_ial(al)
312
+ end
313
+ end
314
+
315
+ def read_url_el(src,con)
316
+ src.ignore_char # leading <
317
+ url = read_simple(src, [], [?>])
318
+ src.ignore_char # closing >
319
+
320
+ con.push_element md_url(url)
321
+ end
322
+
323
+ def read_email_el(src,con)
324
+ src.ignore_char # leading <
325
+ mail = read_simple(src, [], [?>])
326
+ src.ignore_char # closing >
327
+
328
+ address = mail.gsub(/^mailto:/,'')
329
+ con.push_element md_email(address)
330
+ end
331
+
332
+ def read_url(src, break_on)
333
+ if [?',?"].include? src.cur_char
334
+ error 'Invalid char for url', src
335
+ end
336
+
337
+ url = read_simple(src, [], break_on)
338
+ if not url # empty url
339
+ url = ""
340
+ end
341
+
342
+ if url[0] == ?< && url[-1] == ?>
343
+ url = url[1, url.size-2]
344
+ end
345
+
346
+ if url.size == 0
347
+ return nil
348
+ end
349
+
350
+ url
351
+ end
352
+
353
+
354
+ def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
355
+ case src.cur_char
356
+ when ?', ?"
357
+ read_quoted(src, con)
358
+ else
359
+ read_simple(src, escaped, exit_on_chars)
360
+ end
361
+ end
362
+
363
+ # Tries to read a quoted value. If stream does not
364
+ # start with ' or ", returns nil.
365
+ def read_quoted(src, con)
366
+ case src.cur_char
367
+ when ?', ?"
368
+ quote_char = src.shift_char # opening quote
369
+ string = read_simple(src, EscapedCharInQuotes, [quote_char])
370
+ src.ignore_char # closing quote
371
+ return string
372
+ else
373
+ # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
374
+ return nil
375
+ end
376
+ end
377
+
378
+ # Reads a simple string (no formatting) until one of break_on_chars,
379
+ # while escaping the escaped.
380
+ # If the string is empty, it returns nil.
381
+ # Raises on error if the string terminates unexpectedly.
382
+ # # If eat_delim is true, and if the delim is not the EOF, then the delim
383
+ # # gets eaten from the stream.
384
+ def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
385
+ text = ""
386
+ while true
387
+ # puts "Reading simple #{text.inspect}"
388
+ c = src.cur_char
389
+ if exit_on_chars && exit_on_chars.include?(c)
390
+ # src.ignore_char if eat_delim
391
+ break
392
+ end
393
+
394
+ break if exit_on_strings &&
395
+ exit_on_strings.any? {|x| src.cur_chars_are x}
396
+
397
+ case c
398
+ when nil
399
+ s= "String finished while reading (break on "+
400
+ "#{exit_on_chars.map{|x|""<<x}.inspect})"+
401
+ " already read: #{text.inspect}"
402
+ maruku_error s, src
403
+ maruku_recover "I boldly continue", src
404
+ break
405
+ when ?\\
406
+ d = src.next_char
407
+ if escaped.include? d
408
+ src.ignore_chars(2)
409
+ text << d
410
+ else
411
+ text << src.shift_char
412
+ end
413
+ else
414
+ text << src.shift_char
415
+ end
416
+ end
417
+ # puts "Read simple #{text.inspect}"
418
+ text.empty? ? nil : text
419
+ end
420
+
421
+ def read_em(src, delim)
422
+ src.ignore_char
423
+ children = read_span(src, EscapedCharInText, nil, [delim])
424
+ src.ignore_char
425
+ md_em(children)
426
+ end
427
+
428
+ def read_strong(src, delim)
429
+ src.ignore_chars(2)
430
+ children = read_span(src, EscapedCharInText, nil, [delim])
431
+ src.ignore_chars(2)
432
+ md_strong(children)
433
+ end
434
+
435
+ def read_emstrong(src, delim)
436
+ src.ignore_chars(3)
437
+ children = read_span(src, EscapedCharInText, nil, [delim])
438
+ src.ignore_chars(3)
439
+ md_emstrong(children)
440
+ end
441
+
442
+ SPACE = ?\ # = 32
443
+
444
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
445
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
446
+ R_REF_ID = Regexp.compile(/([^\]]*)\]/)
447
+
448
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
449
+ def read_ref_id(src, con)
450
+ src.ignore_char # [
451
+ src.consume_whitespace
452
+ # puts "Next: #{src.cur_chars(10).inspect}"
453
+ if m = src.read_regexp(R_REF_ID)
454
+ # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
455
+ # puts "Then: #{src.cur_chars(10).inspect}"
456
+ m[1]
457
+ else
458
+ nil
459
+ end
460
+ end
461
+
462
+ def read_footnote_ref(src,con)
463
+ ref = read_ref_id(src,con)
464
+ con.push_element md_foot_ref(ref)
465
+ end
466
+
467
+ def read_inline_html(src, con)
468
+ h = HTMLHelper.new
469
+ begin
470
+ # This is our current buffer in the context
471
+ next_stuff = src.current_remaining_buffer
472
+
473
+ consumed = 0
474
+ while true
475
+ if consumed >= next_stuff.size
476
+ maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
477
+ break
478
+ end
479
+
480
+ h.eat_this next_stuff[consumed].chr; consumed += 1
481
+ break if h.is_finished?
482
+ end
483
+ src.ignore_chars(consumed)
484
+ con.push_element md_html(h.stuff_you_read)
485
+
486
+ #start = src.current_remaining_buffer
487
+ # h.eat_this start
488
+ # if not h.is_finished?
489
+ # error "inline_html: Malformed:\n "+
490
+ # "#{start.inspect}\n #{h.inspect}",src,con
491
+ # end
492
+ #
493
+ # consumed = start.size - h.rest.size
494
+ # if consumed > 0
495
+ # con.push_element md_html(h.stuff_you_read)
496
+ # src.ignore_chars(consumed)
497
+ # else
498
+ # puts "HTML helper did not work on #{start.inspect}"
499
+ # con.push_char src.shift_char
500
+ # end
501
+ rescue Exception => e
502
+ maruku_error "Bad html: \n" +
503
+ add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
504
+ src,con
505
+ maruku_recover "I will try to continue after bad HTML.", src, con
506
+ con.push_char src.shift_char
507
+ end
508
+ end
509
+
510
+ def read_inline_code(src, con)
511
+ # Count the number of ticks
512
+ num_ticks = 0
513
+ while src.cur_char == ?`
514
+ num_ticks += 1
515
+ src.ignore_char
516
+ end
517
+ # We will read until this string
518
+ end_string = "`"*num_ticks
519
+
520
+ code =
521
+ read_simple(src, escaped=[], break_on_chars=[],
522
+ break_on_strings=[end_string])
523
+
524
+ # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
525
+ src.ignore_chars num_ticks
526
+
527
+ # Ignore at most one space
528
+ if num_ticks > 1 && code[0] == SPACE
529
+ code = code[1, code.size-1]
530
+ end
531
+
532
+ # drop last space
533
+ if num_ticks > 1 && code[-1] == SPACE
534
+ code = code[0,code.size-1]
535
+ end
536
+
537
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
538
+ con.push_element md_code(code)
539
+ end
540
+
541
+ def read_link(src, con)
542
+ # we read the string and see what happens
543
+ src.ignore_char # opening bracket
544
+ children = read_span(src, EscapedCharInText, [?]])
545
+ src.ignore_char # closing bracket
546
+
547
+ # ignore space
548
+ if src.cur_char == SPACE and
549
+ (src.next_char == ?[ or src.next_char == ?( )
550
+ src.shift_char
551
+ end
552
+
553
+ case src.cur_char
554
+ when ?(
555
+ src.ignore_char # opening (
556
+ src.consume_whitespace
557
+ url = read_url(src, [SPACE,?\t,?)])
558
+ if not url
559
+ url = '' # no url is ok
560
+ end
561
+ src.consume_whitespace
562
+ title = nil
563
+ if src.cur_char != ?) # we have a title
564
+ quote_char = src.cur_char
565
+ title = read_quoted(src,con)
566
+
567
+ if not title
568
+ maruku_error 'Must quote title',src,con
569
+ else
570
+ # Tries to read a title with quotes: ![a](url "ti"tle")
571
+ # this is the most ugly thing in Markdown
572
+ if not src.next_matches(/\s*\)/)
573
+ # if there is not a closing par ), then read
574
+ # the rest and guess it's title with quotes
575
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
576
+ break_on_strings=[])
577
+ # chop the closing char
578
+ rest.chop!
579
+ title << quote_char << rest
580
+ end
581
+ end
582
+ end
583
+ src.consume_whitespace
584
+ closing = src.shift_char # closing )
585
+ if closing != ?)
586
+ maruku_error 'Unclosed link',src,con
587
+ maruku_recover "No closing ): I will not create"+
588
+ " the link for #{children.inspect}", src, con
589
+ con.push_elements children
590
+ return
591
+ end
592
+ con.push_element md_im_link(children,url, title)
593
+ when ?[ # link ref
594
+ ref_id = read_ref_id(src,con)
595
+ if ref_id
596
+ if ref_id.size == 0
597
+ ref_id = sanitize_ref_id(children.to_s)
598
+ else
599
+ ref_id = sanitize_ref_id(ref_id)
600
+ end
601
+ con.push_element md_link(children, ref_id)
602
+ else
603
+ maruku_error "Could not read ref_id", src, con
604
+ maruku_recover "I will not create the link for "+
605
+ "#{children.inspect}", src, con
606
+ con.push_elements children
607
+ return
608
+ end
609
+ else # empty [link]
610
+ id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
611
+ con.push_element md_link(children, id)
612
+ end
613
+ end # read link
614
+
615
+ def read_image(src, con)
616
+ src.ignore_chars(2) # opening "!["
617
+ alt_text = read_span(src, EscapedCharInText, [?]])
618
+ src.ignore_char # closing bracket
619
+ # ignore space
620
+ if src.cur_char == SPACE and
621
+ (src.next_char == ?[ or src.next_char == ?( )
622
+ src.ignore_char
623
+ end
624
+ case src.cur_char
625
+ when ?(
626
+ src.ignore_char # opening (
627
+ src.consume_whitespace
628
+ url = read_url(src, [SPACE,?\t,?)])
629
+ if not url
630
+ error "Could not read url from #{src.cur_chars(10).inspect}",
631
+ src,con
632
+ end
633
+ src.consume_whitespace
634
+ title = nil
635
+ if src.cur_char != ?) # we have a title
636
+ quote_char = src.cur_char
637
+ title = read_quoted(src,con)
638
+ if not title
639
+ maruku_error 'Must quote title',src,con
640
+ else
641
+ # Tries to read a title with quotes: ![a](url "ti"tle")
642
+ # this is the most ugly thing in Markdown
643
+ if not src.next_matches(/\s*\)/)
644
+ # if there is not a closing par ), then read
645
+ # the rest and guess it's title with quotes
646
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
647
+ break_on_strings=[])
648
+ # chop the closing char
649
+ rest.chop!
650
+ title << quote_char << rest
651
+ end
652
+ end
653
+ end
654
+ src.consume_whitespace
655
+ closing = src.shift_char # closing )
656
+ if closing != ?)
657
+ error( ("Unclosed link: '"<<closing<<"'")+
658
+ " Read url=#{url.inspect} title=#{title.inspect}",src,con)
659
+ end
660
+ con.push_element md_im_image(alt_text, url, title)
661
+ when ?[ # link ref
662
+ ref_id = read_ref_id(src,con)
663
+ if not ref_id # TODO: check around
664
+ error('Reference not closed.', src, con)
665
+ ref_id = ""
666
+ end
667
+ if ref_id.size == 0
668
+ ref_id = alt_text.to_s
669
+ end
670
+
671
+ ref_id = sanitize_ref_id(ref_id)
672
+
673
+ con.push_element md_image(alt_text, ref_id)
674
+ else # no stuff
675
+ ref_id = sanitize_ref_id(alt_text.to_s)
676
+ con.push_element md_image(alt_text, ref_id)
677
+ end
678
+ end # read link
679
+
680
+
681
+ class SpanContext
682
+ include MaRuKu::Strings
683
+
684
+ # Read elements
685
+ attr_accessor :elements
686
+ attr_accessor :cur_string
687
+
688
+ def initialize
689
+ @elements = []
690
+ @cur_string = ""
691
+ end
692
+
693
+ def push_element(e)
694
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
695
+ not (e.kind_of?(String) or e.kind_of?(MDElement))
696
+
697
+ push_string_if_present
698
+ @elements << e
699
+ nil
700
+ end
701
+ alias push push_element
702
+
703
+ def push_elements(a)
704
+ for e in a
705
+ if e.kind_of? String
706
+ e.each_byte do |b| push_char b end
707
+ else
708
+ push_element e
709
+ end
710
+ end
711
+ end
712
+
713
+ def push_string_if_present
714
+ if @cur_string.size > 0
715
+ @elements << @cur_string
716
+ @cur_string = ""
717
+ end
718
+ nil
719
+ end
720
+
721
+ def push_char(c)
722
+ @cur_string << c
723
+ nil
724
+ end
725
+
726
+ # push space into current string if
727
+ # there isn't one
728
+ def push_space
729
+ last = @cur_string[@cur_string.size-1]
730
+ @cur_string << ?\ if last != ?\
731
+ end
732
+
733
+ def describe
734
+ lines = @elements.map{|x| x.inspect}.join("\n")
735
+ s = "Elements read in span: \n" +
736
+ add_tabs(lines,1, ' -')+"\n"
737
+
738
+ if @cur_string.size > 0
739
+ s += "Current string: \n #{@cur_string.inspect}\n"
740
+ end
741
+ s
742
+ end
743
+ end # SpanContext
744
+
745
+ end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
746
+