maiku 0.6.1.maiku

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/lib/maruku.rb +141 -0
  2. data/lib/maruku/attributes.rb +175 -0
  3. data/lib/maruku/defaults.rb +71 -0
  4. data/lib/maruku/errors_management.rb +92 -0
  5. data/lib/maruku/ext/div.rb +133 -0
  6. data/lib/maruku/ext/math.rb +41 -0
  7. data/lib/maruku/ext/math/elements.rb +27 -0
  8. data/lib/maruku/ext/math/latex_fix.rb +12 -0
  9. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +107 -0
  10. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
  11. data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
  12. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  13. data/lib/maruku/ext/math/parsing.rb +119 -0
  14. data/lib/maruku/ext/math/to_html.rb +187 -0
  15. data/lib/maruku/ext/math/to_latex.rb +26 -0
  16. data/lib/maruku/helpers.rb +260 -0
  17. data/lib/maruku/input/charsource.rb +326 -0
  18. data/lib/maruku/input/extensions.rb +69 -0
  19. data/lib/maruku/input/html_helper.rb +189 -0
  20. data/lib/maruku/input/linesource.rb +111 -0
  21. data/lib/maruku/input/parse_block.rb +616 -0
  22. data/lib/maruku/input/parse_doc.rb +232 -0
  23. data/lib/maruku/input/parse_span_better.rb +746 -0
  24. data/lib/maruku/input/rubypants.rb +225 -0
  25. data/lib/maruku/input/type_detection.rb +147 -0
  26. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  27. data/lib/maruku/maruku.rb +33 -0
  28. data/lib/maruku/output/s5/fancy.rb +756 -0
  29. data/lib/maruku/output/s5/to_s5.rb +138 -0
  30. data/lib/maruku/output/to_html.rb +991 -0
  31. data/lib/maruku/output/to_latex.rb +590 -0
  32. data/lib/maruku/output/to_latex_entities.rb +367 -0
  33. data/lib/maruku/output/to_latex_strings.rb +64 -0
  34. data/lib/maruku/output/to_markdown.rb +164 -0
  35. data/lib/maruku/output/to_s.rb +56 -0
  36. data/lib/maruku/string_utils.rb +201 -0
  37. data/lib/maruku/structures.rb +167 -0
  38. data/lib/maruku/structures_inspect.rb +87 -0
  39. data/lib/maruku/structures_iterators.rb +61 -0
  40. data/lib/maruku/textile2.rb +1 -0
  41. data/lib/maruku/toc.rb +199 -0
  42. data/lib/maruku/usage/example1.rb +33 -0
  43. data/lib/maruku/version.rb +39 -0
  44. metadata +167 -0
@@ -0,0 +1,232 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
22
+
23
+ def parse_doc(s)
24
+ # FIXME \r\n => \n
25
+ meta2 = parse_email_headers(s)
26
+ data = meta2[:data]
27
+ meta2.delete :data
28
+
29
+ self.attributes.merge! meta2
30
+
31
+ =begin maruku_doc
32
+ Attribute: encoding
33
+ Scope: document
34
+ Summary: Encoding for the document.
35
+
36
+ If the `encoding` attribute is specified, then the content
37
+ will be converted from the specified encoding to UTF-8.
38
+ =end
39
+
40
+ enc = self.attributes[:encoding]
41
+ self.attributes.delete :encoding
42
+ if enc && enc.downcase != 'utf-8'
43
+
44
+ # Switch to ruby 1.9 String#encode
45
+ # with backward 1.8 compatibility
46
+ if data.respond_to?(:encode!)
47
+ data.encode!('UTF-8', enc)
48
+ else
49
+ require 'iconv'
50
+ data = Iconv.new('utf-8', enc).iconv(data)
51
+ end
52
+
53
+ end
54
+
55
+ @children = parse_text_as_markdown(data)
56
+
57
+ if true #markdown_extra?
58
+ self.search_abbreviations
59
+ self.substitute_markdown_inside_raw_html
60
+ end
61
+
62
+ toc = create_toc
63
+
64
+ # use title if not set
65
+ if not self.attributes[:title] and toc.header_element
66
+ title = toc.header_element.to_s
67
+ self.attributes[:title] = title
68
+ # puts "Set document title to #{title}"
69
+ end
70
+
71
+ # save for later use
72
+ self.toc = toc
73
+
74
+ # Now do the attributes magic
75
+ each_element do |e|
76
+ # default attribute list
77
+ if default = self.ald[e.node_type.to_s]
78
+ expand_attribute_list(default, e.attributes)
79
+ end
80
+ expand_attribute_list(e.al, e.attributes)
81
+ # puts "#{e.node_type}: #{e.attributes.inspect}"
82
+ end
83
+
84
+ =begin maruku_doc
85
+ Attribute: unsafe_features
86
+ Scope: global
87
+ Summary: Enables execution of XML instructions.
88
+
89
+ Disabled by default because of security concerns.
90
+ =end
91
+
92
+ if Maruku::Globals[:unsafe_features]
93
+ self.execute_code_blocks
94
+ # TODO: remove executed code blocks
95
+ end
96
+ end
97
+
98
+ # Expands an attribute list in an Hash
99
+ def expand_attribute_list(al, result)
100
+ al.each do |k, v|
101
+ case k
102
+ when :class
103
+ if not result[:class]
104
+ result[:class] = v
105
+ else
106
+ result[:class] += " " + v
107
+ end
108
+ when :id; result[:id] = v
109
+ when :ref;
110
+ if self.ald[v]
111
+ already = (result[:expanded_references] ||= [])
112
+ if not already.include?(v)
113
+ already.push v
114
+ expand_attribute_list(self.ald[v], result)
115
+ else
116
+ already.push v
117
+ maruku_error "Circular reference between labels.\n\n"+
118
+ "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
119
+ (already.map{|x| x.inspect}.join(' => '))
120
+ end
121
+ else
122
+ if not result[:unresolved_references]
123
+ result[:unresolved_references] = v
124
+ else
125
+ result[:unresolved_references] << " #{v}"
126
+ end
127
+
128
+ # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
129
+ result[v.to_sym] = true
130
+ end
131
+ else
132
+ result[k.to_sym]=v
133
+ end
134
+ end
135
+ end
136
+
137
+ def safe_execute_code(object, code)
138
+ begin
139
+ return object.instance_eval(code)
140
+ rescue Exception => e
141
+ maruku_error "Exception while executing this:\n"+
142
+ add_tabs(code, 1, ">")+
143
+ "\nThe error was:\n"+
144
+ add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
145
+ rescue RuntimeError => e
146
+ maruku_error "2: Exception while executing this:\n"+
147
+ add_tabs(code, 1, ">")+
148
+ "\nThe error was:\n"+
149
+ add_tabs(e.inspect, 1, "|")
150
+ rescue SyntaxError => e
151
+ maruku_error "2: Exception while executing this:\n"+
152
+ add_tabs(code, 1, ">")+
153
+ "\nThe error was:\n"+
154
+ add_tabs(e.inspect, 1, "|")
155
+ end
156
+ nil
157
+ end
158
+
159
+ def execute_code_blocks
160
+ self.each_element(:xml_instr) do |e|
161
+ if e.target == 'maruku'
162
+ result = safe_execute_code(e, e.code)
163
+ if result.kind_of?(String)
164
+ puts "Result is : #{result.inspect}"
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ def search_abbreviations
171
+ self.abbreviations.each do |abbrev, title|
172
+ reg = Regexp.new(Regexp.escape(abbrev))
173
+ self.replace_each_string do |s|
174
+ # bug if many abbreviations are present (agorf)
175
+ if m = reg.match(s)
176
+ e = md_abbr(abbrev.dup, title ? title.dup : nil)
177
+ [m.pre_match, e, m.post_match]
178
+ else
179
+ s
180
+ end
181
+ end
182
+ end
183
+ end
184
+
185
+ include REXML
186
+ # (PHP Markdown extra) Search for elements that have
187
+ # markdown=1 or markdown=block defined
188
+ def substitute_markdown_inside_raw_html
189
+ self.each_element(:raw_html) do |e|
190
+ doc = e.instance_variable_get :@parsed_html
191
+ if doc # valid html
192
+ # parse block-level markdown elements in these HTML tags
193
+ block_tags = ['div']
194
+
195
+ # use xpath to find elements with 'markdown' attribute
196
+ XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
197
+ # puts "Found #{e}"
198
+ # should we parse block-level or span-level?
199
+
200
+ how = e.attributes['markdown']
201
+ parse_blocks = (how == 'block') || block_tags.include?(e.name)
202
+
203
+ # Select all text elements of e
204
+ XPath.match(e, "//text()" ).each { |original_text|
205
+ s = original_text.value.strip
206
+ if s.size > 0
207
+
208
+ # puts "Parsing #{s.inspect} as blocks: #{parse_blocks} (#{e.name}, #{e.attributes['markdown']}) "
209
+
210
+ el = md_el(:dummy,
211
+ parse_blocks ? parse_text_as_markdown(s) :
212
+ parse_lines_as_span([s]) )
213
+ p = original_text.parent
214
+ el.children_to_html.each do |x|
215
+ p.insert_before(original_text, x)
216
+ end
217
+ p.delete(original_text)
218
+
219
+ end
220
+ }
221
+
222
+
223
+ # remove 'markdown' attribute
224
+ e.delete_attribute 'markdown'
225
+
226
+ end
227
+
228
+ end
229
+ end
230
+ end
231
+
232
+ end end end end
@@ -0,0 +1,746 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'set'
23
+
24
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
25
+ include MaRuKu::Helpers
26
+
27
+ EscapedCharInText =
28
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
+
30
+ EscapedCharInQuotes =
31
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
+
33
+ EscapedCharInInlineCode = [?\\,?`]
34
+
35
+ def parse_lines_as_span(lines, parent=nil)
36
+ parse_span_better lines.join("\n"), parent
37
+ end
38
+
39
+ def parse_span_better(string, parent=nil)
40
+ if not string.kind_of? String then
41
+ error "Passed #{string.class}." end
42
+
43
+ st = (string + "")
44
+ st.freeze
45
+ src = CharSource.new(st, parent)
46
+ read_span(src, EscapedCharInText, [nil])
47
+ end
48
+
49
+ # This is the main loop for reading span elements
50
+ #
51
+ # It's long, but not *complex* or difficult to understand.
52
+ #
53
+ #
54
+ def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
+ con = SpanContext.new
56
+ c = d = nil
57
+ while true
58
+ c = src.cur_char
59
+
60
+ # This is only an optimization which cuts 50% of the time used.
61
+ # (but you can't use a-zA-z in exit_on_chars)
62
+ if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
+ con.cur_string << src.shift_char
64
+ next
65
+ end
66
+
67
+ break if exit_on_chars && exit_on_chars.include?(c)
68
+ break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
+
70
+ # check if there are extensions
71
+ if check_span_extensions(src, con)
72
+ next
73
+ end
74
+
75
+ case c = src.cur_char
76
+ when ?\ # it's space (32)
77
+ if src.cur_chars_are " \n"
78
+ src.ignore_chars(3)
79
+ con.push_element md_br()
80
+ next
81
+ else
82
+ src.ignore_char
83
+ con.push_space
84
+ end
85
+ when ?\n, ?\t
86
+ src.ignore_char
87
+ con.push_space
88
+ when ?`
89
+ read_inline_code(src,con)
90
+ when ?<
91
+ # It could be:
92
+ # 1) HTML "<div ..."
93
+ # 2) HTML "<!-- ..."
94
+ # 3) url "<http:// ", "<ftp:// ..."
95
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
96
+ # 5) on itself! "a < b "
97
+ # 6) Start of <<guillemettes>>
98
+
99
+ case d = src.next_char
100
+ when ?<; # guillemettes
101
+ src.ignore_chars(2)
102
+ con.push_char ?<
103
+ con.push_char ?<
104
+ when ?!;
105
+ if src.cur_chars_are '<!--'
106
+ read_inline_html(src, con)
107
+ else
108
+ con.push_char src.shift_char
109
+ end
110
+ when ??
111
+ read_xml_instr_span(src, con)
112
+ when ?\ , ?\t
113
+ con.push_char src.shift_char
114
+ else
115
+ if src.next_matches(/<mailto:/) or
116
+ src.next_matches(/<[\w\.]+\@/)
117
+ read_email_el(src, con)
118
+ elsif src.next_matches(/<\w+:/)
119
+ read_url_el(src, con)
120
+ elsif src.next_matches(/<\w/)
121
+ #puts "This is HTML: #{src.cur_chars(20)}"
122
+ read_inline_html(src, con)
123
+ else
124
+ #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
+ con.push_char src.shift_char
126
+ end
127
+ end
128
+ when ?\\
129
+ d = src.next_char
130
+ if d == ?'
131
+ src.ignore_chars(2)
132
+ con.push_element md_entity('apos')
133
+ elsif d == ?"
134
+ src.ignore_chars(2)
135
+ con.push_element md_entity('quot')
136
+ elsif escaped.include? d
137
+ src.ignore_chars(2)
138
+ con.push_char d
139
+ else
140
+ con.push_char src.shift_char
141
+ end
142
+ when ?[
143
+ if markdown_extra? && src.next_char == ?^
144
+ read_footnote_ref(src,con)
145
+ else
146
+ read_link(src, con)
147
+ end
148
+ when ?!
149
+ if src.next_char == ?[
150
+ read_image(src, con)
151
+ else
152
+ con.push_char src.shift_char
153
+ end
154
+ when ?&
155
+ # named references
156
+ if m = src.read_regexp(/\&([\w\d]+);/)
157
+ con.push_element md_entity(m[1])
158
+ # numeric
159
+ elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
+ num = m[1] ? m[2].hex : m[2].to_i
161
+ con.push_element md_entity(num)
162
+ else
163
+ con.push_char src.shift_char
164
+ end
165
+ when ?*
166
+ if not src.next_char
167
+ maruku_error "Opening * as last char.", src, con
168
+ maruku_recover "Threating as literal"
169
+ con.push_char src.shift_char
170
+ else
171
+ follows = src.cur_chars(4)
172
+ if follows =~ /^\*\*\*[^\s\*]/
173
+ con.push_element read_emstrong(src,'***')
174
+ elsif follows =~ /^\*\*[^\s\*]/
175
+ con.push_element read_strong(src,'**')
176
+ elsif follows =~ /^\*[^\s\*]/
177
+ con.push_element read_em(src,'*')
178
+ else # * is just a normal char
179
+ con.push_char src.shift_char
180
+ end
181
+ end
182
+ when ?_
183
+ if not src.next_char
184
+ maruku_error "Opening _ as last char", src, con
185
+ maruku_recover "Threating as literal", src, con
186
+ con.push_char src.shift_char
187
+ else
188
+ # we don't want "mod_ruby" to start an emphasis
189
+ # so we start one only if
190
+ # 1) there's nothing else in the span (first char)
191
+ # or 2) the last char was a space
192
+ # or 3) the current string is empty
193
+ #if con.elements.empty? ||
194
+ if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
195
+ # also, we check the next characters
196
+ follows = src.cur_chars(4)
197
+ if follows =~ /^\_\_\_[^\s\_]/
198
+ con.push_element read_emstrong(src,'___')
199
+ elsif follows =~ /^\_\_[^\s\_]/
200
+ con.push_element read_strong(src,'__')
201
+ elsif follows =~ /^\_[^\s\_]/
202
+ con.push_element read_em(src,'_')
203
+ else # _ is just a normal char
204
+ con.push_char src.shift_char
205
+ end
206
+ else
207
+ # _ is just a normal char
208
+ con.push_char src.shift_char
209
+ end
210
+ end
211
+ when ?{ # extension
212
+ if [?#, ?., ?:].include? src.next_char
213
+ src.ignore_char # {
214
+ interpret_extension(src, con, [?}])
215
+ src.ignore_char # }
216
+ else
217
+ con.push_char src.shift_char
218
+ end
219
+ when nil
220
+ maruku_error( ("Unclosed span (waiting for %s"+
221
+ "#{exit_on_strings.inspect})") % [
222
+ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
223
+ src,con)
224
+ break
225
+ else # normal text
226
+ con.push_char src.shift_char
227
+ end # end case
228
+ end # end while true
229
+ con.push_string_if_present
230
+
231
+ # Assign IAL to elements
232
+ merge_ial(con.elements, src, con)
233
+
234
+
235
+ # Remove leading space
236
+ if (s = con.elements.first).kind_of? String
237
+ if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
238
+ con.elements.shift if s.size == 0
239
+ end
240
+
241
+ # Remove final spaces
242
+ if (s = con.elements.last).kind_of? String
243
+ s.chop! if s[-1] == ?\
244
+ con.elements.pop if s.size == 0
245
+ end
246
+
247
+ educated = educate(con.elements)
248
+
249
+ educated
250
+ end
251
+
252
+
253
+ def read_xml_instr_span(src, con)
254
+ src.ignore_chars(2) # starting <?
255
+
256
+ # read target <?target code... ?>
257
+ target = if m = src.read_regexp(/(\w+)/)
258
+ m[1]
259
+ else
260
+ ''
261
+ end
262
+
263
+ delim = "?>"
264
+
265
+ code =
266
+ read_simple(src, escaped=[], break_on_chars=[],
267
+ break_on_strings=[delim])
268
+
269
+ src.ignore_chars delim.size
270
+
271
+ code = (code || "").strip
272
+ con.push_element md_xml_instr(target, code)
273
+ end
274
+
275
+ # Start: cursor on character **after** '{'
276
+ # End: curson on '}' or EOF
277
+ def interpret_extension(src, con, break_on_chars)
278
+ case src.cur_char
279
+ when ?:
280
+ src.ignore_char # :
281
+ extension_meta(src, con, break_on_chars)
282
+ when ?#, ?.
283
+ extension_meta(src, con, break_on_chars)
284
+ else
285
+ stuff = read_simple(src, escaped=[?}], break_on_chars, [])
286
+ if stuff =~ /^(\w+\s|[^\w])/
287
+ extension_id = $1.strip
288
+ if false
289
+ else
290
+ maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
291
+ "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
292
+ extension_meta(src, con, break_on_chars)
293
+ end
294
+ else
295
+ maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
296
+ extension_meta(src, con, break_on_chars)
297
+ end
298
+ end
299
+ end
300
+
301
+ def extension_meta(src, con, break_on_chars)
302
+ if m = src.read_regexp(/([^\s\:\"\']+):/)
303
+ name = m[1]
304
+ al = read_attribute_list(src, con, break_on_chars)
305
+ # puts "#{name}=#{al.inspect}"
306
+ self.doc.ald[name] = al
307
+ con.push md_ald(name, al)
308
+ else
309
+ al = read_attribute_list(src, con, break_on_chars)
310
+ self.doc.ald[name] = al
311
+ con.push md_ial(al)
312
+ end
313
+ end
314
+
315
+ def read_url_el(src,con)
316
+ src.ignore_char # leading <
317
+ url = read_simple(src, [], [?>])
318
+ src.ignore_char # closing >
319
+
320
+ con.push_element md_url(url)
321
+ end
322
+
323
+ def read_email_el(src,con)
324
+ src.ignore_char # leading <
325
+ mail = read_simple(src, [], [?>])
326
+ src.ignore_char # closing >
327
+
328
+ address = mail.gsub(/^mailto:/,'')
329
+ con.push_element md_email(address)
330
+ end
331
+
332
+ def read_url(src, break_on)
333
+ if [?',?"].include? src.cur_char
334
+ error 'Invalid char for url', src
335
+ end
336
+
337
+ url = read_simple(src, [], break_on)
338
+ if not url # empty url
339
+ url = ""
340
+ end
341
+
342
+ if url[0] == ?< && url[-1] == ?>
343
+ url = url[1, url.size-2]
344
+ end
345
+
346
+ if url.size == 0
347
+ return nil
348
+ end
349
+
350
+ url
351
+ end
352
+
353
+
354
+ def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
355
+ case src.cur_char
356
+ when ?', ?"
357
+ read_quoted(src, con)
358
+ else
359
+ read_simple(src, escaped, exit_on_chars)
360
+ end
361
+ end
362
+
363
+ # Tries to read a quoted value. If stream does not
364
+ # start with ' or ", returns nil.
365
+ def read_quoted(src, con)
366
+ case src.cur_char
367
+ when ?', ?"
368
+ quote_char = src.shift_char # opening quote
369
+ string = read_simple(src, EscapedCharInQuotes, [quote_char])
370
+ src.ignore_char # closing quote
371
+ return string
372
+ else
373
+ # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
374
+ return nil
375
+ end
376
+ end
377
+
378
+ # Reads a simple string (no formatting) until one of break_on_chars,
379
+ # while escaping the escaped.
380
+ # If the string is empty, it returns nil.
381
+ # Raises on error if the string terminates unexpectedly.
382
+ # # If eat_delim is true, and if the delim is not the EOF, then the delim
383
+ # # gets eaten from the stream.
384
+ def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
385
+ text = ""
386
+ while true
387
+ # puts "Reading simple #{text.inspect}"
388
+ c = src.cur_char
389
+ if exit_on_chars && exit_on_chars.include?(c)
390
+ # src.ignore_char if eat_delim
391
+ break
392
+ end
393
+
394
+ break if exit_on_strings &&
395
+ exit_on_strings.any? {|x| src.cur_chars_are x}
396
+
397
+ case c
398
+ when nil
399
+ s= "String finished while reading (break on "+
400
+ "#{exit_on_chars.map{|x|""<<x}.inspect})"+
401
+ " already read: #{text.inspect}"
402
+ maruku_error s, src
403
+ maruku_recover "I boldly continue", src
404
+ break
405
+ when ?\\
406
+ d = src.next_char
407
+ if escaped.include? d
408
+ src.ignore_chars(2)
409
+ text << d
410
+ else
411
+ text << src.shift_char
412
+ end
413
+ else
414
+ text << src.shift_char
415
+ end
416
+ end
417
+ # puts "Read simple #{text.inspect}"
418
+ text.empty? ? nil : text
419
+ end
420
+
421
+ def read_em(src, delim)
422
+ src.ignore_char
423
+ children = read_span(src, EscapedCharInText, nil, [delim])
424
+ src.ignore_char
425
+ md_em(children)
426
+ end
427
+
428
+ def read_strong(src, delim)
429
+ src.ignore_chars(2)
430
+ children = read_span(src, EscapedCharInText, nil, [delim])
431
+ src.ignore_chars(2)
432
+ md_strong(children)
433
+ end
434
+
435
+ def read_emstrong(src, delim)
436
+ src.ignore_chars(3)
437
+ children = read_span(src, EscapedCharInText, nil, [delim])
438
+ src.ignore_chars(3)
439
+ md_emstrong(children)
440
+ end
441
+
442
+ SPACE = ?\ # = 32
443
+
444
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
445
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
446
+ R_REF_ID = Regexp.compile(/([^\]]*)\]/)
447
+
448
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
449
+ def read_ref_id(src, con)
450
+ src.ignore_char # [
451
+ src.consume_whitespace
452
+ # puts "Next: #{src.cur_chars(10).inspect}"
453
+ if m = src.read_regexp(R_REF_ID)
454
+ # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
455
+ # puts "Then: #{src.cur_chars(10).inspect}"
456
+ m[1]
457
+ else
458
+ nil
459
+ end
460
+ end
461
+
462
+ def read_footnote_ref(src,con)
463
+ ref = read_ref_id(src,con)
464
+ con.push_element md_foot_ref(ref)
465
+ end
466
+
467
+ def read_inline_html(src, con)
468
+ h = HTMLHelper.new
469
+ begin
470
+ # This is our current buffer in the context
471
+ next_stuff = src.current_remaining_buffer
472
+
473
+ consumed = 0
474
+ while true
475
+ if consumed >= next_stuff.size
476
+ maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
477
+ break
478
+ end
479
+
480
+ h.eat_this next_stuff[consumed].chr; consumed += 1
481
+ break if h.is_finished?
482
+ end
483
+ src.ignore_chars(consumed)
484
+ con.push_element md_html(h.stuff_you_read)
485
+
486
+ #start = src.current_remaining_buffer
487
+ # h.eat_this start
488
+ # if not h.is_finished?
489
+ # error "inline_html: Malformed:\n "+
490
+ # "#{start.inspect}\n #{h.inspect}",src,con
491
+ # end
492
+ #
493
+ # consumed = start.size - h.rest.size
494
+ # if consumed > 0
495
+ # con.push_element md_html(h.stuff_you_read)
496
+ # src.ignore_chars(consumed)
497
+ # else
498
+ # puts "HTML helper did not work on #{start.inspect}"
499
+ # con.push_char src.shift_char
500
+ # end
501
+ rescue Exception => e
502
+ maruku_error "Bad html: \n" +
503
+ add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
504
+ src,con
505
+ maruku_recover "I will try to continue after bad HTML.", src, con
506
+ con.push_char src.shift_char
507
+ end
508
+ end
509
+
510
+ def read_inline_code(src, con)
511
+ # Count the number of ticks
512
+ num_ticks = 0
513
+ while src.cur_char == ?`
514
+ num_ticks += 1
515
+ src.ignore_char
516
+ end
517
+ # We will read until this string
518
+ end_string = "`"*num_ticks
519
+
520
+ code =
521
+ read_simple(src, escaped=[], break_on_chars=[],
522
+ break_on_strings=[end_string])
523
+
524
+ # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
525
+ src.ignore_chars num_ticks
526
+
527
+ # Ignore at most one space
528
+ if num_ticks > 1 && code[0] == SPACE
529
+ code = code[1, code.size-1]
530
+ end
531
+
532
+ # drop last space
533
+ if num_ticks > 1 && code[-1] == SPACE
534
+ code = code[0,code.size-1]
535
+ end
536
+
537
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
538
+ con.push_element md_code(code)
539
+ end
540
+
541
+ def read_link(src, con)
542
+ # we read the string and see what happens
543
+ src.ignore_char # opening bracket
544
+ children = read_span(src, EscapedCharInText, [?]])
545
+ src.ignore_char # closing bracket
546
+
547
+ # ignore space
548
+ if src.cur_char == SPACE and
549
+ (src.next_char == ?[ or src.next_char == ?( )
550
+ src.shift_char
551
+ end
552
+
553
+ case src.cur_char
554
+ when ?(
555
+ src.ignore_char # opening (
556
+ src.consume_whitespace
557
+ url = read_url(src, [SPACE,?\t,?)])
558
+ if not url
559
+ url = '' # no url is ok
560
+ end
561
+ src.consume_whitespace
562
+ title = nil
563
+ if src.cur_char != ?) # we have a title
564
+ quote_char = src.cur_char
565
+ title = read_quoted(src,con)
566
+
567
+ if not title
568
+ maruku_error 'Must quote title',src,con
569
+ else
570
+ # Tries to read a title with quotes: ![a](url "ti"tle")
571
+ # this is the most ugly thing in Markdown
572
+ if not src.next_matches(/\s*\)/)
573
+ # if there is not a closing par ), then read
574
+ # the rest and guess it's title with quotes
575
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
576
+ break_on_strings=[])
577
+ # chop the closing char
578
+ rest.chop!
579
+ title << quote_char << rest
580
+ end
581
+ end
582
+ end
583
+ src.consume_whitespace
584
+ closing = src.shift_char # closing )
585
+ if closing != ?)
586
+ maruku_error 'Unclosed link',src,con
587
+ maruku_recover "No closing ): I will not create"+
588
+ " the link for #{children.inspect}", src, con
589
+ con.push_elements children
590
+ return
591
+ end
592
+ con.push_element md_im_link(children,url, title)
593
+ when ?[ # link ref
594
+ ref_id = read_ref_id(src,con)
595
+ if ref_id
596
+ if ref_id.size == 0
597
+ ref_id = sanitize_ref_id(children.to_s)
598
+ else
599
+ ref_id = sanitize_ref_id(ref_id)
600
+ end
601
+ con.push_element md_link(children, ref_id)
602
+ else
603
+ maruku_error "Could not read ref_id", src, con
604
+ maruku_recover "I will not create the link for "+
605
+ "#{children.inspect}", src, con
606
+ con.push_elements children
607
+ return
608
+ end
609
+ else # empty [link]
610
+ id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
611
+ con.push_element md_link(children, id)
612
+ end
613
+ end # read link
614
+
615
+ def read_image(src, con)
616
+ src.ignore_chars(2) # opening "!["
617
+ alt_text = read_span(src, EscapedCharInText, [?]])
618
+ src.ignore_char # closing bracket
619
+ # ignore space
620
+ if src.cur_char == SPACE and
621
+ (src.next_char == ?[ or src.next_char == ?( )
622
+ src.ignore_char
623
+ end
624
+ case src.cur_char
625
+ when ?(
626
+ src.ignore_char # opening (
627
+ src.consume_whitespace
628
+ url = read_url(src, [SPACE,?\t,?)])
629
+ if not url
630
+ error "Could not read url from #{src.cur_chars(10).inspect}",
631
+ src,con
632
+ end
633
+ src.consume_whitespace
634
+ title = nil
635
+ if src.cur_char != ?) # we have a title
636
+ quote_char = src.cur_char
637
+ title = read_quoted(src,con)
638
+ if not title
639
+ maruku_error 'Must quote title',src,con
640
+ else
641
+ # Tries to read a title with quotes: ![a](url "ti"tle")
642
+ # this is the most ugly thing in Markdown
643
+ if not src.next_matches(/\s*\)/)
644
+ # if there is not a closing par ), then read
645
+ # the rest and guess it's title with quotes
646
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
647
+ break_on_strings=[])
648
+ # chop the closing char
649
+ rest.chop!
650
+ title << quote_char << rest
651
+ end
652
+ end
653
+ end
654
+ src.consume_whitespace
655
+ closing = src.shift_char # closing )
656
+ if closing != ?)
657
+ error( ("Unclosed link: '"<<closing<<"'")+
658
+ " Read url=#{url.inspect} title=#{title.inspect}",src,con)
659
+ end
660
+ con.push_element md_im_image(alt_text, url, title)
661
+ when ?[ # link ref
662
+ ref_id = read_ref_id(src,con)
663
+ if not ref_id # TODO: check around
664
+ error('Reference not closed.', src, con)
665
+ ref_id = ""
666
+ end
667
+ if ref_id.size == 0
668
+ ref_id = alt_text.to_s
669
+ end
670
+
671
+ ref_id = sanitize_ref_id(ref_id)
672
+
673
+ con.push_element md_image(alt_text, ref_id)
674
+ else # no stuff
675
+ ref_id = sanitize_ref_id(alt_text.to_s)
676
+ con.push_element md_image(alt_text, ref_id)
677
+ end
678
+ end # read link
679
+
680
+
681
+ class SpanContext
682
+ include MaRuKu::Strings
683
+
684
+ # Read elements
685
+ attr_accessor :elements
686
+ attr_accessor :cur_string
687
+
688
+ def initialize
689
+ @elements = []
690
+ @cur_string = ""
691
+ end
692
+
693
+ def push_element(e)
694
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
695
+ not (e.kind_of?(String) or e.kind_of?(MDElement))
696
+
697
+ push_string_if_present
698
+ @elements << e
699
+ nil
700
+ end
701
+ alias push push_element
702
+
703
+ def push_elements(a)
704
+ for e in a
705
+ if e.kind_of? String
706
+ e.each_byte do |b| push_char b end
707
+ else
708
+ push_element e
709
+ end
710
+ end
711
+ end
712
+
713
+ def push_string_if_present
714
+ if @cur_string.size > 0
715
+ @elements << @cur_string
716
+ @cur_string = ""
717
+ end
718
+ nil
719
+ end
720
+
721
+ def push_char(c)
722
+ @cur_string << c
723
+ nil
724
+ end
725
+
726
+ # push space into current string if
727
+ # there isn't one
728
+ def push_space
729
+ last = @cur_string[@cur_string.size-1]
730
+ @cur_string << ?\ if last != ?\
731
+ end
732
+
733
+ def describe
734
+ lines = @elements.map{|x| x.inspect}.join("\n")
735
+ s = "Elements read in span: \n" +
736
+ add_tabs(lines,1, ' -')+"\n"
737
+
738
+ if @cur_string.size > 0
739
+ s += "Current string: \n #{@cur_string.inspect}\n"
740
+ end
741
+ s
742
+ end
743
+ end # SpanContext
744
+
745
+ end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
746
+