maruku 0.2.13 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/bin/maruku +23 -15
  2. data/bin/maruku0.3 +37 -0
  3. data/bin/marutest +277 -0
  4. data/docs/changelog-0.3.html +99 -0
  5. data/docs/changelog-0.3.md +84 -0
  6. data/docs/faq.html +46 -0
  7. data/docs/faq.md +32 -0
  8. data/docs/index.html +629 -64
  9. data/docs/markdown_extra2.html +67 -14
  10. data/docs/markdown_syntax.html +631 -94
  11. data/docs/markdown_syntax_2.html +152 -0
  12. data/docs/maruku.html +629 -64
  13. data/docs/maruku.md +108 -105
  14. data/docs/proposal.html +362 -55
  15. data/docs/proposal.md +133 -169
  16. data/docs/todo.html +30 -0
  17. data/lib/maruku.rb +13 -3
  18. data/lib/maruku/errors_management.rb +75 -0
  19. data/lib/maruku/helpers.rb +164 -0
  20. data/lib/maruku/html_helper.rb +33 -13
  21. data/lib/maruku/parse_block.rb +89 -92
  22. data/lib/maruku/parse_doc.rb +43 -18
  23. data/lib/maruku/parse_span.rb +17 -46
  24. data/lib/maruku/parse_span_better.rb +681 -0
  25. data/lib/maruku/string_utils.rb +17 -10
  26. data/lib/maruku/structures.rb +62 -35
  27. data/lib/maruku/structures_iterators.rb +39 -0
  28. data/lib/maruku/tests/benchmark.rb +12 -4
  29. data/lib/maruku/tests/new_parser.rb +318 -0
  30. data/lib/maruku/to_html.rb +113 -44
  31. data/lib/maruku/to_latex.rb +32 -14
  32. data/lib/maruku/to_markdown.rb +110 -0
  33. data/lib/maruku/toc.rb +35 -1
  34. data/lib/maruku/version.rb +10 -1
  35. data/lib/test.rb +29 -0
  36. data/tests/others/escaping.md +6 -4
  37. data/tests/others/links.md +1 -1
  38. data/tests/others/lists_after_paragraph.md +44 -0
  39. data/tests/unittest/abbreviations.md +71 -0
  40. data/tests/unittest/blank.md +43 -0
  41. data/tests/unittest/blanks_in_code.md +131 -0
  42. data/tests/unittest/code.md +64 -0
  43. data/tests/unittest/code2.md +59 -0
  44. data/tests/unittest/code3.md +121 -0
  45. data/tests/unittest/easy.md +36 -0
  46. data/tests/unittest/email.md +39 -0
  47. data/tests/unittest/encoding/iso-8859-1.md +9 -0
  48. data/tests/unittest/encoding/utf-8.md +38 -0
  49. data/tests/unittest/entities.md +174 -0
  50. data/tests/unittest/escaping.md +97 -0
  51. data/tests/unittest/extra_dl.md +81 -0
  52. data/tests/unittest/extra_header_id.md +96 -0
  53. data/tests/unittest/extra_table1.md +78 -0
  54. data/tests/unittest/footnotes.md +120 -0
  55. data/tests/unittest/headers.md +64 -0
  56. data/tests/unittest/hrule.md +77 -0
  57. data/tests/unittest/images.md +114 -0
  58. data/tests/unittest/inline_html.md +185 -0
  59. data/tests/unittest/links.md +162 -0
  60. data/tests/unittest/list1.md +80 -0
  61. data/tests/unittest/list2.md +75 -0
  62. data/tests/unittest/list3.md +111 -0
  63. data/tests/unittest/list4.md +43 -0
  64. data/tests/unittest/lists.md +262 -0
  65. data/tests/unittest/lists_after_paragraph.md +280 -0
  66. data/tests/unittest/lists_ol.md +323 -0
  67. data/tests/unittest/misc_sw.md +751 -0
  68. data/tests/unittest/notyet/escape.md +46 -0
  69. data/tests/unittest/notyet/header_after_par.md +85 -0
  70. data/tests/unittest/notyet/ticks.md +67 -0
  71. data/tests/unittest/notyet/triggering.md +210 -0
  72. data/tests/unittest/one.md +33 -0
  73. data/tests/unittest/paragraph.md +34 -0
  74. data/tests/unittest/paragraph_rules/dont_merge_ref.md +60 -0
  75. data/tests/unittest/paragraph_rules/tab_is_blank.md +43 -0
  76. data/tests/unittest/paragraphs.md +84 -0
  77. data/tests/unittest/recover/recover_links.md +32 -0
  78. data/tests/unittest/references/long_example.md +87 -0
  79. data/tests/unittest/references/spaces_and_numbers.md +27 -0
  80. data/tests/unittest/syntax_hl.md +99 -0
  81. data/tests/unittest/test.md +36 -0
  82. data/tests/unittest/wrapping.md +88 -0
  83. data/tests/utf8-files/simple.md +1 -0
  84. metadata +139 -86
  85. data/lib/maruku/maruku.rb +0 -50
  86. data/tests/a.md +0 -10
@@ -16,14 +16,17 @@
16
16
  # along with Maruku; if not, write to the Free Software
17
17
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
18
 
19
+ require 'iconv'
20
+
19
21
  class Maruku
20
- def initialize(s=nil)
22
+ def initialize(s=nil, meta={})
21
23
  @node_type = :document
22
24
  @doc = self
23
25
 
24
26
  @refs = {}
25
27
  @footnotes = {}
26
28
  @abbreviations = {}
29
+ @meta = meta
27
30
 
28
31
  parse_doc(s) if s
29
32
  end
@@ -32,14 +35,31 @@ class Maruku
32
35
  # setup initial stack
33
36
  @stack = []
34
37
 
35
- @meta = parse_email_headers(s)
36
- data = @meta[:data]
37
- @meta.delete :data
38
- lines = split_lines(data)
38
+ meta2 = parse_email_headers(s)
39
+ data = meta2[:data]
40
+ meta2.delete :data
41
+
42
+ @meta.merge! meta2
43
+
44
+ enc = @meta[:encoding]
45
+ @meta.delete :encoding
46
+ if enc && enc.downcase != 'utf-8'
47
+ # puts "Converting from #{enc} to UTF-8."
48
+ converted = Iconv.new('utf-8', enc).iconv(data)
49
+
50
+ # puts "Data: #{data.inspect}: #{data}"
51
+ # puts "Conv: #{converted.inspect}: #{converted}"
52
+
53
+ data = converted
54
+ end
55
+
56
+ lines = Maruku.split_lines(data)
39
57
  @children = parse_lines_as_markdown(lines)
40
58
 
41
- self.search_abbreviations
42
- self.substitute_markdown_inside_raw_html
59
+ if true #markdown_extra?
60
+ self.search_abbreviations
61
+ self.substitute_markdown_inside_raw_html
62
+ end
43
63
 
44
64
  toc = create_toc
45
65
 
@@ -47,24 +67,28 @@ class Maruku
47
67
  if not self.meta[:title] and toc.header_element
48
68
  title = toc.header_element.to_s
49
69
  self.meta[:title] = title
50
- puts "Set document title to #{title}"
70
+ # puts "Set document title to #{title}"
51
71
  end
52
72
 
53
73
  # save for later use
54
74
  self.toc = toc
55
75
 
56
- #puts toc.inspect
76
+ # puts self.inspect
57
77
  end
58
78
 
59
79
  def search_abbreviations
60
80
  @abbreviations.each do |abbrev, title|
61
- # puts "#{abbrev} => #{title}"
62
- self.map_match(Regexp.new(Regexp.escape(abbrev))) {
63
- e = create_md_element(:abbreviation)
64
- e.children = [abbrev.dup]
65
- e.meta[:title] = title.dup if title
66
- e
67
- }
81
+ reg = Regexp.new(Regexp.escape(abbrev))
82
+ self.replace_each_string do |s|
83
+ if m = reg.match(s)
84
+ e = create_md_element(:abbreviation)
85
+ e.children = [abbrev.dup]
86
+ e.meta[:title] = title.dup if title
87
+ [m.pre_match, e, m.post_match]
88
+ else
89
+ s
90
+ end
91
+ end
68
92
  end
69
93
  end
70
94
 
@@ -72,7 +96,7 @@ class Maruku
72
96
  # markdown=1 or markdown=block defined
73
97
  def substitute_markdown_inside_raw_html
74
98
  self.each_element(:raw_html) do |e|
75
- doc = e.meta[:parsed_html]
99
+ doc = e.instance_variable_get :@parsed_html
76
100
  if doc # valid html
77
101
  # parse block-level markdown elements in these HTML tags
78
102
  block_tags = ['div']
@@ -89,7 +113,7 @@ class Maruku
89
113
  s = original_text.to_s.strip # XXX
90
114
  el = create_md_element(:dummy,
91
115
  parse_blocks ? parse_text_as_markdown(s) :
92
- parse_lines_as_span(s) )
116
+ parse_lines_as_span([s]) )
93
117
  el.children_to_html.each do |x|
94
118
  e.insert_before(original_text, x)
95
119
  end
@@ -100,5 +124,6 @@ class Maruku
100
124
  end
101
125
  end
102
126
  end
127
+
103
128
 
104
129
  end
@@ -31,8 +31,7 @@ class Maruku
31
31
  # first, get rid of linebreaks
32
32
  res = resolve_linebreaks(lines)
33
33
 
34
- span = MDElement.new
35
- span.children = res
34
+ span = MDElement.new(:dummy, res)
36
35
 
37
36
  # encode all escapes
38
37
  span.replace_each_string { |s| s.escape_md_special }
@@ -97,15 +96,8 @@ class Maruku
97
96
 
98
97
  for reg in [inlineHTML1, inlineHTML2]
99
98
  span.map_match(reg) { |match|
100
- raw_html = (match[1] || raise("No html?"))
101
- e = create_md_element(:raw_html)
102
- e.meta[:raw_html] = raw_html
103
- begin
104
- e.meta[:parsed_html] = Document.new(raw_html)
105
- rescue
106
- $stderr.puts "Malformed HTML:\n#{raw_html}"
107
- end
108
- e
99
+ raw_html = match[1]
100
+ convert_raw_html_in_list(raw_html)
109
101
  }
110
102
  end
111
103
 
@@ -321,44 +313,23 @@ class Maruku
321
313
  res
322
314
  end
323
315
 
316
+ # raw_html is something like
317
+ # <em> A</em> dopwkk *maruk* <em>A</em>
318
+ def convert_raw_html_in_list(raw_html)
319
+ e = create_md_element(:raw_html)
320
+ e.meta[:raw_html] = raw_html
321
+ begin
322
+ e.meta[:parsed_html] = Document.new(raw_html)
323
+ rescue
324
+ $stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
325
+ end
326
+ e
327
+ end
328
+
324
329
  end
325
330
 
326
331
  # And now the black magic that makes the part above so elegant
327
-
328
- class MDElement
329
-
330
- # yields to each element of specified node_type
331
- def each_element(e_node_type, &block)
332
- @children.each do |c|
333
- if c.kind_of? MDElement
334
- if (not e_node_type) || (e_node_type == c.node_type)
335
- block.call c
336
- end
337
- c.each_element(e_node_type, &block)
338
- end
339
- end
340
- end
341
-
342
- # Apply passed block to each String in the hierarchy.
343
- def replace_each_string(&block)
344
- for c in @children
345
- if c.kind_of? MDElement
346
- c.replace_each_string(&block)
347
- end
348
- end
349
-
350
- processed = []
351
- until @children.empty?
352
- c = @children.shift
353
- if c.kind_of? String
354
- result = block.call(c)
355
- [*result].each do |e| processed << e end
356
- else
357
- processed << c
358
- end
359
- end
360
- @children = processed
361
- end
332
+ class MDElement
362
333
 
363
334
  # Try to match the regexp to each string in the hierarchy
364
335
  # (using `replace_each_string`). If the regexp match, eliminate
@@ -0,0 +1,681 @@
1
+ require 'set'
2
+
3
+ class Maruku
4
+ include Helpers
5
+
6
+ EscapedCharInText =
7
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
8
+
9
+ EscapedCharInQuotes =
10
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
11
+
12
+ EscapedCharInInlineCode = [?\\,?`]
13
+
14
+ def parse_lines_as_span(lines)
15
+ parse_span_better lines.join("\n")
16
+ end
17
+
18
+ def parse_span_better(string)
19
+ if not string.kind_of? String then
20
+ error "Passed #{string.class}." end
21
+
22
+ st = (string + "")
23
+ st.freeze
24
+ src = CharSource.new(st)
25
+ read_span(src, EscapedCharInText, [nil])
26
+ end
27
+
28
+ # This is the main loop for reading span elements
29
+ #
30
+ # It's long, but not *complex* or difficult to understand.
31
+ #
32
+ #
33
+ def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
34
+ con = SpanContext.new
35
+ c = d = nil
36
+ while true
37
+ c = src.cur_char
38
+
39
+ if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
40
+ # src.read_text_chars con.cur_string
41
+ con.cur_string << src.shift_char
42
+ next
43
+ end
44
+
45
+ break if exit_on_chars && exit_on_chars.include?(c)
46
+ break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
47
+
48
+ case c
49
+ when ?\ # it's space (32)
50
+ if src.cur_chars_are " \n"
51
+ src.ignore_chars(3)
52
+ con.push_element create_md_element(:linebreak)
53
+ next
54
+ else
55
+ src.ignore_char
56
+ con.push_space
57
+ end
58
+ when ?\n, ?\t
59
+ src.ignore_char
60
+ con.push_space
61
+ when ?`
62
+ read_inline_code(src,con)
63
+ when ?<
64
+ # It could be:
65
+ # 1) HTML "<div ..."
66
+ # 2) HTML "<!-- ..."
67
+ # 3) url "<http:// ", "<ftp:// ..."
68
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
69
+ # 5) on itself! "a < b "
70
+
71
+ case d = src.next_char
72
+ when ?!;
73
+ if src.cur_chars_are '<!--'
74
+ read_inline_html(src, con)
75
+ else
76
+ con.push_char src.shift_char
77
+ end
78
+ when ??; read_server_directive
79
+ when ?\ , ?\t
80
+ con.push_char src.shift_char
81
+ else;
82
+ if src.next_matches(/<mailto:/) or
83
+ src.next_matches(/<[\w\.]+\@/)
84
+ read_email_el(src, con)
85
+ elsif src.next_matches(/<\w+:/)
86
+ read_url_el(src, con)
87
+ elsif src.next_matches(/<\w/)
88
+ # puts "This is HTML: #{src.cur_chars(20)}"
89
+ read_inline_html(src, con)
90
+ else
91
+ # puts "This is NOT HTML: #{src.cur_chars(20)}"
92
+ con.push_char src.shift_char
93
+ end
94
+ end
95
+ when ?\\
96
+ d = src.next_char
97
+ if escaped.include? d
98
+ src.ignore_chars(2)
99
+ con.push_char d
100
+ else
101
+ con.push_char src.shift_char
102
+ end
103
+ when ?[
104
+ if markdown_extra? && src.next_char == ?^
105
+ read_footnote_ref(src,con)
106
+ else
107
+ read_link(src, con)
108
+ end
109
+ when ?!
110
+ if src.next_char == ?[
111
+ read_image(src, con)
112
+ else
113
+ con.push_char src.shift_char
114
+ end
115
+ when ?&
116
+ if m = src.read_regexp(/&([\w\d]+);/)
117
+ con.push_element md_entity(m[1])
118
+ else
119
+ con.push_char src.shift_char
120
+ end
121
+ when ?*
122
+ if not src.next_char
123
+ error "Opening * as last char", src, con
124
+ tell_user "Threating as literal"
125
+ con.push_char src.shift_char
126
+ else
127
+ follows = src.cur_chars(4)
128
+ if follows =~ /^\*\*\*[^\s\*]/
129
+ con.push_element read_emstrong(src,'***')
130
+ elsif follows =~ /^\*\*[^\s\*]/
131
+ con.push_element read_strong(src,'**')
132
+ elsif follows =~ /^\*[^\s\*]/
133
+ con.push_element read_em(src,'*')
134
+ else # * is just a normal char
135
+ con.push_char src.shift_char
136
+ end
137
+ end
138
+ when ?_
139
+ if not src.next_char
140
+ error "Opening _ as last char", src, con
141
+ tell_user "Threating as literal"
142
+ con.push_char src.shift_char
143
+ else
144
+ follows = src.cur_chars(4)
145
+ if follows =~ /^\_\_\_[^\s\_]/
146
+ con.push_element read_emstrong(src,'___')
147
+ elsif follows =~ /^\_\_[^\s\_]/
148
+ con.push_element read_strong(src,'__')
149
+ elsif follows =~ /^\_[^\s\_]/
150
+ con.push_element read_em(src,'_')
151
+ else # _ is just a normal char
152
+ con.push_char src.shift_char
153
+ end
154
+ end
155
+ when nil
156
+ error ("Unclosed span (waiting for %s"+
157
+ "#{exit_on_strings.inspect})") % [
158
+ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
159
+ src,con
160
+
161
+ tell_user "I will boldly go ahead."
162
+ break
163
+ else # normal text
164
+ con.push_char src.shift_char
165
+ end # end case
166
+ end # end while true
167
+ con.push_string_if_present
168
+ con.elements
169
+ end
170
+
171
+ def read_url_el(src,con)
172
+ src.ignore_char # leading <
173
+ url = read_simple(src, [], [?>])
174
+ src.ignore_char # closing >
175
+
176
+ con.push_element md_url(url)
177
+ end
178
+
179
+ def read_email_el(src,con)
180
+ src.ignore_char # leading <
181
+ mail = read_simple(src, [], [?>])
182
+ src.ignore_char # closing >
183
+
184
+ address = mail.gsub(/^mailto:/,'')
185
+ con.push_element md_email(address)
186
+ end
187
+
188
+ def read_url(src, break_on)
189
+ if [?',?"].include? src.cur_char
190
+ error 'Invalid char for url', src
191
+ end
192
+
193
+ url = read_simple(src, [], break_on)
194
+
195
+ if url[0] == ?< && url[-1] == ?>
196
+ url = url[1, url.size-2]
197
+ end
198
+
199
+ if url.size == 0
200
+ return nil
201
+ end
202
+
203
+ url
204
+ end
205
+
206
+ # Tries to read a quoted value. If stream does not
207
+ # start with ' or ", returns nil.
208
+ def read_quoted(src,con)
209
+ case src.cur_char
210
+ when ?', ?"
211
+ quote_char = src.shift_char # opening quote
212
+ string = read_simple(src, EscapedCharInQuotes, [quote_char])
213
+ src.ignore_char # closing quote
214
+ return string
215
+ else
216
+ # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
217
+ return nil
218
+ end
219
+ end
220
+
221
+ # Reads a simple string (no formatting) until one of break_on_chars,
222
+ # while escaping the escaped
223
+ def read_simple(src, escaped, exit_on_chars)
224
+ text = ""
225
+ while true
226
+ # puts "Reading simple #{text.inspect}"
227
+ c = src.cur_char
228
+ if exit_on_chars && exit_on_chars.include?(c)
229
+ # puts (" breaking on "<<c)+" contained in "+exit_on_chars.inspect
230
+ break
231
+ end
232
+ case c
233
+ when nil
234
+ s= "String finished while reading (break on #{exit_on_chars.inspect})"+
235
+ " already read: #{text.inspect}"
236
+ error s, src
237
+ tell_user "I boldly continue"
238
+ break
239
+ when ?\\
240
+ d = src.next_char
241
+ if escaped.include? d
242
+ src.ignore_chars(2)
243
+ text << d
244
+ else
245
+ text << src.shift_char
246
+ end
247
+ else
248
+ text << src.shift_char
249
+ end
250
+ end
251
+ # puts "Read simple #{text.inspect}"
252
+ text
253
+ end
254
+
255
+ def read_em(src, delim)
256
+ src.ignore_char
257
+ children = read_span(src, EscapedCharInText, nil, [delim])
258
+ src.ignore_char
259
+ md_em(children)
260
+ end
261
+
262
+ def read_strong(src, delim)
263
+ src.ignore_chars(2)
264
+ children = read_span(src, EscapedCharInText, nil, [delim])
265
+ src.ignore_chars(2)
266
+ md_strong(children)
267
+ end
268
+
269
+ def read_emstrong(src, delim)
270
+ src.ignore_chars(3)
271
+ children = read_span(src, EscapedCharInText, nil, [delim])
272
+ src.ignore_chars(3)
273
+ md_emstrong(children)
274
+ end
275
+
276
+ SPACE = ?\ # = 32
277
+
278
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
279
+ R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
280
+
281
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
282
+ def read_ref_id(src, con)
283
+ src.ignore_char # [
284
+ src.consume_whitespace
285
+ # puts "Next: #{src.cur_chars(10).inspect}"
286
+ if m = src.read_regexp(R_REF_ID)
287
+ # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
288
+ # puts "Then: #{src.cur_chars(10).inspect}"
289
+ m[1]
290
+ else
291
+ nil
292
+ end
293
+ end
294
+
295
+ def read_footnote_ref(src,con)
296
+ ref = read_ref_id(src,con)
297
+ con.push_element md_foot_ref(ref)
298
+ end
299
+
300
+ def read_inline_html(src, con)
301
+ h = HTMLHelper.new
302
+ begin
303
+ # This is our current buffer in the context
304
+ start = src.current_remaining_buffer
305
+
306
+ h.eat_this start
307
+ if not h.is_finished?
308
+ error "inline_html: Malformed:\n "+
309
+ "#{start.inspect}\n #{h.inspect}",src,con
310
+ end
311
+
312
+ consumed = start.size - h.rest.size
313
+ if consumed > 0
314
+ con.push_element md_html(h.stuff_you_read)
315
+ src.ignore_chars(consumed)
316
+ else
317
+ puts "HTML helper did not work on #{start.inspect}"
318
+ con.push_char src.shift_char
319
+ end
320
+ rescue Exception => e
321
+ maruku_error "Bad html: \n" +
322
+ add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
323
+ src,con
324
+ tell_user "I will try to continue after bad HTML."
325
+ con.push_char src.shift_char
326
+ end
327
+ end
328
+
329
+ def read_inline_code(src, con)
330
+ num_ticks = 0
331
+
332
+ while src.cur_char == ?`
333
+ num_ticks += 1
334
+ src.ignore_char
335
+ end
336
+
337
+
338
+ # ignore space
339
+ if num_ticks > 1 && src.cur_char == SPACE
340
+ src.ignore_char
341
+ end
342
+
343
+ # puts "Ticks: #{num_ticks } next: #{src.some} "
344
+
345
+ end_string = "`"*num_ticks
346
+
347
+ code = ''
348
+ while true
349
+ if not src.cur_char
350
+ error("Ticks not finished: read #{code.inspect}"+
351
+ " and waiting for #{end_string.inspect} num=#{num_ticks}",
352
+ src,con)
353
+ tell_user "Read invalid code block: #{code.inspect}"
354
+ break
355
+ end
356
+
357
+ if src.cur_chars(num_ticks) ==end_string # bah
358
+ # puts "Breaking on #{src.some} (end:#{end_string.inspect})"
359
+ src.ignore_chars num_ticks
360
+ break
361
+ end
362
+
363
+ code << src.shift_char
364
+ end
365
+
366
+ # drop last space
367
+ if num_ticks > 1 && code[-1] == SPACE
368
+ code = code[0,code.size-1]
369
+ end
370
+
371
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
372
+ con.push_element md_code(code)
373
+ end
374
+
375
+
376
+
377
+ def read_server_directive
378
+ # match = gimme(/^(.*)\?>/)
379
+ # if not match
380
+ # error "Server directive not closed"
381
+ # end
382
+ # server = match[1]
383
+ # con.found_object create_md_element(:server, server)
384
+ end
385
+
386
+ def read_link(src, con)
387
+ # we read the string and see what happens
388
+ src.ignore_char # opening bracket
389
+ children = read_span(src, EscapedCharInText, [?]])
390
+ src.ignore_char # closing bracket
391
+
392
+ # ignore space
393
+ if src.cur_char == SPACE and
394
+ (src.next_char == ?[ or src.next_char == ?( )
395
+ src.shift_char
396
+ end
397
+ case src.cur_char
398
+ when ?(
399
+ src.ignore_char # opening (
400
+ src.consume_whitespace
401
+ url = read_url(src, [SPACE,?\t,?)])
402
+ if not url
403
+ url = '' # no url is ok
404
+ end
405
+ src.consume_whitespace
406
+ title = nil
407
+ if src.cur_char != ?) # we have a title
408
+ title = read_quoted(src,con)
409
+ end
410
+ src.consume_whitespace
411
+ closing = src.shift_char # closing )
412
+ if closing != ?)
413
+ error 'Unclosed link',src,con
414
+ tell_user "No closing ): I will not create"+
415
+ " the link for #{children.inspect}"
416
+ con.push_elements children
417
+ return
418
+ end
419
+ con.push_element md_im_link(children,url, title)
420
+ when ?[ # link ref
421
+ ref_id = read_ref_id(src,con)
422
+ if ref_id
423
+ con.push_element md_link(children, ref_id)
424
+ else
425
+ maruku_error "Could not read ref_id", src, con
426
+ tell_user "I will not create the link for #{children.inspect}"
427
+ con.push_elements children
428
+ return
429
+ end
430
+ else # no stuff
431
+ con.push_elements children
432
+ end
433
+ end # read link
434
+
435
+ def read_image(src, con)
436
+ src.ignore_chars(2) # opening "!["
437
+ alt_text = read_span(src, EscapedCharInText, [?]])
438
+ src.ignore_char # closing bracket
439
+ # ignore space
440
+ if src.cur_char == SPACE and
441
+ (src.next_char == ?[ or src.next_char == ?( )
442
+ src.ignore_char
443
+ end
444
+ case src.cur_char
445
+ when ?(
446
+ src.ignore_char # opening (
447
+ src.consume_whitespace
448
+ url = read_url(src, [SPACE,?\t,?)])
449
+ if not url
450
+ error "Could not read url from #{src.cur_chars(10).inspect}",
451
+ src,con
452
+ end
453
+ src.consume_whitespace
454
+ title = nil
455
+ if src.cur_char != ?) # we have a title
456
+ title = read_quoted(src,con)
457
+ error 'Must quote title',src,con if not title
458
+ end
459
+ src.consume_whitespace
460
+ closing = src.shift_char # closing )
461
+ if closing != ?)
462
+ error ("Unclosed link: '"<<closing<<"'")+
463
+ " Read url=#{url.inspect} title=#{title.inspect}",src,con
464
+ end
465
+ con.push_element md_im_image(alt_text, url, title)
466
+ when ?[ # link ref
467
+ ref_id = read_ref_id(src,con)
468
+ con.push_element md_image(alt_text, ref_id)
469
+ else # no stuff
470
+ con.push_elements children
471
+ end
472
+ end # read link
473
+
474
+ end
475
+
476
+
477
+ class SpanContext
478
+ include MarukuStrings
479
+
480
+ # Read elements
481
+ attr_accessor :elements
482
+ attr_accessor :cur_string
483
+
484
+ def initialize
485
+ @elements = []
486
+ @cur_string = ""
487
+ end
488
+
489
+ def push_element(e)
490
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
491
+ not (e.kind_of?(String) or e.kind_of?(MDElement))
492
+
493
+ push_string_if_present
494
+ @elements << e
495
+ nil
496
+ end
497
+
498
+ def push_elements(a)
499
+ for e in a
500
+ if e.kind_of? String
501
+ e.each_byte do |b| push_char b end
502
+ else
503
+ push_element e
504
+ end
505
+ end
506
+ end
507
+ def push_string_if_present
508
+ if @cur_string.size > 0
509
+ @elements << @cur_string
510
+ @cur_string = ""
511
+ end
512
+ nil
513
+ end
514
+
515
+ def push_char(c)
516
+ @cur_string << c
517
+ nil
518
+ end
519
+
520
+ # push space into current string if
521
+ # there isn't one
522
+ def push_space
523
+ last = @cur_string[@cur_string.size-1]
524
+ @cur_string << ?\ if last != ?\
525
+ end
526
+
527
+ def describe
528
+ lines = @elements.map{|x| x.inspect}.join("\n")
529
+ s = "Elements read in span: \n" +
530
+ add_tabs(lines,1, ' -')+"\n"
531
+
532
+ if @cur_string.size > 0
533
+ s += "Current string: \n #{@cur_string.inspect}\n"
534
+ end
535
+ s
536
+ end
537
+
538
+ end
539
+
540
+ class CharSource
541
+ include MarukuStrings
542
+
543
+ def initialize(s)
544
+ @elements = []
545
+ @cur_string = ""
546
+ @buffer = s
547
+ @buffer_index = 0
548
+ end
549
+
550
+ # Return current char as a FixNum (or nil).
551
+ def cur_char; @buffer[@buffer_index] end
552
+
553
+ # Return the next n chars as a String.
554
+ def cur_chars(n); @buffer[@buffer_index,n] end
555
+
556
+ # Return the char after current char as a FixNum (or nil).
557
+ def next_char; @buffer[@buffer_index+1] end
558
+
559
+ def shift_char
560
+ c = @buffer[@buffer_index]
561
+ @buffer_index+=1
562
+ c
563
+ end
564
+
565
+ def ignore_char
566
+ @buffer_index+=1
567
+ end
568
+
569
+ def ignore_chars(n)
570
+ @buffer_index+=n
571
+ nil
572
+ end
573
+
574
+ def current_remaining_buffer
575
+ @buffer[@buffer_index, @buffer.size-@buffer_index]
576
+ end
577
+
578
+ def cur_chars_are(string)
579
+ r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
580
+ @buffer =~ r2
581
+ end
582
+
583
+ def next_matches(r)
584
+ r2 = /^.{#{@buffer_index}}#{r}/m
585
+ r2.match @buffer
586
+ end
587
+
588
+ def read_regexp(r)
589
+ r2 = /^.{#{@buffer_index}}#{r}/m
590
+ m = r2.match @buffer
591
+ if m
592
+ consumed = m.to_s.size - @buffer_index
593
+ # puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
594
+ ignore_chars consumed
595
+ else
596
+ # puts "Could not read regexp #{r2.inspect} from buffer "+
597
+ # " index=#{@buffer_index}"
598
+ # puts "Cur chars = #{cur_chars(20).inspect}"
599
+ # puts "Matches? = #{cur_chars(20) =~ r}"
600
+ end
601
+ m
602
+ end
603
+
604
+ def consume_whitespace
605
+ while c = cur_char
606
+ if (c == 32 || c == ?\t)
607
+ # puts "ignoring #{c}"
608
+ ignore_char
609
+ else
610
+ # puts "#{c} is not ws: "<<c
611
+ break
612
+ end
613
+ end
614
+ end
615
+
616
+ def read_text_chars(out)
617
+ s = @buffer.size; c=nil
618
+ while @buffer_index < s && (c=@buffer[@buffer_index]) &&
619
+ ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
620
+ out << c
621
+ @buffer_index += 1
622
+ end
623
+ end
624
+
625
+ def describe
626
+
627
+ len = 75
628
+ num_before = [len/2, @buffer_index].min
629
+ num_after = [len/2, @buffer.size-@buffer_index].min
630
+ num_before_max = @buffer_index
631
+ num_after_max = @buffer.size-@buffer_index
632
+
633
+ # puts "num #{num_before} #{num_after}"
634
+ num_before = [num_before_max, len-num_after].min
635
+ num_after = [num_after_max, len-num_before].min
636
+ # puts "num #{num_before} #{num_after}"
637
+
638
+ index_start = [@buffer_index - num_before, 0].max
639
+ index_end = [@buffer_index + num_after, @buffer.size].min
640
+
641
+ size = index_end- index_start
642
+
643
+ # puts "- #{index_start} #{size}"
644
+
645
+ str = @buffer[index_start, size]
646
+ str.gsub!("\n",'N')
647
+ str.gsub!("\t",'T')
648
+
649
+ if index_end == @buffer.size
650
+ str += "EOF"
651
+ end
652
+
653
+ pre_s = @buffer_index-index_start
654
+ pre_s = [pre_s, 0].max
655
+ pre_s2 = [len-pre_s,0].max
656
+ # puts "pre_S = #{pre_s}"
657
+ pre =" "*(pre_s)
658
+
659
+ "-"*len+"\n"+
660
+ str + "\n" +
661
+ "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
662
+ # pre + "|\n"+
663
+ pre + "+--- Byte #{@buffer_index}\n"+
664
+
665
+
666
+ "Shown bytes [#{index_start} to #{size}] of #{@buffer.size}:\n"+
667
+ add_tabs(@buffer,1,">")
668
+
669
+ # "CharSource: At character #{@buffer_index} of block "+
670
+ # " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
671
+ # " before: \n ... #{cur_chars(50).inspect} ... "
672
+ end
673
+
674
+ def some
675
+ cur_chars(15).inspect
676
+ end
677
+ end
678
+
679
+
680
+
681
+