maruku 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. data/bin/{maruku0.3 → marudown} +6 -14
  2. data/bin/maruku +1 -1
  3. data/bin/marutest +37 -9
  4. data/docs/TOFIX.html +22 -0
  5. data/docs/TOFIX.md +3 -0
  6. data/docs/changelog-0.2.13.html +30 -0
  7. data/docs/changelog-0.2.13.md +6 -0
  8. data/docs/changelog-0.3.html +19 -5
  9. data/docs/faq.html +51 -40
  10. data/docs/faq.md +3 -3
  11. data/docs/hidden_o_n_squared.md +10 -0
  12. data/docs/index.html +84 -396
  13. data/docs/markdown_syntax.html +139 -330
  14. data/docs/markdown_syntax.md +80 -93
  15. data/docs/maruku.html +84 -396
  16. data/docs/maruku.md +88 -158
  17. data/docs/proposal.html +13 -106
  18. data/docs/proposal.md +3 -3
  19. data/docs/todo.html +38 -28
  20. data/lib/maruku.rb +77 -11
  21. data/lib/maruku/attributes.rb +186 -0
  22. data/lib/maruku/defaults.rb +40 -0
  23. data/lib/maruku/errors_management.rb +55 -39
  24. data/lib/maruku/helpers.rb +156 -72
  25. data/lib/maruku/input/charsource.rb +319 -0
  26. data/lib/maruku/{html_helper.rb → input/html_helper.rb} +30 -9
  27. data/lib/maruku/input/linesource.rb +111 -0
  28. data/lib/maruku/input/parse_block.rb +562 -0
  29. data/lib/maruku/{parse_doc.rb → input/parse_doc.rb} +60 -28
  30. data/lib/maruku/{parse_span_better.rb → input/parse_span_better.rb} +226 -256
  31. data/lib/maruku/input/type_detection.rb +137 -0
  32. data/lib/maruku/maruku.rb +33 -0
  33. data/lib/maruku/{to_html.rb → output/to_html.rb} +151 -132
  34. data/lib/maruku/{to_latex.rb → output/to_latex.rb} +31 -35
  35. data/lib/maruku/{to_latex_entities.rb → output/to_latex_entities.rb} +25 -3
  36. data/lib/maruku/output/to_latex_strings.rb +64 -0
  37. data/lib/maruku/output/to_markdown.rb +164 -0
  38. data/lib/maruku/{to_s.rb → output/to_s.rb} +6 -0
  39. data/lib/maruku/string_utils.rb +12 -181
  40. data/lib/maruku/structures.rb +91 -67
  41. data/lib/maruku/structures_inspect.rb +78 -0
  42. data/lib/maruku/structures_iterators.rb +24 -2
  43. data/lib/maruku/tests/benchmark.rb +41 -9
  44. data/lib/maruku/tests/new_parser.rb +317 -286
  45. data/lib/maruku/tests/tests.rb +20 -0
  46. data/lib/maruku/toc.rb +64 -64
  47. data/lib/maruku/usage/example1.rb +33 -0
  48. data/lib/maruku/version.rb +8 -2
  49. data/tests/unittest/abbreviations.md +27 -16
  50. data/tests/unittest/attributes/attributes.md +89 -0
  51. data/tests/unittest/attributes/circular.md +51 -0
  52. data/tests/unittest/attributes/default.md +47 -0
  53. data/tests/unittest/blank.md +10 -6
  54. data/tests/unittest/blanks_in_code.md +26 -26
  55. data/tests/unittest/code.md +9 -9
  56. data/tests/unittest/code2.md +12 -13
  57. data/tests/unittest/code3.md +34 -34
  58. data/tests/unittest/easy.md +9 -7
  59. data/tests/unittest/email.md +9 -7
  60. data/tests/unittest/encoding/iso-8859-1.md +41 -4
  61. data/tests/unittest/encoding/utf-8.md +6 -5
  62. data/tests/unittest/entities.md +52 -80
  63. data/tests/unittest/escaping.md +47 -35
  64. data/tests/unittest/extra_dl.md +19 -29
  65. data/tests/unittest/extra_header_id.md +31 -24
  66. data/tests/unittest/extra_table1.md +14 -32
  67. data/tests/unittest/footnotes.md +58 -42
  68. data/tests/unittest/headers.md +11 -11
  69. data/tests/unittest/hrule.md +14 -24
  70. data/tests/unittest/images.md +41 -26
  71. data/tests/unittest/inline_html.md +104 -56
  72. data/tests/unittest/inline_html2.md +38 -0
  73. data/tests/unittest/links.md +74 -33
  74. data/tests/unittest/list1.md +18 -15
  75. data/tests/unittest/list2.md +31 -13
  76. data/tests/unittest/list3.md +29 -28
  77. data/tests/unittest/list4.md +103 -12
  78. data/tests/unittest/lists.md +86 -53
  79. data/tests/unittest/lists6.md +53 -0
  80. data/tests/unittest/lists7.md +31 -0
  81. data/tests/unittest/lists_after_paragraph.md +105 -71
  82. data/tests/unittest/lists_ol.md +149 -73
  83. data/tests/unittest/misc_sw.md +366 -326
  84. data/tests/unittest/notyet/escape.md +10 -10
  85. data/tests/unittest/notyet/header_after_par.md +20 -14
  86. data/tests/unittest/notyet/ticks.md +8 -35
  87. data/tests/unittest/notyet/triggering.md +72 -45
  88. data/tests/unittest/olist.md +78 -0
  89. data/tests/unittest/one.md +5 -3
  90. data/tests/unittest/paragraph.md +5 -3
  91. data/tests/unittest/paragraph_rules/dont_merge_ref.md +15 -9
  92. data/tests/unittest/paragraph_rules/tab_is_blank.md +9 -5
  93. data/tests/unittest/paragraphs.md +21 -26
  94. data/tests/unittest/recover/recover_links.md +6 -5
  95. data/tests/unittest/references/long_example.md +39 -30
  96. data/tests/unittest/references/spaces_and_numbers.md +2 -2
  97. data/tests/unittest/syntax_hl.md +33 -31
  98. data/tests/unittest/test.md +4 -6
  99. data/tests/unittest/wrapping.md +43 -26
  100. metadata +160 -139
  101. data/docs/markdown_extra2.html +0 -87
  102. data/docs/markdown_extra2.md +0 -83
  103. data/docs/markdown_syntax_2.html +0 -152
  104. data/lib/maruku/parse_block.rb +0 -564
  105. data/lib/maruku/parse_span.rb +0 -451
  106. data/lib/maruku/to_latex_strings.rb +0 -59
  107. data/lib/maruku/to_markdown.rb +0 -110
  108. data/lib/test.rb +0 -29
@@ -1,451 +0,0 @@
1
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
- #
3
- # This file is part of Maruku.
4
- #
5
- # Maruku is free software; you can redistribute it and/or modify
6
- # it under the terms of the GNU General Public License as published by
7
- # the Free Software Foundation; either version 2 of the License, or
8
- # (at your option) any later version.
9
- #
10
- # Maruku is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- # GNU General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU General Public License
16
- # along with Maruku; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
-
19
-
20
- # There are two black-magic methods `match_couple_of` and `map_match`,
21
- # defined at the end of the file, that make the function
22
- # `parse_lines_as_span` so elegant.
23
-
24
- class Maruku
25
-
26
- # Takes care of all span-level formatting, links, images, etc.
27
- #
28
- # Lines must not contain block-level elements.
29
- def parse_lines_as_span(lines)
30
-
31
- # first, get rid of linebreaks
32
- res = resolve_linebreaks(lines)
33
-
34
- span = MDElement.new(:dummy, res)
35
-
36
- # encode all escapes
37
- span.replace_each_string { |s| s.escape_md_special }
38
-
39
-
40
- # The order of processing is significant:
41
- # 1. inline code
42
- # 2. immediate links
43
- # 3. inline HTML
44
- # 4. everything else
45
-
46
- # search for ``code`` markers
47
- span.match_couple_of('``') { |children, match1, match2|
48
- e = create_md_element(:inline_code)
49
- # this is now opaque to processing
50
- e.meta[:raw_code] = children.join('').it_was_a_code_block
51
- e
52
- }
53
-
54
- # Search for `single tick` code markers
55
- span.match_couple_of('`') { |children, match1, match2|
56
- e = create_md_element(:inline_code)
57
- # this is now opaque to processing
58
- e.meta[:raw_code] = children.join('').it_was_a_code_block
59
- # this is now opaque to processing
60
- e
61
- }
62
-
63
- # Detect any immediate link: <http://www.google.com>
64
- # we expect an http: or something: at the beginning
65
- span.map_match( /<(\w+:[^\>]+)>/) { |match|
66
- url = match[1]
67
-
68
- e = create_md_element(:immediate_link, [])
69
- e.meta[:url] = url
70
- e
71
- }
72
-
73
- # Search for inline HTML (the support is pretty basic for now)
74
-
75
- # this searches for a matching block
76
- inlineHTML1 = %r{
77
- ( # put everything in 1
78
- < # open
79
- (\w+) # opening tag in 2
80
- > # close
81
- .* # anything
82
- </\2> # match closing tag
83
- )
84
- }x
85
-
86
- # this searches for only one block
87
- inlineHTML2 = %r{
88
- ( # put everything in 1
89
- < # open
90
- \w+ #
91
- # close
92
- [^<>]* # anything except
93
- /> # closing tag
94
- )
95
- }x
96
-
97
- for reg in [inlineHTML1, inlineHTML2]
98
- span.map_match(reg) { |match|
99
- raw_html = match[1]
100
- convert_raw_html_in_list(raw_html)
101
- }
102
- end
103
-
104
- # Detect footnotes references: [^1]
105
- span.map_match(/\[(\^[^\]]+)\]/) { |match|
106
- id = match[1].strip.downcase
107
- e = create_md_element(:footnote_reference)
108
- e.meta[:footnote_id] = id
109
- e
110
- }
111
-
112
- # Detect any image like ![Alt text][url]
113
- span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
114
- alt = match[1]
115
- id = match[2].strip.downcase
116
-
117
- if id.size == 0
118
- id = text.strip.downcase
119
- end
120
-
121
- e = create_md_element(:image)
122
- e.meta[:ref_id] = id
123
- e
124
- }
125
-
126
- # Detect any immage with immediate url: ![Alt](url "title")
127
- # a dummy ref is created and put in the symbol table
128
- link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
129
- span.map_match(link1) { |match|
130
- alt = match[1]
131
- url = match[2]
132
- title = match[3]
133
-
134
- url = url.strip
135
- # create a dummy id
136
- id="dummy_#{@refs.size}"
137
- @refs[id] = {:url=>url, :title=>title}
138
-
139
- e = create_md_element(:image)
140
- e.meta[:ref_id] = id
141
- e
142
- }
143
-
144
- # an id reference: "[id]", "[ id ]"
145
- reg_id_ref = %r{
146
- \[ # opening bracket
147
- ([^\]]*) # 0 or more non-closing bracket (this is too permissive)
148
- \] # closing bracket
149
- }x
150
-
151
-
152
- # validates a url, only $1 is set to the url
153
- reg_url =
154
- /((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
155
- reg_url = %r{([^\s\]\)]+)}
156
-
157
- # A string enclosed in quotes.
158
- reg_title = %r{
159
- " # opening
160
- [^"]* # anything = 1
161
- " # closing
162
- }x
163
-
164
- # [bah](http://www.google.com "Google.com"),
165
- # [bah](http://www.google.com),
166
- # [empty]()
167
- reg_url_and_title = %r{
168
- \( # opening
169
- \s* # whitespace
170
- #{reg_url}? # url = 1 might be empty
171
- (?:\s+["'](.*)["'])? # optional title = 2
172
- \s* # whitespace
173
- \) # closing
174
- }x
175
-
176
- # Detect a link like ![Alt text][id]
177
- span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
178
- text = match[1]
179
- id = match[2].strip.downcase
180
-
181
- if id.size == 0
182
- id = text.strip.downcase
183
- end
184
-
185
- children = parse_lines_as_span(text)
186
- e = create_md_element(:link, children)
187
- e.meta[:ref_id] = id
188
- e
189
- }
190
-
191
- # Detect any immage with immediate url: ![Alt](url "title")
192
- # a dummy ref is created and put in the symbol table
193
- link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
194
- span.map_match(link1) { |match|
195
- text = match[1]
196
- children = parse_lines_as_span(text)
197
-
198
- url = match[2]
199
- title = match[3]
200
-
201
- url = url.strip
202
- # create a dummy id
203
- id="dummy_#{@refs.size}"
204
- @refs[id] = {:url=>url, :title=>title}
205
- @refs[id][:title] = title if title
206
-
207
- e = create_md_element(:link, children)
208
- e.meta[:ref_id] = id
209
- e
210
- }
211
-
212
-
213
- # Detect any link like [Google engine][google]
214
- span.match_couple_of('[', # opening bracket
215
- %r{\] # closing bracket
216
- [ ]? # optional whitespace
217
- #{reg_id_ref} # ref id, with $1 being the reference
218
- }x
219
- ) { |children, match1, match2|
220
- id = match2[1]
221
- id = id.strip.downcase
222
-
223
- if id.size == 0
224
- id = children.join.strip.downcase
225
- end
226
-
227
- e = create_md_element(:link, children)
228
- e.meta[:ref_id] = id
229
- e
230
- }
231
-
232
- # Detect any link with immediate url: [Google](http://www.google.com)
233
- # XXX Note that the url can be empty: [Empty]()
234
- # a dummy ref is created and put in the symbol table
235
- span.match_couple_of('[', # opening bracket
236
- %r{\] # closing bracket
237
- [ ]? # optional whitespace
238
- #{reg_url_and_title} # ref id, with $1 being the url and $2 being the title
239
- }x
240
- ) { |children, match1, match2|
241
-
242
- url = match2[1]
243
- title = match2[3] # XXX? Is it a bug? I would use [2]
244
-
245
- # create a dummy id
246
- id="dummy_#{@refs.size}"
247
- @refs[id] = {:url=>url}
248
- @refs[id][:title] = title if title
249
-
250
- e = create_md_element(:link, children)
251
- e.meta[:ref_id] = id
252
- e
253
- }
254
-
255
- # Detect an email address <andrea@invalid.it>
256
- span.map_match(EMailAddress) { |match|
257
- email = match[1]
258
- e = create_md_element(:email_address, [])
259
- e.meta[:email] = email
260
- e
261
- }
262
-
263
- # Detect HTML entitis
264
- span.map_match(/&([\w\d]+);/) { |match|
265
- entity_name = match[1]
266
-
267
- e = create_md_element(:entity, [])
268
- e.meta[:entity_name] = entity_name
269
- e
270
- }
271
-
272
-
273
- # And now the easy stuff
274
-
275
- # search for ***strong and em***
276
- span.match_couple_of('***') { |children,m1,m2|
277
- create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
278
-
279
- span.match_couple_of('___') { |children,m1,m2|
280
- create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
281
-
282
- # search for **strong**
283
- span.match_couple_of('**') { |children,m1,m2| create_md_element(:strong, children) }
284
-
285
- # search for __strong__
286
- span.match_couple_of('__') { |children,m1,m2| create_md_element(:strong, children) }
287
-
288
- # search for *emphasis*
289
- span.match_couple_of('*') { |children,m1,m2| create_md_element(:emphasis, children) }
290
-
291
- # search for _emphasis_
292
- span.match_couple_of('_') { |children,m1,m2| create_md_element(:emphasis, children) }
293
-
294
- # finally, unescape the special characters
295
- span.replace_each_string { |s| s.unescape_md_special}
296
-
297
- span.children
298
- end
299
-
300
- # returns array containing Strings or :linebreak elements
301
- def resolve_linebreaks(lines)
302
- res = []
303
- s = ""
304
- lines.each do |l|
305
- s += (s.size>0 ? " " : "") + l.strip
306
- if force_linebreak?(l)
307
- res << s
308
- res << create_md_element(:linebreak)
309
- s = ""
310
- end
311
- end
312
- res << s if s.size > 0
313
- res
314
- end
315
-
316
- # raw_html is something like
317
- # <em> A</em> dopwkk *maruk* <em>A</em>
318
- def convert_raw_html_in_list(raw_html)
319
- e = create_md_element(:raw_html)
320
- e.meta[:raw_html] = raw_html
321
- begin
322
- e.meta[:parsed_html] = Document.new(raw_html)
323
- rescue
324
- $stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
325
- end
326
- e
327
- end
328
-
329
- end
330
-
331
- # And now the black magic that makes the part above so elegant
332
- class MDElement
333
-
334
- # Try to match the regexp to each string in the hierarchy
335
- # (using `replace_each_string`). If the regexp match, eliminate
336
- # the matching string and substitute it with the pre_match, the
337
- # result of the block, and the post_match
338
- #
339
- # ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
340
- #
341
- # the block might return arrays.
342
- #
343
- def map_match(regexp, &block)
344
- replace_each_string { |s|
345
- processed = []
346
- while (match = regexp.match(s))
347
- # save the pre_match
348
- processed << match.pre_match if match.pre_match && match.pre_match.size>0
349
- # transform match
350
- result = block.call(match)
351
- # and append as processed
352
- [*result].each do |e| processed << e end
353
- # go on with the rest of the string
354
- s = match.post_match
355
- end
356
- processed << s if s.size > 0
357
- processed
358
- }
359
- end
360
-
361
- # Finds couple of delimiters in a hierarchy of Strings and MDElements
362
- #
363
- # Open and close are two delimiters (like '[' and ']'), or two Regexp.
364
- #
365
- # If you don't pass close, it defaults to open.
366
- #
367
- # Each block is called with |contained children, match1, match2|
368
- def match_couple_of(open, close=nil, &block)
369
- close = close || open
370
- open_regexp = open.kind_of?(Regexp) ? open : Regexp.new(Regexp.escape(open))
371
- close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close))
372
-
373
- # Do the same to children first
374
- for c in @children; if c.kind_of? MDElement
375
- c.match_couple_of(open_regexp, close_regexp, &block)
376
- end end
377
-
378
- processed_children = []
379
-
380
- until @children.empty?
381
- c = @children.shift
382
- if c.kind_of? String
383
- match1 = open_regexp.match(c)
384
- if not match1
385
- processed_children << c
386
- else # we found opening, now search closing
387
- # puts "Found opening (#{marker}) in #{c.inspect}"
388
- # pre match is processed
389
- processed_children.push match1.pre_match if
390
- match1.pre_match && match1.pre_match.size > 0
391
- # we will process again the post_match
392
- @children.unshift match1.post_match if
393
- match1.post_match && match1.post_match.size>0
394
-
395
- contained = []; found_closing = false
396
- until @children.empty? || found_closing
397
- c = @children.shift
398
- if c.kind_of? String
399
- match2 = close_regexp.match(c)
400
- if not match2
401
- contained << c
402
- else
403
- # we found closing
404
- found_closing = true
405
- # pre match is contained
406
- contained.push match2.pre_match if
407
- match2.pre_match && match2.pre_match.size>0
408
- # we will process again the post_match
409
- @children.unshift match2.post_match if
410
- match2.post_match && match2.post_match.size>0
411
-
412
- # And now we call the block
413
- substitute = block.call(contained, match1, match2)
414
- processed_children << substitute
415
-
416
- # puts "Found closing (#{marker}) in #{c.inspect}"
417
- # puts "Children: #{contained.inspect}"
418
- # puts "Substitute: #{substitute.inspect}"
419
- end
420
- else
421
- contained << c
422
- end
423
- end
424
-
425
- if not found_closing
426
- # $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring"
427
- processed_children << match1.to_s
428
- contained.reverse.each do |c|
429
- @children.unshift c
430
- end
431
- end
432
- end
433
- else
434
- processed_children << c
435
- end
436
- end
437
-
438
- raise "BugBug" unless @children.empty?
439
-
440
- rebuilt = []
441
- # rebuild strings
442
- processed_children.each do |c|
443
- if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String)
444
- rebuilt.last << c
445
- else
446
- rebuilt << c
447
- end
448
- end
449
- @children = rebuilt
450
- end
451
- end
@@ -1,59 +0,0 @@
1
-
2
-
3
- class String
4
-
5
- # These are TeX's special characters
6
- LATEX_ADD_SLASH = [ '{', '}', '$', '&', '#', '_', '%'].map{|x|x[0]}
7
-
8
- # These, we transform to {\tt \char<ascii code>}
9
- LATEX_TO_CHARCODE = [ '^', '~', '>','<'].map{|x|x[0]}
10
-
11
- def int_to_string(char)
12
- tmp = "0"; tmp[0]=char; tmp;
13
- end
14
-
15
- def escape_to_latex(s)
16
- s2 = ""
17
- s.each_byte do |b|
18
- if LATEX_TO_CHARCODE.include? b
19
- s2 += "{\\tt \\char#{b}}"
20
- elsif LATEX_ADD_SLASH.include? b
21
- s2 += "\\"
22
- s2 += int_to_string(b)
23
- elsif b == "\\"[0]
24
- # there is no backslash in cmr10 fonts
25
- s2 += "$\\backslash$"
26
- else
27
- s2 += int_to_string(b)
28
- end
29
- end
30
- s2
31
- end
32
-
33
- # escapes special characters
34
- def to_latex
35
- s = self
36
-
37
- s = escape_to_latex(s)
38
-
39
-
40
- # puts "Before: #{s.inspect}"
41
- # puts "after: #{s.inspect}"
42
-
43
- OtherGoodies.each do |k, v|
44
- s.gsub!(k, v)
45
- end
46
- s
47
- end
48
-
49
- # other things that are good on the eyes
50
- OtherGoodies = {
51
- /(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \latex
52
- # 'HTML' => '\\textsc{html}\\xspace ',
53
- # 'PDF' => '\\textsc{pdf}\\xspace '
54
- }
55
-
56
-
57
-
58
-
59
- end