maruku 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/bin/{maruku0.3 → marudown} +6 -14
  2. data/bin/maruku +1 -1
  3. data/bin/marutest +37 -9
  4. data/docs/TOFIX.html +22 -0
  5. data/docs/TOFIX.md +3 -0
  6. data/docs/changelog-0.2.13.html +30 -0
  7. data/docs/changelog-0.2.13.md +6 -0
  8. data/docs/changelog-0.3.html +19 -5
  9. data/docs/faq.html +51 -40
  10. data/docs/faq.md +3 -3
  11. data/docs/hidden_o_n_squared.md +10 -0
  12. data/docs/index.html +84 -396
  13. data/docs/markdown_syntax.html +139 -330
  14. data/docs/markdown_syntax.md +80 -93
  15. data/docs/maruku.html +84 -396
  16. data/docs/maruku.md +88 -158
  17. data/docs/proposal.html +13 -106
  18. data/docs/proposal.md +3 -3
  19. data/docs/todo.html +38 -28
  20. data/lib/maruku.rb +77 -11
  21. data/lib/maruku/attributes.rb +186 -0
  22. data/lib/maruku/defaults.rb +40 -0
  23. data/lib/maruku/errors_management.rb +55 -39
  24. data/lib/maruku/helpers.rb +156 -72
  25. data/lib/maruku/input/charsource.rb +319 -0
  26. data/lib/maruku/{html_helper.rb → input/html_helper.rb} +30 -9
  27. data/lib/maruku/input/linesource.rb +111 -0
  28. data/lib/maruku/input/parse_block.rb +562 -0
  29. data/lib/maruku/{parse_doc.rb → input/parse_doc.rb} +60 -28
  30. data/lib/maruku/{parse_span_better.rb → input/parse_span_better.rb} +226 -256
  31. data/lib/maruku/input/type_detection.rb +137 -0
  32. data/lib/maruku/maruku.rb +33 -0
  33. data/lib/maruku/{to_html.rb → output/to_html.rb} +151 -132
  34. data/lib/maruku/{to_latex.rb → output/to_latex.rb} +31 -35
  35. data/lib/maruku/{to_latex_entities.rb → output/to_latex_entities.rb} +25 -3
  36. data/lib/maruku/output/to_latex_strings.rb +64 -0
  37. data/lib/maruku/output/to_markdown.rb +164 -0
  38. data/lib/maruku/{to_s.rb → output/to_s.rb} +6 -0
  39. data/lib/maruku/string_utils.rb +12 -181
  40. data/lib/maruku/structures.rb +91 -67
  41. data/lib/maruku/structures_inspect.rb +78 -0
  42. data/lib/maruku/structures_iterators.rb +24 -2
  43. data/lib/maruku/tests/benchmark.rb +41 -9
  44. data/lib/maruku/tests/new_parser.rb +317 -286
  45. data/lib/maruku/tests/tests.rb +20 -0
  46. data/lib/maruku/toc.rb +64 -64
  47. data/lib/maruku/usage/example1.rb +33 -0
  48. data/lib/maruku/version.rb +8 -2
  49. data/tests/unittest/abbreviations.md +27 -16
  50. data/tests/unittest/attributes/attributes.md +89 -0
  51. data/tests/unittest/attributes/circular.md +51 -0
  52. data/tests/unittest/attributes/default.md +47 -0
  53. data/tests/unittest/blank.md +10 -6
  54. data/tests/unittest/blanks_in_code.md +26 -26
  55. data/tests/unittest/code.md +9 -9
  56. data/tests/unittest/code2.md +12 -13
  57. data/tests/unittest/code3.md +34 -34
  58. data/tests/unittest/easy.md +9 -7
  59. data/tests/unittest/email.md +9 -7
  60. data/tests/unittest/encoding/iso-8859-1.md +41 -4
  61. data/tests/unittest/encoding/utf-8.md +6 -5
  62. data/tests/unittest/entities.md +52 -80
  63. data/tests/unittest/escaping.md +47 -35
  64. data/tests/unittest/extra_dl.md +19 -29
  65. data/tests/unittest/extra_header_id.md +31 -24
  66. data/tests/unittest/extra_table1.md +14 -32
  67. data/tests/unittest/footnotes.md +58 -42
  68. data/tests/unittest/headers.md +11 -11
  69. data/tests/unittest/hrule.md +14 -24
  70. data/tests/unittest/images.md +41 -26
  71. data/tests/unittest/inline_html.md +104 -56
  72. data/tests/unittest/inline_html2.md +38 -0
  73. data/tests/unittest/links.md +74 -33
  74. data/tests/unittest/list1.md +18 -15
  75. data/tests/unittest/list2.md +31 -13
  76. data/tests/unittest/list3.md +29 -28
  77. data/tests/unittest/list4.md +103 -12
  78. data/tests/unittest/lists.md +86 -53
  79. data/tests/unittest/lists6.md +53 -0
  80. data/tests/unittest/lists7.md +31 -0
  81. data/tests/unittest/lists_after_paragraph.md +105 -71
  82. data/tests/unittest/lists_ol.md +149 -73
  83. data/tests/unittest/misc_sw.md +366 -326
  84. data/tests/unittest/notyet/escape.md +10 -10
  85. data/tests/unittest/notyet/header_after_par.md +20 -14
  86. data/tests/unittest/notyet/ticks.md +8 -35
  87. data/tests/unittest/notyet/triggering.md +72 -45
  88. data/tests/unittest/olist.md +78 -0
  89. data/tests/unittest/one.md +5 -3
  90. data/tests/unittest/paragraph.md +5 -3
  91. data/tests/unittest/paragraph_rules/dont_merge_ref.md +15 -9
  92. data/tests/unittest/paragraph_rules/tab_is_blank.md +9 -5
  93. data/tests/unittest/paragraphs.md +21 -26
  94. data/tests/unittest/recover/recover_links.md +6 -5
  95. data/tests/unittest/references/long_example.md +39 -30
  96. data/tests/unittest/references/spaces_and_numbers.md +2 -2
  97. data/tests/unittest/syntax_hl.md +33 -31
  98. data/tests/unittest/test.md +4 -6
  99. data/tests/unittest/wrapping.md +43 -26
  100. metadata +160 -139
  101. data/docs/markdown_extra2.html +0 -87
  102. data/docs/markdown_extra2.md +0 -83
  103. data/docs/markdown_syntax_2.html +0 -152
  104. data/lib/maruku/parse_block.rb +0 -564
  105. data/lib/maruku/parse_span.rb +0 -451
  106. data/lib/maruku/to_latex_strings.rb +0 -59
  107. data/lib/maruku/to_markdown.rb +0 -110
  108. data/lib/test.rb +0 -29
@@ -1,451 +0,0 @@
1
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
- #
3
- # This file is part of Maruku.
4
- #
5
- # Maruku is free software; you can redistribute it and/or modify
6
- # it under the terms of the GNU General Public License as published by
7
- # the Free Software Foundation; either version 2 of the License, or
8
- # (at your option) any later version.
9
- #
10
- # Maruku is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- # GNU General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU General Public License
16
- # along with Maruku; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
-
19
-
20
- # There are two black-magic methods `match_couple_of` and `map_match`,
21
- # defined at the end of the file, that make the function
22
- # `parse_lines_as_span` so elegant.
23
-
24
- class Maruku
25
-
26
- # Takes care of all span-level formatting, links, images, etc.
27
- #
28
- # Lines must not contain block-level elements.
29
- def parse_lines_as_span(lines)
30
-
31
- # first, get rid of linebreaks
32
- res = resolve_linebreaks(lines)
33
-
34
- span = MDElement.new(:dummy, res)
35
-
36
- # encode all escapes
37
- span.replace_each_string { |s| s.escape_md_special }
38
-
39
-
40
- # The order of processing is significant:
41
- # 1. inline code
42
- # 2. immediate links
43
- # 3. inline HTML
44
- # 4. everything else
45
-
46
- # search for ``code`` markers
47
- span.match_couple_of('``') { |children, match1, match2|
48
- e = create_md_element(:inline_code)
49
- # this is now opaque to processing
50
- e.meta[:raw_code] = children.join('').it_was_a_code_block
51
- e
52
- }
53
-
54
- # Search for `single tick` code markers
55
- span.match_couple_of('`') { |children, match1, match2|
56
- e = create_md_element(:inline_code)
57
- # this is now opaque to processing
58
- e.meta[:raw_code] = children.join('').it_was_a_code_block
59
- # this is now opaque to processing
60
- e
61
- }
62
-
63
- # Detect any immediate link: <http://www.google.com>
64
- # we expect an http: or something: at the beginning
65
- span.map_match( /<(\w+:[^\>]+)>/) { |match|
66
- url = match[1]
67
-
68
- e = create_md_element(:immediate_link, [])
69
- e.meta[:url] = url
70
- e
71
- }
72
-
73
- # Search for inline HTML (the support is pretty basic for now)
74
-
75
- # this searches for a matching block
76
- inlineHTML1 = %r{
77
- ( # put everything in 1
78
- < # open
79
- (\w+) # opening tag in 2
80
- > # close
81
- .* # anything
82
- </\2> # match closing tag
83
- )
84
- }x
85
-
86
- # this searches for only one block
87
- inlineHTML2 = %r{
88
- ( # put everything in 1
89
- < # open
90
- \w+ #
91
- # close
92
- [^<>]* # anything except
93
- /> # closing tag
94
- )
95
- }x
96
-
97
- for reg in [inlineHTML1, inlineHTML2]
98
- span.map_match(reg) { |match|
99
- raw_html = match[1]
100
- convert_raw_html_in_list(raw_html)
101
- }
102
- end
103
-
104
- # Detect footnotes references: [^1]
105
- span.map_match(/\[(\^[^\]]+)\]/) { |match|
106
- id = match[1].strip.downcase
107
- e = create_md_element(:footnote_reference)
108
- e.meta[:footnote_id] = id
109
- e
110
- }
111
-
112
- # Detect any image like ![Alt text][url]
113
- span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
114
- alt = match[1]
115
- id = match[2].strip.downcase
116
-
117
- if id.size == 0
118
- id = text.strip.downcase
119
- end
120
-
121
- e = create_md_element(:image)
122
- e.meta[:ref_id] = id
123
- e
124
- }
125
-
126
- # Detect any immage with immediate url: ![Alt](url "title")
127
- # a dummy ref is created and put in the symbol table
128
- link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
129
- span.map_match(link1) { |match|
130
- alt = match[1]
131
- url = match[2]
132
- title = match[3]
133
-
134
- url = url.strip
135
- # create a dummy id
136
- id="dummy_#{@refs.size}"
137
- @refs[id] = {:url=>url, :title=>title}
138
-
139
- e = create_md_element(:image)
140
- e.meta[:ref_id] = id
141
- e
142
- }
143
-
144
- # an id reference: "[id]", "[ id ]"
145
- reg_id_ref = %r{
146
- \[ # opening bracket
147
- ([^\]]*) # 0 or more non-closing bracket (this is too permissive)
148
- \] # closing bracket
149
- }x
150
-
151
-
152
- # validates a url, only $1 is set to the url
153
- reg_url =
154
- /((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
155
- reg_url = %r{([^\s\]\)]+)}
156
-
157
- # A string enclosed in quotes.
158
- reg_title = %r{
159
- " # opening
160
- [^"]* # anything = 1
161
- " # closing
162
- }x
163
-
164
- # [bah](http://www.google.com "Google.com"),
165
- # [bah](http://www.google.com),
166
- # [empty]()
167
- reg_url_and_title = %r{
168
- \( # opening
169
- \s* # whitespace
170
- #{reg_url}? # url = 1 might be empty
171
- (?:\s+["'](.*)["'])? # optional title = 2
172
- \s* # whitespace
173
- \) # closing
174
- }x
175
-
176
- # Detect a link like ![Alt text][id]
177
- span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
178
- text = match[1]
179
- id = match[2].strip.downcase
180
-
181
- if id.size == 0
182
- id = text.strip.downcase
183
- end
184
-
185
- children = parse_lines_as_span(text)
186
- e = create_md_element(:link, children)
187
- e.meta[:ref_id] = id
188
- e
189
- }
190
-
191
- # Detect any immage with immediate url: ![Alt](url "title")
192
- # a dummy ref is created and put in the symbol table
193
- link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
194
- span.map_match(link1) { |match|
195
- text = match[1]
196
- children = parse_lines_as_span(text)
197
-
198
- url = match[2]
199
- title = match[3]
200
-
201
- url = url.strip
202
- # create a dummy id
203
- id="dummy_#{@refs.size}"
204
- @refs[id] = {:url=>url, :title=>title}
205
- @refs[id][:title] = title if title
206
-
207
- e = create_md_element(:link, children)
208
- e.meta[:ref_id] = id
209
- e
210
- }
211
-
212
-
213
- # Detect any link like [Google engine][google]
214
- span.match_couple_of('[', # opening bracket
215
- %r{\] # closing bracket
216
- [ ]? # optional whitespace
217
- #{reg_id_ref} # ref id, with $1 being the reference
218
- }x
219
- ) { |children, match1, match2|
220
- id = match2[1]
221
- id = id.strip.downcase
222
-
223
- if id.size == 0
224
- id = children.join.strip.downcase
225
- end
226
-
227
- e = create_md_element(:link, children)
228
- e.meta[:ref_id] = id
229
- e
230
- }
231
-
232
- # Detect any link with immediate url: [Google](http://www.google.com)
233
- # XXX Note that the url can be empty: [Empty]()
234
- # a dummy ref is created and put in the symbol table
235
- span.match_couple_of('[', # opening bracket
236
- %r{\] # closing bracket
237
- [ ]? # optional whitespace
238
- #{reg_url_and_title} # ref id, with $1 being the url and $2 being the title
239
- }x
240
- ) { |children, match1, match2|
241
-
242
- url = match2[1]
243
- title = match2[3] # XXX? Is it a bug? I would use [2]
244
-
245
- # create a dummy id
246
- id="dummy_#{@refs.size}"
247
- @refs[id] = {:url=>url}
248
- @refs[id][:title] = title if title
249
-
250
- e = create_md_element(:link, children)
251
- e.meta[:ref_id] = id
252
- e
253
- }
254
-
255
- # Detect an email address <andrea@invalid.it>
256
- span.map_match(EMailAddress) { |match|
257
- email = match[1]
258
- e = create_md_element(:email_address, [])
259
- e.meta[:email] = email
260
- e
261
- }
262
-
263
- # Detect HTML entitis
264
- span.map_match(/&([\w\d]+);/) { |match|
265
- entity_name = match[1]
266
-
267
- e = create_md_element(:entity, [])
268
- e.meta[:entity_name] = entity_name
269
- e
270
- }
271
-
272
-
273
- # And now the easy stuff
274
-
275
- # search for ***strong and em***
276
- span.match_couple_of('***') { |children,m1,m2|
277
- create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
278
-
279
- span.match_couple_of('___') { |children,m1,m2|
280
- create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
281
-
282
- # search for **strong**
283
- span.match_couple_of('**') { |children,m1,m2| create_md_element(:strong, children) }
284
-
285
- # search for __strong__
286
- span.match_couple_of('__') { |children,m1,m2| create_md_element(:strong, children) }
287
-
288
- # search for *emphasis*
289
- span.match_couple_of('*') { |children,m1,m2| create_md_element(:emphasis, children) }
290
-
291
- # search for _emphasis_
292
- span.match_couple_of('_') { |children,m1,m2| create_md_element(:emphasis, children) }
293
-
294
- # finally, unescape the special characters
295
- span.replace_each_string { |s| s.unescape_md_special}
296
-
297
- span.children
298
- end
299
-
300
- # returns array containing Strings or :linebreak elements
301
- def resolve_linebreaks(lines)
302
- res = []
303
- s = ""
304
- lines.each do |l|
305
- s += (s.size>0 ? " " : "") + l.strip
306
- if force_linebreak?(l)
307
- res << s
308
- res << create_md_element(:linebreak)
309
- s = ""
310
- end
311
- end
312
- res << s if s.size > 0
313
- res
314
- end
315
-
316
- # raw_html is something like
317
- # <em> A</em> dopwkk *maruk* <em>A</em>
318
- def convert_raw_html_in_list(raw_html)
319
- e = create_md_element(:raw_html)
320
- e.meta[:raw_html] = raw_html
321
- begin
322
- e.meta[:parsed_html] = Document.new(raw_html)
323
- rescue
324
- $stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
325
- end
326
- e
327
- end
328
-
329
- end
330
-
331
- # And now the black magic that makes the part above so elegant
332
- class MDElement
333
-
334
- # Try to match the regexp to each string in the hierarchy
335
- # (using `replace_each_string`). If the regexp match, eliminate
336
- # the matching string and substitute it with the pre_match, the
337
- # result of the block, and the post_match
338
- #
339
- # ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
340
- #
341
- # the block might return arrays.
342
- #
343
- def map_match(regexp, &block)
344
- replace_each_string { |s|
345
- processed = []
346
- while (match = regexp.match(s))
347
- # save the pre_match
348
- processed << match.pre_match if match.pre_match && match.pre_match.size>0
349
- # transform match
350
- result = block.call(match)
351
- # and append as processed
352
- [*result].each do |e| processed << e end
353
- # go on with the rest of the string
354
- s = match.post_match
355
- end
356
- processed << s if s.size > 0
357
- processed
358
- }
359
- end
360
-
361
- # Finds couple of delimiters in a hierarchy of Strings and MDElements
362
- #
363
- # Open and close are two delimiters (like '[' and ']'), or two Regexp.
364
- #
365
- # If you don't pass close, it defaults to open.
366
- #
367
- # Each block is called with |contained children, match1, match2|
368
- def match_couple_of(open, close=nil, &block)
369
- close = close || open
370
- open_regexp = open.kind_of?(Regexp) ? open : Regexp.new(Regexp.escape(open))
371
- close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close))
372
-
373
- # Do the same to children first
374
- for c in @children; if c.kind_of? MDElement
375
- c.match_couple_of(open_regexp, close_regexp, &block)
376
- end end
377
-
378
- processed_children = []
379
-
380
- until @children.empty?
381
- c = @children.shift
382
- if c.kind_of? String
383
- match1 = open_regexp.match(c)
384
- if not match1
385
- processed_children << c
386
- else # we found opening, now search closing
387
- # puts "Found opening (#{marker}) in #{c.inspect}"
388
- # pre match is processed
389
- processed_children.push match1.pre_match if
390
- match1.pre_match && match1.pre_match.size > 0
391
- # we will process again the post_match
392
- @children.unshift match1.post_match if
393
- match1.post_match && match1.post_match.size>0
394
-
395
- contained = []; found_closing = false
396
- until @children.empty? || found_closing
397
- c = @children.shift
398
- if c.kind_of? String
399
- match2 = close_regexp.match(c)
400
- if not match2
401
- contained << c
402
- else
403
- # we found closing
404
- found_closing = true
405
- # pre match is contained
406
- contained.push match2.pre_match if
407
- match2.pre_match && match2.pre_match.size>0
408
- # we will process again the post_match
409
- @children.unshift match2.post_match if
410
- match2.post_match && match2.post_match.size>0
411
-
412
- # And now we call the block
413
- substitute = block.call(contained, match1, match2)
414
- processed_children << substitute
415
-
416
- # puts "Found closing (#{marker}) in #{c.inspect}"
417
- # puts "Children: #{contained.inspect}"
418
- # puts "Substitute: #{substitute.inspect}"
419
- end
420
- else
421
- contained << c
422
- end
423
- end
424
-
425
- if not found_closing
426
- # $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring"
427
- processed_children << match1.to_s
428
- contained.reverse.each do |c|
429
- @children.unshift c
430
- end
431
- end
432
- end
433
- else
434
- processed_children << c
435
- end
436
- end
437
-
438
- raise "BugBug" unless @children.empty?
439
-
440
- rebuilt = []
441
- # rebuild strings
442
- processed_children.each do |c|
443
- if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String)
444
- rebuilt.last << c
445
- else
446
- rebuilt << c
447
- end
448
- end
449
- @children = rebuilt
450
- end
451
- end
@@ -1,59 +0,0 @@
1
-
2
-
3
- class String
4
-
5
- # These are TeX's special characters
6
- LATEX_ADD_SLASH = [ '{', '}', '$', '&', '#', '_', '%'].map{|x|x[0]}
7
-
8
- # These, we transform to {\tt \char<ascii code>}
9
- LATEX_TO_CHARCODE = [ '^', '~', '>','<'].map{|x|x[0]}
10
-
11
- def int_to_string(char)
12
- tmp = "0"; tmp[0]=char; tmp;
13
- end
14
-
15
- def escape_to_latex(s)
16
- s2 = ""
17
- s.each_byte do |b|
18
- if LATEX_TO_CHARCODE.include? b
19
- s2 += "{\\tt \\char#{b}}"
20
- elsif LATEX_ADD_SLASH.include? b
21
- s2 += "\\"
22
- s2 += int_to_string(b)
23
- elsif b == "\\"[0]
24
- # there is no backslash in cmr10 fonts
25
- s2 += "$\\backslash$"
26
- else
27
- s2 += int_to_string(b)
28
- end
29
- end
30
- s2
31
- end
32
-
33
- # escapes special characters
34
- def to_latex
35
- s = self
36
-
37
- s = escape_to_latex(s)
38
-
39
-
40
- # puts "Before: #{s.inspect}"
41
- # puts "after: #{s.inspect}"
42
-
43
- OtherGoodies.each do |k, v|
44
- s.gsub!(k, v)
45
- end
46
- s
47
- end
48
-
49
- # other things that are good on the eyes
50
- OtherGoodies = {
51
- /(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \latex
52
- # 'HTML' => '\\textsc{html}\\xspace ',
53
- # 'PDF' => '\\textsc{pdf}\\xspace '
54
- }
55
-
56
-
57
-
58
-
59
- end