maruku 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/bin/{maruku0.3 → marudown} +6 -14
  2. data/bin/maruku +1 -1
  3. data/bin/marutest +37 -9
  4. data/docs/TOFIX.html +22 -0
  5. data/docs/TOFIX.md +3 -0
  6. data/docs/changelog-0.2.13.html +30 -0
  7. data/docs/changelog-0.2.13.md +6 -0
  8. data/docs/changelog-0.3.html +19 -5
  9. data/docs/faq.html +51 -40
  10. data/docs/faq.md +3 -3
  11. data/docs/hidden_o_n_squared.md +10 -0
  12. data/docs/index.html +84 -396
  13. data/docs/markdown_syntax.html +139 -330
  14. data/docs/markdown_syntax.md +80 -93
  15. data/docs/maruku.html +84 -396
  16. data/docs/maruku.md +88 -158
  17. data/docs/proposal.html +13 -106
  18. data/docs/proposal.md +3 -3
  19. data/docs/todo.html +38 -28
  20. data/lib/maruku.rb +77 -11
  21. data/lib/maruku/attributes.rb +186 -0
  22. data/lib/maruku/defaults.rb +40 -0
  23. data/lib/maruku/errors_management.rb +55 -39
  24. data/lib/maruku/helpers.rb +156 -72
  25. data/lib/maruku/input/charsource.rb +319 -0
  26. data/lib/maruku/{html_helper.rb → input/html_helper.rb} +30 -9
  27. data/lib/maruku/input/linesource.rb +111 -0
  28. data/lib/maruku/input/parse_block.rb +562 -0
  29. data/lib/maruku/{parse_doc.rb → input/parse_doc.rb} +60 -28
  30. data/lib/maruku/{parse_span_better.rb → input/parse_span_better.rb} +226 -256
  31. data/lib/maruku/input/type_detection.rb +137 -0
  32. data/lib/maruku/maruku.rb +33 -0
  33. data/lib/maruku/{to_html.rb → output/to_html.rb} +151 -132
  34. data/lib/maruku/{to_latex.rb → output/to_latex.rb} +31 -35
  35. data/lib/maruku/{to_latex_entities.rb → output/to_latex_entities.rb} +25 -3
  36. data/lib/maruku/output/to_latex_strings.rb +64 -0
  37. data/lib/maruku/output/to_markdown.rb +164 -0
  38. data/lib/maruku/{to_s.rb → output/to_s.rb} +6 -0
  39. data/lib/maruku/string_utils.rb +12 -181
  40. data/lib/maruku/structures.rb +91 -67
  41. data/lib/maruku/structures_inspect.rb +78 -0
  42. data/lib/maruku/structures_iterators.rb +24 -2
  43. data/lib/maruku/tests/benchmark.rb +41 -9
  44. data/lib/maruku/tests/new_parser.rb +317 -286
  45. data/lib/maruku/tests/tests.rb +20 -0
  46. data/lib/maruku/toc.rb +64 -64
  47. data/lib/maruku/usage/example1.rb +33 -0
  48. data/lib/maruku/version.rb +8 -2
  49. data/tests/unittest/abbreviations.md +27 -16
  50. data/tests/unittest/attributes/attributes.md +89 -0
  51. data/tests/unittest/attributes/circular.md +51 -0
  52. data/tests/unittest/attributes/default.md +47 -0
  53. data/tests/unittest/blank.md +10 -6
  54. data/tests/unittest/blanks_in_code.md +26 -26
  55. data/tests/unittest/code.md +9 -9
  56. data/tests/unittest/code2.md +12 -13
  57. data/tests/unittest/code3.md +34 -34
  58. data/tests/unittest/easy.md +9 -7
  59. data/tests/unittest/email.md +9 -7
  60. data/tests/unittest/encoding/iso-8859-1.md +41 -4
  61. data/tests/unittest/encoding/utf-8.md +6 -5
  62. data/tests/unittest/entities.md +52 -80
  63. data/tests/unittest/escaping.md +47 -35
  64. data/tests/unittest/extra_dl.md +19 -29
  65. data/tests/unittest/extra_header_id.md +31 -24
  66. data/tests/unittest/extra_table1.md +14 -32
  67. data/tests/unittest/footnotes.md +58 -42
  68. data/tests/unittest/headers.md +11 -11
  69. data/tests/unittest/hrule.md +14 -24
  70. data/tests/unittest/images.md +41 -26
  71. data/tests/unittest/inline_html.md +104 -56
  72. data/tests/unittest/inline_html2.md +38 -0
  73. data/tests/unittest/links.md +74 -33
  74. data/tests/unittest/list1.md +18 -15
  75. data/tests/unittest/list2.md +31 -13
  76. data/tests/unittest/list3.md +29 -28
  77. data/tests/unittest/list4.md +103 -12
  78. data/tests/unittest/lists.md +86 -53
  79. data/tests/unittest/lists6.md +53 -0
  80. data/tests/unittest/lists7.md +31 -0
  81. data/tests/unittest/lists_after_paragraph.md +105 -71
  82. data/tests/unittest/lists_ol.md +149 -73
  83. data/tests/unittest/misc_sw.md +366 -326
  84. data/tests/unittest/notyet/escape.md +10 -10
  85. data/tests/unittest/notyet/header_after_par.md +20 -14
  86. data/tests/unittest/notyet/ticks.md +8 -35
  87. data/tests/unittest/notyet/triggering.md +72 -45
  88. data/tests/unittest/olist.md +78 -0
  89. data/tests/unittest/one.md +5 -3
  90. data/tests/unittest/paragraph.md +5 -3
  91. data/tests/unittest/paragraph_rules/dont_merge_ref.md +15 -9
  92. data/tests/unittest/paragraph_rules/tab_is_blank.md +9 -5
  93. data/tests/unittest/paragraphs.md +21 -26
  94. data/tests/unittest/recover/recover_links.md +6 -5
  95. data/tests/unittest/references/long_example.md +39 -30
  96. data/tests/unittest/references/spaces_and_numbers.md +2 -2
  97. data/tests/unittest/syntax_hl.md +33 -31
  98. data/tests/unittest/test.md +4 -6
  99. data/tests/unittest/wrapping.md +43 -26
  100. metadata +160 -139
  101. data/docs/markdown_extra2.html +0 -87
  102. data/docs/markdown_extra2.md +0 -83
  103. data/docs/markdown_syntax_2.html +0 -152
  104. data/lib/maruku/parse_block.rb +0 -564
  105. data/lib/maruku/parse_span.rb +0 -451
  106. data/lib/maruku/to_latex_strings.rb +0 -59
  107. data/lib/maruku/to_markdown.rb +0 -110
  108. data/lib/test.rb +0 -29
@@ -1,152 +0,0 @@
1
- <?xml version='1.0'?>
2
- <!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
3
- 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
4
- <html lang='en' xml:lang='en' xmlns='http://www.w3.org/1999/xhtml'><head><title>Markdown: Syntax</title></head><head><title>Markdown: Syntax</title></head><body><h1 id='markdown_syntax'>Markdown: Syntax</h1><ul id='ProjectSubmenu'>
5
- <li><a href='/projects/markdown/' title='Markdown Project Page'>Main</a></li>
6
- <li><a href='/projects/markdown/basics' title='Markdown Basics'>Basics</a></li>
7
- <li><a class='selected' title='Markdown Syntax Documentation'>Syntax</a></li>
8
- <li><a href='/projects/markdown/license' title='Pricing and License Information'>License</a></li>
9
- <li><a href='/projects/markdown/dingus' title='Online Markdown Web Form'>Dingus</a></li>
10
- </ul><ul><li><p><a href='#overview'>Overview</a></p><ul><li><a href='#philosophy'>Philosophy</a></li><li><a href='#html'>Inline HTML</a></li><li><a href='#autoescape'>Automatic Escaping for Special Characters</a></li></ul></li><li><p><a href='#block'>Block Elements</a></p><ul><li><a href='#p'>Paragraphs and Line Breaks</a></li><li><a href='#header'>Headers</a></li><li><a href='#blockquote'>Blockquotes</a></li><li><a href='#list'>Lists</a></li><li><a href='#precode'>Code Blocks</a></li><li><a href='#hr'>Horizontal Rules</a></li></ul></li><li><p><a href='#span'>Span Elements</a></p><ul><li><a href='#link'>Links</a></li><li><a href='#em'>Emphasis</a></li><li><a href='#code'>Code</a></li><li><a href='#img'>Images</a></li></ul></li><li><p><a href='#misc'>Miscellaneous</a></p><ul><li><a href='#backslash'>Backslash Escapes</a></li><li><a href='#autolink'>Automatic Links</a></li></ul></li></ul><p><strong>Note:</strong> This document is itself written using Markdown; you can <a href='/projects/markdown/syntax.text'>see the source for it by adding &apos;.text&apos; to the URL</a>.</p><hr /><h2 id='overview'>Overview</h2><h3 id='philosophy'>Philosophy</h3><p>Markdown is intended to be as easy-to-read and easy-to-write as is feasible.</p><p>Readability, however, is emphasized above all else. A Markdown-formatted document should be publishable as-is, as plain text, without looking like it&apos;s been marked up with tags or formatting instructions. While Markdown&apos;s syntax has been influenced by several existing text-to-HTML filters -- including <a href='http://docutils.sourceforge.net/mirror/setext.html'>Setext</a>, <a href='http://www.aaronsw.com/2002/atx/'>atx</a>, <a href='http://textism.com/tools/textile/'>Textile</a>, <a href='http://docutils.sourceforge.net/rst.html'>reStructuredText</a>, <a href='http://www.triptico.com/software/grutatxt.html'>Grutatext</a>, and <a href='http://ettext.taint.org/doc/'>EtText</a> -- the single biggest source of inspiration for Markdown&apos;s syntax is the format of plain text email.</p><p>To this end, Markdown&apos;s syntax is comprised entirely of punctuation characters, which punctuation characters have been carefully chosen so as to look like what they mean. E.g., asterisks around a word actually look like *emphasis*. Markdown lists look like, well, lists. Even blockquotes look like quoted passages of text, assuming you&apos;ve ever used email.</p><h3 id='html'>Inline HTML</h3><p>Markdown&apos;s syntax is intended for one purpose: to be used as a format for <em>writing</em> for the web.</p><p>Markdown is not a replacement for HTML, or even close to it. Its syntax is very small, corresponding only to a very small subset of HTML tags. The idea is <em>not</em> to create a syntax that makes it easier to insert HTML tags. In my opinion, HTML tags are already easy to insert. The idea for Markdown is to make it easy to read, write, and edit prose. HTML is a <em>publishing</em> format; Markdown is a <em>writing</em> format. Thus, Markdown&apos;s formatting syntax only addresses issues that can be conveyed in plain text.</p><p>For any markup that is not covered by Markdown&apos;s syntax, you simply use HTML itself. There&apos;s no need to preface it or delimit it to indicate that you&apos;re switching from Markdown to HTML; you just use the tags.</p><p>The only restrictions are that block-level HTML elements -- e.g. <tt style='background-color: #f0f0e0;'>&lt;div&gt;</tt>, <tt style='background-color: #f0f0e0;'>&lt;table&gt;</tt>, <tt style='background-color: #f0f0e0;'>&lt;pre&gt;</tt>, <tt style='background-color: #f0f0e0;'>&lt;p&gt;</tt>, etc. -- must be separated from surrounding content by blank lines, and the start and end tags of the block should not be indented with tabs or spaces. Markdown is smart enough not to add extra (unwanted) <tt style='background-color: #f0f0e0;'>&lt;p&gt;</tt> tags around HTML block-level tags.</p><p>For example, to add an HTML table to a Markdown article:</p><pre style='background-color: #f0f0e0;'>This is a regular paragraph.
11
- &lt;table&gt;
12
- &lt;tr&gt;
13
- &lt;td&gt;Foo&lt;/td&gt;
14
- &lt;/tr&gt;
15
- &lt;/table&gt;
16
- This is another regular paragraph.</pre><p>Note that Markdown formatting syntax is not processed within block-level HTML tags. E.g., you can&apos;t use Markdown-style <tt style='background-color: #f0f0e0;'>*emphasis*</tt> inside an HTML block.</p><p>Span-level HTML tags -- e.g. <tt style='background-color: #f0f0e0;'>&lt;span&gt;</tt>, <tt style='background-color: #f0f0e0;'>&lt;cite&gt;</tt>, or <tt style='background-color: #f0f0e0;'>&lt;del&gt;</tt> -- can be used anywhere in a Markdown paragraph, list item, or header. If you want, you can even use HTML tags instead of Markdown formatting; e.g. if you&apos;d prefer to use HTML <tt style='background-color: #f0f0e0;'>&lt;a&gt;</tt> or <tt style='background-color: #f0f0e0;'>&lt;img&gt;</tt> tags instead of Markdown&apos;s link or image syntax, go right ahead.</p><p>Unlike block-level HTML tags, Markdown syntax <em>is</em> processed within span-level tags.</p><h3 id='autoescape'>Automatic Escaping for Special Characters</h3><p>In HTML, there are two characters that demand special treatment: <tt style='background-color: #f0f0e0;'>&lt;</tt> and <tt style='background-color: #f0f0e0;'>&amp;</tt>. Left angle brackets are used to start tags; ampersands are used to denote HTML entities. If you want to use them as literal characters, you must escape them as entities, e.g. <tt style='background-color: #f0f0e0;'>&amp;lt;</tt>, and <tt style='background-color: #f0f0e0;'>&amp;amp;</tt>.</p><p>Ampersands in particular are bedeviling for web writers. If you want to write about &apos;AT&amp;T&apos;, you need to write &apos;<tt style='background-color: #f0f0e0;'>AT&amp;amp;T</tt>&apos;. You even need to escape ampersands within URLs. Thus, if you want to link to:</p><pre style='background-color: #f0f0e0;'>http://images.google.com/images?num=30&amp;q=larry+bird</pre><p>you need to encode the URL as:</p><pre style='background-color: #f0f0e0;'>http://images.google.com/images?num=30&amp;amp;q=larry+bird</pre><p>in your anchor tag <tt style='background-color: #f0f0e0;'>href</tt> attribute. Needless to say, this is easy to forget, and is probably the single most common source of HTML validation errors in otherwise well-marked-up web sites.</p><p>Markdown allows you to use these characters naturally, taking care of all the necessary escaping for you. If you use an ampersand as part of an HTML entity, it remains unchanged; otherwise it will be translated into <tt style='background-color: #f0f0e0;'>&amp;amp;</tt>.</p><p>So, if you want to include a copyright symbol in your article, you can write:</p><pre style='background-color: #f0f0e0;'>&amp;copy;</pre><p>and Markdown will leave it alone. But if you write:</p><pre style='background-color: #f0f0e0;'>AT&amp;T</pre><p>Markdown will translate it to:</p><pre style='background-color: #f0f0e0;'>AT&amp;amp;T</pre><p>Similarly, because Markdown supports <a href='#html'>inline HTML</a>, if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such. But if you write:</p><pre style='background-color: #f0f0e0;'>4 &lt; 5</pre><p>Markdown will translate it to:</p><pre style='background-color: #f0f0e0;'>4 &amp;lt; 5</pre><p>However, inside Markdown code spans and blocks, angle brackets and ampersands are <em>always</em> encoded automatically. This makes it easy to use Markdown to write about HTML code. (As opposed to raw HTML, which is a terrible format for writing about HTML syntax, because every single <tt style='background-color: #f0f0e0;'>&lt;</tt> and <tt style='background-color: #f0f0e0;'>&amp;</tt> in your example code needs to be escaped.)</p><hr /><h2 id='block'>Block Elements</h2><h3 id='p'>Paragraphs and Line Breaks</h3><p>A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered blank.) Normal paragraphs should not be indented with spaces or tabs.</p><p>The implication of the &quot;one or more consecutive lines of text&quot; rule is that Markdown supports &quot;hard-wrapped&quot; text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable Type&apos;s &quot;Convert Line Breaks&quot; option) which translate every line break character in a paragraph into a <tt style='background-color: #f0f0e0;'>&lt;br /&gt;</tt> tag.</p><p>When you <em>do</em> want to insert a <tt style='background-color: #f0f0e0;'>&lt;br /&gt;</tt> break tag using Markdown, you end a line with two or more spaces, then type return.</p><p>Yes, this takes a tad more effort to create a <tt style='background-color: #f0f0e0;'>&lt;br /&gt;</tt>, but a simplistic &quot;every line break is a <tt style='background-color: #f0f0e0;'>&lt;br /&gt;</tt>&quot; rule wouldn&apos;t work for Markdown. Markdown&apos;s email-style <a href='#blockquote'>blockquoting</a> and multi-paragraph <a href='#list'>list items</a> work best -- and look better -- when you format them with hard breaks.</p><h3 id='header'>Headers</h3><p>Markdown supports two styles of headers, <a href='http://docutils.sourceforge.net/mirror/setext.html'>Setext</a> and <a href='http://www.aaronsw.com/2002/atx/'>atx</a>.</p><p>Setext-style headers are &quot;underlined&quot; using equal signs (for first-level headers) and dashes (for second-level headers). For example:</p><pre style='background-color: #f0f0e0;'>This is an H1
17
- =============
18
- This is an H2
19
- -------------</pre><p>Any number of underlining <tt style='background-color: #f0f0e0;'>=</tt>&apos;s or <tt style='background-color: #f0f0e0;'>-</tt>&apos;s will work.</p><p>Atx-style headers use 1-6 hash characters at the start of the line, corresponding to header levels 1-6. For example:</p><pre style='background-color: #f0f0e0;'># This is an H1
20
- ## This is an H2
21
- ###### This is an H6</pre><p>Optionally, you may &quot;close&quot; atx-style headers. This is purely cosmetic -- you can use this if you think it looks better. The closing hashes don&apos;t even need to match the number of hashes used to open the header. (The number of opening hashes determines the header level.) :</p><pre style='background-color: #f0f0e0;'># This is an H1 #
22
- ## This is an H2 ##
23
- ### This is an H3 ######</pre><h3 id='blockquote'>Blockquotes</h3><p>Markdown uses email-style <tt style='background-color: #f0f0e0;'>&gt;</tt> characters for blockquoting. If you&apos;re familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a <tt style='background-color: #f0f0e0;'>&gt;</tt> before every line:</p><pre style='background-color: #f0f0e0;'>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
24
- &gt; consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
25
- &gt; Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
26
- &gt;
27
- &gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
28
- &gt; id sem consectetuer libero luctus adipiscing.</pre><p>Markdown allows you to be lazy and only put the <tt style='background-color: #f0f0e0;'>&gt;</tt> before the first line of a hard-wrapped paragraph:</p><pre style='background-color: #f0f0e0;'>&gt; This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
29
- consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
30
- Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
31
- &gt; Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
32
- id sem consectetuer libero luctus adipiscing.</pre><p>Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of <tt style='background-color: #f0f0e0;'>&gt;</tt>:</p><pre style='background-color: #f0f0e0;'>&gt; This is the first level of quoting.
33
- &gt;
34
- &gt; &gt; This is nested blockquote.
35
- &gt;
36
- &gt; Back to the first level.</pre><p>Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:</p><pre style='background-color: #f0f0e0;'>&gt; ## This is a header.
37
- &gt;
38
- &gt; 1. This is the first list item.
39
- &gt; 2. This is the second list item.
40
- &gt;
41
- &gt; Here&apos;s some example code:
42
- &gt;
43
- &gt; return shell_exec(&quot;echo $input | $markdown_script&quot;);</pre><p>Any decent text editor should make email-style quoting easy. For example, with BBEdit, you can make a selection and choose Increase Quote Level from the Text menu.</p><h3 id='list'>Lists</h3><p>Markdown supports ordered (numbered) and unordered (bulleted) lists.</p><p>Unordered lists use asterisks, pluses, and hyphens -- interchangably -- as list markers:</p><pre style='background-color: #f0f0e0;'>* Red
44
- * Green
45
- * Blue</pre><p>is equivalent to:</p><pre style='background-color: #f0f0e0;'>+ Red
46
- + Green
47
- + Blue</pre><p>and:</p><pre style='background-color: #f0f0e0;'>- Red
48
- - Green
49
- - Blue</pre><p>Ordered lists use numbers followed by periods:</p><pre style='background-color: #f0f0e0;'>1. Bird
50
- 2. McHale
51
- 3. Parish</pre><p>It&apos;s important to note that the actual numbers you use to mark the list have no effect on the HTML output Markdown produces. The HTML Markdown produces from the above list is:</p><pre style='background-color: #f0f0e0;'>&lt;ol&gt;
52
- &lt;li&gt;Bird&lt;/li&gt;
53
- &lt;li&gt;McHale&lt;/li&gt;
54
- &lt;li&gt;Parish&lt;/li&gt;
55
- &lt;/ol&gt;</pre><p>If you instead wrote the list in Markdown like this:</p><pre style='background-color: #f0f0e0;'>1. Bird
56
- 1. McHale
57
- 1. Parish</pre><p>or even:</p><pre style='background-color: #f0f0e0;'>3. Bird
58
- 1. McHale
59
- 8. Parish</pre><p>you&apos;d get the exact same HTML output. The point is, if you want to, you can use ordinal numbers in your ordered Markdown lists, so that the numbers in your source match the numbers in your published HTML. But if you want to be lazy, you don&apos;t have to.</p><p>If you do use lazy list numbering, however, you should still start the list with the number 1. At some point in the future, Markdown may support starting ordered lists at an arbitrary number.</p><p>List markers typically start at the left margin, but may be indented by up to three spaces. List markers must be followed by one or more spaces or a tab.</p><p>To make lists look nice, you can wrap items with hanging indents:</p><pre style='background-color: #f0f0e0;'>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
60
- Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
61
- viverra nec, fringilla in, laoreet vitae, risus.
62
- * Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
63
- Suspendisse id sem consectetuer libero luctus adipiscing.</pre><p>But if you want to be lazy, you don&apos;t have to:</p><pre style='background-color: #f0f0e0;'>* Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
64
- Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
65
- viverra nec, fringilla in, laoreet vitae, risus.
66
- * Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
67
- Suspendisse id sem consectetuer libero luctus adipiscing.</pre><p>If list items are separated by blank lines, Markdown will wrap the items in <tt style='background-color: #f0f0e0;'>&lt;p&gt;</tt> tags in the HTML output. For example, this input:</p><pre style='background-color: #f0f0e0;'>* Bird
68
- * Magic</pre><p>will turn into:</p><pre style='background-color: #f0f0e0;'>&lt;ul&gt;
69
- &lt;li&gt;Bird&lt;/li&gt;
70
- &lt;li&gt;Magic&lt;/li&gt;
71
- &lt;/ul&gt;</pre><p>But this:</p><pre style='background-color: #f0f0e0;'>* Bird
72
- * Magic</pre><p>will turn into:</p><pre style='background-color: #f0f0e0;'>&lt;ul&gt;
73
- &lt;li&gt;&lt;p&gt;Bird&lt;/p&gt;&lt;/li&gt;
74
- &lt;li&gt;&lt;p&gt;Magic&lt;/p&gt;&lt;/li&gt;
75
- &lt;/ul&gt;</pre><p>List items may consist of multiple paragraphs. Each subsequent paragraph in a list item must be intended by either 4 spaces or one tab:</p><pre style='background-color: #f0f0e0;'>1. This is a list item with two paragraphs. Lorem ipsum dolor
76
- sit amet, consectetuer adipiscing elit. Aliquam hendrerit
77
- mi posuere lectus.
78
- Vestibulum enim wisi, viverra nec, fringilla in, laoreet
79
- vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
80
- sit amet velit.
81
- 2. Suspendisse id sem consectetuer libero luctus adipiscing.</pre><p>It looks nice if you indent every line of the subsequent paragraphs, but here again, Markdown will allow you to be lazy:</p><pre style='background-color: #f0f0e0;'>* This is a list item with two paragraphs.
82
- This is the second paragraph in the list item. You&apos;re
83
- only required to indent the first line. Lorem ipsum dolor
84
- sit amet, consectetuer adipiscing elit.
85
- * Another item in the same list.</pre><p>To put a blockquote within a list item, the blockquote&apos;s <tt style='background-color: #f0f0e0;'>&gt;</tt> delimiters need to be indented:</p><pre style='background-color: #f0f0e0;'>* A list item with a blockquote:
86
- &gt; This is a blockquote
87
- &gt; inside a list item.</pre><p>To put a code block within a list item, the code block needs to be indented <em>twice</em> -- 8 spaces or two tabs:</p><pre style='background-color: #f0f0e0;'>* A list item with a code block:
88
- &lt;code goes here&gt;</pre><p>It&apos;s worth noting that it&apos;s possible to trigger an ordered list by accident, by writing something like this:</p><pre style='background-color: #f0f0e0;'>1986. What a great season.</pre><p>In other words, a <em>number-period-space</em> sequence at the beginning of a line. To avoid this, you can backslash-escape the period:</p><pre style='background-color: #f0f0e0;'>1986\. What a great season.</pre><h3 id='precode'>Code Blocks</h3><p>Pre-formatted code blocks are used for writing about programming or markup source code. Rather than forming normal paragraphs, the lines of a code block are interpreted literally. Markdown wraps a code block in both <tt style='background-color: #f0f0e0;'>&lt;pre&gt;</tt> and <tt style='background-color: #f0f0e0;'>&lt;code&gt;</tt> tags.</p><p>To produce a code block in Markdown, simply indent every line of the block by at least 4 spaces or 1 tab. For example, given this input:</p><pre style='background-color: #f0f0e0;'>This is a normal paragraph:
89
- This is a code block.</pre><p>Markdown will generate:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;This is a normal paragraph:&lt;/p&gt;
90
- &lt;pre&gt;&lt;code&gt;This is a code block.
91
- &lt;/code&gt;&lt;/pre&gt;</pre><p>One level of indentation -- 4 spaces or 1 tab -- is removed from each line of the code block. For example, this:</p><pre style='background-color: #f0f0e0;'>Here is an example of AppleScript:
92
- tell application &quot;Foo&quot;
93
- beep
94
- end tell</pre><p>will turn into:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;Here is an example of AppleScript:&lt;/p&gt;
95
- &lt;pre&gt;&lt;code&gt;tell application &quot;Foo&quot;
96
- beep
97
- end tell
98
- &lt;/code&gt;&lt;/pre&gt;</pre><p>A code block continues until it reaches a line that is not indented (or the end of the article).</p><p>Within a code block, ampersands (<tt style='background-color: #f0f0e0;'>&amp;</tt>) and angle brackets (<tt style='background-color: #f0f0e0;'>&lt;</tt> and <tt style='background-color: #f0f0e0;'>&gt;</tt>) are automatically converted into HTML entities. This makes it very easy to include example HTML source code using Markdown -- just paste it and indent it, and Markdown will handle the hassle of encoding the ampersands and angle brackets. For example, this:</p><pre style='background-color: #f0f0e0;'> &lt;div class=&quot;footer&quot;&gt;
99
- &amp;copy; 2004 Foo Corporation
100
- &lt;/div&gt;</pre><p>will turn into:</p><pre style='background-color: #f0f0e0;'>&lt;pre&gt;&lt;code&gt;&amp;lt;div class=&quot;footer&quot;&amp;gt;
101
- &amp;amp;copy; 2004 Foo Corporation
102
- &amp;lt;/div&amp;gt;
103
- &lt;/code&gt;&lt;/pre&gt;</pre><p>Regular Markdown syntax is not processed within code blocks. E.g., asterisks are just literal asterisks within a code block. This means it&apos;s also easy to use Markdown to write about Markdown&apos;s own syntax.</p><h3 id='hr'>Horizontal Rules</h3><p>You can produce a horizontal rule tag (<tt style='background-color: #f0f0e0;'>&lt;hr /&gt;</tt>) by placing three or more hyphens, asterisks, or underscores on a line by themselves. If you wish, you may use spaces between the hyphens or asterisks. Each of the following lines will produce a horizontal rule:</p><pre style='background-color: #f0f0e0;'>* * *
104
- ***
105
- *****
106
- - - -
107
- ---------------------------------------
108
- _ _ _</pre><hr /><h2 id='span'>Span Elements</h2><h3 id='link'>Links</h3><p>Markdown supports two style of links: <em>inline</em> and <em>reference</em>.</p><p>In both styles, the link text is delimited by [square brackets].</p><p>To create an inline link, use a set of regular parentheses immediately after the link text&apos;s closing square bracket. Inside the parentheses, put the URL where you want the link to point, along with an <em>optional</em> title for the link, surrounded in quotes. For example:</p><pre style='background-color: #f0f0e0;'>This is [an example](http://example.com/ &quot;Title&quot;) inline link.
109
- [This link](http://example.net/) has no title attribute.</pre><p>Will produce:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;This is &lt;a href=&quot;http://example.com/&quot; title=&quot;Title&quot;&gt;
110
- an example&lt;/a&gt; inline link.&lt;/p&gt;
111
- &lt;p&gt;&lt;a href=&quot;http://example.net/&quot;&gt;This link&lt;/a&gt; has no
112
- title attribute.&lt;/p&gt;</pre><p>If you&apos;re referring to a local resource on the same server, you can use relative paths:</p><pre style='background-color: #f0f0e0;'>See my [About](/about/) page for details. </pre><p>Reference-style links use a second set of square brackets, inside which you place a label of your choosing to identify the link:</p><pre style='background-color: #f0f0e0;'>This is [an example][id] reference-style link.</pre><p>You can optionally use a space to separate the sets of brackets:</p><pre style='background-color: #f0f0e0;'>This is [an example] [id] reference-style link.</pre><p>Then, anywhere in the document, you define your link label like this, on a line by itself:</p><pre style='background-color: #f0f0e0;'>[id]: http://example.com/ &quot;Optional Title Here&quot;</pre><p>That is:</p><ul><li>Square brackets containing the link identifier (optionally indented from the left margin using up to three spaces);</li><li>followed by a colon;</li><li>followed by one or more spaces (or tabs);</li><li>followed by the URL for the link;</li><li>optionally followed by a title attribute for the link, enclosed in double or single quotes.</li></ul><p>The link URL may, optionally, be surrounded by angle brackets:</p><pre style='background-color: #f0f0e0;'>[id]: &lt;http://example.com/&gt; &quot;Optional Title Here&quot;</pre><p>You can put the title attribute on the next line and use extra spaces or tabs for padding, which tends to look better with longer URLs:</p><pre style='background-color: #f0f0e0;'>[id]: http://example.com/longish/path/to/resource/here
113
- &quot;Optional Title Here&quot;</pre><p>Link definitions are only used for creating links during Markdown processing, and are stripped from your document in the HTML output.</p><p>Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are <em>not</em> case sensitive. E.g. these two links:</p><pre style='background-color: #f0f0e0;'>[link text][a]
114
- [link text][A]</pre><p>are equivalent.</p><p>The <em>implicit link name</em> shortcut allows you to omit the name of the link, in which case the link text itself is used as the name. Just use an empty set of square brackets -- e.g., to link the word &quot;Google&quot; to the google.com web site, you could simply write:</p><pre style='background-color: #f0f0e0;'>[Google][]</pre><p>And then define the link:</p><pre style='background-color: #f0f0e0;'>[Google]: http://google.com/</pre><p>Because link names may contain spaces, this shortcut even works for multiple words in the link text:</p><pre style='background-color: #f0f0e0;'>Visit [Daring Fireball][] for more information.</pre><p>And then define the link:</p><pre style='background-color: #f0f0e0;'>
115
- [Daring Fireball]: http://daringfireball.net/</pre><p>Link definitions can be placed anywhere in your Markdown document. I tend to put them immediately after each paragraph in which they&apos;re used, but if you want, you can put them all at the end of your document, sort of like footnotes.</p><p>Here&apos;s an example of reference links in action:</p><pre style='background-color: #f0f0e0;'>I get 10 times more traffic from [Google] [1] than from
116
- [Yahoo] [2] or [MSN] [3].
117
- [1]: http://google.com/ &quot;Google&quot;
118
- [2]: http://search.yahoo.com/ &quot;Yahoo Search&quot;
119
- [3]: http://search.msn.com/ &quot;MSN Search&quot;</pre><p>Using the implicit link name shortcut, you could instead write:</p><pre style='background-color: #f0f0e0;'>I get 10 times more traffic from [Google][] than from
120
- [Yahoo][] or [MSN][].
121
- [google]: http://google.com/ &quot;Google&quot;
122
- [yahoo]: http://search.yahoo.com/ &quot;Yahoo Search&quot;
123
- [msn]: http://search.msn.com/ &quot;MSN Search&quot;</pre><p>Both of the above examples will produce the following HTML output:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;I get 10 times more traffic from &lt;a href=&quot;http://google.com/&quot;
124
- title=&quot;Google&quot;&gt;Google&lt;/a&gt; than from
125
- &lt;a href=&quot;http://search.yahoo.com/&quot; title=&quot;Yahoo Search&quot;&gt;Yahoo&lt;/a&gt;
126
- or &lt;a href=&quot;http://search.msn.com/&quot; title=&quot;MSN Search&quot;&gt;MSN&lt;/a&gt;.&lt;/p&gt;</pre><p>For comparison, here is the same paragraph written using Markdown&apos;s inline link style:</p><pre style='background-color: #f0f0e0;'>I get 10 times more traffic from [Google](http://google.com/ &quot;Google&quot;)
127
- than from [Yahoo](http://search.yahoo.com/ &quot;Yahoo Search&quot;) or
128
- [MSN](http://search.msn.com/ &quot;MSN Search&quot;).</pre><p>The point of reference-style links is not that they&apos;re easier to write. The point is that with reference-style links, your document source is vastly more readable. Compare the above examples: using reference-style links, the paragraph itself is only 81 characters long; with inline-style links, it&apos;s 176 characters; and as raw HTML, it&apos;s 234 characters. In the raw HTML, there&apos;s more markup than there is text.</p><p>With Markdown&apos;s reference-style links, a source document much more closely resembles the final output, as rendered in a browser. By allowing you to move the markup-related metadata out of the paragraph, you can add links without interrupting the narrative flow of your prose.</p><h3 id='em'>Emphasis</h3><p>Markdown treats asterisks (<tt style='background-color: #f0f0e0;'>*</tt>) and underscores (<tt style='background-color: #f0f0e0;'>_</tt>) as indicators of emphasis. Text wrapped with one <tt style='background-color: #f0f0e0;'>*</tt> or <tt style='background-color: #f0f0e0;'>_</tt> will be wrapped with an HTML <tt style='background-color: #f0f0e0;'>&lt;em&gt;</tt> tag; double <tt style='background-color: #f0f0e0;'>*</tt>&apos;s or <tt style='background-color: #f0f0e0;'>_</tt>&apos;s will be wrapped with an HTML <tt style='background-color: #f0f0e0;'>&lt;strong&gt;</tt> tag. E.g., this input:</p><pre style='background-color: #f0f0e0;'>*single asterisks*
129
- _single underscores_
130
- **double asterisks**
131
- __double underscores__</pre><p>will produce:</p><pre style='background-color: #f0f0e0;'>&lt;em&gt;single asterisks&lt;/em&gt;
132
- &lt;em&gt;single underscores&lt;/em&gt;
133
- &lt;strong&gt;double asterisks&lt;/strong&gt;
134
- &lt;strong&gt;double underscores&lt;/strong&gt;</pre><p>You can use whichever style you prefer; the lone restriction is that the same character must be used to open and close an emphasis span.</p><p>Emphasis can be used in the middle of a word:</p><pre style='background-color: #f0f0e0;'>un*fucking*believable</pre><p>But if you surround an <tt style='background-color: #f0f0e0;'>*</tt> or <tt style='background-color: #f0f0e0;'>_</tt> with spaces, it&apos;ll be treated as a literal asterisk or underscore.</p><p>To produce a literal asterisk or underscore at a position where it would otherwise be used as an emphasis delimiter, you can backslash escape it:</p><pre style='background-color: #f0f0e0;'>\*this text is surrounded by literal asterisks\*</pre><h3 id='code'>Code</h3><p>To indicate a span of code, wrap it with backtick quotes (<tt style='background-color: #f0f0e0;'> ` </tt>). Unlike a pre-formatted code block, a code span indicates code within a normal paragraph. For example:</p><pre style='background-color: #f0f0e0;'>Use the `printf()` function.</pre><p>will produce:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;Use the &lt;code&gt;printf()&lt;/code&gt; function.&lt;/p&gt;</pre><p>To include a literal backtick character within a code span, you can use multiple backticks as the opening and closing delimiters:</p><pre style='background-color: #f0f0e0;'>``There is a literal backtick (`) here.``</pre><p>which will produce this:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;&lt;code&gt;There is a literal backtick (`) here.&lt;/code&gt;&lt;/p&gt;</pre><p>The backtick delimiters surrounding a code span may include spaces -- one after the opening, one before the closing. This allows you to place literal backtick characters at the beginning or end of a code span:</p><pre style='background-color: #f0f0e0;'>A single backtick in a code span: `` ` ``
135
- A backtick-delimited string in a code span: `` `foo` ``</pre><p>will produce:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;A single backtick in a code span: &lt;code&gt;`&lt;/code&gt;&lt;/p&gt;
136
- &lt;p&gt;A backtick-delimited string in a code span: &lt;code&gt;`foo`&lt;/code&gt;&lt;/p&gt;</pre><p>With a code span, ampersands and angle brackets are encoded as HTML entities automatically, which makes it easy to include example HTML tags. Markdown will turn this:</p><pre style='background-color: #f0f0e0;'>Please don&apos;t use any `&lt;blink&gt;` tags.</pre><p>into:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;Please don&apos;t use any &lt;code&gt;&amp;lt;blink&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;</pre><p>You can write this:</p><pre style='background-color: #f0f0e0;'>`&amp;#8212;` is the decimal-encoded equivalent of `&amp;mdash;`.</pre><p>to produce:</p><pre style='background-color: #f0f0e0;'>&lt;p&gt;&lt;code&gt;&amp;amp;#8212;&lt;/code&gt; is the decimal-encoded
137
- equivalent of &lt;code&gt;&amp;amp;mdash;&lt;/code&gt;.&lt;/p&gt;</pre><h3 id='img'>Images</h3><p>Admittedly, it&apos;s fairly difficult to devise a &quot;natural&quot; syntax for placing images into a plain text document format.</p><p>Markdown uses an image syntax that is intended to resemble the syntax for links, allowing for two styles: <em>inline</em> and <em>reference</em>.</p><p>Inline image syntax looks like this:</p><pre style='background-color: #f0f0e0;'>![Alt text](/path/to/img.jpg)
138
- ![Alt text](/path/to/img.jpg &quot;Optional title&quot;)</pre><p>That is:</p><ul><li>An exclamation mark: <tt style='background-color: #f0f0e0;'>!</tt>;</li><li>followed by a set of square brackets, containing the <tt style='background-color: #f0f0e0;'>alt</tt> attribute text for the image;</li><li>followed by a set of parentheses, containing the URL or path to the image, and an optional <tt style='background-color: #f0f0e0;'>title</tt> attribute enclosed in double or single quotes.</li></ul><p>Reference-style image syntax looks like this:</p><pre style='background-color: #f0f0e0;'>![Alt text][id]</pre><p>Where &quot;id&quot; is the name of a defined image reference. Image references are defined using syntax identical to link references:</p><pre style='background-color: #f0f0e0;'>[id]: url/to/image &quot;Optional title attribute&quot;</pre><p>As of this writing, Markdown has no syntax for specifying the dimensions of an image; if this is important to you, you can simply use regular HTML <tt style='background-color: #f0f0e0;'>&lt;img&gt;</tt> tags.</p><hr /><h2 id='misc'>Miscellaneous</h2><h3 id='autolink'>Automatic Links</h3><p>Markdown supports a shortcut style for creating &quot;automatic&quot; links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:</p><pre style='background-color: #f0f0e0;'>&lt;http://example.com/&gt;</pre><p>Markdown will turn this into:</p><pre style='background-color: #f0f0e0;'>&lt;a href=&quot;http://example.com/&quot;&gt;http://example.com/&lt;/a&gt;</pre><p>Automatic links for email addresses work similarly, except that Markdown will also perform a bit of randomized decimal and hex entity-encoding to help obscure your address from address-harvesting spambots. For example, Markdown will turn this:</p><pre style='background-color: #f0f0e0;'>&lt;address@example.com&gt;</pre><p>into something like this:</p><pre style='background-color: #f0f0e0;'>&lt;a href=&quot;&amp;#x6D;&amp;#x61;i&amp;#x6C;&amp;#x74;&amp;#x6F;:&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;
139
- &amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;&amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;
140
- &amp;#109;&quot;&gt;&amp;#x61;&amp;#x64;&amp;#x64;&amp;#x72;&amp;#x65;&amp;#115;&amp;#115;&amp;#64;&amp;#101;&amp;#120;&amp;#x61;
141
- &amp;#109;&amp;#x70;&amp;#x6C;e&amp;#x2E;&amp;#99;&amp;#111;&amp;#109;&lt;/a&gt;</pre><p>which will render in a browser as a clickable link to &quot;address@example.com&quot;.</p><p>(This sort of entity-encoding trick will indeed fool many, if not most, address-harvesting bots, but it definitely won&apos;t fool all of them. It&apos;s better than nothing, but an address published in this way will probably eventually start receiving spam.)</p><h3 id='backslash'>Backslash Escapes</h3><p>Markdown allows you to use backslash escapes to generate literal characters which would otherwise have special meaning in Markdown&apos;s formatting syntax. For example, if you wanted to surround a word with literal asterisks (instead of an HTML <tt style='background-color: #f0f0e0;'>&lt;em&gt;</tt> tag), you can backslashes before the asterisks, like this:</p><pre style='background-color: #f0f0e0;'>\*literal asterisks\*</pre><p>Markdown provides backslash escapes for the following characters:</p><pre style='background-color: #f0f0e0;'>\ backslash
142
- ` backtick
143
- * asterisk
144
- _ underscore
145
- {} curly braces
146
- [] square brackets
147
- () parentheses
148
- # hash mark
149
- + plus sign
150
- - minus sign (hyphen)
151
- . dot
152
- ! exclamation mark</pre><div class='maruku_signature'><hr /><span style='font-size: small; font-style: italic'>Created by <a href='http://maruku.rubyforge.org' title='Maruku: a Markdown interpreter'>Maruku</a> at 14:55 on Sunday, December 31st, 2006.</span></div></body></html>
@@ -1,564 +0,0 @@
1
- # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
- #
3
- # This file is part of Maruku.
4
- #
5
- # Maruku is free software; you can redistribute it and/or modify
6
- # it under the terms of the GNU General Public License as published by
7
- # the Free Software Foundation; either version 2 of the License, or
8
- # (at your option) any later version.
9
- #
10
- # Maruku is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- # GNU General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU General Public License
16
- # along with Maruku; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
-
19
- class Maruku
20
- include Helpers
21
-
22
- # Splits the string and calls parse_lines_as_markdown
23
- def parse_text_as_markdown(text)
24
- lines = Maruku.split_lines(text)
25
- parse_lines_as_markdown(lines)
26
- end
27
-
28
- def parse_lines_as_markdown(lines)
29
- @stack.push lines
30
- output = []; current_metadata = just_read_metadata = nil
31
- # run state machine
32
- while cur_line
33
-
34
- # Prints detected type
35
- # puts "#{cur_line_node_type}|#{cur_line}"
36
- case cur_line_node_type
37
- when :empty;
38
- shift_line;
39
- when :text
40
- if cur_line =~ MightBeTableHeader and
41
- (next_line && next_line =~ TableSeparator)
42
- output << read_table
43
- elsif [:header1,:header2].include? next_line_node_type
44
- output << read_header12
45
-
46
-
47
- elsif eventually_comes_a_def_list
48
- definition = read_definition
49
- if output.last && output.last.node_type == :definition_list
50
- output.last.children << definition
51
- else
52
- output << create_md_element(:definition_list, [definition])
53
- end
54
-
55
- else # Start of a paragraph
56
- output << read_paragraph
57
- end
58
- when :header2, :hrule
59
- # hrule
60
- shift_line
61
- output << create_md_element(:hrule)
62
- when :header3
63
- output << read_header3
64
- when :ulist, :olist
65
- list_type = cur_line_node_type == :ulist ? :ul : :ol
66
- li = read_list_item
67
- # append to current list if we have one
68
- if output.last && output.last.node_type == list_type
69
- output.last.children << li
70
- else
71
- output << create_md_element(list_type, [li])
72
- end
73
- when :quote; output << read_quote
74
- when :code; e = read_code; output << e if e
75
- when :raw_html; e = read_raw_html; output << e if e
76
-
77
- # these do not produce output
78
- when :footnote_text; read_footnote_text
79
- when :ref_definition; output << read_ref_definition
80
- when :abbreviation; read_abbreviation
81
- when :metadata; just_read_metadata = read_metadata
82
-
83
- # warn if we forgot something
84
- else
85
- node_type = cur_line_node_type
86
- line = shift_line
87
- tell_user "Ignoring line '#{line}' type = #{node_type}"
88
- end
89
-
90
- if current_metadata and output.last
91
- output.last.meta.merge! current_metadata
92
- current_metadata = nil
93
- # puts "meta for #{output.last.node_type}\n #{output.last.meta.inspect}"
94
- end
95
- current_metadata = just_read_metadata
96
- just_read_metadata = nil
97
- end
98
- # pop the stack
99
- @stack.pop
100
-
101
- # See for each list if we can omit the paragraphs and use li_span
102
- output.each do |c|
103
- # Remove paragraphs that we can get rid of
104
- if [:ul,:ol].include? c.node_type
105
- if c.children.all? {|li| !li.meta[:want_my_paragraph]} then
106
- c.children.each do |d|
107
- d.node_type = :li_span
108
- d.children = d.children[0].children
109
- end
110
- end
111
- end
112
- if c.node_type == :definition_list
113
- if c.children.all?{|defi| !defi.meta[:want_my_paragraph]} then
114
- c.children.each do |definition|
115
- dds = definition.meta[:definitions]
116
- dds.each do |dd|
117
- dd.children = dd.children[0].children
118
- end
119
- end
120
- end
121
- end
122
- end
123
-
124
- output
125
- end
126
-
127
- def create_md_element(node_type, children=[], meta = {})
128
- e = MDElement.new(node_type, children, meta)
129
- e.doc = self
130
- e
131
- end
132
-
133
- def top; @stack.last end
134
- def cur_line_node_type; line_node_type top.first end
135
- def cur_line; top.empty? ? nil : top.first end
136
- def next_line; top.empty? ? nil : top[1] end
137
- def next_line_node_type
138
- (top.size >= 2) ? line_node_type(top[1]) : nil end
139
- def shift_line; top.shift; end
140
-
141
- # reads a header (with ----- or ========)
142
- def read_header12
143
- e = create_md_element(:header)
144
- line = shift_line.strip
145
- if line =~ HeaderWithId
146
- line = $1.strip
147
- e.meta[:id] = $2
148
- end
149
- e.children = parse_lines_as_span [ line ]
150
-
151
- e.meta[:level] = cur_line_node_type == :header2 ? 2 : 1
152
- shift_line
153
-
154
- # generate an id if one is not provided
155
- e.meta[:id] = e.generate_id if not e.meta[:id]
156
-
157
- e
158
- end
159
-
160
- # returns an hash
161
- def parse_attributes(s)
162
- {:id => s[1,s.size]}
163
- end
164
- # reads a header like '#### header ####'
165
-
166
- def read_header3
167
- e = create_md_element(:header)
168
- line = shift_line.strip
169
- if line =~ HeaderWithAttributes
170
- line = $1.strip
171
- e.meta.merge! parse_attributes($2)
172
- end
173
-
174
- e.meta[:level] = num_leading_hashes(line)
175
- e.children = parse_lines_as_span [strip_hashes(line)]
176
-
177
- # generate an id if one is not provided
178
- e.meta[:id] = e.generate_id if not e.meta[:id]
179
-
180
- e
181
- end
182
-
183
-
184
- def read_raw_html
185
- # raw_html = ""
186
-
187
- h = HTMLHelper.new
188
- begin
189
- l=shift_line
190
- h.eat_this l
191
- # puts "\nBLOCK:\nhtml -> #{l.inspect}"
192
- while cur_line and not h.is_finished?
193
- l=shift_line
194
- # puts "html -> #{l.inspect}"
195
- h.eat_this "\n"+l
196
- end
197
- rescue Exception => e
198
- tell_user e.inspect + e.backtrace.join("\n")
199
- # puts h.inspect
200
- end
201
-
202
- raw_html = h.stuff_you_read
203
-
204
- md_html(raw_html)
205
- end
206
-
207
- def read_paragraph
208
- lines = []
209
- while cur_line
210
- break if [:quote,:header3,:empty,:raw_html,:ref_definition].include?(
211
- cur_line_node_type)
212
- break if cur_line.strip.size == 0
213
-
214
- break if [:header1,:header2].include? next_line_node_type
215
-
216
- lines << shift_line
217
- end
218
- # dbg_describe_ary(lines, 'PAR')
219
- children = parse_lines_as_span(lines)
220
-
221
- md_par(children)
222
- end
223
-
224
-
225
-
226
- # Reads one list item, either ordered or unordered.
227
- def read_list_item
228
- item_type = cur_line_node_type
229
- first = shift_line
230
-
231
- # Ugly things going on inside `read_indented_content`
232
- indentation = spaces_before_first_char(first)
233
- break_list = [:ulist, :olist]
234
- lines, want_my_paragraph =
235
- read_indented_content(indentation, break_list, item_type)
236
-
237
- # add first line
238
- # Strip first '*', '-', '+' from first line
239
- stripped = first[indentation, first.size-1]
240
- lines.unshift stripped
241
-
242
- e = create_md_element(:li)
243
- e.children = parse_lines_as_markdown(lines)
244
- e.meta[:want_my_paragraph] = want_my_paragraph|| (e.children.size>1)
245
- e
246
- end
247
-
248
- def read_abbreviation
249
- shift_line =~ Abbreviation
250
- abbrev = $1
251
- description = $2
252
-
253
- @abbreviations[abbrev] = description
254
- end
255
-
256
- def read_footnote_text
257
- first = shift_line
258
-
259
- first =~ FootnoteText
260
- id = $1
261
- text = $2
262
-
263
- # Ugly things going on inside `read_indented_content`
264
- indentation = 4 #first.size-text.size
265
-
266
- # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
267
-
268
- break_list = [:footnote_text]
269
- item_type = :footnote_text
270
- lines, want_my_paragraph =
271
- read_indented_content(indentation, break_list, item_type)
272
-
273
- # add first line
274
- if text && text.strip != "" then lines.unshift text end
275
-
276
-
277
- # dbg_describe_ary(lines, 'FOOTNOTE')
278
- children = parse_lines_as_markdown(lines)
279
- @footnotes[id] = create_md_element(:footnote, children)
280
-
281
- end
282
-
283
-
284
- # This is the only ugly function in the code base.
285
- # It is used to read list items, descriptions, footnote text
286
- def read_indented_content(indentation, break_list, item_type)
287
- lines =[]
288
- # collect all indented lines
289
- saw_empty = false; saw_anything_after = false
290
- while cur_line
291
- if cur_line_node_type == :empty
292
- saw_empty = true
293
- lines << shift_line
294
- next
295
- end
296
-
297
- # after a white line
298
- if saw_empty
299
- # we expect things to be properly aligned
300
- if number_of_leading_spaces(cur_line) < indentation
301
- # debug "breaking for spaces: #{cur_line}"
302
- break
303
- end
304
- saw_anything_after = true
305
- else
306
- break if break_list.include? cur_line_node_type
307
- # break if cur_line_node_type != :text
308
- end
309
-
310
- # debug "Accepted '#{cur_line}'"
311
-
312
- stripped = strip_indent(shift_line, indentation)
313
- lines << stripped
314
-
315
- # You are only required to indent the first line of
316
- # a child paragraph.
317
- if line_node_type(stripped) == :text
318
- while cur_line && (cur_line_node_type == :text)
319
- lines << strip_indent(shift_line, indentation)
320
- end
321
- end
322
- end
323
-
324
- want_my_paragraph = saw_anything_after ||
325
- (saw_empty && (cur_line && (cur_line_node_type == item_type)))
326
-
327
- # dbg_describe_ary(lines, 'LI')
328
- # create a new context
329
-
330
- while lines.last && (line_node_type(lines.last) == :empty)
331
- lines.pop
332
- end
333
-
334
- return lines, want_my_paragraph
335
- end
336
-
337
-
338
- def read_quote
339
- lines = []
340
- # collect all indented lines
341
- while cur_line && line_node_type(cur_line) == :quote
342
- lines << unquote(shift_line)
343
- end
344
- # dbg_describe_ary(lines, 'QUOTE')
345
-
346
- e = create_md_element(:quote)
347
- e.children = parse_lines_as_markdown(lines)
348
- e
349
- end
350
-
351
- def read_code
352
- e = create_md_element(:code)
353
- # collect all indented lines
354
- lines = []
355
- while cur_line && ([:code, :empty].include? cur_line_node_type)
356
- lines << strip_indent(shift_line, 4)
357
- end
358
-
359
- #while lines.last && (line_node_type(lines.last) == :empty )
360
- while lines.last && lines.last.strip.size == 0
361
- lines.pop
362
- end
363
-
364
- while lines.first && lines.first.strip.size == 0
365
- lines.shift
366
- end
367
-
368
- return nil if lines.empty?
369
-
370
- source = lines.join("\n")
371
- # ignore trailing lines
372
- # source = source.gsub(/\n+\Z/,'')
373
-
374
- # dbg_describe_ary(lines, 'CODE')
375
- e.meta[:raw_code] = source
376
- e
377
- end
378
-
379
- # Reads a series of metadata lines with empty lines in between
380
- def read_metadata
381
- hash = {}
382
- while cur_line
383
- case cur_line_node_type
384
- when :empty; shift_line
385
- when :metadata; hash.merge! parse_metadata(shift_line)
386
- else break
387
- end
388
- end
389
- hash
390
- end
391
-
392
- # parse one metadata line
393
- # TODO: read quote-delimited values
394
- def parse_metadata(l)
395
- hash = {}
396
- # remove leading '@'
397
- l = l[1, l.size].strip
398
- l.split(';').each do |kv|
399
- k, v = kv.split(':')
400
- k, v = normalize_key_and_value(k, v)
401
-
402
- hash[k.to_sym] = v
403
- end
404
- hash
405
- end
406
-
407
-
408
-
409
- def read_ref_definition
410
- line = shift_line
411
-
412
- # if link is incomplete, shift next line
413
- if cur_line && (cur_line_node_type != :ref_definition) &&
414
- ([1,2,3].include? number_of_leading_spaces(cur_line) )
415
- line += " "+ shift_line
416
- end
417
-
418
- # puts "total= #{line}"
419
-
420
- match = LinkRegex.match(line)
421
- if not match
422
- error "Link does not respect format: '#{line}'"
423
- end
424
-
425
- id = match[1]; url = match[2]; title = match[3];
426
- id = id.strip.downcase
427
-
428
- hash = self.refs[id] = {:url=>url,:title=>title}
429
-
430
- stuff=match[4]
431
-
432
- if stuff
433
- stuff.split.each do |couple|
434
- # puts "found #{couple}"
435
- k, v = couple.split('=')
436
- v ||= ""
437
- if v[0,1]=='"' then v = v[1, v.size-2] end
438
- # puts "key:_#{k}_ value=_#{v}_"
439
- hash[k.to_sym] = v
440
- end
441
- end
442
- # puts hash.inspect
443
-
444
- md_ref_def(id, url, meta={:title=>title})
445
- end
446
-
447
- def read_table
448
-
449
- def split_cells(s)
450
- s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
451
- end
452
-
453
- head = split_cells(shift_line).map{|s|
454
- create_md_element(:head_cell, parse_lines_as_span([s]))}
455
-
456
- separator=split_cells(shift_line)
457
-
458
- align = separator.map { |s| s =~ Sep
459
- if $1 and $2 then :center elsif $2 then :right else :left end }
460
-
461
- num_columns = align.size
462
-
463
- if head.size != num_columns
464
- error "Head does not have #{num_columns} columns: \n#{head.inspect}"
465
- # XXX try to recover
466
- return create_md_element(:linebreak)
467
- end
468
-
469
- rows = []
470
-
471
- while cur_line && cur_line =~ /\|/
472
- row = split_cells(shift_line).map{|s|
473
- create_md_element(:cell, parse_lines_as_span([s]))}
474
- if head.size != num_columns
475
- error "Row does not have #{num_columns} columns: \n#{row.inspect}"
476
- # XXX try to recover
477
- return create_md_element(:linebreak)
478
- end
479
- rows << row
480
- end
481
-
482
- e = create_md_element(:table)
483
- e.meta[:align] = align
484
- e.children = (head+rows).flatten
485
- e
486
- end
487
-
488
- # If current line is text, a definition list is coming
489
- # if 1) text,empty,[text,empty]*,definition
490
-
491
- def eventually_comes_a_def_list
492
- future = create_next_string
493
- ok = future =~ %r{^t+e?d}x
494
- # puts "future: #{future} - #{ok}"
495
- ok
496
- end
497
-
498
- # Returns the type of next line as a string
499
- # breaks at first :definition
500
- def create_next_string
501
- s = ""; num_e = 0;
502
- for line in top
503
- c = case line_node_type(line)
504
- when :text; "t"
505
- when :empty; num_e+=1; "e"
506
- when :definition; "d"
507
- else "o"
508
- end
509
- s += c
510
- break if c == "d" or num_e>1
511
- end
512
- s
513
- end
514
-
515
- def read_definition
516
- # Read one or more terms
517
- terms = []
518
- while cur_line && cur_line_node_type == :text
519
- terms << create_md_element(:definition_term, parse_lines_as_span([shift_line]))
520
- end
521
- # dbg_describe_ary(terms, 'DT')
522
-
523
- want_paragraph = false
524
-
525
- raise "Chunky Bacon!" if not cur_line
526
-
527
- # one optional empty
528
- if cur_line_node_type == :empty
529
- want_my_paragraph = true
530
- shift_line
531
- end
532
-
533
- raise "Chunky Bacon!" if cur_line_node_type != :definition
534
-
535
- # Read one or more definitions
536
- definitions = []
537
- while cur_line && cur_line_node_type == :definition
538
- first = shift_line
539
- first =~ Definition
540
- first = $1
541
-
542
- # I know, it's ugly!!!
543
-
544
- lines, w_m_p =
545
- read_indented_content(4, [:definition], :definition)
546
- want_my_paragraph ||= w_m_p
547
-
548
- lines.unshift first
549
-
550
- # dbg_describe_ary(lines, 'DD')
551
-
552
- children = parse_lines_as_markdown(lines)
553
- definitions << create_md_element(:definition_data, children)
554
- end
555
-
556
- definition = create_md_element(:definition)
557
- definition.meta[:terms] = terms
558
- definition.meta[:definitions] = definitions
559
- definition.children = terms + definitions
560
- definition.meta[:want_my_paragraph] = want_my_paragraph
561
- definition
562
- end
563
- end
564
-