remi-maruku 0.5.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (183) hide show
  1. data/Rakefile +73 -0
  2. data/bin/marudown +29 -0
  3. data/bin/maruku +176 -0
  4. data/bin/marutest +338 -0
  5. data/bin/marutex +31 -0
  6. data/docs/changelog.md +334 -0
  7. data/docs/div_syntax.md +36 -0
  8. data/docs/entity_test.md +23 -0
  9. data/docs/markdown_syntax.md +899 -0
  10. data/docs/maruku.md +346 -0
  11. data/docs/math.md +194 -0
  12. data/docs/other_stuff.md +51 -0
  13. data/docs/proposal.md +309 -0
  14. data/lib/maruku.rb +141 -0
  15. data/lib/maruku/attributes.rb +227 -0
  16. data/lib/maruku/defaults.rb +70 -0
  17. data/lib/maruku/errors_management.rb +92 -0
  18. data/lib/maruku/ext/div.rb +100 -0
  19. data/lib/maruku/ext/math.rb +41 -0
  20. data/lib/maruku/ext/math/elements.rb +27 -0
  21. data/lib/maruku/ext/math/latex_fix.rb +11 -0
  22. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +104 -0
  23. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
  24. data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
  25. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  26. data/lib/maruku/ext/math/parsing.rb +105 -0
  27. data/lib/maruku/ext/math/to_html.rb +170 -0
  28. data/lib/maruku/ext/math/to_latex.rb +22 -0
  29. data/lib/maruku/helpers.rb +260 -0
  30. data/lib/maruku/input/charsource.rb +326 -0
  31. data/lib/maruku/input/extensions.rb +69 -0
  32. data/lib/maruku/input/html_helper.rb +189 -0
  33. data/lib/maruku/input/linesource.rb +111 -0
  34. data/lib/maruku/input/parse_block.rb +613 -0
  35. data/lib/maruku/input/parse_doc.rb +227 -0
  36. data/lib/maruku/input/parse_span_better.rb +732 -0
  37. data/lib/maruku/input/rubypants.rb +225 -0
  38. data/lib/maruku/input/type_detection.rb +144 -0
  39. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  40. data/lib/maruku/maruku.rb +33 -0
  41. data/lib/maruku/output/s5/fancy.rb +756 -0
  42. data/lib/maruku/output/s5/to_s5.rb +125 -0
  43. data/lib/maruku/output/to_html.rb +971 -0
  44. data/lib/maruku/output/to_latex.rb +563 -0
  45. data/lib/maruku/output/to_latex_entities.rb +367 -0
  46. data/lib/maruku/output/to_latex_strings.rb +64 -0
  47. data/lib/maruku/output/to_markdown.rb +164 -0
  48. data/lib/maruku/output/to_s.rb +53 -0
  49. data/lib/maruku/string_utils.rb +191 -0
  50. data/lib/maruku/structures.rb +165 -0
  51. data/lib/maruku/structures_inspect.rb +87 -0
  52. data/lib/maruku/structures_iterators.rb +61 -0
  53. data/lib/maruku/tests/benchmark.rb +82 -0
  54. data/lib/maruku/tests/new_parser.rb +370 -0
  55. data/lib/maruku/tests/tests.rb +136 -0
  56. data/lib/maruku/textile2.rb +1 -0
  57. data/lib/maruku/toc.rb +199 -0
  58. data/lib/maruku/usage/example1.rb +33 -0
  59. data/lib/maruku/version.rb +40 -0
  60. data/tests/bugs/code_in_links.md +16 -0
  61. data/tests/bugs/complex_escaping.md +4 -0
  62. data/tests/math/syntax.md +46 -0
  63. data/tests/math_usage/document.md +13 -0
  64. data/tests/others/abbreviations.md +11 -0
  65. data/tests/others/blank.md +4 -0
  66. data/tests/others/code.md +5 -0
  67. data/tests/others/code2.md +8 -0
  68. data/tests/others/code3.md +16 -0
  69. data/tests/others/email.md +4 -0
  70. data/tests/others/entities.md +19 -0
  71. data/tests/others/escaping.md +16 -0
  72. data/tests/others/extra_dl.md +101 -0
  73. data/tests/others/extra_header_id.md +13 -0
  74. data/tests/others/extra_table1.md +40 -0
  75. data/tests/others/footnotes.md +17 -0
  76. data/tests/others/headers.md +10 -0
  77. data/tests/others/hrule.md +10 -0
  78. data/tests/others/images.md +20 -0
  79. data/tests/others/inline_html.md +42 -0
  80. data/tests/others/links.md +38 -0
  81. data/tests/others/list1.md +4 -0
  82. data/tests/others/list2.md +5 -0
  83. data/tests/others/list3.md +8 -0
  84. data/tests/others/lists.md +32 -0
  85. data/tests/others/lists_after_paragraph.md +44 -0
  86. data/tests/others/lists_ol.md +39 -0
  87. data/tests/others/misc_sw.md +105 -0
  88. data/tests/others/one.md +1 -0
  89. data/tests/others/paragraphs.md +13 -0
  90. data/tests/others/sss06.md +352 -0
  91. data/tests/others/test.md +4 -0
  92. data/tests/s5/s5profiling.md +48 -0
  93. data/tests/unittest/abbreviations.md +72 -0
  94. data/tests/unittest/alt.md +30 -0
  95. data/tests/unittest/attributes/att2.md +34 -0
  96. data/tests/unittest/attributes/att3.md +45 -0
  97. data/tests/unittest/attributes/attributes.md +82 -0
  98. data/tests/unittest/attributes/circular.md +43 -0
  99. data/tests/unittest/attributes/default.md +38 -0
  100. data/tests/unittest/blank.md +39 -0
  101. data/tests/unittest/blanks_in_code.md +106 -0
  102. data/tests/unittest/bug_def.md +29 -0
  103. data/tests/unittest/bug_table.md +67 -0
  104. data/tests/unittest/code.md +53 -0
  105. data/tests/unittest/code2.md +46 -0
  106. data/tests/unittest/code3.md +102 -0
  107. data/tests/unittest/data_loss.md +42 -0
  108. data/tests/unittest/divs/div1.md +204 -0
  109. data/tests/unittest/divs/div2.md +34 -0
  110. data/tests/unittest/divs/div3_nest.md +62 -0
  111. data/tests/unittest/easy.md +28 -0
  112. data/tests/unittest/email.md +33 -0
  113. data/tests/unittest/encoding/iso-8859-1.md +38 -0
  114. data/tests/unittest/encoding/utf-8.md +33 -0
  115. data/tests/unittest/entities.md +124 -0
  116. data/tests/unittest/escaping.md +89 -0
  117. data/tests/unittest/extra_dl.md +72 -0
  118. data/tests/unittest/extra_header_id.md +86 -0
  119. data/tests/unittest/extra_table1.md +55 -0
  120. data/tests/unittest/footnotes.md +126 -0
  121. data/tests/unittest/headers.md +54 -0
  122. data/tests/unittest/hex_entities.md +50 -0
  123. data/tests/unittest/hrule.md +60 -0
  124. data/tests/unittest/html2.md +38 -0
  125. data/tests/unittest/html3.md +47 -0
  126. data/tests/unittest/html4.md +42 -0
  127. data/tests/unittest/html5.md +38 -0
  128. data/tests/unittest/ie.md +82 -0
  129. data/tests/unittest/images.md +114 -0
  130. data/tests/unittest/images2.md +46 -0
  131. data/tests/unittest/inline_html.md +260 -0
  132. data/tests/unittest/inline_html2.md +36 -0
  133. data/tests/unittest/links.md +197 -0
  134. data/tests/unittest/list1.md +66 -0
  135. data/tests/unittest/list2.md +76 -0
  136. data/tests/unittest/list3.md +88 -0
  137. data/tests/unittest/list4.md +116 -0
  138. data/tests/unittest/lists.md +241 -0
  139. data/tests/unittest/lists11.md +31 -0
  140. data/tests/unittest/lists6.md +54 -0
  141. data/tests/unittest/lists7.md +79 -0
  142. data/tests/unittest/lists7b.md +136 -0
  143. data/tests/unittest/lists8.md +83 -0
  144. data/tests/unittest/lists9.md +85 -0
  145. data/tests/unittest/lists_after_paragraph.md +268 -0
  146. data/tests/unittest/lists_ol.md +324 -0
  147. data/tests/unittest/loss.md +29 -0
  148. data/tests/unittest/math/equations.md +69 -0
  149. data/tests/unittest/math/inline.md +66 -0
  150. data/tests/unittest/math/math2.md +110 -0
  151. data/tests/unittest/math/notmath.md +40 -0
  152. data/tests/unittest/math/table.md +43 -0
  153. data/tests/unittest/math/table2.md +60 -0
  154. data/tests/unittest/misc_sw.md +615 -0
  155. data/tests/unittest/notyet/escape.md +36 -0
  156. data/tests/unittest/notyet/header_after_par.md +81 -0
  157. data/tests/unittest/notyet/ticks.md +31 -0
  158. data/tests/unittest/notyet/triggering.md +206 -0
  159. data/tests/unittest/olist.md +64 -0
  160. data/tests/unittest/one.md +28 -0
  161. data/tests/unittest/paragraph.md +29 -0
  162. data/tests/unittest/paragraph_rules/dont_merge_ref.md +57 -0
  163. data/tests/unittest/paragraph_rules/tab_is_blank.md +39 -0
  164. data/tests/unittest/paragraphs.md +66 -0
  165. data/tests/unittest/pending/amps.md +29 -0
  166. data/tests/unittest/pending/empty_cells.md +53 -0
  167. data/tests/unittest/pending/link.md +103 -0
  168. data/tests/unittest/recover/recover_links.md +28 -0
  169. data/tests/unittest/references/long_example.md +88 -0
  170. data/tests/unittest/references/spaces_and_numbers.md +28 -0
  171. data/tests/unittest/smartypants.md +171 -0
  172. data/tests/unittest/syntax_hl.md +80 -0
  173. data/tests/unittest/table_attributes.md +52 -0
  174. data/tests/unittest/test.md +32 -0
  175. data/tests/unittest/wrapping.md +88 -0
  176. data/tests/unittest/xml.md +54 -0
  177. data/tests/unittest/xml2.md +34 -0
  178. data/tests/unittest/xml3.md +44 -0
  179. data/tests/unittest/xml_instruction.md +72 -0
  180. data/tests/utf8-files/simple.md +1 -0
  181. data/unit_test_block.sh +5 -0
  182. data/unit_test_span.sh +2 -0
  183. metadata +243 -0
@@ -0,0 +1,69 @@
1
+ module MaRuKu; module In; module Markdown
2
+
3
+
4
+ # Hash Fixnum -> name
5
+ SpanExtensionsTrigger = {}
6
+
7
+
8
+ class SpanExtension
9
+ # trigging chars
10
+ attr_accessor :chars
11
+ # trigging regexp
12
+ attr_accessor :regexp
13
+ # lambda
14
+ attr_accessor :block
15
+ end
16
+
17
+ # Hash String -> Extension
18
+ SpanExtensions = {}
19
+
20
+ def check_span_extensions(src, con)
21
+ c = src.cur_char
22
+ if extensions = SpanExtensionsTrigger[c]
23
+ extensions.each do |e|
24
+ if e.regexp && (match = src.next_matches(e.regexp))
25
+ return true if e.block.call(doc, src, con)
26
+ end
27
+ end
28
+ end
29
+ return false # not special
30
+ end
31
+
32
+ def self.register_span_extension(args)
33
+ e = SpanExtension.new
34
+ e.chars = [*args[:chars]]
35
+ e.regexp = args[:regexp]
36
+ e.block = args[:handler] || raise("No blocks passed")
37
+ e.chars.each do |c|
38
+ (SpanExtensionsTrigger[c] ||= []).push e
39
+ end
40
+ end
41
+
42
+ def self.register_block_extension(args)
43
+ regexp = args[:regexp]
44
+ BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
45
+ end
46
+
47
+ # Hash Regexp -> Block
48
+ BlockExtensions = {}
49
+
50
+ def check_block_extensions(src, con, line)
51
+ BlockExtensions.each do |reg, block|
52
+ if m = reg.match(line)
53
+ block = BlockExtensions[reg]
54
+ accepted = block.call(doc, src, con)
55
+ return true if accepted
56
+ end
57
+ end
58
+ return false # not special
59
+ end
60
+
61
+ def any_matching_block_extension?(line)
62
+ BlockExtensions.each_key do |reg|
63
+ m = reg.match(line)
64
+ return m if m
65
+ end
66
+ return false
67
+ end
68
+
69
+ end end end
@@ -0,0 +1,189 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
23
+
24
+ # This class helps me read and sanitize HTML blocks
25
+
26
+ # I tried to do this with REXML, but wasn't able to. (suggestions?)
27
+
28
+ class HTMLHelper
29
+ include MaRuKu::Strings
30
+
31
+ Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
32
+ PartialTag = %r{^<.*}m
33
+
34
+ EverythingElse = %r{^[^<]+}m
35
+ CommentStart = %r{^<!--}x
36
+ CommentEnd = %r{^.*-->}
37
+ TO_SANITIZE = ['img','hr','br']
38
+
39
+ attr_reader :rest
40
+
41
+ def my_debug(s)
42
+ # puts "---"*10+"\n"+inspect+"\t>>>\t"s
43
+ end
44
+
45
+ def initialize
46
+ @rest = ""
47
+ @tag_stack = []
48
+ @m = nil
49
+ @already = ""
50
+ self.state = :inside_element
51
+ end
52
+
53
+ attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
54
+
55
+ def eat_this(line)
56
+ @rest = line + @rest
57
+ things_read = 0
58
+ until @rest.empty?
59
+ case self.state
60
+ when :inside_comment
61
+ if @m = CommentEnd.match(@rest)
62
+ @already += @m.pre_match + @m.to_s
63
+ @rest = @m.post_match
64
+ self.state = :inside_element
65
+ else
66
+ @already += @rest
67
+ @rest = ""
68
+ self.state = :inside_comment
69
+ end
70
+ when :inside_element
71
+ if @m = CommentStart.match(@rest)
72
+ things_read += 1
73
+ @already += @m.pre_match + @m.to_s
74
+ @rest = @m.post_match
75
+ self.state = :inside_comment
76
+ elsif @m = Tag.match(@rest) then
77
+ my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
78
+ things_read += 1
79
+ handle_tag
80
+ self.state = :inside_element
81
+ elsif @m = PartialTag.match(@rest) then
82
+ my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
83
+ @already += @m.pre_match
84
+ @rest = @m.post_match
85
+ @partial_tag = @m.to_s
86
+ self.state = :inside_tag
87
+ elsif @m = EverythingElse.match(@rest)
88
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
89
+ @already += @m.pre_match + @m.to_s
90
+ @rest = @m.post_match
91
+ self.state = :inside_element
92
+ else
93
+ error "Malformed HTML: not complete: #{@rest.inspect}"
94
+ end
95
+ when :inside_tag
96
+ if @m = /^[^>]*>/.match(@rest) then
97
+ my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
98
+ @partial_tag += @m.to_s
99
+ my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
100
+ @rest = @partial_tag + @m.post_match
101
+ @partial_tag = nil
102
+ self.state = :inside_element
103
+ else
104
+ @partial_tag += @rest
105
+ @rest = ""
106
+ self.state = :inside_tag
107
+ end
108
+ else
109
+ raise "Bug bug: state = #{self.state.inspect}"
110
+ end # not inside comment
111
+
112
+ # puts inspect
113
+ # puts "Read: #{@tag_stack.inspect}"
114
+ break if is_finished? and things_read>0
115
+ end
116
+ end
117
+
118
+ def handle_tag()
119
+ @already += @m.pre_match
120
+ @rest = @m.post_match
121
+
122
+ is_closing = !!@m[1]
123
+ tag = @m[2]
124
+ attributes = @m[3].to_s
125
+
126
+ is_single = false
127
+ if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
128
+ attributes = attributes[0, attributes.size-1]
129
+ is_single = true
130
+ end
131
+
132
+ my_debug "Attributes: #{attributes.inspect}"
133
+ my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
134
+
135
+ if TO_SANITIZE.include? tag
136
+ attributes.strip!
137
+ # puts "Attributes: #{attributes.inspect}"
138
+ if attributes.size > 0
139
+ @already += '<%s %s />' % [tag, attributes]
140
+ else
141
+ @already += '<%s />' % [tag]
142
+ end
143
+ elsif is_closing
144
+ @already += @m.to_s
145
+ if @tag_stack.empty?
146
+ error "Malformed: closing tag #{tag.inspect} "+
147
+ "in empty list"
148
+ end
149
+ if @tag_stack.last != tag
150
+ error "Malformed: tag <#{tag}> "+
151
+ "closes <#{@tag_stack.last}>"
152
+ end
153
+ @tag_stack.pop
154
+ else
155
+ @already += @m.to_s
156
+
157
+ if not is_single
158
+ @tag_stack.push(tag)
159
+ my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
160
+ end
161
+ end
162
+ end
163
+ def error(s)
164
+ raise Exception, "Error: #{s} \n"+ inspect, caller
165
+ end
166
+
167
+ def inspect; "HTML READER\n state=#{self.state} "+
168
+ "match=#{@m.to_s.inspect}\n"+
169
+ "Tag stack = #{@tag_stack.inspect} \n"+
170
+ "Before:\n"+
171
+ add_tabs(@already,1,'|')+"\n"+
172
+ "After:\n"+
173
+ add_tabs(@rest,1,'|')+"\n"
174
+
175
+ end
176
+
177
+
178
+ def stuff_you_read
179
+ @already
180
+ end
181
+
182
+ def rest() @rest end
183
+
184
+ def is_finished?
185
+ (self.state == :inside_element) and @tag_stack.empty?
186
+ end
187
+ end # html helper
188
+
189
+ end end end end
@@ -0,0 +1,111 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
23
+
24
+ # This represents a source of lines that can be consumed.
25
+ #
26
+ # It is the twin of CharSource.
27
+ #
28
+
29
+ class LineSource
30
+ include MaRuKu::Strings
31
+ attr_reader :parent
32
+
33
+ def initialize(lines, parent=nil, parent_offset=nil)
34
+ raise "NIL lines? " if not lines
35
+ @lines = lines
36
+ @lines_index = 0
37
+ @parent = parent
38
+ @parent_offset = parent_offset
39
+ end
40
+
41
+ def cur_line() @lines[@lines_index] end
42
+ def next_line() @lines[@lines_index+1] end
43
+
44
+ def shift_line()
45
+ raise "Over the rainbow" if @lines_index >= @lines.size
46
+ l = @lines[@lines_index]
47
+ @lines_index += 1
48
+ return l
49
+ end
50
+
51
+ def ignore_line
52
+ raise "Over the rainbow" if @lines_index >= @lines.size
53
+ @lines_index += 1
54
+ end
55
+
56
+ def describe
57
+ s = "At line #{original_line_number(@lines_index)}\n"
58
+
59
+ context = 3 # lines
60
+ from = [@lines_index-context, 0].max
61
+ to = [@lines_index+context, @lines.size-1].min
62
+
63
+ for i in from..to
64
+ prefix = (i == @lines_index) ? '--> ' : ' ';
65
+ l = @lines[i]
66
+ s += "%10s %4s|%s" %
67
+ [@lines[i].md_type.to_s, prefix, l]
68
+
69
+ s += "|\n"
70
+ end
71
+
72
+ # if @parent
73
+ # s << "Parent context is: \n"
74
+ # s << add_tabs(@parent.describe,1,'|')
75
+ # end
76
+ s
77
+ end
78
+
79
+ def original_line_number(index)
80
+ if @parent
81
+ return index + @parent.original_line_number(@parent_offset)
82
+ else
83
+ 1 + index
84
+ end
85
+ end
86
+
87
+ def cur_index
88
+ @lines_index
89
+ end
90
+
91
+ # Returns the type of next line as a string
92
+ # breaks at first :definition
93
+ def tell_me_the_future
94
+ s = ""; num_e = 0;
95
+ for i in @lines_index..@lines.size-1
96
+ c = case @lines[i].md_type
97
+ when :text; "t"
98
+ when :empty; num_e+=1; "e"
99
+ when :definition; "d"
100
+ else "o"
101
+ end
102
+ s += c
103
+ break if c == "d" or num_e>1
104
+ end
105
+ s
106
+ end
107
+
108
+ end # linesource
109
+
110
+ end end end end # block
111
+
@@ -0,0 +1,613 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
23
+
24
+ include Helpers
25
+ include MaRuKu::Strings
26
+ include MaRuKu::In::Markdown::SpanLevelParser
27
+
28
+ class BlockContext < Array
29
+ def describe
30
+ n = 5
31
+ desc = size > n ? self[-n,n] : self
32
+ "Last #{n} elements: "+
33
+ desc.map{|x| "\n -" + x.inspect}.join
34
+ end
35
+ end
36
+
37
+ # Splits the string and calls parse_lines_as_markdown
38
+ def parse_text_as_markdown(text)
39
+ lines = split_lines(text)
40
+ src = LineSource.new(lines)
41
+ return parse_blocks(src)
42
+ end
43
+
44
+ # Input is a LineSource
45
+ def parse_blocks(src)
46
+ output = BlockContext.new
47
+
48
+ # run state machine
49
+ while src.cur_line
50
+
51
+ next if check_block_extensions(src, output, src.cur_line)
52
+
53
+ # Prints detected type (useful for debugging)
54
+ # puts "#{src.cur_line.md_type}|#{src.cur_line}"
55
+ case src.cur_line.md_type
56
+ when :empty;
57
+ output.push :empty
58
+ src.ignore_line
59
+ when :ial
60
+ m = InlineAttributeList.match src.shift_line
61
+ content = m[1] || ""
62
+ # puts "Content: #{content.inspect}"
63
+ src2 = CharSource.new(content, src)
64
+ interpret_extension(src2, output, [nil])
65
+ when :ald
66
+ output.push read_ald(src)
67
+ when :text
68
+ # paragraph, or table, or definition list
69
+ read_text_material(src, output)
70
+ when :header2, :hrule
71
+ # hrule
72
+ src.shift_line
73
+ output.push md_hrule()
74
+ when :header3
75
+ output.push read_header3(src)
76
+ when :ulist, :olist
77
+ list_type = src.cur_line.md_type == :ulist ? :ul : :ol
78
+ li = read_list_item(src)
79
+ # append to current list if we have one
80
+ if output.last.kind_of?(MDElement) &&
81
+ output.last.node_type == list_type then
82
+ output.last.children << li
83
+ else
84
+ output.push md_el(list_type, [li])
85
+ end
86
+ when :quote; output.push read_quote(src)
87
+ when :code; e = read_code(src); output << e if e
88
+ when :raw_html; e = read_raw_html(src); output << e if e
89
+
90
+ when :footnote_text; output.push read_footnote_text(src)
91
+ when :ref_definition;
92
+ if src.parent && (src.cur_index == 0)
93
+ read_text_material(src, output)
94
+ else
95
+ read_ref_definition(src, output)
96
+ end
97
+ when :abbreviation; output.push read_abbreviation(src)
98
+ when :xml_instr; read_xml_instruction(src, output)
99
+ when :metadata;
100
+ maruku_error "Please use the new meta-data syntax: \n"+
101
+ " http://maruku.rubyforge.org/proposal.html\n", src
102
+ src.ignore_line
103
+ else # warn if we forgot something
104
+ md_type = src.cur_line.md_type
105
+ line = src.cur_line
106
+ maruku_error "Ignoring line '#{line}' type = #{md_type}", src
107
+ src.shift_line
108
+ end
109
+ end
110
+
111
+ merge_ial(output, src, output)
112
+ output.delete_if {|x| x.kind_of?(MDElement) &&
113
+ x.node_type == :ial}
114
+
115
+ # get rid of empty line markers
116
+ output.delete_if {|x| x == :empty}
117
+ # See for each list if we can omit the paragraphs and use li_span
118
+ # TODO: do this after
119
+ output.each do |c|
120
+ # Remove paragraphs that we can get rid of
121
+ if [:ul,:ol].include? c.node_type
122
+ if c.children.all? {|li| !li.want_my_paragraph} then
123
+ c.children.each do |d|
124
+ d.node_type = :li_span
125
+ d.children = d.children[0].children
126
+ end
127
+ end
128
+ end
129
+ if c.node_type == :definition_list
130
+ if c.children.all?{|defi| !defi.want_my_paragraph} then
131
+ c.children.each do |definition|
132
+ definition.definitions.each do |dd|
133
+ dd.children = dd.children[0].children
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ output
141
+ end
142
+
143
+ def read_text_material(src, output)
144
+ if src.cur_line =~ MightBeTableHeader and
145
+ (src.next_line && src.next_line =~ TableSeparator)
146
+ output.push read_table(src)
147
+ elsif [:header1,:header2].include? src.next_line.md_type
148
+ output.push read_header12(src)
149
+ elsif eventually_comes_a_def_list(src)
150
+ definition = read_definition(src)
151
+ if output.last.kind_of?(MDElement) &&
152
+ output.last.node_type == :definition_list then
153
+ output.last.children << definition
154
+ else
155
+ output.push md_el(:definition_list, [definition])
156
+ end
157
+ else # Start of a paragraph
158
+ output.push read_paragraph(src)
159
+ end
160
+ end
161
+
162
+
163
+ def read_ald(src)
164
+ if (l=src.shift_line) =~ AttributeDefinitionList
165
+ id = $1; al=$2;
166
+ al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
167
+ self.ald[id] = al;
168
+ return md_ald(id, al)
169
+ else
170
+ maruku_error "Bug Bug:\n#{l.inspect}"
171
+ return nil
172
+ end
173
+ end
174
+
175
+ # reads a header (with ----- or ========)
176
+ def read_header12(src)
177
+ line = src.shift_line.strip
178
+ al = nil
179
+ # Check if there is an IAL
180
+ if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
181
+ line = $1.strip
182
+ ial = $2
183
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
184
+ end
185
+ text = parse_lines_as_span [ line ]
186
+ level = src.cur_line.md_type == :header2 ? 2 : 1;
187
+ src.shift_line
188
+ return md_header(level, text, al)
189
+ end
190
+
191
+ # reads a header like '#### header ####'
192
+ def read_header3(src)
193
+ line = src.shift_line.strip
194
+ al = nil
195
+ # Check if there is an IAL
196
+ if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
197
+ line = $1.strip
198
+ ial = $2
199
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
200
+ end
201
+ level = num_leading_hashes(line)
202
+ text = parse_lines_as_span [strip_hashes(line)]
203
+ return md_header(level, text, al)
204
+ end
205
+
206
+ def read_xml_instruction(src, output)
207
+ m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
208
+ raise "BugBug" if not m
209
+ target = m[2] || ''
210
+ code = m[3]
211
+ until code =~ /\?>/
212
+ code += "\n"+src.shift_line
213
+ end
214
+ if not code =~ (/\?>\s*$/)
215
+ garbage = (/\?>(.*)$/.match(code))[1]
216
+ maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
217
+ add_tabs(code, 1, '|'), src
218
+ end
219
+ code.gsub!(/\?>\s*$/, '')
220
+
221
+ if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
222
+ result = safe_execute_code(self, code)
223
+ if result
224
+ if result.kind_of? String
225
+ raise "Not expected"
226
+ else
227
+ output.push(*result)
228
+ end
229
+ end
230
+ else
231
+ output.push md_xml_instr(target, code)
232
+ end
233
+ end
234
+
235
+ def read_raw_html(src)
236
+ h = HTMLHelper.new
237
+ begin
238
+ h.eat_this(l=src.shift_line)
239
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
240
+ while src.cur_line and not h.is_finished?
241
+ l=src.shift_line
242
+ # puts "html -> #{l.inspect}"
243
+ h.eat_this "\n"+l
244
+ end
245
+ rescue Exception => e
246
+ ex = e.inspect + e.backtrace.join("\n")
247
+ maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
248
+ end
249
+ if not (h.rest =~ /^\s*$/)
250
+ maruku_error "Could you please format this better?\n"+
251
+ "I see that #{h.rest.inspect} is left after the raw HTML.", src
252
+ end
253
+ raw_html = h.stuff_you_read
254
+ return md_html(raw_html)
255
+ end
256
+
257
+ def read_paragraph(src)
258
+ lines = [src.shift_line]
259
+ while src.cur_line
260
+ # :olist does not break
261
+ case t = src.cur_line.md_type
262
+ when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
263
+ break
264
+ when :olist,:ulist
265
+ break if src.next_line.md_type == t
266
+ end
267
+ break if src.cur_line.strip.size == 0
268
+ break if [:header1,:header2].include? src.next_line.md_type
269
+ break if any_matching_block_extension?(src.cur_line)
270
+
271
+ lines << src.shift_line
272
+ end
273
+ # dbg_describe_ary(lines, 'PAR')
274
+ children = parse_lines_as_span(lines, src)
275
+
276
+ return md_par(children)
277
+ end
278
+
279
+ # Reads one list item, either ordered or unordered.
280
+ def read_list_item(src)
281
+ parent_offset = src.cur_index
282
+
283
+ item_type = src.cur_line.md_type
284
+ first = src.shift_line
285
+
286
+ indentation = spaces_before_first_char(first)
287
+ break_list = [:ulist, :olist, :ial]
288
+ # Ugly things going on inside `read_indented_content`
289
+ lines, want_my_paragraph =
290
+ read_indented_content(src,indentation, break_list, item_type)
291
+
292
+ # add first line
293
+ # Strip first '*', '-', '+' from first line
294
+ stripped = first[indentation, first.size-1]
295
+ lines.unshift stripped
296
+
297
+ # dbg_describe_ary(lines, 'LIST ITEM ')
298
+
299
+ src2 = LineSource.new(lines, src, parent_offset)
300
+ children = parse_blocks(src2)
301
+ with_par = want_my_paragraph || (children.size>1)
302
+
303
+ return md_li(children, with_par)
304
+ end
305
+
306
+ def read_abbreviation(src)
307
+ if not (l=src.shift_line) =~ Abbreviation
308
+ maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
309
+ end
310
+
311
+ abbr = $1
312
+ desc = $2
313
+
314
+ if (not abbr) or (abbr.size==0)
315
+ maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
316
+ end
317
+
318
+ self.abbreviations[abbr] = desc
319
+
320
+ return md_abbr_def(abbr, desc)
321
+ end
322
+
323
+ def read_footnote_text(src)
324
+ parent_offset = src.cur_index
325
+
326
+ first = src.shift_line
327
+
328
+ if not first =~ FootnoteText
329
+ maruku_error "Bug (it's Andrea's fault)"
330
+ end
331
+
332
+ id = $1
333
+ text = $2
334
+
335
+ # Ugly things going on inside `read_indented_content`
336
+ indentation = 4 #first.size-text.size
337
+
338
+ # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
339
+
340
+ break_list = [:footnote_text]
341
+ item_type = :footnote_text
342
+ lines, want_my_paragraph =
343
+ read_indented_content(src,indentation, break_list, item_type)
344
+
345
+ # add first line
346
+ if text && text.strip != "" then lines.unshift text end
347
+
348
+ # dbg_describe_ary(lines, 'FOOTNOTE')
349
+ src2 = LineSource.new(lines, src, parent_offset)
350
+ children = parse_blocks(src2)
351
+
352
+ e = md_footnote(id, children)
353
+ self.footnotes[id] = e
354
+ return e
355
+ end
356
+
357
+
358
+ # This is the only ugly function in the code base.
359
+ # It is used to read list items, descriptions, footnote text
360
+ def read_indented_content(src, indentation, break_list, item_type)
361
+ lines =[]
362
+ # collect all indented lines
363
+ saw_empty = false; saw_anything_after = false
364
+ while src.cur_line
365
+ # puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
366
+ #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
367
+ if src.cur_line.md_type == :empty
368
+ saw_empty = true
369
+ lines << src.shift_line
370
+ next
371
+ end
372
+
373
+ # after a white line
374
+ if saw_empty
375
+ # we expect things to be properly aligned
376
+ if (ns=number_of_leading_spaces(src.cur_line)) < indentation
377
+ #puts "breaking for spaces, only #{ns}: #{src.cur_line}"
378
+ break
379
+ end
380
+ saw_anything_after = true
381
+ else
382
+ # if src.cur_line[0] != ?\
383
+ break if break_list.include? src.cur_line.md_type
384
+ # end
385
+ # break if src.cur_line.md_type != :text
386
+ end
387
+
388
+
389
+ stripped = strip_indent(src.shift_line, indentation)
390
+ lines << stripped
391
+
392
+ #puts "Accepted as #{stripped.inspect}"
393
+
394
+ # You are only required to indent the first line of
395
+ # a child paragraph.
396
+ if stripped.md_type == :text
397
+ while src.cur_line && (src.cur_line.md_type == :text)
398
+ lines << strip_indent(src.shift_line, indentation)
399
+ end
400
+ end
401
+ end
402
+
403
+ want_my_paragraph = saw_anything_after ||
404
+ (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
405
+
406
+ # dbg_describe_ary(lines, 'LI')
407
+ # create a new context
408
+
409
+ while lines.last && (lines.last.md_type == :empty)
410
+ lines.pop
411
+ end
412
+
413
+ return lines, want_my_paragraph
414
+ end
415
+
416
+
417
+ def read_quote(src)
418
+ parent_offset = src.cur_index
419
+
420
+ lines = []
421
+ # collect all indented lines
422
+ while src.cur_line && src.cur_line.md_type == :quote
423
+ lines << unquote(src.shift_line)
424
+ end
425
+ # dbg_describe_ary(lines, 'QUOTE')
426
+
427
+ src2 = LineSource.new(lines, src, parent_offset)
428
+ children = parse_blocks(src2)
429
+ return md_quote(children)
430
+ end
431
+
432
+ def read_code(src)
433
+ # collect all indented lines
434
+ lines = []
435
+ while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
436
+ lines << strip_indent(src.shift_line, 4)
437
+ end
438
+
439
+ #while lines.last && (lines.last.md_type == :empty )
440
+ while lines.last && lines.last.strip.size == 0
441
+ lines.pop
442
+ end
443
+
444
+ while lines.first && lines.first.strip.size == 0
445
+ lines.shift
446
+ end
447
+
448
+ return nil if lines.empty?
449
+
450
+ source = lines.join("\n")
451
+
452
+ # dbg_describe_ary(lines, 'CODE')
453
+
454
+ return md_codeblock(source)
455
+ end
456
+
457
+ # Reads a series of metadata lines with empty lines in between
458
+ def read_metadata(src)
459
+ hash = {}
460
+ while src.cur_line
461
+ case src.cur_line.md_type
462
+ when :empty; src.shift_line
463
+ when :metadata; hash.merge! parse_metadata(src.shift_line)
464
+ else break
465
+ end
466
+ end
467
+ hash
468
+ end
469
+
470
+
471
+ def read_ref_definition(src, out)
472
+ line = src.shift_line
473
+
474
+ # if link is incomplete, shift next line
475
+ if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
476
+ ([1,2,3].include? number_of_leading_spaces(src.cur_line) )
477
+ line += " "+ src.shift_line
478
+ end
479
+
480
+ # puts "total= #{line}"
481
+
482
+ match = LinkRegex.match(line)
483
+ if not match
484
+ maruku_error "Link does not respect format: '#{line}'"
485
+ return
486
+ end
487
+
488
+ id = match[1]; url = match[2]; title = match[3];
489
+ id = sanitize_ref_id(id)
490
+
491
+ hash = self.refs[id] = {:url=>url,:title=>title}
492
+
493
+ stuff=match[4]
494
+
495
+ if stuff
496
+ stuff.split.each do |couple|
497
+ # puts "found #{couple}"
498
+ k, v = couple.split('=')
499
+ v ||= ""
500
+ if v[0,1]=='"' then v = v[1, v.size-2] end
501
+ # puts "key:_#{k}_ value=_#{v}_"
502
+ hash[k.to_sym] = v
503
+ end
504
+ end
505
+ # puts hash.inspect
506
+
507
+ out.push md_ref_def(id, url, meta={:title=>title})
508
+ end
509
+
510
+ def split_cells(s)
511
+ # s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
512
+ # changed to allow empty cells
513
+ s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
514
+ end
515
+
516
+ def read_table(src)
517
+ head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
518
+
519
+ separator=split_cells(src.shift_line)
520
+
521
+ align = separator.map { |s| s =~ Sep
522
+ if $1 and $2 then :center elsif $2 then :right else :left end }
523
+
524
+ num_columns = align.size
525
+
526
+ if head.size != num_columns
527
+ maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
528
+ tell_user "I will ignore this table."
529
+ # XXX try to recover
530
+ return md_br()
531
+ end
532
+
533
+ rows = []
534
+
535
+ while src.cur_line && src.cur_line =~ /\|/
536
+ row = split_cells(src.shift_line).map{|s|
537
+ md_el(:cell, parse_lines_as_span([s]))}
538
+ if head.size != num_columns
539
+ maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
540
+ tell_user "I will ignore this table."
541
+ # XXX try to recover
542
+ return md_br()
543
+ end
544
+ rows << row
545
+ end
546
+
547
+ children = (head+rows).flatten
548
+ return md_el(:table, children, {:align => align})
549
+ end
550
+
551
+ # If current line is text, a definition list is coming
552
+ # if 1) text,empty,[text,empty]*,definition
553
+
554
+ def eventually_comes_a_def_list(src)
555
+ future = src.tell_me_the_future
556
+ ok = future =~ %r{^t+e?d}x
557
+ # puts "future: #{future} - #{ok}"
558
+ ok
559
+ end
560
+
561
+
562
+ def read_definition(src)
563
+ # Read one or more terms
564
+ terms = []
565
+ while src.cur_line && src.cur_line.md_type == :text
566
+ terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
567
+ end
568
+ # dbg_describe_ary(terms, 'DT')
569
+
570
+ want_my_paragraph = false
571
+
572
+ raise "Chunky Bacon!" if not src.cur_line
573
+
574
+ # one optional empty
575
+ if src.cur_line.md_type == :empty
576
+ want_my_paragraph = true
577
+ src.shift_line
578
+ end
579
+
580
+ raise "Chunky Bacon!" if src.cur_line.md_type != :definition
581
+
582
+ # Read one or more definitions
583
+ definitions = []
584
+ while src.cur_line && src.cur_line.md_type == :definition
585
+ parent_offset = src.cur_index
586
+
587
+ first = src.shift_line
588
+ first =~ Definition
589
+ first = $1
590
+
591
+ # I know, it's ugly!!!
592
+
593
+ lines, w_m_p =
594
+ read_indented_content(src,4, [:definition], :definition)
595
+ want_my_paragraph ||= w_m_p
596
+
597
+ lines.unshift first
598
+
599
+ # dbg_describe_ary(lines, 'DD')
600
+ src2 = LineSource.new(lines, src, parent_offset)
601
+ children = parse_blocks(src2)
602
+ definitions << md_el(:definition_data, children)
603
+ end
604
+
605
+ return md_el(:definition, terms+definitions, {
606
+ :terms => terms,
607
+ :definitions => definitions,
608
+ :want_my_paragraph => want_my_paragraph})
609
+ end
610
+ end # BlockLevelParser
611
+ end # MaRuKu
612
+ end
613
+ end