patcito-maruku 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. data/AUTHORS +23 -0
  2. data/LICENSE +340 -0
  3. data/README.md +73 -0
  4. data/bin/maruku +196 -0
  5. data/bin/marutex +4 -0
  6. data/data/entities.xml +261 -0
  7. data/docs/changelog.md +334 -0
  8. data/docs/div_syntax.md +36 -0
  9. data/docs/entity_test.md +23 -0
  10. data/docs/markdown_syntax.md +899 -0
  11. data/docs/maruku.md +346 -0
  12. data/docs/math.md +194 -0
  13. data/docs/other_stuff.md +51 -0
  14. data/docs/proposal.md +309 -0
  15. data/docs/website/src/bluecloth.md +25 -0
  16. data/docs/website/src/download.md +31 -0
  17. data/docs/website/src/maruku.md +261 -0
  18. data/docs/website/src/proposal.md +271 -0
  19. data/lib/maruku.rb +132 -0
  20. data/lib/maruku/attributes.rb +138 -0
  21. data/lib/maruku/defaults.rb +69 -0
  22. data/lib/maruku/errors.rb +89 -0
  23. data/lib/maruku/ext/div.rb +121 -0
  24. data/lib/maruku/ext/fenced_code.rb +78 -0
  25. data/lib/maruku/ext/math.rb +37 -0
  26. data/lib/maruku/ext/math/elements.rb +21 -0
  27. data/lib/maruku/ext/math/latex_fix.rb +12 -0
  28. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +93 -0
  29. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +39 -0
  30. data/lib/maruku/ext/math/mathml_engines/none.rb +21 -0
  31. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  32. data/lib/maruku/ext/math/parsing.rb +125 -0
  33. data/lib/maruku/ext/math/to_html.rb +237 -0
  34. data/lib/maruku/ext/math/to_latex.rb +36 -0
  35. data/lib/maruku/ext/yaml.rb +43 -0
  36. data/lib/maruku/helpers.rb +214 -0
  37. data/lib/maruku/input/charsource.rb +326 -0
  38. data/lib/maruku/input/extensions.rb +69 -0
  39. data/lib/maruku/input/html_helper.rb +189 -0
  40. data/lib/maruku/input/linesource.rb +111 -0
  41. data/lib/maruku/input/parse_block.rb +608 -0
  42. data/lib/maruku/input/parse_doc.rb +240 -0
  43. data/lib/maruku/input/parse_span_better.rb +746 -0
  44. data/lib/maruku/input/rubypants.rb +225 -0
  45. data/lib/maruku/input/type_detection.rb +147 -0
  46. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  47. data/lib/maruku/maruku.rb +31 -0
  48. data/lib/maruku/output/s5/fancy.rb +756 -0
  49. data/lib/maruku/output/s5/to_s5.rb +138 -0
  50. data/lib/maruku/output/to_html.rb +994 -0
  51. data/lib/maruku/output/to_latex.rb +580 -0
  52. data/lib/maruku/output/to_latex_entities.rb +101 -0
  53. data/lib/maruku/output/to_latex_strings.rb +64 -0
  54. data/lib/maruku/output/to_markdown.rb +164 -0
  55. data/lib/maruku/output/to_s.rb +54 -0
  56. data/lib/maruku/string_utils.rb +185 -0
  57. data/lib/maruku/structures.rb +143 -0
  58. data/lib/maruku/structures_inspect.rb +51 -0
  59. data/lib/maruku/structures_iterators.rb +48 -0
  60. data/lib/maruku/textile2.rb +1 -0
  61. data/lib/maruku/toc.rb +214 -0
  62. data/lib/maruku/usage/example1.rb +33 -0
  63. data/lib/maruku/version +0 -0
  64. data/lib/maruku/version.rb +54 -0
  65. data/spec/block_docs/abbreviations.md +52 -0
  66. data/spec/block_docs/alt.md +17 -0
  67. data/spec/block_docs/attributes/att2.md +20 -0
  68. data/spec/block_docs/attributes/att3.md +28 -0
  69. data/spec/block_docs/attributes/attributes.md +57 -0
  70. data/spec/block_docs/attributes/circular.md +26 -0
  71. data/spec/block_docs/attributes/default.md +22 -0
  72. data/spec/block_docs/blank.md +24 -0
  73. data/spec/block_docs/blanks_in_code.md +75 -0
  74. data/spec/block_docs/bug_def.md +16 -0
  75. data/spec/block_docs/bug_table.md +46 -0
  76. data/spec/block_docs/code.md +34 -0
  77. data/spec/block_docs/code2.md +28 -0
  78. data/spec/block_docs/code3.md +71 -0
  79. data/spec/block_docs/data_loss.md +25 -0
  80. data/spec/block_docs/divs/div1.md +167 -0
  81. data/spec/block_docs/divs/div2.md +21 -0
  82. data/spec/block_docs/divs/div3_nest.md +45 -0
  83. data/spec/block_docs/easy.md +15 -0
  84. data/spec/block_docs/email.md +20 -0
  85. data/spec/block_docs/encoding/iso-8859-1.md +23 -0
  86. data/spec/block_docs/encoding/utf-8.md +18 -0
  87. data/spec/block_docs/entities.md +94 -0
  88. data/spec/block_docs/escaping.md +67 -0
  89. data/spec/block_docs/extra_dl.md +52 -0
  90. data/spec/block_docs/extra_header_id.md +63 -0
  91. data/spec/block_docs/extra_table1.md +37 -0
  92. data/spec/block_docs/footnotes.md +97 -0
  93. data/spec/block_docs/headers.md +37 -0
  94. data/spec/block_docs/hex_entities.md +37 -0
  95. data/spec/block_docs/hrule.md +39 -0
  96. data/spec/block_docs/html2.md +22 -0
  97. data/spec/block_docs/html3.md +31 -0
  98. data/spec/block_docs/html4.md +25 -0
  99. data/spec/block_docs/html5.md +23 -0
  100. data/spec/block_docs/ie.md +49 -0
  101. data/spec/block_docs/images.md +90 -0
  102. data/spec/block_docs/images2.md +31 -0
  103. data/spec/block_docs/inline_html.md +152 -0
  104. data/spec/block_docs/inline_html2.md +21 -0
  105. data/spec/block_docs/links.md +152 -0
  106. data/spec/block_docs/links2.md +22 -0
  107. data/spec/block_docs/list1.md +46 -0
  108. data/spec/block_docs/list12.md +28 -0
  109. data/spec/block_docs/list2.md +56 -0
  110. data/spec/block_docs/list3.md +64 -0
  111. data/spec/block_docs/list4.md +89 -0
  112. data/spec/block_docs/lists.md +192 -0
  113. data/spec/block_docs/lists10.md +34 -0
  114. data/spec/block_docs/lists11.md +23 -0
  115. data/spec/block_docs/lists6.md +41 -0
  116. data/spec/block_docs/lists9.md +64 -0
  117. data/spec/block_docs/lists_after_paragraph.md +208 -0
  118. data/spec/block_docs/lists_ol.md +262 -0
  119. data/spec/block_docs/loss.md +16 -0
  120. data/spec/block_docs/math/equations.md +45 -0
  121. data/spec/block_docs/math/inline.md +46 -0
  122. data/spec/block_docs/math/math2.md +45 -0
  123. data/spec/block_docs/math/notmath.md +25 -0
  124. data/spec/block_docs/math/table.md +25 -0
  125. data/spec/block_docs/math/table2.md +42 -0
  126. data/spec/block_docs/misc_sw.md +525 -0
  127. data/spec/block_docs/notyet/escape.md +21 -0
  128. data/spec/block_docs/notyet/header_after_par.md +58 -0
  129. data/spec/block_docs/notyet/ticks.md +18 -0
  130. data/spec/block_docs/notyet/triggering.md +157 -0
  131. data/spec/block_docs/olist.md +45 -0
  132. data/spec/block_docs/one.md +15 -0
  133. data/spec/block_docs/paragraph.md +16 -0
  134. data/spec/block_docs/paragraph_rules/dont_merge_ref.md +42 -0
  135. data/spec/block_docs/paragraph_rules/tab_is_blank.md +24 -0
  136. data/spec/block_docs/paragraphs.md +46 -0
  137. data/spec/block_docs/pending/amps.md +15 -0
  138. data/spec/block_docs/pending/empty_cells.md +37 -0
  139. data/spec/block_docs/pending/link.md +72 -0
  140. data/spec/block_docs/pending/ref.md +21 -0
  141. data/spec/block_docs/recover/recover_links.md +15 -0
  142. data/spec/block_docs/red_tests/abbrev.md +679 -0
  143. data/spec/block_docs/red_tests/lists7.md +32 -0
  144. data/spec/block_docs/red_tests/lists7b.md +65 -0
  145. data/spec/block_docs/red_tests/lists8.md +42 -0
  146. data/spec/block_docs/red_tests/ref.md +23 -0
  147. data/spec/block_docs/red_tests/xml.md +35 -0
  148. data/spec/block_docs/references/long_example.md +71 -0
  149. data/spec/block_docs/references/spaces_and_numbers.md +15 -0
  150. data/spec/block_docs/smartypants.md +114 -0
  151. data/spec/block_docs/syntax_hl.md +52 -0
  152. data/spec/block_docs/table_attributes.md +34 -0
  153. data/spec/block_docs/test.md +19 -0
  154. data/spec/block_docs/underscore_in_words.md +15 -0
  155. data/spec/block_docs/wrapping.md +67 -0
  156. data/spec/block_docs/xml2.md +19 -0
  157. data/spec/block_docs/xml3.md +26 -0
  158. data/spec/block_docs/xml_instruction.md +52 -0
  159. data/spec/block_spec.rb +49 -0
  160. data/spec/span_spec.rb +254 -0
  161. data/spec/spec_helper.rb +6 -0
  162. metadata +247 -0
@@ -0,0 +1,69 @@
1
+ module MaRuKu; module In; module Markdown
2
+
3
+
4
+ # Hash Fixnum -> name
5
+ SpanExtensionsTrigger = {}
6
+
7
+
8
+ class SpanExtension
9
+ # trigging chars
10
+ attr_accessor :chars
11
+ # trigging regexp
12
+ attr_accessor :regexp
13
+ # lambda
14
+ attr_accessor :block
15
+ end
16
+
17
+ # Hash String -> Extension
18
+ SpanExtensions = {}
19
+
20
+ def check_span_extensions(src, con)
21
+ c = src.cur_char
22
+ if extensions = SpanExtensionsTrigger[c]
23
+ extensions.each do |e|
24
+ if e.regexp && (match = src.next_matches(e.regexp))
25
+ return true if e.block.call(doc, src, con)
26
+ end
27
+ end
28
+ end
29
+ return false # not special
30
+ end
31
+
32
+ def self.register_span_extension(args)
33
+ e = SpanExtension.new
34
+ e.chars = [*args[:chars]]
35
+ e.regexp = args[:regexp]
36
+ e.block = args[:handler] || raise("No blocks passed")
37
+ e.chars.each do |c|
38
+ (SpanExtensionsTrigger[c] ||= []).push e
39
+ end
40
+ end
41
+
42
+ def self.register_block_extension(args)
43
+ regexp = args[:regexp]
44
+ BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
45
+ end
46
+
47
+ # Hash Regexp -> Block
48
+ BlockExtensions = {}
49
+
50
+ def check_block_extensions(src, con, line)
51
+ BlockExtensions.each do |reg, block|
52
+ if m = reg.match(line)
53
+ block = BlockExtensions[reg]
54
+ accepted = block.call(doc, src, con)
55
+ return true if accepted
56
+ end
57
+ end
58
+ return false # not special
59
+ end
60
+
61
+ def any_matching_block_extension?(line)
62
+ BlockExtensions.each_key do |reg|
63
+ m = reg.match(line)
64
+ return m if m
65
+ end
66
+ return false
67
+ end
68
+
69
+ end end end
@@ -0,0 +1,189 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
23
+
24
+ # This class helps me read and sanitize HTML blocks
25
+
26
+ # I tried to do this with REXML, but wasn't able to. (suggestions?)
27
+
28
+ class HTMLHelper
29
+ include MaRuKu::Strings
30
+
31
+ Tag = %r{^<(/)?(\w+)\s*([^>]*?)>}m
32
+ PartialTag = %r{^<.*}m
33
+
34
+ EverythingElse = %r{^[^<]+}m
35
+ CommentStart = %r{^<!--}x
36
+ CommentEnd = %r{^.*?-->}
37
+ TO_SANITIZE = ['img','hr','br']
38
+
39
+ attr_reader :rest
40
+
41
+ def my_debug(s)
42
+ # puts "---"*10+"\n"+inspect+"\t>>>\t"s
43
+ end
44
+
45
+ def initialize
46
+ @rest = ""
47
+ @tag_stack = []
48
+ @m = nil
49
+ @already = ""
50
+ self.state = :inside_element
51
+ end
52
+
53
+ attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
54
+
55
+ def eat_this(line)
56
+ @rest = line + @rest
57
+ things_read = 0
58
+ until @rest.empty?
59
+ case self.state
60
+ when :inside_comment
61
+ if @m = CommentEnd.match(@rest)
62
+ @already += @m.pre_match + @m.to_s
63
+ @rest = @m.post_match
64
+ self.state = :inside_element
65
+ else
66
+ @already += @rest
67
+ @rest = ""
68
+ self.state = :inside_comment
69
+ end
70
+ when :inside_element
71
+ if @m = CommentStart.match(@rest)
72
+ things_read += 1
73
+ @already += @m.pre_match + @m.to_s
74
+ @rest = @m.post_match
75
+ self.state = :inside_comment
76
+ elsif @m = Tag.match(@rest) then
77
+ my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
78
+ things_read += 1
79
+ handle_tag
80
+ self.state = :inside_element
81
+ elsif @m = PartialTag.match(@rest) then
82
+ my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
83
+ @already += @m.pre_match
84
+ @rest = @m.post_match
85
+ @partial_tag = @m.to_s
86
+ self.state = :inside_tag
87
+ elsif @m = EverythingElse.match(@rest)
88
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
89
+ @already += @m.pre_match + @m.to_s
90
+ @rest = @m.post_match
91
+ self.state = :inside_element
92
+ else
93
+ error "Malformed HTML: not complete: #{@rest.inspect}"
94
+ end
95
+ when :inside_tag
96
+ if @m = /^[^>]*>/.match(@rest) then
97
+ my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
98
+ @partial_tag += @m.to_s
99
+ my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
100
+ @rest = @partial_tag + @m.post_match
101
+ @partial_tag = nil
102
+ self.state = :inside_element
103
+ else
104
+ @partial_tag += @rest
105
+ @rest = ""
106
+ self.state = :inside_tag
107
+ end
108
+ else
109
+ raise "Bug bug: state = #{self.state.inspect}"
110
+ end # not inside comment
111
+
112
+ # puts inspect
113
+ # puts "Read: #{@tag_stack.inspect}"
114
+ break if is_finished? and things_read>0
115
+ end
116
+ end
117
+
118
+ def handle_tag()
119
+ @already += @m.pre_match
120
+ @rest = @m.post_match
121
+
122
+ is_closing = !!@m[1]
123
+ tag = @m[2]
124
+ attributes = @m[3].to_s
125
+
126
+ is_single = false
127
+ if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
128
+ attributes = attributes[0, attributes.size-1]
129
+ is_single = true
130
+ end
131
+
132
+ my_debug "Attributes: #{attributes.inspect}"
133
+ my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
134
+
135
+ if TO_SANITIZE.include? tag
136
+ attributes.strip!
137
+ # puts "Attributes: #{attributes.inspect}"
138
+ if attributes.size > 0
139
+ @already += '<%s %s />' % [tag, attributes]
140
+ else
141
+ @already += '<%s />' % [tag]
142
+ end
143
+ elsif is_closing
144
+ @already += @m.to_s
145
+ if @tag_stack.empty?
146
+ error "Malformed: closing tag #{tag.inspect} "+
147
+ "in empty list"
148
+ end
149
+ if @tag_stack.last != tag
150
+ error "Malformed: tag <#{tag}> "+
151
+ "closes <#{@tag_stack.last}>"
152
+ end
153
+ @tag_stack.pop
154
+ else
155
+ @already += @m.to_s
156
+
157
+ if not is_single
158
+ @tag_stack.push(tag)
159
+ my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
160
+ end
161
+ end
162
+ end
163
+ def error(s)
164
+ raise Exception, "Error: #{s} \n"+ inspect, caller
165
+ end
166
+
167
+ def inspect; "HTML READER\n state=#{self.state} "+
168
+ "match=#{@m.to_s.inspect}\n"+
169
+ "Tag stack = #{@tag_stack.inspect} \n"+
170
+ "Before:\n"+
171
+ @already.gsub(/^/, '|')+"\n"+
172
+ "After:\n"+
173
+ @rest.gsub(/^/, '|')+"\n"
174
+
175
+ end
176
+
177
+
178
+ def stuff_you_read
179
+ @already
180
+ end
181
+
182
+ def rest() @rest end
183
+
184
+ def is_finished?
185
+ (self.state == :inside_element) and @tag_stack.empty?
186
+ end
187
+ end # html helper
188
+
189
+ end end end end
@@ -0,0 +1,111 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
23
+
24
+ # This represents a source of lines that can be consumed.
25
+ #
26
+ # It is the twin of CharSource.
27
+ #
28
+
29
+ class LineSource
30
+ include MaRuKu::Strings
31
+ attr_reader :parent
32
+
33
+ def initialize(lines, parent=nil, parent_offset=nil)
34
+ raise "NIL lines? " if not lines
35
+ @lines = lines
36
+ @lines_index = 0
37
+ @parent = parent
38
+ @parent_offset = parent_offset
39
+ end
40
+
41
+ def cur_line() @lines[@lines_index] end
42
+ def next_line() @lines[@lines_index+1] end
43
+
44
+ def shift_line()
45
+ raise "Over the rainbow" if @lines_index >= @lines.size
46
+ l = @lines[@lines_index]
47
+ @lines_index += 1
48
+ return l
49
+ end
50
+
51
+ def ignore_line
52
+ raise "Over the rainbow" if @lines_index >= @lines.size
53
+ @lines_index += 1
54
+ end
55
+
56
+ def describe
57
+ s = "At line #{original_line_number(@lines_index)}\n"
58
+
59
+ context = 3 # lines
60
+ from = [@lines_index-context, 0].max
61
+ to = [@lines_index+context, @lines.size-1].min
62
+
63
+ for i in from..to
64
+ prefix = (i == @lines_index) ? '--> ' : ' ';
65
+ l = @lines[i]
66
+ s += "%10s %4s|%s" %
67
+ [@lines[i].md_type.to_s, prefix, l]
68
+
69
+ s += "|\n"
70
+ end
71
+
72
+ # if @parent
73
+ # s << "Parent context is: \n"
74
+ # s << @parent.describe.gsub(/^/, '|')
75
+ # end
76
+ s
77
+ end
78
+
79
+ def original_line_number(index)
80
+ if @parent
81
+ return index + @parent.original_line_number(@parent_offset)
82
+ else
83
+ 1 + index
84
+ end
85
+ end
86
+
87
+ def cur_index
88
+ @lines_index
89
+ end
90
+
91
+ # Returns the type of next line as a string
92
+ # breaks at first :definition
93
+ def tell_me_the_future
94
+ s = ""; num_e = 0;
95
+ for i in @lines_index..@lines.size-1
96
+ c = case @lines[i].md_type
97
+ when :text; "t"
98
+ when :empty; num_e+=1; "e"
99
+ when :definition; "d"
100
+ else "o"
101
+ end
102
+ s += c
103
+ break if c == "d" or num_e>1
104
+ end
105
+ s
106
+ end
107
+
108
+ end # linesource
109
+
110
+ end end end end # block
111
+
@@ -0,0 +1,608 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
23
+
24
+ include Helpers
25
+ include MaRuKu::Strings
26
+ include MaRuKu::In::Markdown::SpanLevelParser
27
+
28
+ class BlockContext < Array
29
+ def describe
30
+ n = 5
31
+ desc = size > n ? self[-n,n] : self
32
+ "Last #{n} elements: "+
33
+ desc.map{|x| "\n -" + x.inspect}.join
34
+ end
35
+ end
36
+
37
+ # Splits the string and calls parse_lines_as_markdown
38
+ def parse_text_as_markdown(text)
39
+ lines = split_lines(text)
40
+ src = LineSource.new(lines)
41
+ return parse_blocks(src)
42
+ end
43
+
44
+ # Input is a LineSource
45
+ def parse_blocks(src)
46
+ output = BlockContext.new
47
+
48
+ # run state machine
49
+ while src.cur_line
50
+
51
+ next if check_block_extensions(src, output, src.cur_line)
52
+
53
+ # Prints detected type (useful for debugging)
54
+ # puts "#{src.cur_line.md_type}|#{src.cur_line}"
55
+ case src.cur_line.md_type
56
+ when :empty;
57
+ output.push :empty
58
+ src.ignore_line
59
+ when :ial
60
+ m = InlineAttributeList.match src.shift_line
61
+ content = m[1] || ""
62
+ # puts "Content: #{content.inspect}"
63
+ src2 = CharSource.new(content, src)
64
+ interpret_extension(src2, output, [nil])
65
+ when :ald
66
+ output.push read_ald(src)
67
+ when :text
68
+ # paragraph, or table, or definition list
69
+ read_text_material(src, output)
70
+ when :header2, :hrule
71
+ # hrule
72
+ src.shift_line
73
+ output.push md_hrule()
74
+ when :header3
75
+ output.push read_header3(src)
76
+ when :ulist, :olist
77
+ list_type = src.cur_line.md_type == :ulist ? :ul : :ol
78
+ li = read_list_item(src)
79
+ # append to current list if we have one
80
+ if output.last.kind_of?(MDElement) &&
81
+ output.last.node_type == list_type then
82
+ output.last.children << li
83
+ else
84
+ output.push md_el(list_type, [li])
85
+ end
86
+ when :quote; output.push read_quote(src)
87
+ when :code; e = read_code(src); output << e if e
88
+ when :raw_html; e = read_raw_html(src); output << e if e
89
+
90
+ when :footnote_text; output.push read_footnote_text(src)
91
+ when :ref_definition;
92
+ if src.parent && (src.cur_index == 0)
93
+ read_text_material(src, output)
94
+ else
95
+ read_ref_definition(src, output)
96
+ end
97
+ when :abbreviation; output.push read_abbreviation(src)
98
+ when :xml_instr; read_xml_instruction(src, output)
99
+ when :metadata;
100
+ maruku_error "Please use the new meta-data syntax: \n"+
101
+ " http://maruku.rubyforge.org/proposal.html\n", src
102
+ src.ignore_line
103
+ else # warn if we forgot something
104
+ md_type = src.cur_line.md_type
105
+ line = src.cur_line
106
+ maruku_error "Ignoring line '#{line}' type = #{md_type}", src
107
+ src.shift_line
108
+ end
109
+ end
110
+
111
+ merge_ial(output, src, output)
112
+ output.delete_if {|x| x.kind_of?(MDElement) &&
113
+ x.node_type == :ial}
114
+
115
+ # get rid of empty line markers
116
+ output.delete_if {|x| x == :empty}
117
+ # See for each list if we can omit the paragraphs and use li_span
118
+ # TODO: do this after
119
+ output.each do |c|
120
+ # Remove paragraphs that we can get rid of
121
+ if [:ul,:ol].include? c.node_type
122
+ if c.children.all? {|li| !li.want_my_paragraph} then
123
+ c.children.each do |d|
124
+ d.node_type = :li_span
125
+ d.children = d.children[0].children
126
+ end
127
+ end
128
+ end
129
+ if c.node_type == :definition_list
130
+ if c.children.all?{|defi| !defi.want_my_paragraph} then
131
+ c.children.each do |definition|
132
+ definition.definitions.each do |dd|
133
+ dd.children = dd.children[0].children
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ output
141
+ end
142
+
143
+ def read_text_material(src, output)
144
+ if src.cur_line =~ MightBeTableHeader and
145
+ (src.next_line && src.next_line =~ TableSeparator)
146
+ output.push read_table(src)
147
+ elsif [:header1,:header2].include? src.next_line.md_type
148
+ output.push read_header12(src)
149
+ elsif eventually_comes_a_def_list(src)
150
+ definition = read_definition(src)
151
+ if output.last.kind_of?(MDElement) &&
152
+ output.last.node_type == :definition_list then
153
+ output.last.children << definition
154
+ else
155
+ output.push md_el(:definition_list, [definition])
156
+ end
157
+ else # Start of a paragraph
158
+ output.push read_paragraph(src)
159
+ end
160
+ end
161
+
162
+
163
+ def read_ald(src)
164
+ if (l=src.shift_line) =~ AttributeDefinitionList
165
+ id = $1; al=$2;
166
+ al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
167
+ self.ald[id] = al;
168
+ return md_ald(id, al)
169
+ else
170
+ maruku_error "Bug Bug:\n#{l.inspect}"
171
+ return nil
172
+ end
173
+ end
174
+
175
+ # reads a header (with ----- or ========)
176
+ def read_header12(src)
177
+ line = src.shift_line.strip
178
+ al = nil
179
+ # Check if there is an IAL
180
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
181
+ line = $1.strip
182
+ ial = $2
183
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
184
+ end
185
+ text = parse_lines_as_span [ line ]
186
+ level = src.cur_line.md_type == :header2 ? 2 : 1;
187
+ src.shift_line
188
+ return md_header(level, text, al)
189
+ end
190
+
191
+ # reads a header like '#### header ####'
192
+ def read_header3(src)
193
+ line = src.shift_line.strip
194
+ al = nil
195
+ # Check if there is an IAL
196
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
197
+ line = $1.strip
198
+ ial = $2
199
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
200
+ end
201
+ level = line[/^#+/].size
202
+ text = parse_lines_as_span [line.gsub(/\A#+|#+\Z/, '')]
203
+ return md_header(level, text, al)
204
+ end
205
+
206
+ def read_xml_instruction(src, output)
207
+ m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
208
+ raise "BugBug" if not m
209
+ target = m[2] || ''
210
+ code = m[3]
211
+ until code =~ /\?>/
212
+ code += "\n"+src.shift_line
213
+ end
214
+ if not code =~ (/\?>\s*$/)
215
+ garbage = (/\?>(.*)$/.match(code))[1]
216
+ maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
217
+ code.gsub(/^/, '|'), src
218
+ end
219
+ code.gsub!(/\?>\s*$/, '')
220
+
221
+ if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
222
+ result = safe_execute_code(self, code)
223
+ if result
224
+ if result.kind_of? String
225
+ raise "Not expected"
226
+ else
227
+ output.push(*result)
228
+ end
229
+ end
230
+ else
231
+ output.push md_xml_instr(target, code)
232
+ end
233
+ end
234
+
235
+ def read_raw_html(src)
236
+ h = HTMLHelper.new
237
+ begin
238
+ h.eat_this(l=src.shift_line)
239
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
240
+ while src.cur_line and not h.is_finished?
241
+ l=src.shift_line
242
+ # puts "html -> #{l.inspect}"
243
+ h.eat_this "\n"+l
244
+ end
245
+ rescue Exception => e
246
+ ex = e.inspect + e.backtrace.join("\n")
247
+ maruku_error "Bad block-level HTML:\n#{ex.gsub(/^/, '|')}\n", src
248
+ end
249
+ if not (h.rest =~ /^\s*$/)
250
+ maruku_error "Could you please format this better?\n"+
251
+ "I see that #{h.rest.inspect} is left after the raw HTML.", src
252
+ end
253
+ raw_html = h.stuff_you_read
254
+
255
+ return md_html(raw_html)
256
+ end
257
+
258
+ def read_paragraph(src)
259
+ lines = [src.shift_line]
260
+ while src.cur_line
261
+ # :olist does not break
262
+ case t = src.cur_line.md_type
263
+ when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
264
+ break
265
+ when :olist,:ulist
266
+ break if src.next_line.md_type == t
267
+ end
268
+ break if src.cur_line.strip.size == 0
269
+ break if [:header1,:header2].include? src.next_line.md_type
270
+ break if any_matching_block_extension?(src.cur_line)
271
+
272
+ lines << src.shift_line
273
+ end
274
+ children = parse_lines_as_span(lines, src)
275
+
276
+ return md_par(children)
277
+ end
278
+
279
+ # Reads one list item, either ordered or unordered.
280
+ def read_list_item(src)
281
+ parent_offset = src.cur_index
282
+
283
+ item_type = src.cur_line.md_type
284
+ first = src.shift_line
285
+
286
+ indentation, ial = spaces_before_first_char(first)
287
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) if ial
288
+ break_list = [:ulist, :olist, :ial]
289
+ # Ugly things going on inside `read_indented_content`
290
+ lines, want_my_paragraph =
291
+ read_indented_content(src,indentation, break_list, item_type)
292
+
293
+ # add first line
294
+ # Strip first '*', '-', '+' from first line
295
+ stripped = first[indentation, first.size-1]
296
+ lines.unshift stripped
297
+
298
+
299
+ src2 = LineSource.new(lines, src, parent_offset)
300
+ children = parse_blocks(src2)
301
+ with_par = want_my_paragraph || (children.size>1)
302
+
303
+ return md_li(children, with_par, al)
304
+ end
305
+
306
+ def read_abbreviation(src)
307
+ if not (l=src.shift_line) =~ Abbreviation
308
+ maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
309
+ end
310
+
311
+ abbr = $1
312
+ desc = $2
313
+
314
+ if (not abbr) or (abbr.size==0)
315
+ maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
316
+ end
317
+
318
+ self.abbreviations[abbr] = desc
319
+
320
+ return md_abbr_def(abbr, desc)
321
+ end
322
+
323
+ def read_footnote_text(src)
324
+ parent_offset = src.cur_index
325
+
326
+ first = src.shift_line
327
+
328
+ if not first =~ FootnoteText
329
+ maruku_error "Bug (it's Andrea's fault)"
330
+ end
331
+
332
+ id = $1
333
+ text = $2
334
+
335
+ # Ugly things going on inside `read_indented_content`
336
+ indentation = 4 #first.size-text.size
337
+
338
+ # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
339
+
340
+ break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
341
+ item_type = :footnote_text
342
+ lines, want_my_paragraph =
343
+ read_indented_content(src,indentation, break_list, item_type)
344
+
345
+ # add first line
346
+ if text && text.strip != "" then lines.unshift text end
347
+
348
+ src2 = LineSource.new(lines, src, parent_offset)
349
+ children = parse_blocks(src2)
350
+
351
+ e = md_footnote(id, children)
352
+ self.footnotes[id] = e
353
+ return e
354
+ end
355
+
356
+
357
+ # This is the only ugly function in the code base.
358
+ # It is used to read list items, descriptions, footnote text
359
+ def read_indented_content(src, indentation, break_list, item_type)
360
+ lines =[]
361
+ # collect all indented lines
362
+ saw_empty = false; saw_anything_after = false
363
+ while src.cur_line
364
+ # puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
365
+ #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
366
+ if src.cur_line.md_type == :empty
367
+ saw_empty = true
368
+ lines << src.shift_line
369
+ next
370
+ end
371
+
372
+ # after a white line
373
+ if saw_empty
374
+ # we expect things to be properly aligned
375
+ if (ns=number_of_leading_spaces(src.cur_line)) < indentation
376
+ #puts "breaking for spaces, only #{ns}: #{src.cur_line}"
377
+ break
378
+ end
379
+ saw_anything_after = true
380
+ else
381
+ # if src.cur_line[0] != ?\
382
+ break if break_list.include? src.cur_line.md_type
383
+ # end
384
+ # break if src.cur_line.md_type != :text
385
+ end
386
+
387
+
388
+ stripped = strip_indent(src.shift_line, indentation)
389
+ lines << stripped
390
+
391
+ #puts "Accepted as #{stripped.inspect}"
392
+
393
+ # You are only required to indent the first line of
394
+ # a child paragraph.
395
+ if stripped.md_type == :text
396
+ while src.cur_line && (src.cur_line.md_type == :text)
397
+ lines << strip_indent(src.shift_line, indentation)
398
+ end
399
+ end
400
+ end
401
+
402
+ want_my_paragraph = saw_anything_after ||
403
+ (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
404
+
405
+ # create a new context
406
+
407
+ while lines.last && (lines.last.md_type == :empty)
408
+ lines.pop
409
+ end
410
+
411
+ return lines, want_my_paragraph
412
+ end
413
+
414
+
415
+ def read_quote(src)
416
+ parent_offset = src.cur_index
417
+
418
+ lines = []
419
+ # collect all indented lines
420
+ while src.cur_line && src.cur_line.md_type == :quote
421
+ lines << unquote(src.shift_line)
422
+ end
423
+
424
+ src2 = LineSource.new(lines, src, parent_offset)
425
+ children = parse_blocks(src2)
426
+ return md_quote(children)
427
+ end
428
+
429
+ def read_code(src)
430
+ # collect all indented lines
431
+ lines = []
432
+ while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
433
+ lines << strip_indent(src.shift_line, 4)
434
+ end
435
+
436
+ #while lines.last && (lines.last.md_type == :empty )
437
+ while lines.last && lines.last.strip.size == 0
438
+ lines.pop
439
+ end
440
+
441
+ while lines.first && lines.first.strip.size == 0
442
+ lines.shift
443
+ end
444
+
445
+ return nil if lines.empty?
446
+
447
+ source = lines.join("\n")
448
+
449
+
450
+ return md_codeblock(source)
451
+ end
452
+
453
+ # Reads a series of metadata lines with empty lines in between
454
+ def read_metadata(src)
455
+ hash = {}
456
+ while src.cur_line
457
+ case src.cur_line.md_type
458
+ when :empty; src.shift_line
459
+ when :metadata; hash.merge! parse_metadata(src.shift_line)
460
+ else break
461
+ end
462
+ end
463
+ hash
464
+ end
465
+
466
+
467
+ def read_ref_definition(src, out)
468
+ line = src.shift_line
469
+
470
+
471
+ # if link is incomplete, shift next line
472
+ if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
473
+ ([1,2,3].include? number_of_leading_spaces(src.cur_line) )
474
+ line += " "+ src.shift_line
475
+ end
476
+
477
+ # puts "total= #{line}"
478
+
479
+ match = LinkRegex.match(line)
480
+ if not match
481
+ maruku_error "Link does not respect format: '#{line}'"
482
+ return
483
+ end
484
+
485
+ id = match[1]; url = match[2]; title = match[3];
486
+ id = sanitize_ref_id(id)
487
+
488
+ hash = self.refs[id] = {:url=>url,:title=>title}
489
+
490
+ stuff=match[4]
491
+
492
+ if stuff
493
+ stuff.split.each do |couple|
494
+ # puts "found #{couple}"
495
+ k, v = couple.split('=')
496
+ v ||= ""
497
+ if v[0,1]=='"' then v = v[1, v.size-2] end
498
+ # puts "key:_#{k}_ value=_#{v}_"
499
+ hash[k.to_sym] = v
500
+ end
501
+ end
502
+ # puts hash.inspect
503
+
504
+ out.push md_ref_def(id, url, meta={:title=>title})
505
+ end
506
+
507
+ def split_cells(s)
508
+ # s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
509
+ # changed to allow empty cells
510
+ s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
511
+ end
512
+
513
+ def read_table(src)
514
+ head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
515
+
516
+ separator=split_cells(src.shift_line)
517
+
518
+ align = separator.map { |s| s =~ Sep
519
+ if $1 and $2 then :center elsif $2 then :right else :left end }
520
+
521
+ num_columns = align.size
522
+
523
+ if head.size != num_columns
524
+ maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
525
+ tell_user "I will ignore this table."
526
+ # XXX try to recover
527
+ return md_br()
528
+ end
529
+
530
+ rows = []
531
+
532
+ while src.cur_line && src.cur_line =~ /\|/
533
+ row = split_cells(src.shift_line).map{|s|
534
+ md_el(:cell, parse_lines_as_span([s]))}
535
+ if head.size != num_columns
536
+ maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
537
+ tell_user "I will ignore this table."
538
+ # XXX try to recover
539
+ return md_br()
540
+ end
541
+ rows << row
542
+ end
543
+
544
+ children = (head+rows).flatten
545
+ return md_el(:table, children, {:align => align})
546
+ end
547
+
548
+ # If current line is text, a definition list is coming
549
+ # if 1) text,empty,[text,empty]*,definition
550
+
551
+ def eventually_comes_a_def_list(src)
552
+ future = src.tell_me_the_future
553
+ ok = future =~ %r{^t+e?d}x
554
+ # puts "future: #{future} - #{ok}"
555
+ ok
556
+ end
557
+
558
+
559
+ def read_definition(src)
560
+ # Read one or more terms
561
+ terms = []
562
+ while src.cur_line && src.cur_line.md_type == :text
563
+ terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
564
+ end
565
+
566
+ want_my_paragraph = false
567
+
568
+ raise "Chunky Bacon!" if not src.cur_line
569
+
570
+ # one optional empty
571
+ if src.cur_line.md_type == :empty
572
+ want_my_paragraph = true
573
+ src.shift_line
574
+ end
575
+
576
+ raise "Chunky Bacon!" if src.cur_line.md_type != :definition
577
+
578
+ # Read one or more definitions
579
+ definitions = []
580
+ while src.cur_line && src.cur_line.md_type == :definition
581
+ parent_offset = src.cur_index
582
+
583
+ first = src.shift_line
584
+ first =~ Definition
585
+ first = $1
586
+
587
+ # I know, it's ugly!!!
588
+
589
+ lines, w_m_p =
590
+ read_indented_content(src,4, [:definition], :definition)
591
+ want_my_paragraph ||= w_m_p
592
+
593
+ lines.unshift first
594
+
595
+ src2 = LineSource.new(lines, src, parent_offset)
596
+ children = parse_blocks(src2)
597
+ definitions << md_el(:definition_data, children)
598
+ end
599
+
600
+ return md_el(:definition, terms+definitions, {
601
+ :terms => terms,
602
+ :definitions => definitions,
603
+ :want_my_paragraph => want_my_paragraph})
604
+ end
605
+ end # BlockLevelParser
606
+ end # MaRuKu
607
+ end
608
+ end