patcito-maruku 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. data/AUTHORS +23 -0
  2. data/LICENSE +340 -0
  3. data/README.md +73 -0
  4. data/bin/maruku +196 -0
  5. data/bin/marutex +4 -0
  6. data/data/entities.xml +261 -0
  7. data/docs/changelog.md +334 -0
  8. data/docs/div_syntax.md +36 -0
  9. data/docs/entity_test.md +23 -0
  10. data/docs/markdown_syntax.md +899 -0
  11. data/docs/maruku.md +346 -0
  12. data/docs/math.md +194 -0
  13. data/docs/other_stuff.md +51 -0
  14. data/docs/proposal.md +309 -0
  15. data/docs/website/src/bluecloth.md +25 -0
  16. data/docs/website/src/download.md +31 -0
  17. data/docs/website/src/maruku.md +261 -0
  18. data/docs/website/src/proposal.md +271 -0
  19. data/lib/maruku.rb +132 -0
  20. data/lib/maruku/attributes.rb +138 -0
  21. data/lib/maruku/defaults.rb +69 -0
  22. data/lib/maruku/errors.rb +89 -0
  23. data/lib/maruku/ext/div.rb +121 -0
  24. data/lib/maruku/ext/fenced_code.rb +78 -0
  25. data/lib/maruku/ext/math.rb +37 -0
  26. data/lib/maruku/ext/math/elements.rb +21 -0
  27. data/lib/maruku/ext/math/latex_fix.rb +12 -0
  28. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +93 -0
  29. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +39 -0
  30. data/lib/maruku/ext/math/mathml_engines/none.rb +21 -0
  31. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  32. data/lib/maruku/ext/math/parsing.rb +125 -0
  33. data/lib/maruku/ext/math/to_html.rb +237 -0
  34. data/lib/maruku/ext/math/to_latex.rb +36 -0
  35. data/lib/maruku/ext/yaml.rb +43 -0
  36. data/lib/maruku/helpers.rb +214 -0
  37. data/lib/maruku/input/charsource.rb +326 -0
  38. data/lib/maruku/input/extensions.rb +69 -0
  39. data/lib/maruku/input/html_helper.rb +189 -0
  40. data/lib/maruku/input/linesource.rb +111 -0
  41. data/lib/maruku/input/parse_block.rb +608 -0
  42. data/lib/maruku/input/parse_doc.rb +240 -0
  43. data/lib/maruku/input/parse_span_better.rb +746 -0
  44. data/lib/maruku/input/rubypants.rb +225 -0
  45. data/lib/maruku/input/type_detection.rb +147 -0
  46. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  47. data/lib/maruku/maruku.rb +31 -0
  48. data/lib/maruku/output/s5/fancy.rb +756 -0
  49. data/lib/maruku/output/s5/to_s5.rb +138 -0
  50. data/lib/maruku/output/to_html.rb +994 -0
  51. data/lib/maruku/output/to_latex.rb +580 -0
  52. data/lib/maruku/output/to_latex_entities.rb +101 -0
  53. data/lib/maruku/output/to_latex_strings.rb +64 -0
  54. data/lib/maruku/output/to_markdown.rb +164 -0
  55. data/lib/maruku/output/to_s.rb +54 -0
  56. data/lib/maruku/string_utils.rb +185 -0
  57. data/lib/maruku/structures.rb +143 -0
  58. data/lib/maruku/structures_inspect.rb +51 -0
  59. data/lib/maruku/structures_iterators.rb +48 -0
  60. data/lib/maruku/textile2.rb +1 -0
  61. data/lib/maruku/toc.rb +214 -0
  62. data/lib/maruku/usage/example1.rb +33 -0
  63. data/lib/maruku/version +0 -0
  64. data/lib/maruku/version.rb +54 -0
  65. data/spec/block_docs/abbreviations.md +52 -0
  66. data/spec/block_docs/alt.md +17 -0
  67. data/spec/block_docs/attributes/att2.md +20 -0
  68. data/spec/block_docs/attributes/att3.md +28 -0
  69. data/spec/block_docs/attributes/attributes.md +57 -0
  70. data/spec/block_docs/attributes/circular.md +26 -0
  71. data/spec/block_docs/attributes/default.md +22 -0
  72. data/spec/block_docs/blank.md +24 -0
  73. data/spec/block_docs/blanks_in_code.md +75 -0
  74. data/spec/block_docs/bug_def.md +16 -0
  75. data/spec/block_docs/bug_table.md +46 -0
  76. data/spec/block_docs/code.md +34 -0
  77. data/spec/block_docs/code2.md +28 -0
  78. data/spec/block_docs/code3.md +71 -0
  79. data/spec/block_docs/data_loss.md +25 -0
  80. data/spec/block_docs/divs/div1.md +167 -0
  81. data/spec/block_docs/divs/div2.md +21 -0
  82. data/spec/block_docs/divs/div3_nest.md +45 -0
  83. data/spec/block_docs/easy.md +15 -0
  84. data/spec/block_docs/email.md +20 -0
  85. data/spec/block_docs/encoding/iso-8859-1.md +23 -0
  86. data/spec/block_docs/encoding/utf-8.md +18 -0
  87. data/spec/block_docs/entities.md +94 -0
  88. data/spec/block_docs/escaping.md +67 -0
  89. data/spec/block_docs/extra_dl.md +52 -0
  90. data/spec/block_docs/extra_header_id.md +63 -0
  91. data/spec/block_docs/extra_table1.md +37 -0
  92. data/spec/block_docs/footnotes.md +97 -0
  93. data/spec/block_docs/headers.md +37 -0
  94. data/spec/block_docs/hex_entities.md +37 -0
  95. data/spec/block_docs/hrule.md +39 -0
  96. data/spec/block_docs/html2.md +22 -0
  97. data/spec/block_docs/html3.md +31 -0
  98. data/spec/block_docs/html4.md +25 -0
  99. data/spec/block_docs/html5.md +23 -0
  100. data/spec/block_docs/ie.md +49 -0
  101. data/spec/block_docs/images.md +90 -0
  102. data/spec/block_docs/images2.md +31 -0
  103. data/spec/block_docs/inline_html.md +152 -0
  104. data/spec/block_docs/inline_html2.md +21 -0
  105. data/spec/block_docs/links.md +152 -0
  106. data/spec/block_docs/links2.md +22 -0
  107. data/spec/block_docs/list1.md +46 -0
  108. data/spec/block_docs/list12.md +28 -0
  109. data/spec/block_docs/list2.md +56 -0
  110. data/spec/block_docs/list3.md +64 -0
  111. data/spec/block_docs/list4.md +89 -0
  112. data/spec/block_docs/lists.md +192 -0
  113. data/spec/block_docs/lists10.md +34 -0
  114. data/spec/block_docs/lists11.md +23 -0
  115. data/spec/block_docs/lists6.md +41 -0
  116. data/spec/block_docs/lists9.md +64 -0
  117. data/spec/block_docs/lists_after_paragraph.md +208 -0
  118. data/spec/block_docs/lists_ol.md +262 -0
  119. data/spec/block_docs/loss.md +16 -0
  120. data/spec/block_docs/math/equations.md +45 -0
  121. data/spec/block_docs/math/inline.md +46 -0
  122. data/spec/block_docs/math/math2.md +45 -0
  123. data/spec/block_docs/math/notmath.md +25 -0
  124. data/spec/block_docs/math/table.md +25 -0
  125. data/spec/block_docs/math/table2.md +42 -0
  126. data/spec/block_docs/misc_sw.md +525 -0
  127. data/spec/block_docs/notyet/escape.md +21 -0
  128. data/spec/block_docs/notyet/header_after_par.md +58 -0
  129. data/spec/block_docs/notyet/ticks.md +18 -0
  130. data/spec/block_docs/notyet/triggering.md +157 -0
  131. data/spec/block_docs/olist.md +45 -0
  132. data/spec/block_docs/one.md +15 -0
  133. data/spec/block_docs/paragraph.md +16 -0
  134. data/spec/block_docs/paragraph_rules/dont_merge_ref.md +42 -0
  135. data/spec/block_docs/paragraph_rules/tab_is_blank.md +24 -0
  136. data/spec/block_docs/paragraphs.md +46 -0
  137. data/spec/block_docs/pending/amps.md +15 -0
  138. data/spec/block_docs/pending/empty_cells.md +37 -0
  139. data/spec/block_docs/pending/link.md +72 -0
  140. data/spec/block_docs/pending/ref.md +21 -0
  141. data/spec/block_docs/recover/recover_links.md +15 -0
  142. data/spec/block_docs/red_tests/abbrev.md +679 -0
  143. data/spec/block_docs/red_tests/lists7.md +32 -0
  144. data/spec/block_docs/red_tests/lists7b.md +65 -0
  145. data/spec/block_docs/red_tests/lists8.md +42 -0
  146. data/spec/block_docs/red_tests/ref.md +23 -0
  147. data/spec/block_docs/red_tests/xml.md +35 -0
  148. data/spec/block_docs/references/long_example.md +71 -0
  149. data/spec/block_docs/references/spaces_and_numbers.md +15 -0
  150. data/spec/block_docs/smartypants.md +114 -0
  151. data/spec/block_docs/syntax_hl.md +52 -0
  152. data/spec/block_docs/table_attributes.md +34 -0
  153. data/spec/block_docs/test.md +19 -0
  154. data/spec/block_docs/underscore_in_words.md +15 -0
  155. data/spec/block_docs/wrapping.md +67 -0
  156. data/spec/block_docs/xml2.md +19 -0
  157. data/spec/block_docs/xml3.md +26 -0
  158. data/spec/block_docs/xml_instruction.md +52 -0
  159. data/spec/block_spec.rb +49 -0
  160. data/spec/span_spec.rb +254 -0
  161. data/spec/spec_helper.rb +6 -0
  162. metadata +247 -0
@@ -0,0 +1,69 @@
1
+ module MaRuKu; module In; module Markdown
2
+
3
+
4
+ # Hash Fixnum -> name
5
+ SpanExtensionsTrigger = {}
6
+
7
+
8
+ class SpanExtension
9
+ # trigging chars
10
+ attr_accessor :chars
11
+ # trigging regexp
12
+ attr_accessor :regexp
13
+ # lambda
14
+ attr_accessor :block
15
+ end
16
+
17
+ # Hash String -> Extension
18
+ SpanExtensions = {}
19
+
20
+ def check_span_extensions(src, con)
21
+ c = src.cur_char
22
+ if extensions = SpanExtensionsTrigger[c]
23
+ extensions.each do |e|
24
+ if e.regexp && (match = src.next_matches(e.regexp))
25
+ return true if e.block.call(doc, src, con)
26
+ end
27
+ end
28
+ end
29
+ return false # not special
30
+ end
31
+
32
+ def self.register_span_extension(args)
33
+ e = SpanExtension.new
34
+ e.chars = [*args[:chars]]
35
+ e.regexp = args[:regexp]
36
+ e.block = args[:handler] || raise("No blocks passed")
37
+ e.chars.each do |c|
38
+ (SpanExtensionsTrigger[c] ||= []).push e
39
+ end
40
+ end
41
+
42
+ def self.register_block_extension(args)
43
+ regexp = args[:regexp]
44
+ BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
45
+ end
46
+
47
+ # Hash Regexp -> Block
48
+ BlockExtensions = {}
49
+
50
+ def check_block_extensions(src, con, line)
51
+ BlockExtensions.each do |reg, block|
52
+ if m = reg.match(line)
53
+ block = BlockExtensions[reg]
54
+ accepted = block.call(doc, src, con)
55
+ return true if accepted
56
+ end
57
+ end
58
+ return false # not special
59
+ end
60
+
61
+ def any_matching_block_extension?(line)
62
+ BlockExtensions.each_key do |reg|
63
+ m = reg.match(line)
64
+ return m if m
65
+ end
66
+ return false
67
+ end
68
+
69
+ end end end
@@ -0,0 +1,189 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
23
+
24
+ # This class helps me read and sanitize HTML blocks
25
+
26
+ # I tried to do this with REXML, but wasn't able to. (suggestions?)
27
+
28
+ class HTMLHelper
29
+ include MaRuKu::Strings
30
+
31
+ Tag = %r{^<(/)?(\w+)\s*([^>]*?)>}m
32
+ PartialTag = %r{^<.*}m
33
+
34
+ EverythingElse = %r{^[^<]+}m
35
+ CommentStart = %r{^<!--}x
36
+ CommentEnd = %r{^.*?-->}
37
+ TO_SANITIZE = ['img','hr','br']
38
+
39
+ attr_reader :rest
40
+
41
+ def my_debug(s)
42
+ # puts "---"*10+"\n"+inspect+"\t>>>\t"s
43
+ end
44
+
45
+ def initialize
46
+ @rest = ""
47
+ @tag_stack = []
48
+ @m = nil
49
+ @already = ""
50
+ self.state = :inside_element
51
+ end
52
+
53
+ attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
54
+
55
+ def eat_this(line)
56
+ @rest = line + @rest
57
+ things_read = 0
58
+ until @rest.empty?
59
+ case self.state
60
+ when :inside_comment
61
+ if @m = CommentEnd.match(@rest)
62
+ @already += @m.pre_match + @m.to_s
63
+ @rest = @m.post_match
64
+ self.state = :inside_element
65
+ else
66
+ @already += @rest
67
+ @rest = ""
68
+ self.state = :inside_comment
69
+ end
70
+ when :inside_element
71
+ if @m = CommentStart.match(@rest)
72
+ things_read += 1
73
+ @already += @m.pre_match + @m.to_s
74
+ @rest = @m.post_match
75
+ self.state = :inside_comment
76
+ elsif @m = Tag.match(@rest) then
77
+ my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
78
+ things_read += 1
79
+ handle_tag
80
+ self.state = :inside_element
81
+ elsif @m = PartialTag.match(@rest) then
82
+ my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
83
+ @already += @m.pre_match
84
+ @rest = @m.post_match
85
+ @partial_tag = @m.to_s
86
+ self.state = :inside_tag
87
+ elsif @m = EverythingElse.match(@rest)
88
+ my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
89
+ @already += @m.pre_match + @m.to_s
90
+ @rest = @m.post_match
91
+ self.state = :inside_element
92
+ else
93
+ error "Malformed HTML: not complete: #{@rest.inspect}"
94
+ end
95
+ when :inside_tag
96
+ if @m = /^[^>]*>/.match(@rest) then
97
+ my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
98
+ @partial_tag += @m.to_s
99
+ my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
100
+ @rest = @partial_tag + @m.post_match
101
+ @partial_tag = nil
102
+ self.state = :inside_element
103
+ else
104
+ @partial_tag += @rest
105
+ @rest = ""
106
+ self.state = :inside_tag
107
+ end
108
+ else
109
+ raise "Bug bug: state = #{self.state.inspect}"
110
+ end # not inside comment
111
+
112
+ # puts inspect
113
+ # puts "Read: #{@tag_stack.inspect}"
114
+ break if is_finished? and things_read>0
115
+ end
116
+ end
117
+
118
+ def handle_tag()
119
+ @already += @m.pre_match
120
+ @rest = @m.post_match
121
+
122
+ is_closing = !!@m[1]
123
+ tag = @m[2]
124
+ attributes = @m[3].to_s
125
+
126
+ is_single = false
127
+ if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
128
+ attributes = attributes[0, attributes.size-1]
129
+ is_single = true
130
+ end
131
+
132
+ my_debug "Attributes: #{attributes.inspect}"
133
+ my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
134
+
135
+ if TO_SANITIZE.include? tag
136
+ attributes.strip!
137
+ # puts "Attributes: #{attributes.inspect}"
138
+ if attributes.size > 0
139
+ @already += '<%s %s />' % [tag, attributes]
140
+ else
141
+ @already += '<%s />' % [tag]
142
+ end
143
+ elsif is_closing
144
+ @already += @m.to_s
145
+ if @tag_stack.empty?
146
+ error "Malformed: closing tag #{tag.inspect} "+
147
+ "in empty list"
148
+ end
149
+ if @tag_stack.last != tag
150
+ error "Malformed: tag <#{tag}> "+
151
+ "closes <#{@tag_stack.last}>"
152
+ end
153
+ @tag_stack.pop
154
+ else
155
+ @already += @m.to_s
156
+
157
+ if not is_single
158
+ @tag_stack.push(tag)
159
+ my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
160
+ end
161
+ end
162
+ end
163
+ def error(s)
164
+ raise Exception, "Error: #{s} \n"+ inspect, caller
165
+ end
166
+
167
+ def inspect; "HTML READER\n state=#{self.state} "+
168
+ "match=#{@m.to_s.inspect}\n"+
169
+ "Tag stack = #{@tag_stack.inspect} \n"+
170
+ "Before:\n"+
171
+ @already.gsub(/^/, '|')+"\n"+
172
+ "After:\n"+
173
+ @rest.gsub(/^/, '|')+"\n"
174
+
175
+ end
176
+
177
+
178
+ def stuff_you_read
179
+ @already
180
+ end
181
+
182
+ def rest() @rest end
183
+
184
+ def is_finished?
185
+ (self.state == :inside_element) and @tag_stack.empty?
186
+ end
187
+ end # html helper
188
+
189
+ end end end end
@@ -0,0 +1,111 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
23
+
24
+ # This represents a source of lines that can be consumed.
25
+ #
26
+ # It is the twin of CharSource.
27
+ #
28
+
29
+ class LineSource
30
+ include MaRuKu::Strings
31
+ attr_reader :parent
32
+
33
+ def initialize(lines, parent=nil, parent_offset=nil)
34
+ raise "NIL lines? " if not lines
35
+ @lines = lines
36
+ @lines_index = 0
37
+ @parent = parent
38
+ @parent_offset = parent_offset
39
+ end
40
+
41
+ def cur_line() @lines[@lines_index] end
42
+ def next_line() @lines[@lines_index+1] end
43
+
44
+ def shift_line()
45
+ raise "Over the rainbow" if @lines_index >= @lines.size
46
+ l = @lines[@lines_index]
47
+ @lines_index += 1
48
+ return l
49
+ end
50
+
51
+ def ignore_line
52
+ raise "Over the rainbow" if @lines_index >= @lines.size
53
+ @lines_index += 1
54
+ end
55
+
56
+ def describe
57
+ s = "At line #{original_line_number(@lines_index)}\n"
58
+
59
+ context = 3 # lines
60
+ from = [@lines_index-context, 0].max
61
+ to = [@lines_index+context, @lines.size-1].min
62
+
63
+ for i in from..to
64
+ prefix = (i == @lines_index) ? '--> ' : ' ';
65
+ l = @lines[i]
66
+ s += "%10s %4s|%s" %
67
+ [@lines[i].md_type.to_s, prefix, l]
68
+
69
+ s += "|\n"
70
+ end
71
+
72
+ # if @parent
73
+ # s << "Parent context is: \n"
74
+ # s << @parent.describe.gsub(/^/, '|')
75
+ # end
76
+ s
77
+ end
78
+
79
+ def original_line_number(index)
80
+ if @parent
81
+ return index + @parent.original_line_number(@parent_offset)
82
+ else
83
+ 1 + index
84
+ end
85
+ end
86
+
87
+ def cur_index
88
+ @lines_index
89
+ end
90
+
91
+ # Returns the type of next line as a string
92
+ # breaks at first :definition
93
+ def tell_me_the_future
94
+ s = ""; num_e = 0;
95
+ for i in @lines_index..@lines.size-1
96
+ c = case @lines[i].md_type
97
+ when :text; "t"
98
+ when :empty; num_e+=1; "e"
99
+ when :definition; "d"
100
+ else "o"
101
+ end
102
+ s += c
103
+ break if c == "d" or num_e>1
104
+ end
105
+ s
106
+ end
107
+
108
+ end # linesource
109
+
110
+ end end end end # block
111
+
@@ -0,0 +1,608 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
23
+
24
+ include Helpers
25
+ include MaRuKu::Strings
26
+ include MaRuKu::In::Markdown::SpanLevelParser
27
+
28
+ class BlockContext < Array
29
+ def describe
30
+ n = 5
31
+ desc = size > n ? self[-n,n] : self
32
+ "Last #{n} elements: "+
33
+ desc.map{|x| "\n -" + x.inspect}.join
34
+ end
35
+ end
36
+
37
+ # Splits the string and calls parse_lines_as_markdown
38
+ def parse_text_as_markdown(text)
39
+ lines = split_lines(text)
40
+ src = LineSource.new(lines)
41
+ return parse_blocks(src)
42
+ end
43
+
44
+ # Input is a LineSource
45
+ def parse_blocks(src)
46
+ output = BlockContext.new
47
+
48
+ # run state machine
49
+ while src.cur_line
50
+
51
+ next if check_block_extensions(src, output, src.cur_line)
52
+
53
+ # Prints detected type (useful for debugging)
54
+ # puts "#{src.cur_line.md_type}|#{src.cur_line}"
55
+ case src.cur_line.md_type
56
+ when :empty;
57
+ output.push :empty
58
+ src.ignore_line
59
+ when :ial
60
+ m = InlineAttributeList.match src.shift_line
61
+ content = m[1] || ""
62
+ # puts "Content: #{content.inspect}"
63
+ src2 = CharSource.new(content, src)
64
+ interpret_extension(src2, output, [nil])
65
+ when :ald
66
+ output.push read_ald(src)
67
+ when :text
68
+ # paragraph, or table, or definition list
69
+ read_text_material(src, output)
70
+ when :header2, :hrule
71
+ # hrule
72
+ src.shift_line
73
+ output.push md_hrule()
74
+ when :header3
75
+ output.push read_header3(src)
76
+ when :ulist, :olist
77
+ list_type = src.cur_line.md_type == :ulist ? :ul : :ol
78
+ li = read_list_item(src)
79
+ # append to current list if we have one
80
+ if output.last.kind_of?(MDElement) &&
81
+ output.last.node_type == list_type then
82
+ output.last.children << li
83
+ else
84
+ output.push md_el(list_type, [li])
85
+ end
86
+ when :quote; output.push read_quote(src)
87
+ when :code; e = read_code(src); output << e if e
88
+ when :raw_html; e = read_raw_html(src); output << e if e
89
+
90
+ when :footnote_text; output.push read_footnote_text(src)
91
+ when :ref_definition;
92
+ if src.parent && (src.cur_index == 0)
93
+ read_text_material(src, output)
94
+ else
95
+ read_ref_definition(src, output)
96
+ end
97
+ when :abbreviation; output.push read_abbreviation(src)
98
+ when :xml_instr; read_xml_instruction(src, output)
99
+ when :metadata;
100
+ maruku_error "Please use the new meta-data syntax: \n"+
101
+ " http://maruku.rubyforge.org/proposal.html\n", src
102
+ src.ignore_line
103
+ else # warn if we forgot something
104
+ md_type = src.cur_line.md_type
105
+ line = src.cur_line
106
+ maruku_error "Ignoring line '#{line}' type = #{md_type}", src
107
+ src.shift_line
108
+ end
109
+ end
110
+
111
+ merge_ial(output, src, output)
112
+ output.delete_if {|x| x.kind_of?(MDElement) &&
113
+ x.node_type == :ial}
114
+
115
+ # get rid of empty line markers
116
+ output.delete_if {|x| x == :empty}
117
+ # See for each list if we can omit the paragraphs and use li_span
118
+ # TODO: do this after
119
+ output.each do |c|
120
+ # Remove paragraphs that we can get rid of
121
+ if [:ul,:ol].include? c.node_type
122
+ if c.children.all? {|li| !li.want_my_paragraph} then
123
+ c.children.each do |d|
124
+ d.node_type = :li_span
125
+ d.children = d.children[0].children
126
+ end
127
+ end
128
+ end
129
+ if c.node_type == :definition_list
130
+ if c.children.all?{|defi| !defi.want_my_paragraph} then
131
+ c.children.each do |definition|
132
+ definition.definitions.each do |dd|
133
+ dd.children = dd.children[0].children
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
139
+
140
+ output
141
+ end
142
+
143
+ def read_text_material(src, output)
144
+ if src.cur_line =~ MightBeTableHeader and
145
+ (src.next_line && src.next_line =~ TableSeparator)
146
+ output.push read_table(src)
147
+ elsif [:header1,:header2].include? src.next_line.md_type
148
+ output.push read_header12(src)
149
+ elsif eventually_comes_a_def_list(src)
150
+ definition = read_definition(src)
151
+ if output.last.kind_of?(MDElement) &&
152
+ output.last.node_type == :definition_list then
153
+ output.last.children << definition
154
+ else
155
+ output.push md_el(:definition_list, [definition])
156
+ end
157
+ else # Start of a paragraph
158
+ output.push read_paragraph(src)
159
+ end
160
+ end
161
+
162
+
163
+ def read_ald(src)
164
+ if (l=src.shift_line) =~ AttributeDefinitionList
165
+ id = $1; al=$2;
166
+ al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
167
+ self.ald[id] = al;
168
+ return md_ald(id, al)
169
+ else
170
+ maruku_error "Bug Bug:\n#{l.inspect}"
171
+ return nil
172
+ end
173
+ end
174
+
175
+ # reads a header (with ----- or ========)
176
+ def read_header12(src)
177
+ line = src.shift_line.strip
178
+ al = nil
179
+ # Check if there is an IAL
180
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
181
+ line = $1.strip
182
+ ial = $2
183
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
184
+ end
185
+ text = parse_lines_as_span [ line ]
186
+ level = src.cur_line.md_type == :header2 ? 2 : 1;
187
+ src.shift_line
188
+ return md_header(level, text, al)
189
+ end
190
+
191
+ # reads a header like '#### header ####'
192
+ def read_header3(src)
193
+ line = src.shift_line.strip
194
+ al = nil
195
+ # Check if there is an IAL
196
+ if new_meta_data? and line =~ /^(.*?)\{(.*?)\}\s*$/
197
+ line = $1.strip
198
+ ial = $2
199
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
200
+ end
201
+ level = line[/^#+/].size
202
+ text = parse_lines_as_span [line.gsub(/\A#+|#+\Z/, '')]
203
+ return md_header(level, text, al)
204
+ end
205
+
206
+ def read_xml_instruction(src, output)
207
+ m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
208
+ raise "BugBug" if not m
209
+ target = m[2] || ''
210
+ code = m[3]
211
+ until code =~ /\?>/
212
+ code += "\n"+src.shift_line
213
+ end
214
+ if not code =~ (/\?>\s*$/)
215
+ garbage = (/\?>(.*)$/.match(code))[1]
216
+ maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
217
+ code.gsub(/^/, '|'), src
218
+ end
219
+ code.gsub!(/\?>\s*$/, '')
220
+
221
+ if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
222
+ result = safe_execute_code(self, code)
223
+ if result
224
+ if result.kind_of? String
225
+ raise "Not expected"
226
+ else
227
+ output.push(*result)
228
+ end
229
+ end
230
+ else
231
+ output.push md_xml_instr(target, code)
232
+ end
233
+ end
234
+
235
+ def read_raw_html(src)
236
+ h = HTMLHelper.new
237
+ begin
238
+ h.eat_this(l=src.shift_line)
239
+ # puts "\nBLOCK:\nhtml -> #{l.inspect}"
240
+ while src.cur_line and not h.is_finished?
241
+ l=src.shift_line
242
+ # puts "html -> #{l.inspect}"
243
+ h.eat_this "\n"+l
244
+ end
245
+ rescue Exception => e
246
+ ex = e.inspect + e.backtrace.join("\n")
247
+ maruku_error "Bad block-level HTML:\n#{ex.gsub(/^/, '|')}\n", src
248
+ end
249
+ if not (h.rest =~ /^\s*$/)
250
+ maruku_error "Could you please format this better?\n"+
251
+ "I see that #{h.rest.inspect} is left after the raw HTML.", src
252
+ end
253
+ raw_html = h.stuff_you_read
254
+
255
+ return md_html(raw_html)
256
+ end
257
+
258
+ def read_paragraph(src)
259
+ lines = [src.shift_line]
260
+ while src.cur_line
261
+ # :olist does not break
262
+ case t = src.cur_line.md_type
263
+ when :quote,:header3,:empty,:ref_definition,:ial #,:xml_instr,:raw_html
264
+ break
265
+ when :olist,:ulist
266
+ break if src.next_line.md_type == t
267
+ end
268
+ break if src.cur_line.strip.size == 0
269
+ break if [:header1,:header2].include? src.next_line.md_type
270
+ break if any_matching_block_extension?(src.cur_line)
271
+
272
+ lines << src.shift_line
273
+ end
274
+ children = parse_lines_as_span(lines, src)
275
+
276
+ return md_par(children)
277
+ end
278
+
279
+ # Reads one list item, either ordered or unordered.
280
+ def read_list_item(src)
281
+ parent_offset = src.cur_index
282
+
283
+ item_type = src.cur_line.md_type
284
+ first = src.shift_line
285
+
286
+ indentation, ial = spaces_before_first_char(first)
287
+ al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) if ial
288
+ break_list = [:ulist, :olist, :ial]
289
+ # Ugly things going on inside `read_indented_content`
290
+ lines, want_my_paragraph =
291
+ read_indented_content(src,indentation, break_list, item_type)
292
+
293
+ # add first line
294
+ # Strip first '*', '-', '+' from first line
295
+ stripped = first[indentation, first.size-1]
296
+ lines.unshift stripped
297
+
298
+
299
+ src2 = LineSource.new(lines, src, parent_offset)
300
+ children = parse_blocks(src2)
301
+ with_par = want_my_paragraph || (children.size>1)
302
+
303
+ return md_li(children, with_par, al)
304
+ end
305
+
306
+ def read_abbreviation(src)
307
+ if not (l=src.shift_line) =~ Abbreviation
308
+ maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
309
+ end
310
+
311
+ abbr = $1
312
+ desc = $2
313
+
314
+ if (not abbr) or (abbr.size==0)
315
+ maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
316
+ end
317
+
318
+ self.abbreviations[abbr] = desc
319
+
320
+ return md_abbr_def(abbr, desc)
321
+ end
322
+
323
+ def read_footnote_text(src)
324
+ parent_offset = src.cur_index
325
+
326
+ first = src.shift_line
327
+
328
+ if not first =~ FootnoteText
329
+ maruku_error "Bug (it's Andrea's fault)"
330
+ end
331
+
332
+ id = $1
333
+ text = $2
334
+
335
+ # Ugly things going on inside `read_indented_content`
336
+ indentation = 4 #first.size-text.size
337
+
338
+ # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
339
+
340
+ break_list = [:footnote_text, :ref_definition, :definition, :abbreviation]
341
+ item_type = :footnote_text
342
+ lines, want_my_paragraph =
343
+ read_indented_content(src,indentation, break_list, item_type)
344
+
345
+ # add first line
346
+ if text && text.strip != "" then lines.unshift text end
347
+
348
+ src2 = LineSource.new(lines, src, parent_offset)
349
+ children = parse_blocks(src2)
350
+
351
+ e = md_footnote(id, children)
352
+ self.footnotes[id] = e
353
+ return e
354
+ end
355
+
356
+
357
+ # This is the only ugly function in the code base.
358
+ # It is used to read list items, descriptions, footnote text
359
+ def read_indented_content(src, indentation, break_list, item_type)
360
+ lines =[]
361
+ # collect all indented lines
362
+ saw_empty = false; saw_anything_after = false
363
+ while src.cur_line
364
+ # puts "Reading indent = #{indentation} #{src.cur_line.inspect}"
365
+ #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
366
+ if src.cur_line.md_type == :empty
367
+ saw_empty = true
368
+ lines << src.shift_line
369
+ next
370
+ end
371
+
372
+ # after a white line
373
+ if saw_empty
374
+ # we expect things to be properly aligned
375
+ if (ns=number_of_leading_spaces(src.cur_line)) < indentation
376
+ #puts "breaking for spaces, only #{ns}: #{src.cur_line}"
377
+ break
378
+ end
379
+ saw_anything_after = true
380
+ else
381
+ # if src.cur_line[0] != ?\
382
+ break if break_list.include? src.cur_line.md_type
383
+ # end
384
+ # break if src.cur_line.md_type != :text
385
+ end
386
+
387
+
388
+ stripped = strip_indent(src.shift_line, indentation)
389
+ lines << stripped
390
+
391
+ #puts "Accepted as #{stripped.inspect}"
392
+
393
+ # You are only required to indent the first line of
394
+ # a child paragraph.
395
+ if stripped.md_type == :text
396
+ while src.cur_line && (src.cur_line.md_type == :text)
397
+ lines << strip_indent(src.shift_line, indentation)
398
+ end
399
+ end
400
+ end
401
+
402
+ want_my_paragraph = saw_anything_after ||
403
+ (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
404
+
405
+ # create a new context
406
+
407
+ while lines.last && (lines.last.md_type == :empty)
408
+ lines.pop
409
+ end
410
+
411
+ return lines, want_my_paragraph
412
+ end
413
+
414
+
415
+ def read_quote(src)
416
+ parent_offset = src.cur_index
417
+
418
+ lines = []
419
+ # collect all indented lines
420
+ while src.cur_line && src.cur_line.md_type == :quote
421
+ lines << unquote(src.shift_line)
422
+ end
423
+
424
+ src2 = LineSource.new(lines, src, parent_offset)
425
+ children = parse_blocks(src2)
426
+ return md_quote(children)
427
+ end
428
+
429
+ def read_code(src)
430
+ # collect all indented lines
431
+ lines = []
432
+ while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
433
+ lines << strip_indent(src.shift_line, 4)
434
+ end
435
+
436
+ #while lines.last && (lines.last.md_type == :empty )
437
+ while lines.last && lines.last.strip.size == 0
438
+ lines.pop
439
+ end
440
+
441
+ while lines.first && lines.first.strip.size == 0
442
+ lines.shift
443
+ end
444
+
445
+ return nil if lines.empty?
446
+
447
+ source = lines.join("\n")
448
+
449
+
450
+ return md_codeblock(source)
451
+ end
452
+
453
+ # Reads a series of metadata lines with empty lines in between
454
+ def read_metadata(src)
455
+ hash = {}
456
+ while src.cur_line
457
+ case src.cur_line.md_type
458
+ when :empty; src.shift_line
459
+ when :metadata; hash.merge! parse_metadata(src.shift_line)
460
+ else break
461
+ end
462
+ end
463
+ hash
464
+ end
465
+
466
+
467
+ def read_ref_definition(src, out)
468
+ line = src.shift_line
469
+
470
+
471
+ # if link is incomplete, shift next line
472
+ if src.cur_line && !([:footnote_text, :ref_definition, :definition, :abbreviation].include? src.cur_line.md_type) &&
473
+ ([1,2,3].include? number_of_leading_spaces(src.cur_line) )
474
+ line += " "+ src.shift_line
475
+ end
476
+
477
+ # puts "total= #{line}"
478
+
479
+ match = LinkRegex.match(line)
480
+ if not match
481
+ maruku_error "Link does not respect format: '#{line}'"
482
+ return
483
+ end
484
+
485
+ id = match[1]; url = match[2]; title = match[3];
486
+ id = sanitize_ref_id(id)
487
+
488
+ hash = self.refs[id] = {:url=>url,:title=>title}
489
+
490
+ stuff=match[4]
491
+
492
+ if stuff
493
+ stuff.split.each do |couple|
494
+ # puts "found #{couple}"
495
+ k, v = couple.split('=')
496
+ v ||= ""
497
+ if v[0,1]=='"' then v = v[1, v.size-2] end
498
+ # puts "key:_#{k}_ value=_#{v}_"
499
+ hash[k.to_sym] = v
500
+ end
501
+ end
502
+ # puts hash.inspect
503
+
504
+ out.push md_ref_def(id, url, meta={:title=>title})
505
+ end
506
+
507
+ def split_cells(s)
508
+ # s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
509
+ # changed to allow empty cells
510
+ s.strip.split('|').select{|x|x.size>0}.map{|x|x.strip}
511
+ end
512
+
513
+ def read_table(src)
514
+ head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
515
+
516
+ separator=split_cells(src.shift_line)
517
+
518
+ align = separator.map { |s| s =~ Sep
519
+ if $1 and $2 then :center elsif $2 then :right else :left end }
520
+
521
+ num_columns = align.size
522
+
523
+ if head.size != num_columns
524
+ maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
525
+ tell_user "I will ignore this table."
526
+ # XXX try to recover
527
+ return md_br()
528
+ end
529
+
530
+ rows = []
531
+
532
+ while src.cur_line && src.cur_line =~ /\|/
533
+ row = split_cells(src.shift_line).map{|s|
534
+ md_el(:cell, parse_lines_as_span([s]))}
535
+ if head.size != num_columns
536
+ maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
537
+ tell_user "I will ignore this table."
538
+ # XXX try to recover
539
+ return md_br()
540
+ end
541
+ rows << row
542
+ end
543
+
544
+ children = (head+rows).flatten
545
+ return md_el(:table, children, {:align => align})
546
+ end
547
+
548
+ # If current line is text, a definition list is coming
549
+ # if 1) text,empty,[text,empty]*,definition
550
+
551
+ def eventually_comes_a_def_list(src)
552
+ future = src.tell_me_the_future
553
+ ok = future =~ %r{^t+e?d}x
554
+ # puts "future: #{future} - #{ok}"
555
+ ok
556
+ end
557
+
558
+
559
+ def read_definition(src)
560
+ # Read one or more terms
561
+ terms = []
562
+ while src.cur_line && src.cur_line.md_type == :text
563
+ terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
564
+ end
565
+
566
+ want_my_paragraph = false
567
+
568
+ raise "Chunky Bacon!" if not src.cur_line
569
+
570
+ # one optional empty
571
+ if src.cur_line.md_type == :empty
572
+ want_my_paragraph = true
573
+ src.shift_line
574
+ end
575
+
576
+ raise "Chunky Bacon!" if src.cur_line.md_type != :definition
577
+
578
+ # Read one or more definitions
579
+ definitions = []
580
+ while src.cur_line && src.cur_line.md_type == :definition
581
+ parent_offset = src.cur_index
582
+
583
+ first = src.shift_line
584
+ first =~ Definition
585
+ first = $1
586
+
587
+ # I know, it's ugly!!!
588
+
589
+ lines, w_m_p =
590
+ read_indented_content(src,4, [:definition], :definition)
591
+ want_my_paragraph ||= w_m_p
592
+
593
+ lines.unshift first
594
+
595
+ src2 = LineSource.new(lines, src, parent_offset)
596
+ children = parse_blocks(src2)
597
+ definitions << md_el(:definition_data, children)
598
+ end
599
+
600
+ return md_el(:definition, terms+definitions, {
601
+ :terms => terms,
602
+ :definitions => definitions,
603
+ :want_my_paragraph => want_my_paragraph})
604
+ end
605
+ end # BlockLevelParser
606
+ end # MaRuKu
607
+ end
608
+ end