bai-maruku 0.5.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (190) hide show
  1. data/Rakefile +102 -0
  2. data/bin/marudown +29 -0
  3. data/bin/maruku +181 -0
  4. data/bin/marutest +345 -0
  5. data/bin/marutex +31 -0
  6. data/docs/changelog.md +334 -0
  7. data/docs/div_syntax.md +36 -0
  8. data/docs/entity_test.md +23 -0
  9. data/docs/markdown_syntax.md +899 -0
  10. data/docs/maruku.md +346 -0
  11. data/docs/math.md +194 -0
  12. data/docs/other_stuff.md +51 -0
  13. data/docs/proposal.md +309 -0
  14. data/lib/maruku.rb +141 -0
  15. data/lib/maruku/attributes.rb +227 -0
  16. data/lib/maruku/defaults.rb +71 -0
  17. data/lib/maruku/errors_management.rb +92 -0
  18. data/lib/maruku/ext/div.rb +133 -0
  19. data/lib/maruku/ext/math.rb +41 -0
  20. data/lib/maruku/ext/math/elements.rb +27 -0
  21. data/lib/maruku/ext/math/latex_fix.rb +12 -0
  22. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +107 -0
  23. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
  24. data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
  25. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  26. data/lib/maruku/ext/math/parsing.rb +119 -0
  27. data/lib/maruku/ext/math/to_html.rb +187 -0
  28. data/lib/maruku/ext/math/to_latex.rb +26 -0
  29. data/lib/maruku/helpers.rb +260 -0
  30. data/lib/maruku/input/charsource.rb +326 -0
  31. data/lib/maruku/input/extensions.rb +69 -0
  32. data/lib/maruku/input/html_helper.rb +189 -0
  33. data/lib/maruku/input/linesource.rb +111 -0
  34. data/lib/maruku/input/parse_block.rb +614 -0
  35. data/lib/maruku/input/parse_doc.rb +227 -0
  36. data/lib/maruku/input/parse_span_better.rb +746 -0
  37. data/lib/maruku/input/rubypants.rb +225 -0
  38. data/lib/maruku/input/type_detection.rb +147 -0
  39. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  40. data/lib/maruku/maruku.rb +33 -0
  41. data/lib/maruku/output/s5/fancy.rb +756 -0
  42. data/lib/maruku/output/s5/to_s5.rb +138 -0
  43. data/lib/maruku/output/to_html.rb +991 -0
  44. data/lib/maruku/output/to_latex.rb +590 -0
  45. data/lib/maruku/output/to_latex_entities.rb +367 -0
  46. data/lib/maruku/output/to_latex_strings.rb +64 -0
  47. data/lib/maruku/output/to_markdown.rb +164 -0
  48. data/lib/maruku/output/to_s.rb +56 -0
  49. data/lib/maruku/string_utils.rb +191 -0
  50. data/lib/maruku/structures.rb +167 -0
  51. data/lib/maruku/structures_inspect.rb +87 -0
  52. data/lib/maruku/structures_iterators.rb +61 -0
  53. data/lib/maruku/tests/benchmark.rb +82 -0
  54. data/lib/maruku/tests/new_parser.rb +373 -0
  55. data/lib/maruku/tests/tests.rb +136 -0
  56. data/lib/maruku/textile2.rb +1 -0
  57. data/lib/maruku/toc.rb +199 -0
  58. data/lib/maruku/usage/example1.rb +33 -0
  59. data/lib/maruku/version.rb +40 -0
  60. data/maruku.gemspec +32 -0
  61. data/tests/bugs/code_in_links.md +101 -0
  62. data/tests/bugs/complex_escaping.md +38 -0
  63. data/tests/math/syntax.md +46 -0
  64. data/tests/math_usage/document.md +13 -0
  65. data/tests/others/abbreviations.md +11 -0
  66. data/tests/others/blank.md +4 -0
  67. data/tests/others/code.md +5 -0
  68. data/tests/others/code2.md +8 -0
  69. data/tests/others/code3.md +16 -0
  70. data/tests/others/email.md +4 -0
  71. data/tests/others/entities.md +19 -0
  72. data/tests/others/escaping.md +16 -0
  73. data/tests/others/extra_dl.md +101 -0
  74. data/tests/others/extra_header_id.md +13 -0
  75. data/tests/others/extra_table1.md +40 -0
  76. data/tests/others/footnotes.md +17 -0
  77. data/tests/others/headers.md +10 -0
  78. data/tests/others/hrule.md +10 -0
  79. data/tests/others/images.md +20 -0
  80. data/tests/others/inline_html.md +42 -0
  81. data/tests/others/links.md +38 -0
  82. data/tests/others/list1.md +4 -0
  83. data/tests/others/list2.md +5 -0
  84. data/tests/others/list3.md +8 -0
  85. data/tests/others/lists.md +32 -0
  86. data/tests/others/lists_after_paragraph.md +44 -0
  87. data/tests/others/lists_ol.md +39 -0
  88. data/tests/others/misc_sw.md +105 -0
  89. data/tests/others/one.md +1 -0
  90. data/tests/others/paragraphs.md +13 -0
  91. data/tests/others/sss06.md +352 -0
  92. data/tests/others/test.md +4 -0
  93. data/tests/s5/s5profiling.md +48 -0
  94. data/tests/unittest/abbreviations.md +64 -0
  95. data/tests/unittest/alt.md +29 -0
  96. data/tests/unittest/attributes/att2.md +32 -0
  97. data/tests/unittest/attributes/att3.md +40 -0
  98. data/tests/unittest/attributes/attributes.md +69 -0
  99. data/tests/unittest/attributes/circular.md +38 -0
  100. data/tests/unittest/attributes/default.md +34 -0
  101. data/tests/unittest/blank.md +36 -0
  102. data/tests/unittest/blanks_in_code.md +87 -0
  103. data/tests/unittest/bug_def.md +28 -0
  104. data/tests/unittest/bug_table.md +58 -0
  105. data/tests/unittest/code.md +46 -0
  106. data/tests/unittest/code2.md +40 -0
  107. data/tests/unittest/code3.md +83 -0
  108. data/tests/unittest/data_loss.md +37 -0
  109. data/tests/unittest/divs/div1.md +179 -0
  110. data/tests/unittest/divs/div2.md +33 -0
  111. data/tests/unittest/divs/div3_nest.md +57 -0
  112. data/tests/unittest/easy.md +27 -0
  113. data/tests/unittest/email.md +32 -0
  114. data/tests/unittest/encoding/iso-8859-1.md +35 -0
  115. data/tests/unittest/encoding/utf-8.md +30 -0
  116. data/tests/unittest/entities.md +106 -0
  117. data/tests/unittest/escaping.md +79 -0
  118. data/tests/unittest/extra_dl.md +64 -0
  119. data/tests/unittest/extra_header_id.md +75 -0
  120. data/tests/unittest/extra_table1.md +49 -0
  121. data/tests/unittest/footnotes.md +109 -0
  122. data/tests/unittest/headers.md +49 -0
  123. data/tests/unittest/hex_entities.md +49 -0
  124. data/tests/unittest/hrule.md +51 -0
  125. data/tests/unittest/html2.md +34 -0
  126. data/tests/unittest/html3.md +43 -0
  127. data/tests/unittest/html4.md +37 -0
  128. data/tests/unittest/html5.md +35 -0
  129. data/tests/unittest/ie.md +61 -0
  130. data/tests/unittest/images.md +102 -0
  131. data/tests/unittest/images2.md +43 -0
  132. data/tests/unittest/inline_html.md +187 -0
  133. data/tests/unittest/inline_html2.md +33 -0
  134. data/tests/unittest/links.md +164 -0
  135. data/tests/unittest/links2.md +34 -0
  136. data/tests/unittest/list1.md +58 -0
  137. data/tests/unittest/list12.md +40 -0
  138. data/tests/unittest/list2.md +68 -0
  139. data/tests/unittest/list3.md +76 -0
  140. data/tests/unittest/list4.md +101 -0
  141. data/tests/unittest/lists.md +204 -0
  142. data/tests/unittest/lists10.md +46 -0
  143. data/tests/unittest/lists11.md +28 -0
  144. data/tests/unittest/lists6.md +53 -0
  145. data/tests/unittest/lists9.md +76 -0
  146. data/tests/unittest/lists_after_paragraph.md +220 -0
  147. data/tests/unittest/lists_ol.md +274 -0
  148. data/tests/unittest/loss.md +28 -0
  149. data/tests/unittest/math/equations.md +86 -0
  150. data/tests/unittest/math/inline.md +58 -0
  151. data/tests/unittest/math/math2.md +88 -0
  152. data/tests/unittest/math/notmath.md +37 -0
  153. data/tests/unittest/math/table.md +52 -0
  154. data/tests/unittest/math/table2.md +54 -0
  155. data/tests/unittest/misc_sw.md +537 -0
  156. data/tests/unittest/notyet/escape.md +33 -0
  157. data/tests/unittest/notyet/header_after_par.md +70 -0
  158. data/tests/unittest/notyet/ticks.md +30 -0
  159. data/tests/unittest/notyet/triggering.md +169 -0
  160. data/tests/unittest/olist.md +57 -0
  161. data/tests/unittest/one.md +27 -0
  162. data/tests/unittest/paragraph.md +28 -0
  163. data/tests/unittest/paragraph_rules/dont_merge_ref.md +54 -0
  164. data/tests/unittest/paragraph_rules/tab_is_blank.md +36 -0
  165. data/tests/unittest/paragraphs.md +58 -0
  166. data/tests/unittest/pending/amps.md +27 -0
  167. data/tests/unittest/pending/empty_cells.md +49 -0
  168. data/tests/unittest/pending/link.md +84 -0
  169. data/tests/unittest/pending/ref.md +33 -0
  170. data/tests/unittest/recover/recover_links.md +27 -0
  171. data/tests/unittest/red_tests/abbrev.md +1388 -0
  172. data/tests/unittest/red_tests/lists7.md +68 -0
  173. data/tests/unittest/red_tests/lists7b.md +128 -0
  174. data/tests/unittest/red_tests/lists8.md +76 -0
  175. data/tests/unittest/references/long_example.md +83 -0
  176. data/tests/unittest/references/spaces_and_numbers.md +27 -0
  177. data/tests/unittest/smartypants.md +126 -0
  178. data/tests/unittest/syntax_hl.md +64 -0
  179. data/tests/unittest/table_attributes.md +46 -0
  180. data/tests/unittest/test.md +31 -0
  181. data/tests/unittest/underscore_in_words.md +27 -0
  182. data/tests/unittest/wrapping.md +79 -0
  183. data/tests/unittest/xml.md +45 -0
  184. data/tests/unittest/xml2.md +31 -0
  185. data/tests/unittest/xml3.md +38 -0
  186. data/tests/unittest/xml_instruction.md +64 -0
  187. data/tests/utf8-files/simple.md +1 -0
  188. data/unit_test_block.sh +5 -0
  189. data/unit_test_span.sh +3 -0
  190. metadata +251 -0
@@ -0,0 +1,227 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'iconv'
23
+
24
+
25
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
26
+
27
+ def parse_doc(s)
28
+ # FIXME \r\n => \n
29
+ meta2 = parse_email_headers(s)
30
+ data = meta2[:data]
31
+ meta2.delete :data
32
+
33
+ self.attributes.merge! meta2
34
+
35
+ =begin maruku_doc
36
+ Attribute: encoding
37
+ Scope: document
38
+ Summary: Encoding for the document.
39
+
40
+ If the `encoding` attribute is specified, then the content
41
+ will be converted from the specified encoding to UTF-8.
42
+
43
+ Conversion happens using the `iconv` library.
44
+ =end
45
+
46
+ enc = self.attributes[:encoding]
47
+ self.attributes.delete :encoding
48
+ if enc && enc.downcase != 'utf-8'
49
+ converted = Iconv.new('utf-8', enc).iconv(data)
50
+
51
+ # puts "Data: #{data.inspect}: #{data}"
52
+ # puts "Conv: #{converted.inspect}: #{converted}"
53
+
54
+ data = converted
55
+ end
56
+
57
+ @children = parse_text_as_markdown(data)
58
+
59
+ if true #markdown_extra?
60
+ self.search_abbreviations
61
+ self.substitute_markdown_inside_raw_html
62
+ end
63
+
64
+ toc = create_toc
65
+
66
+ # use title if not set
67
+ if not self.attributes[:title] and toc.header_element
68
+ title = toc.header_element.to_s
69
+ self.attributes[:title] = title
70
+ # puts "Set document title to #{title}"
71
+ end
72
+
73
+ # save for later use
74
+ self.toc = toc
75
+
76
+ # Now do the attributes magic
77
+ each_element do |e|
78
+ # default attribute list
79
+ if default = self.ald[e.node_type.to_s]
80
+ expand_attribute_list(default, e.attributes)
81
+ end
82
+ expand_attribute_list(e.al, e.attributes)
83
+ # puts "#{e.node_type}: #{e.attributes.inspect}"
84
+ end
85
+
86
+ =begin maruku_doc
87
+ Attribute: unsafe_features
88
+ Scope: global
89
+ Summary: Enables execution of XML instructions.
90
+
91
+ Disabled by default because of security concerns.
92
+ =end
93
+
94
+ if Maruku::Globals[:unsafe_features]
95
+ self.execute_code_blocks
96
+ # TODO: remove executed code blocks
97
+ end
98
+ end
99
+
100
+ # Expands an attribute list in an Hash
101
+ def expand_attribute_list(al, result)
102
+ al.each do |k, v|
103
+ case k
104
+ when :class
105
+ if not result[:class]
106
+ result[:class] = v
107
+ else
108
+ result[:class] += " " + v
109
+ end
110
+ when :id; result[:id] = v
111
+ when :ref;
112
+ if self.ald[v]
113
+ already = (result[:expanded_references] ||= [])
114
+ if not already.include?(v)
115
+ already.push v
116
+ expand_attribute_list(self.ald[v], result)
117
+ else
118
+ already.push v
119
+ maruku_error "Circular reference between labels.\n\n"+
120
+ "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
121
+ (already.map{|x| x.inspect}.join(' => '))
122
+ end
123
+ else
124
+ if not result[:unresolved_references]
125
+ result[:unresolved_references] = v
126
+ else
127
+ result[:unresolved_references] << " #{v}"
128
+ end
129
+
130
+ # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
131
+ result[v.to_sym] = true
132
+ end
133
+ else
134
+ result[k.to_sym]=v
135
+ end
136
+ end
137
+ end
138
+
139
+ def safe_execute_code(object, code)
140
+ begin
141
+ return object.instance_eval(code)
142
+ rescue Exception => e
143
+ maruku_error "Exception while executing this:\n"+
144
+ add_tabs(code, 1, ">")+
145
+ "\nThe error was:\n"+
146
+ add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
147
+ rescue RuntimeError => e
148
+ maruku_error "2: Exception while executing this:\n"+
149
+ add_tabs(code, 1, ">")+
150
+ "\nThe error was:\n"+
151
+ add_tabs(e.inspect, 1, "|")
152
+ rescue SyntaxError => e
153
+ maruku_error "2: Exception while executing this:\n"+
154
+ add_tabs(code, 1, ">")+
155
+ "\nThe error was:\n"+
156
+ add_tabs(e.inspect, 1, "|")
157
+ end
158
+ nil
159
+ end
160
+
161
+ def execute_code_blocks
162
+ self.each_element(:xml_instr) do |e|
163
+ if e.target == 'maruku'
164
+ result = safe_execute_code(e, e.code)
165
+ if result.kind_of?(String)
166
+ puts "Result is : #{result.inspect}"
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ def search_abbreviations
173
+ self.abbreviations.each do |abbrev, title|
174
+ reg = Regexp.new(Regexp.escape(abbrev))
175
+ self.replace_each_string do |s|
176
+ # bug if many abbreviations are present (agorf)
177
+ if m = reg.match(s)
178
+ e = md_abbr(abbrev.dup, title ? title.dup : nil)
179
+ [m.pre_match, e, m.post_match]
180
+ else
181
+ s
182
+ end
183
+ end
184
+ end
185
+ end
186
+
187
+ include REXML
188
+ # (PHP Markdown extra) Search for elements that have
189
+ # markdown=1 or markdown=block defined
190
+ def substitute_markdown_inside_raw_html
191
+ self.each_element(:raw_html) do |e|
192
+ doc = e.instance_variable_get :@parsed_html
193
+ if doc # valid html
194
+ # parse block-level markdown elements in these HTML tags
195
+ block_tags = ['div']
196
+
197
+ # use xpath to find elements with 'markdown' attribute
198
+ XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
199
+ # puts "Found #{e}"
200
+ # should we parse block-level or span-level?
201
+ parse_blocks = (e.attributes['markdown'] == 'block') ||
202
+ block_tags.include?(e.name)
203
+ # remove 'markdown' attribute
204
+ e.delete_attribute 'markdown'
205
+ # Select all text elements of e
206
+ XPath.match(e, "//text()" ).each { |original_text|
207
+ s = original_text.value.strip
208
+ if s.size > 0
209
+ el = md_el(:dummy,
210
+ parse_blocks ? parse_text_as_markdown(s) :
211
+ parse_lines_as_span([s]) )
212
+ p = original_text.parent
213
+ el.children_to_html.each do |x|
214
+ p.insert_before(original_text, x)
215
+ end
216
+ p.delete(original_text)
217
+
218
+ end
219
+ }
220
+
221
+ end
222
+
223
+ end
224
+ end
225
+ end
226
+
227
+ end end end end
@@ -0,0 +1,746 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'set'
23
+
24
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
25
+ include MaRuKu::Helpers
26
+
27
+ EscapedCharInText =
28
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
+
30
+ EscapedCharInQuotes =
31
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
+
33
+ EscapedCharInInlineCode = [?\\,?`]
34
+
35
+ def parse_lines_as_span(lines, parent=nil)
36
+ parse_span_better lines.join("\n"), parent
37
+ end
38
+
39
+ def parse_span_better(string, parent=nil)
40
+ if not string.kind_of? String then
41
+ error "Passed #{string.class}." end
42
+
43
+ st = (string + "")
44
+ st.freeze
45
+ src = CharSource.new(st, parent)
46
+ read_span(src, EscapedCharInText, [nil])
47
+ end
48
+
49
+ # This is the main loop for reading span elements
50
+ #
51
+ # It's long, but not *complex* or difficult to understand.
52
+ #
53
+ #
54
+ def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
+ con = SpanContext.new
56
+ c = d = nil
57
+ while true
58
+ c = src.cur_char
59
+
60
+ # This is only an optimization which cuts 50% of the time used.
61
+ # (but you can't use a-zA-z in exit_on_chars)
62
+ if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
+ con.cur_string << src.shift_char
64
+ next
65
+ end
66
+
67
+ break if exit_on_chars && exit_on_chars.include?(c)
68
+ break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
+
70
+ # check if there are extensions
71
+ if check_span_extensions(src, con)
72
+ next
73
+ end
74
+
75
+ case c = src.cur_char
76
+ when ?\ # it's space (32)
77
+ if src.cur_chars_are " \n"
78
+ src.ignore_chars(3)
79
+ con.push_element md_br()
80
+ next
81
+ else
82
+ src.ignore_char
83
+ con.push_space
84
+ end
85
+ when ?\n, ?\t
86
+ src.ignore_char
87
+ con.push_space
88
+ when ?`
89
+ read_inline_code(src,con)
90
+ when ?<
91
+ # It could be:
92
+ # 1) HTML "<div ..."
93
+ # 2) HTML "<!-- ..."
94
+ # 3) url "<http:// ", "<ftp:// ..."
95
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
96
+ # 5) on itself! "a < b "
97
+ # 6) Start of <<guillemettes>>
98
+
99
+ case d = src.next_char
100
+ when ?<; # guillemettes
101
+ src.ignore_chars(2)
102
+ con.push_char ?<
103
+ con.push_char ?<
104
+ when ?!;
105
+ if src.cur_chars_are '<!--'
106
+ read_inline_html(src, con)
107
+ else
108
+ con.push_char src.shift_char
109
+ end
110
+ when ??
111
+ read_xml_instr_span(src, con)
112
+ when ?\ , ?\t
113
+ con.push_char src.shift_char
114
+ else
115
+ if src.next_matches(/<mailto:/) or
116
+ src.next_matches(/<[\w\.]+\@/)
117
+ read_email_el(src, con)
118
+ elsif src.next_matches(/<\w+:/)
119
+ read_url_el(src, con)
120
+ elsif src.next_matches(/<\w/)
121
+ #puts "This is HTML: #{src.cur_chars(20)}"
122
+ read_inline_html(src, con)
123
+ else
124
+ #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
+ con.push_char src.shift_char
126
+ end
127
+ end
128
+ when ?\\
129
+ d = src.next_char
130
+ if d == ?'
131
+ src.ignore_chars(2)
132
+ con.push_element md_entity('apos')
133
+ elsif d == ?"
134
+ src.ignore_chars(2)
135
+ con.push_element md_entity('quot')
136
+ elsif escaped.include? d
137
+ src.ignore_chars(2)
138
+ con.push_char d
139
+ else
140
+ con.push_char src.shift_char
141
+ end
142
+ when ?[
143
+ if markdown_extra? && src.next_char == ?^
144
+ read_footnote_ref(src,con)
145
+ else
146
+ read_link(src, con)
147
+ end
148
+ when ?!
149
+ if src.next_char == ?[
150
+ read_image(src, con)
151
+ else
152
+ con.push_char src.shift_char
153
+ end
154
+ when ?&
155
+ # named references
156
+ if m = src.read_regexp(/\&([\w\d]+);/)
157
+ con.push_element md_entity(m[1])
158
+ # numeric
159
+ elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
+ num = m[1] ? m[2].hex : m[2].to_i
161
+ con.push_element md_entity(num)
162
+ else
163
+ con.push_char src.shift_char
164
+ end
165
+ when ?*
166
+ if not src.next_char
167
+ maruku_error "Opening * as last char.", src, con
168
+ maruku_recover "Threating as literal"
169
+ con.push_char src.shift_char
170
+ else
171
+ follows = src.cur_chars(4)
172
+ if follows =~ /^\*\*\*[^\s\*]/
173
+ con.push_element read_emstrong(src,'***')
174
+ elsif follows =~ /^\*\*[^\s\*]/
175
+ con.push_element read_strong(src,'**')
176
+ elsif follows =~ /^\*[^\s\*]/
177
+ con.push_element read_em(src,'*')
178
+ else # * is just a normal char
179
+ con.push_char src.shift_char
180
+ end
181
+ end
182
+ when ?_
183
+ if not src.next_char
184
+ maruku_error "Opening _ as last char", src, con
185
+ maruku_recover "Threating as literal", src, con
186
+ con.push_char src.shift_char
187
+ else
188
+ # we don't want "mod_ruby" to start an emphasis
189
+ # so we start one only if
190
+ # 1) there's nothing else in the span (first char)
191
+ # or 2) the last char was a space
192
+ # or 3) the current string is empty
193
+ #if con.elements.empty? ||
194
+ if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
195
+ # also, we check the next characters
196
+ follows = src.cur_chars(4)
197
+ if follows =~ /^\_\_\_[^\s\_]/
198
+ con.push_element read_emstrong(src,'___')
199
+ elsif follows =~ /^\_\_[^\s\_]/
200
+ con.push_element read_strong(src,'__')
201
+ elsif follows =~ /^\_[^\s\_]/
202
+ con.push_element read_em(src,'_')
203
+ else # _ is just a normal char
204
+ con.push_char src.shift_char
205
+ end
206
+ else
207
+ # _ is just a normal char
208
+ con.push_char src.shift_char
209
+ end
210
+ end
211
+ when ?{ # extension
212
+ if [?#, ?., ?:].include? src.next_char
213
+ src.ignore_char # {
214
+ interpret_extension(src, con, [?}])
215
+ src.ignore_char # }
216
+ else
217
+ con.push_char src.shift_char
218
+ end
219
+ when nil
220
+ maruku_error( ("Unclosed span (waiting for %s"+
221
+ "#{exit_on_strings.inspect})") % [
222
+ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
223
+ src,con)
224
+ break
225
+ else # normal text
226
+ con.push_char src.shift_char
227
+ end # end case
228
+ end # end while true
229
+ con.push_string_if_present
230
+
231
+ # Assign IAL to elements
232
+ merge_ial(con.elements, src, con)
233
+
234
+
235
+ # Remove leading space
236
+ if (s = con.elements.first).kind_of? String
237
+ if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
238
+ con.elements.shift if s.size == 0
239
+ end
240
+
241
+ # Remove final spaces
242
+ if (s = con.elements.last).kind_of? String
243
+ s.chop! if s[-1] == ?\
244
+ con.elements.pop if s.size == 0
245
+ end
246
+
247
+ educated = educate(con.elements)
248
+
249
+ educated
250
+ end
251
+
252
+
253
+ def read_xml_instr_span(src, con)
254
+ src.ignore_chars(2) # starting <?
255
+
256
+ # read target <?target code... ?>
257
+ target = if m = src.read_regexp(/(\w+)/)
258
+ m[1]
259
+ else
260
+ ''
261
+ end
262
+
263
+ delim = "?>"
264
+
265
+ code =
266
+ read_simple(src, escaped=[], break_on_chars=[],
267
+ break_on_strings=[delim])
268
+
269
+ src.ignore_chars delim.size
270
+
271
+ code = (code || "").strip
272
+ con.push_element md_xml_instr(target, code)
273
+ end
274
+
275
+ # Start: cursor on character **after** '{'
276
+ # End: curson on '}' or EOF
277
+ def interpret_extension(src, con, break_on_chars)
278
+ case src.cur_char
279
+ when ?:
280
+ src.ignore_char # :
281
+ extension_meta(src, con, break_on_chars)
282
+ when ?#, ?.
283
+ extension_meta(src, con, break_on_chars)
284
+ else
285
+ stuff = read_simple(src, escaped=[?}], break_on_chars, [])
286
+ if stuff =~ /^(\w+\s|[^\w])/
287
+ extension_id = $1.strip
288
+ if false
289
+ else
290
+ maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
291
+ "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
292
+ extension_meta(src, con, break_on_chars)
293
+ end
294
+ else
295
+ maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
296
+ extension_meta(src, con, break_on_chars)
297
+ end
298
+ end
299
+ end
300
+
301
+ def extension_meta(src, con, break_on_chars)
302
+ if m = src.read_regexp(/([^\s\:\"\']+):/)
303
+ name = m[1]
304
+ al = read_attribute_list(src, con, break_on_chars)
305
+ # puts "#{name}=#{al.inspect}"
306
+ self.doc.ald[name] = al
307
+ con.push md_ald(name, al)
308
+ else
309
+ al = read_attribute_list(src, con, break_on_chars)
310
+ self.doc.ald[name] = al
311
+ con.push md_ial(al)
312
+ end
313
+ end
314
+
315
+ def read_url_el(src,con)
316
+ src.ignore_char # leading <
317
+ url = read_simple(src, [], [?>])
318
+ src.ignore_char # closing >
319
+
320
+ con.push_element md_url(url)
321
+ end
322
+
323
+ def read_email_el(src,con)
324
+ src.ignore_char # leading <
325
+ mail = read_simple(src, [], [?>])
326
+ src.ignore_char # closing >
327
+
328
+ address = mail.gsub(/^mailto:/,'')
329
+ con.push_element md_email(address)
330
+ end
331
+
332
+ def read_url(src, break_on)
333
+ if [?',?"].include? src.cur_char
334
+ error 'Invalid char for url', src
335
+ end
336
+
337
+ url = read_simple(src, [], break_on)
338
+ if not url # empty url
339
+ url = ""
340
+ end
341
+
342
+ if url[0] == ?< && url[-1] == ?>
343
+ url = url[1, url.size-2]
344
+ end
345
+
346
+ if url.size == 0
347
+ return nil
348
+ end
349
+
350
+ url
351
+ end
352
+
353
+
354
+ def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
355
+ case src.cur_char
356
+ when ?', ?"
357
+ read_quoted(src, con)
358
+ else
359
+ read_simple(src, escaped, exit_on_chars)
360
+ end
361
+ end
362
+
363
+ # Tries to read a quoted value. If stream does not
364
+ # start with ' or ", returns nil.
365
+ def read_quoted(src, con)
366
+ case src.cur_char
367
+ when ?', ?"
368
+ quote_char = src.shift_char # opening quote
369
+ string = read_simple(src, EscapedCharInQuotes, [quote_char])
370
+ src.ignore_char # closing quote
371
+ return string
372
+ else
373
+ # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
374
+ return nil
375
+ end
376
+ end
377
+
378
+ # Reads a simple string (no formatting) until one of break_on_chars,
379
+ # while escaping the escaped.
380
+ # If the string is empty, it returns nil.
381
+ # Raises on error if the string terminates unexpectedly.
382
+ # # If eat_delim is true, and if the delim is not the EOF, then the delim
383
+ # # gets eaten from the stream.
384
+ def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
385
+ text = ""
386
+ while true
387
+ # puts "Reading simple #{text.inspect}"
388
+ c = src.cur_char
389
+ if exit_on_chars && exit_on_chars.include?(c)
390
+ # src.ignore_char if eat_delim
391
+ break
392
+ end
393
+
394
+ break if exit_on_strings &&
395
+ exit_on_strings.any? {|x| src.cur_chars_are x}
396
+
397
+ case c
398
+ when nil
399
+ s= "String finished while reading (break on "+
400
+ "#{exit_on_chars.map{|x|""<<x}.inspect})"+
401
+ " already read: #{text.inspect}"
402
+ maruku_error s, src
403
+ maruku_recover "I boldly continue", src
404
+ break
405
+ when ?\\
406
+ d = src.next_char
407
+ if escaped.include? d
408
+ src.ignore_chars(2)
409
+ text << d
410
+ else
411
+ text << src.shift_char
412
+ end
413
+ else
414
+ text << src.shift_char
415
+ end
416
+ end
417
+ # puts "Read simple #{text.inspect}"
418
+ text.empty? ? nil : text
419
+ end
420
+
421
+ def read_em(src, delim)
422
+ src.ignore_char
423
+ children = read_span(src, EscapedCharInText, nil, [delim])
424
+ src.ignore_char
425
+ md_em(children)
426
+ end
427
+
428
+ def read_strong(src, delim)
429
+ src.ignore_chars(2)
430
+ children = read_span(src, EscapedCharInText, nil, [delim])
431
+ src.ignore_chars(2)
432
+ md_strong(children)
433
+ end
434
+
435
+ def read_emstrong(src, delim)
436
+ src.ignore_chars(3)
437
+ children = read_span(src, EscapedCharInText, nil, [delim])
438
+ src.ignore_chars(3)
439
+ md_emstrong(children)
440
+ end
441
+
442
+ SPACE = ?\ # = 32
443
+
444
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
445
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
446
+ R_REF_ID = Regexp.compile(/([^\]]*)\]/)
447
+
448
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
449
+ def read_ref_id(src, con)
450
+ src.ignore_char # [
451
+ src.consume_whitespace
452
+ # puts "Next: #{src.cur_chars(10).inspect}"
453
+ if m = src.read_regexp(R_REF_ID)
454
+ # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
455
+ # puts "Then: #{src.cur_chars(10).inspect}"
456
+ m[1]
457
+ else
458
+ nil
459
+ end
460
+ end
461
+
462
+ def read_footnote_ref(src,con)
463
+ ref = read_ref_id(src,con)
464
+ con.push_element md_foot_ref(ref)
465
+ end
466
+
467
+ def read_inline_html(src, con)
468
+ h = HTMLHelper.new
469
+ begin
470
+ # This is our current buffer in the context
471
+ next_stuff = src.current_remaining_buffer
472
+
473
+ consumed = 0
474
+ while true
475
+ if consumed >= next_stuff.size
476
+ maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
477
+ break
478
+ end
479
+
480
+ h.eat_this next_stuff[consumed].chr; consumed += 1
481
+ break if h.is_finished?
482
+ end
483
+ src.ignore_chars(consumed)
484
+ con.push_element md_html(h.stuff_you_read)
485
+
486
+ #start = src.current_remaining_buffer
487
+ # h.eat_this start
488
+ # if not h.is_finished?
489
+ # error "inline_html: Malformed:\n "+
490
+ # "#{start.inspect}\n #{h.inspect}",src,con
491
+ # end
492
+ #
493
+ # consumed = start.size - h.rest.size
494
+ # if consumed > 0
495
+ # con.push_element md_html(h.stuff_you_read)
496
+ # src.ignore_chars(consumed)
497
+ # else
498
+ # puts "HTML helper did not work on #{start.inspect}"
499
+ # con.push_char src.shift_char
500
+ # end
501
+ rescue Exception => e
502
+ maruku_error "Bad html: \n" +
503
+ add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
504
+ src,con
505
+ maruku_recover "I will try to continue after bad HTML.", src, con
506
+ con.push_char src.shift_char
507
+ end
508
+ end
509
+
510
+ def read_inline_code(src, con)
511
+ # Count the number of ticks
512
+ num_ticks = 0
513
+ while src.cur_char == ?`
514
+ num_ticks += 1
515
+ src.ignore_char
516
+ end
517
+ # We will read until this string
518
+ end_string = "`"*num_ticks
519
+
520
+ code =
521
+ read_simple(src, escaped=[], break_on_chars=[],
522
+ break_on_strings=[end_string])
523
+
524
+ # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
525
+ src.ignore_chars num_ticks
526
+
527
+ # Ignore at most one space
528
+ if num_ticks > 1 && code[0] == SPACE
529
+ code = code[1, code.size-1]
530
+ end
531
+
532
+ # drop last space
533
+ if num_ticks > 1 && code[-1] == SPACE
534
+ code = code[0,code.size-1]
535
+ end
536
+
537
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
538
+ con.push_element md_code(code)
539
+ end
540
+
541
+ def read_link(src, con)
542
+ # we read the string and see what happens
543
+ src.ignore_char # opening bracket
544
+ children = read_span(src, EscapedCharInText, [?]])
545
+ src.ignore_char # closing bracket
546
+
547
+ # ignore space
548
+ if src.cur_char == SPACE and
549
+ (src.next_char == ?[ or src.next_char == ?( )
550
+ src.shift_char
551
+ end
552
+
553
+ case src.cur_char
554
+ when ?(
555
+ src.ignore_char # opening (
556
+ src.consume_whitespace
557
+ url = read_url(src, [SPACE,?\t,?)])
558
+ if not url
559
+ url = '' # no url is ok
560
+ end
561
+ src.consume_whitespace
562
+ title = nil
563
+ if src.cur_char != ?) # we have a title
564
+ quote_char = src.cur_char
565
+ title = read_quoted(src,con)
566
+
567
+ if not title
568
+ maruku_error 'Must quote title',src,con
569
+ else
570
+ # Tries to read a title with quotes: ![a](url "ti"tle")
571
+ # this is the most ugly thing in Markdown
572
+ if not src.next_matches(/\s*\)/)
573
+ # if there is not a closing par ), then read
574
+ # the rest and guess it's title with quotes
575
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
576
+ break_on_strings=[])
577
+ # chop the closing char
578
+ rest.chop!
579
+ title << quote_char << rest
580
+ end
581
+ end
582
+ end
583
+ src.consume_whitespace
584
+ closing = src.shift_char # closing )
585
+ if closing != ?)
586
+ maruku_error 'Unclosed link',src,con
587
+ maruku_recover "No closing ): I will not create"+
588
+ " the link for #{children.inspect}", src, con
589
+ con.push_elements children
590
+ return
591
+ end
592
+ con.push_element md_im_link(children,url, title)
593
+ when ?[ # link ref
594
+ ref_id = read_ref_id(src,con)
595
+ if ref_id
596
+ if ref_id.size == 0
597
+ ref_id = sanitize_ref_id(children.to_s)
598
+ else
599
+ ref_id = sanitize_ref_id(ref_id)
600
+ end
601
+ con.push_element md_link(children, ref_id)
602
+ else
603
+ maruku_error "Could not read ref_id", src, con
604
+ maruku_recover "I will not create the link for "+
605
+ "#{children.inspect}", src, con
606
+ con.push_elements children
607
+ return
608
+ end
609
+ else # empty [link]
610
+ id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
611
+ con.push_element md_link(children, id)
612
+ end
613
+ end # read link
614
+
615
+ def read_image(src, con)
616
+ src.ignore_chars(2) # opening "!["
617
+ alt_text = read_span(src, EscapedCharInText, [?]])
618
+ src.ignore_char # closing bracket
619
+ # ignore space
620
+ if src.cur_char == SPACE and
621
+ (src.next_char == ?[ or src.next_char == ?( )
622
+ src.ignore_char
623
+ end
624
+ case src.cur_char
625
+ when ?(
626
+ src.ignore_char # opening (
627
+ src.consume_whitespace
628
+ url = read_url(src, [SPACE,?\t,?)])
629
+ if not url
630
+ error "Could not read url from #{src.cur_chars(10).inspect}",
631
+ src,con
632
+ end
633
+ src.consume_whitespace
634
+ title = nil
635
+ if src.cur_char != ?) # we have a title
636
+ quote_char = src.cur_char
637
+ title = read_quoted(src,con)
638
+ if not title
639
+ maruku_error 'Must quote title',src,con
640
+ else
641
+ # Tries to read a title with quotes: ![a](url "ti"tle")
642
+ # this is the most ugly thing in Markdown
643
+ if not src.next_matches(/\s*\)/)
644
+ # if there is not a closing par ), then read
645
+ # the rest and guess it's title with quotes
646
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
647
+ break_on_strings=[])
648
+ # chop the closing char
649
+ rest.chop!
650
+ title << quote_char << rest
651
+ end
652
+ end
653
+ end
654
+ src.consume_whitespace
655
+ closing = src.shift_char # closing )
656
+ if closing != ?)
657
+ error( ("Unclosed link: '"<<closing<<"'")+
658
+ " Read url=#{url.inspect} title=#{title.inspect}",src,con)
659
+ end
660
+ con.push_element md_im_image(alt_text, url, title)
661
+ when ?[ # link ref
662
+ ref_id = read_ref_id(src,con)
663
+ if not ref_id # TODO: check around
664
+ error('Reference not closed.', src, con)
665
+ ref_id = ""
666
+ end
667
+ if ref_id.size == 0
668
+ ref_id = alt_text.to_s
669
+ end
670
+
671
+ ref_id = sanitize_ref_id(ref_id)
672
+
673
+ con.push_element md_image(alt_text, ref_id)
674
+ else # no stuff
675
+ ref_id = sanitize_ref_id(alt_text.to_s)
676
+ con.push_element md_image(alt_text, ref_id)
677
+ end
678
+ end # read link
679
+
680
+
681
+ class SpanContext
682
+ include MaRuKu::Strings
683
+
684
+ # Read elements
685
+ attr_accessor :elements
686
+ attr_accessor :cur_string
687
+
688
+ def initialize
689
+ @elements = []
690
+ @cur_string = ""
691
+ end
692
+
693
+ def push_element(e)
694
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
695
+ not (e.kind_of?(String) or e.kind_of?(MDElement))
696
+
697
+ push_string_if_present
698
+ @elements << e
699
+ nil
700
+ end
701
+ alias push push_element
702
+
703
+ def push_elements(a)
704
+ for e in a
705
+ if e.kind_of? String
706
+ e.each_byte do |b| push_char b end
707
+ else
708
+ push_element e
709
+ end
710
+ end
711
+ end
712
+
713
+ def push_string_if_present
714
+ if @cur_string.size > 0
715
+ @elements << @cur_string
716
+ @cur_string = ""
717
+ end
718
+ nil
719
+ end
720
+
721
+ def push_char(c)
722
+ @cur_string << c
723
+ nil
724
+ end
725
+
726
+ # push space into current string if
727
+ # there isn't one
728
+ def push_space
729
+ last = @cur_string[@cur_string.size-1]
730
+ @cur_string << ?\ if last != ?\
731
+ end
732
+
733
+ def describe
734
+ lines = @elements.map{|x| x.inspect}.join("\n")
735
+ s = "Elements read in span: \n" +
736
+ add_tabs(lines,1, ' -')+"\n"
737
+
738
+ if @cur_string.size > 0
739
+ s += "Current string: \n #{@cur_string.inspect}\n"
740
+ end
741
+ s
742
+ end
743
+ end # SpanContext
744
+
745
+ end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
746
+