remi-maruku 0.5.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (183) hide show
  1. data/Rakefile +73 -0
  2. data/bin/marudown +29 -0
  3. data/bin/maruku +176 -0
  4. data/bin/marutest +338 -0
  5. data/bin/marutex +31 -0
  6. data/docs/changelog.md +334 -0
  7. data/docs/div_syntax.md +36 -0
  8. data/docs/entity_test.md +23 -0
  9. data/docs/markdown_syntax.md +899 -0
  10. data/docs/maruku.md +346 -0
  11. data/docs/math.md +194 -0
  12. data/docs/other_stuff.md +51 -0
  13. data/docs/proposal.md +309 -0
  14. data/lib/maruku.rb +141 -0
  15. data/lib/maruku/attributes.rb +227 -0
  16. data/lib/maruku/defaults.rb +70 -0
  17. data/lib/maruku/errors_management.rb +92 -0
  18. data/lib/maruku/ext/div.rb +100 -0
  19. data/lib/maruku/ext/math.rb +41 -0
  20. data/lib/maruku/ext/math/elements.rb +27 -0
  21. data/lib/maruku/ext/math/latex_fix.rb +11 -0
  22. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +104 -0
  23. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
  24. data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
  25. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  26. data/lib/maruku/ext/math/parsing.rb +105 -0
  27. data/lib/maruku/ext/math/to_html.rb +170 -0
  28. data/lib/maruku/ext/math/to_latex.rb +22 -0
  29. data/lib/maruku/helpers.rb +260 -0
  30. data/lib/maruku/input/charsource.rb +326 -0
  31. data/lib/maruku/input/extensions.rb +69 -0
  32. data/lib/maruku/input/html_helper.rb +189 -0
  33. data/lib/maruku/input/linesource.rb +111 -0
  34. data/lib/maruku/input/parse_block.rb +613 -0
  35. data/lib/maruku/input/parse_doc.rb +227 -0
  36. data/lib/maruku/input/parse_span_better.rb +732 -0
  37. data/lib/maruku/input/rubypants.rb +225 -0
  38. data/lib/maruku/input/type_detection.rb +144 -0
  39. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  40. data/lib/maruku/maruku.rb +33 -0
  41. data/lib/maruku/output/s5/fancy.rb +756 -0
  42. data/lib/maruku/output/s5/to_s5.rb +125 -0
  43. data/lib/maruku/output/to_html.rb +971 -0
  44. data/lib/maruku/output/to_latex.rb +563 -0
  45. data/lib/maruku/output/to_latex_entities.rb +367 -0
  46. data/lib/maruku/output/to_latex_strings.rb +64 -0
  47. data/lib/maruku/output/to_markdown.rb +164 -0
  48. data/lib/maruku/output/to_s.rb +53 -0
  49. data/lib/maruku/string_utils.rb +191 -0
  50. data/lib/maruku/structures.rb +165 -0
  51. data/lib/maruku/structures_inspect.rb +87 -0
  52. data/lib/maruku/structures_iterators.rb +61 -0
  53. data/lib/maruku/tests/benchmark.rb +82 -0
  54. data/lib/maruku/tests/new_parser.rb +370 -0
  55. data/lib/maruku/tests/tests.rb +136 -0
  56. data/lib/maruku/textile2.rb +1 -0
  57. data/lib/maruku/toc.rb +199 -0
  58. data/lib/maruku/usage/example1.rb +33 -0
  59. data/lib/maruku/version.rb +40 -0
  60. data/tests/bugs/code_in_links.md +16 -0
  61. data/tests/bugs/complex_escaping.md +4 -0
  62. data/tests/math/syntax.md +46 -0
  63. data/tests/math_usage/document.md +13 -0
  64. data/tests/others/abbreviations.md +11 -0
  65. data/tests/others/blank.md +4 -0
  66. data/tests/others/code.md +5 -0
  67. data/tests/others/code2.md +8 -0
  68. data/tests/others/code3.md +16 -0
  69. data/tests/others/email.md +4 -0
  70. data/tests/others/entities.md +19 -0
  71. data/tests/others/escaping.md +16 -0
  72. data/tests/others/extra_dl.md +101 -0
  73. data/tests/others/extra_header_id.md +13 -0
  74. data/tests/others/extra_table1.md +40 -0
  75. data/tests/others/footnotes.md +17 -0
  76. data/tests/others/headers.md +10 -0
  77. data/tests/others/hrule.md +10 -0
  78. data/tests/others/images.md +20 -0
  79. data/tests/others/inline_html.md +42 -0
  80. data/tests/others/links.md +38 -0
  81. data/tests/others/list1.md +4 -0
  82. data/tests/others/list2.md +5 -0
  83. data/tests/others/list3.md +8 -0
  84. data/tests/others/lists.md +32 -0
  85. data/tests/others/lists_after_paragraph.md +44 -0
  86. data/tests/others/lists_ol.md +39 -0
  87. data/tests/others/misc_sw.md +105 -0
  88. data/tests/others/one.md +1 -0
  89. data/tests/others/paragraphs.md +13 -0
  90. data/tests/others/sss06.md +352 -0
  91. data/tests/others/test.md +4 -0
  92. data/tests/s5/s5profiling.md +48 -0
  93. data/tests/unittest/abbreviations.md +72 -0
  94. data/tests/unittest/alt.md +30 -0
  95. data/tests/unittest/attributes/att2.md +34 -0
  96. data/tests/unittest/attributes/att3.md +45 -0
  97. data/tests/unittest/attributes/attributes.md +82 -0
  98. data/tests/unittest/attributes/circular.md +43 -0
  99. data/tests/unittest/attributes/default.md +38 -0
  100. data/tests/unittest/blank.md +39 -0
  101. data/tests/unittest/blanks_in_code.md +106 -0
  102. data/tests/unittest/bug_def.md +29 -0
  103. data/tests/unittest/bug_table.md +67 -0
  104. data/tests/unittest/code.md +53 -0
  105. data/tests/unittest/code2.md +46 -0
  106. data/tests/unittest/code3.md +102 -0
  107. data/tests/unittest/data_loss.md +42 -0
  108. data/tests/unittest/divs/div1.md +204 -0
  109. data/tests/unittest/divs/div2.md +34 -0
  110. data/tests/unittest/divs/div3_nest.md +62 -0
  111. data/tests/unittest/easy.md +28 -0
  112. data/tests/unittest/email.md +33 -0
  113. data/tests/unittest/encoding/iso-8859-1.md +38 -0
  114. data/tests/unittest/encoding/utf-8.md +33 -0
  115. data/tests/unittest/entities.md +124 -0
  116. data/tests/unittest/escaping.md +89 -0
  117. data/tests/unittest/extra_dl.md +72 -0
  118. data/tests/unittest/extra_header_id.md +86 -0
  119. data/tests/unittest/extra_table1.md +55 -0
  120. data/tests/unittest/footnotes.md +126 -0
  121. data/tests/unittest/headers.md +54 -0
  122. data/tests/unittest/hex_entities.md +50 -0
  123. data/tests/unittest/hrule.md +60 -0
  124. data/tests/unittest/html2.md +38 -0
  125. data/tests/unittest/html3.md +47 -0
  126. data/tests/unittest/html4.md +42 -0
  127. data/tests/unittest/html5.md +38 -0
  128. data/tests/unittest/ie.md +82 -0
  129. data/tests/unittest/images.md +114 -0
  130. data/tests/unittest/images2.md +46 -0
  131. data/tests/unittest/inline_html.md +260 -0
  132. data/tests/unittest/inline_html2.md +36 -0
  133. data/tests/unittest/links.md +197 -0
  134. data/tests/unittest/list1.md +66 -0
  135. data/tests/unittest/list2.md +76 -0
  136. data/tests/unittest/list3.md +88 -0
  137. data/tests/unittest/list4.md +116 -0
  138. data/tests/unittest/lists.md +241 -0
  139. data/tests/unittest/lists11.md +31 -0
  140. data/tests/unittest/lists6.md +54 -0
  141. data/tests/unittest/lists7.md +79 -0
  142. data/tests/unittest/lists7b.md +136 -0
  143. data/tests/unittest/lists8.md +83 -0
  144. data/tests/unittest/lists9.md +85 -0
  145. data/tests/unittest/lists_after_paragraph.md +268 -0
  146. data/tests/unittest/lists_ol.md +324 -0
  147. data/tests/unittest/loss.md +29 -0
  148. data/tests/unittest/math/equations.md +69 -0
  149. data/tests/unittest/math/inline.md +66 -0
  150. data/tests/unittest/math/math2.md +110 -0
  151. data/tests/unittest/math/notmath.md +40 -0
  152. data/tests/unittest/math/table.md +43 -0
  153. data/tests/unittest/math/table2.md +60 -0
  154. data/tests/unittest/misc_sw.md +615 -0
  155. data/tests/unittest/notyet/escape.md +36 -0
  156. data/tests/unittest/notyet/header_after_par.md +81 -0
  157. data/tests/unittest/notyet/ticks.md +31 -0
  158. data/tests/unittest/notyet/triggering.md +206 -0
  159. data/tests/unittest/olist.md +64 -0
  160. data/tests/unittest/one.md +28 -0
  161. data/tests/unittest/paragraph.md +29 -0
  162. data/tests/unittest/paragraph_rules/dont_merge_ref.md +57 -0
  163. data/tests/unittest/paragraph_rules/tab_is_blank.md +39 -0
  164. data/tests/unittest/paragraphs.md +66 -0
  165. data/tests/unittest/pending/amps.md +29 -0
  166. data/tests/unittest/pending/empty_cells.md +53 -0
  167. data/tests/unittest/pending/link.md +103 -0
  168. data/tests/unittest/recover/recover_links.md +28 -0
  169. data/tests/unittest/references/long_example.md +88 -0
  170. data/tests/unittest/references/spaces_and_numbers.md +28 -0
  171. data/tests/unittest/smartypants.md +171 -0
  172. data/tests/unittest/syntax_hl.md +80 -0
  173. data/tests/unittest/table_attributes.md +52 -0
  174. data/tests/unittest/test.md +32 -0
  175. data/tests/unittest/wrapping.md +88 -0
  176. data/tests/unittest/xml.md +54 -0
  177. data/tests/unittest/xml2.md +34 -0
  178. data/tests/unittest/xml3.md +44 -0
  179. data/tests/unittest/xml_instruction.md +72 -0
  180. data/tests/utf8-files/simple.md +1 -0
  181. data/unit_test_block.sh +5 -0
  182. data/unit_test_span.sh +2 -0
  183. metadata +243 -0
@@ -0,0 +1,227 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'iconv'
23
+
24
+
25
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
26
+
27
+ def parse_doc(s)
28
+ # FIXME \r\n => \n
29
+ meta2 = parse_email_headers(s)
30
+ data = meta2[:data]
31
+ meta2.delete :data
32
+
33
+ self.attributes.merge! meta2
34
+
35
+ =begin maruku_doc
36
+ Attribute: encoding
37
+ Scope: document
38
+ Summary: Encoding for the document.
39
+
40
+ If the `encoding` attribute is specified, then the content
41
+ will be converted from the specified encoding to UTF-8.
42
+
43
+ Conversion happens using the `iconv` library.
44
+ =end
45
+
46
+ enc = self.attributes[:encoding]
47
+ self.attributes.delete :encoding
48
+ if enc && enc.downcase != 'utf-8'
49
+ converted = Iconv.new('utf-8', enc).iconv(data)
50
+
51
+ # puts "Data: #{data.inspect}: #{data}"
52
+ # puts "Conv: #{converted.inspect}: #{converted}"
53
+
54
+ data = converted
55
+ end
56
+
57
+ @children = parse_text_as_markdown(data)
58
+
59
+ if true #markdown_extra?
60
+ self.search_abbreviations
61
+ self.substitute_markdown_inside_raw_html
62
+ end
63
+
64
+ toc = create_toc
65
+
66
+ # use title if not set
67
+ if not self.attributes[:title] and toc.header_element
68
+ title = toc.header_element.to_s
69
+ self.attributes[:title] = title
70
+ # puts "Set document title to #{title}"
71
+ end
72
+
73
+ # save for later use
74
+ self.toc = toc
75
+
76
+ # Now do the attributes magic
77
+ each_element do |e|
78
+ # default attribute list
79
+ if default = self.ald[e.node_type.to_s]
80
+ expand_attribute_list(default, e.attributes)
81
+ end
82
+ expand_attribute_list(e.al, e.attributes)
83
+ # puts "#{e.node_type}: #{e.attributes.inspect}"
84
+ end
85
+
86
+ =begin maruku_doc
87
+ Attribute: unsafe_features
88
+ Scope: global
89
+ Summary: Enables execution of XML instructions.
90
+
91
+ Disabled by default because of security concerns.
92
+ =end
93
+
94
+ if Maruku::Globals[:unsafe_features]
95
+ self.execute_code_blocks
96
+ # TODO: remove executed code blocks
97
+ end
98
+ end
99
+
100
+ # Expands an attribute list in an Hash
101
+ def expand_attribute_list(al, result)
102
+ al.each do |k, v|
103
+ case k
104
+ when :class
105
+ if not result[:class]
106
+ result[:class] = v
107
+ else
108
+ result[:class] += " " + v
109
+ end
110
+ when :id; result[:id] = v
111
+ when :ref;
112
+ if self.ald[v]
113
+ already = (result[:expanded_references] ||= [])
114
+ if not already.include?(v)
115
+ already.push v
116
+ expand_attribute_list(self.ald[v], result)
117
+ else
118
+ already.push v
119
+ maruku_error "Circular reference between labels.\n\n"+
120
+ "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
121
+ (already.map{|x| x.inspect}.join(' => '))
122
+ end
123
+ else
124
+ if not result[:unresolved_references]
125
+ result[:unresolved_references] = v
126
+ else
127
+ result[:unresolved_references] << " #{v}"
128
+ end
129
+
130
+ # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
131
+ result[v.to_sym] = true
132
+ end
133
+ else
134
+ result[k.to_sym]=v
135
+ end
136
+ end
137
+ end
138
+
139
+ def safe_execute_code(object, code)
140
+ begin
141
+ return object.instance_eval(code)
142
+ rescue Exception => e
143
+ maruku_error "Exception while executing this:\n"+
144
+ add_tabs(code, 1, ">")+
145
+ "\nThe error was:\n"+
146
+ add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
147
+ rescue RuntimeError => e
148
+ maruku_error "2: Exception while executing this:\n"+
149
+ add_tabs(code, 1, ">")+
150
+ "\nThe error was:\n"+
151
+ add_tabs(e.inspect, 1, "|")
152
+ rescue SyntaxError => e
153
+ maruku_error "2: Exception while executing this:\n"+
154
+ add_tabs(code, 1, ">")+
155
+ "\nThe error was:\n"+
156
+ add_tabs(e.inspect, 1, "|")
157
+ end
158
+ nil
159
+ end
160
+
161
+ def execute_code_blocks
162
+ self.each_element(:xml_instr) do |e|
163
+ if e.target == 'maruku'
164
+ result = safe_execute_code(e, e.code)
165
+ if result.kind_of?(String)
166
+ puts "Result is : #{result.inspect}"
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ def search_abbreviations
173
+ self.abbreviations.each do |abbrev, title|
174
+ reg = Regexp.new(Regexp.escape(abbrev))
175
+ self.replace_each_string do |s|
176
+ # bug if many abbreviations are present (agorf)
177
+ if m = reg.match(s)
178
+ e = md_abbr(abbrev.dup, title ? title.dup : nil)
179
+ [m.pre_match, e, m.post_match]
180
+ else
181
+ s
182
+ end
183
+ end
184
+ end
185
+ end
186
+
187
+ include REXML
188
+ # (PHP Markdown extra) Search for elements that have
189
+ # markdown=1 or markdown=block defined
190
+ def substitute_markdown_inside_raw_html
191
+ self.each_element(:raw_html) do |e|
192
+ doc = e.instance_variable_get :@parsed_html
193
+ if doc # valid html
194
+ # parse block-level markdown elements in these HTML tags
195
+ block_tags = ['div']
196
+
197
+ # use xpath to find elements with 'markdown' attribute
198
+ XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
199
+ # puts "Found #{e}"
200
+ # should we parse block-level or span-level?
201
+ parse_blocks = (e.attributes['markdown'] == 'block') ||
202
+ block_tags.include?(e.name)
203
+ # remove 'markdown' attribute
204
+ e.delete_attribute 'markdown'
205
+ # Select all text elements of e
206
+ XPath.match(e, "//text()" ).each { |original_text|
207
+ s = original_text.value.strip
208
+ if s.size > 0
209
+ el = md_el(:dummy,
210
+ parse_blocks ? parse_text_as_markdown(s) :
211
+ parse_lines_as_span([s]) )
212
+ p = original_text.parent
213
+ el.children_to_html.each do |x|
214
+ p.insert_before(original_text, x)
215
+ end
216
+ p.delete(original_text)
217
+
218
+ end
219
+ }
220
+
221
+ end
222
+
223
+ end
224
+ end
225
+ end
226
+
227
+ end end end end
@@ -0,0 +1,732 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'set'
23
+
24
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
25
+ include MaRuKu::Helpers
26
+
27
+ EscapedCharInText =
28
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
+
30
+ EscapedCharInQuotes =
31
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
+
33
+ EscapedCharInInlineCode = [?\\,?`]
34
+
35
+ def parse_lines_as_span(lines, parent=nil)
36
+ parse_span_better lines.join("\n"), parent
37
+ end
38
+
39
+ def parse_span_better(string, parent=nil)
40
+ if not string.kind_of? String then
41
+ error "Passed #{string.class}." end
42
+
43
+ st = (string + "")
44
+ st.freeze
45
+ src = CharSource.new(st, parent)
46
+ read_span(src, EscapedCharInText, [nil])
47
+ end
48
+
49
+ # This is the main loop for reading span elements
50
+ #
51
+ # It's long, but not *complex* or difficult to understand.
52
+ #
53
+ #
54
+ def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
+ con = SpanContext.new
56
+ c = d = nil
57
+ while true
58
+ c = src.cur_char
59
+
60
+ # This is only an optimization which cuts 50% of the time used.
61
+ # (but you can't use a-zA-z in exit_on_chars)
62
+ if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
+ con.cur_string << src.shift_char
64
+ next
65
+ end
66
+
67
+ break if exit_on_chars && exit_on_chars.include?(c)
68
+ break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
+
70
+ # check if there are extensions
71
+ if check_span_extensions(src, con)
72
+ next
73
+ end
74
+
75
+ case c = src.cur_char
76
+ when ?\ # it's space (32)
77
+ if src.cur_chars_are " \n"
78
+ src.ignore_chars(3)
79
+ con.push_element md_br()
80
+ next
81
+ else
82
+ src.ignore_char
83
+ con.push_space
84
+ end
85
+ when ?\n, ?\t
86
+ src.ignore_char
87
+ con.push_space
88
+ when ?`
89
+ read_inline_code(src,con)
90
+ when ?<
91
+ # It could be:
92
+ # 1) HTML "<div ..."
93
+ # 2) HTML "<!-- ..."
94
+ # 3) url "<http:// ", "<ftp:// ..."
95
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
96
+ # 5) on itself! "a < b "
97
+ # 6) Start of <<guillemettes>>
98
+
99
+ case d = src.next_char
100
+ when ?<; # guillemettes
101
+ src.ignore_chars(2)
102
+ con.push_char ?<
103
+ con.push_char ?<
104
+ when ?!;
105
+ if src.cur_chars_are '<!--'
106
+ read_inline_html(src, con)
107
+ else
108
+ con.push_char src.shift_char
109
+ end
110
+ when ??
111
+ read_xml_instr_span(src, con)
112
+ when ?\ , ?\t
113
+ con.push_char src.shift_char
114
+ else
115
+ if src.next_matches(/<mailto:/) or
116
+ src.next_matches(/<[\w\.]+\@/)
117
+ read_email_el(src, con)
118
+ elsif src.next_matches(/<\w+:/)
119
+ read_url_el(src, con)
120
+ elsif src.next_matches(/<\w/)
121
+ #puts "This is HTML: #{src.cur_chars(20)}"
122
+ read_inline_html(src, con)
123
+ else
124
+ #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
+ con.push_char src.shift_char
126
+ end
127
+ end
128
+ when ?\\
129
+ d = src.next_char
130
+ if d == ?'
131
+ src.ignore_chars(2)
132
+ con.push_element md_entity('apos')
133
+ elsif d == ?"
134
+ src.ignore_chars(2)
135
+ con.push_element md_entity('quot')
136
+ elsif escaped.include? d
137
+ src.ignore_chars(2)
138
+ con.push_char d
139
+ else
140
+ con.push_char src.shift_char
141
+ end
142
+ when ?[
143
+ if markdown_extra? && src.next_char == ?^
144
+ read_footnote_ref(src,con)
145
+ else
146
+ read_link(src, con)
147
+ end
148
+ when ?!
149
+ if src.next_char == ?[
150
+ read_image(src, con)
151
+ else
152
+ con.push_char src.shift_char
153
+ end
154
+ when ?&
155
+ # named references
156
+ if m = src.read_regexp(/\&([\w\d]+);/)
157
+ con.push_element md_entity(m[1])
158
+ # numeric
159
+ elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
+ num = m[1] ? m[2].hex : m[2].to_i
161
+ con.push_element md_entity(num)
162
+ else
163
+ con.push_char src.shift_char
164
+ end
165
+ when ?*
166
+ if not src.next_char
167
+ maruku_error "Opening * as last char.", src, con
168
+ maruku_recover "Threating as literal"
169
+ con.push_char src.shift_char
170
+ else
171
+ follows = src.cur_chars(4)
172
+ if follows =~ /^\*\*\*[^\s\*]/
173
+ con.push_element read_emstrong(src,'***')
174
+ elsif follows =~ /^\*\*[^\s\*]/
175
+ con.push_element read_strong(src,'**')
176
+ elsif follows =~ /^\*[^\s\*]/
177
+ con.push_element read_em(src,'*')
178
+ else # * is just a normal char
179
+ con.push_char src.shift_char
180
+ end
181
+ end
182
+ when ?_
183
+ if not src.next_char
184
+ maruku_error "Opening _ as last char", src, con
185
+ maruku_recover "Threating as literal", src, con
186
+ con.push_char src.shift_char
187
+ else
188
+ follows = src.cur_chars(4)
189
+ if follows =~ /^\_\_\_[^\s\_]/
190
+ con.push_element read_emstrong(src,'___')
191
+ elsif follows =~ /^\_\_[^\s\_]/
192
+ con.push_element read_strong(src,'__')
193
+ elsif follows =~ /^\_[^\s\_]/
194
+ con.push_element read_em(src,'_')
195
+ else # _ is just a normal char
196
+ con.push_char src.shift_char
197
+ end
198
+ end
199
+ when ?{ # extension
200
+ if [?#, ?., ?:].include? src.next_char
201
+ src.ignore_char # {
202
+ interpret_extension(src, con, [?}])
203
+ src.ignore_char # }
204
+ else
205
+ con.push_char src.shift_char
206
+ end
207
+ when nil
208
+ maruku_error( ("Unclosed span (waiting for %s"+
209
+ "#{exit_on_strings.inspect})") % [
210
+ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
211
+ src,con)
212
+ break
213
+ else # normal text
214
+ con.push_char src.shift_char
215
+ end # end case
216
+ end # end while true
217
+ con.push_string_if_present
218
+
219
+ # Assign IAL to elements
220
+ merge_ial(con.elements, src, con)
221
+
222
+
223
+ # Remove leading space
224
+ if (s = con.elements.first).kind_of? String
225
+ if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
226
+ con.elements.shift if s.size == 0
227
+ end
228
+
229
+ # Remove final spaces
230
+ if (s = con.elements.last).kind_of? String
231
+ s.chop! if s[-1] == ?\
232
+ con.elements.pop if s.size == 0
233
+ end
234
+
235
+ educated = educate(con.elements)
236
+
237
+ educated
238
+ end
239
+
240
+
241
+ def read_xml_instr_span(src, con)
242
+ src.ignore_chars(2) # starting <?
243
+
244
+ # read target <?target code... ?>
245
+ target = if m = src.read_regexp(/(\w+)/)
246
+ m[1]
247
+ else
248
+ ''
249
+ end
250
+
251
+ delim = "?>"
252
+
253
+ code =
254
+ read_simple(src, escaped=[], break_on_chars=[],
255
+ break_on_strings=[delim])
256
+
257
+ src.ignore_chars delim.size
258
+
259
+ code = (code || "").strip
260
+ con.push_element md_xml_instr(target, code)
261
+ end
262
+
263
+ # Start: cursor on character **after** '{'
264
+ # End: curson on '}' or EOF
265
+ def interpret_extension(src, con, break_on_chars)
266
+ case src.cur_char
267
+ when ?:
268
+ src.ignore_char # :
269
+ extension_meta(src, con, break_on_chars)
270
+ when ?#, ?.
271
+ extension_meta(src, con, break_on_chars)
272
+ else
273
+ stuff = read_simple(src, escaped=[?}], break_on_chars, [])
274
+ if stuff =~ /^(\w+\s|[^\w])/
275
+ extension_id = $1.strip
276
+ if false
277
+ else
278
+ maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
279
+ "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
280
+ extension_meta(src, con, break_on_chars)
281
+ end
282
+ else
283
+ maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
284
+ extension_meta(src, con, break_on_chars)
285
+ end
286
+ end
287
+ end
288
+
289
+ def extension_meta(src, con, break_on_chars)
290
+ if m = src.read_regexp(/([^\s\:\"\']+):/)
291
+ name = m[1]
292
+ al = read_attribute_list(src, con, break_on_chars)
293
+ # puts "#{name}=#{al.inspect}"
294
+ self.doc.ald[name] = al
295
+ con.push md_ald(name, al)
296
+ else
297
+ al = read_attribute_list(src, con, break_on_chars)
298
+ self.doc.ald[name] = al
299
+ con.push md_ial(al)
300
+ end
301
+ end
302
+
303
+ def read_url_el(src,con)
304
+ src.ignore_char # leading <
305
+ url = read_simple(src, [], [?>])
306
+ src.ignore_char # closing >
307
+
308
+ con.push_element md_url(url)
309
+ end
310
+
311
+ def read_email_el(src,con)
312
+ src.ignore_char # leading <
313
+ mail = read_simple(src, [], [?>])
314
+ src.ignore_char # closing >
315
+
316
+ address = mail.gsub(/^mailto:/,'')
317
+ con.push_element md_email(address)
318
+ end
319
+
320
+ def read_url(src, break_on)
321
+ if [?',?"].include? src.cur_char
322
+ error 'Invalid char for url', src
323
+ end
324
+
325
+ url = read_simple(src, [], break_on)
326
+ if not url # empty url
327
+ url = ""
328
+ end
329
+
330
+ if url[0] == ?< && url[-1] == ?>
331
+ url = url[1, url.size-2]
332
+ end
333
+
334
+ if url.size == 0
335
+ return nil
336
+ end
337
+
338
+ url
339
+ end
340
+
341
+
342
+ def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
343
+ case src.cur_char
344
+ when ?', ?"
345
+ read_quoted(src, con)
346
+ else
347
+ read_simple(src, escaped, exit_on_chars)
348
+ end
349
+ end
350
+
351
+ # Tries to read a quoted value. If stream does not
352
+ # start with ' or ", returns nil.
353
+ def read_quoted(src, con)
354
+ case src.cur_char
355
+ when ?', ?"
356
+ quote_char = src.shift_char # opening quote
357
+ string = read_simple(src, EscapedCharInQuotes, [quote_char])
358
+ src.ignore_char # closing quote
359
+ return string
360
+ else
361
+ # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
362
+ return nil
363
+ end
364
+ end
365
+
366
+ # Reads a simple string (no formatting) until one of break_on_chars,
367
+ # while escaping the escaped.
368
+ # If the string is empty, it returns nil.
369
+ # Raises on error if the string terminates unexpectedly.
370
+ # # If eat_delim is true, and if the delim is not the EOF, then the delim
371
+ # # gets eaten from the stream.
372
+ def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
373
+ text = ""
374
+ while true
375
+ # puts "Reading simple #{text.inspect}"
376
+ c = src.cur_char
377
+ if exit_on_chars && exit_on_chars.include?(c)
378
+ # src.ignore_char if eat_delim
379
+ break
380
+ end
381
+
382
+ break if exit_on_strings &&
383
+ exit_on_strings.any? {|x| src.cur_chars_are x}
384
+
385
+ case c
386
+ when nil
387
+ s= "String finished while reading (break on "+
388
+ "#{exit_on_chars.map{|x|""<<x}.inspect})"+
389
+ " already read: #{text.inspect}"
390
+ maruku_error s, src
391
+ maruku_recover "I boldly continue", src
392
+ break
393
+ when ?\\
394
+ d = src.next_char
395
+ if escaped.include? d
396
+ src.ignore_chars(2)
397
+ text << d
398
+ else
399
+ text << src.shift_char
400
+ end
401
+ else
402
+ text << src.shift_char
403
+ end
404
+ end
405
+ # puts "Read simple #{text.inspect}"
406
+ text.empty? ? nil : text
407
+ end
408
+
409
+ def read_em(src, delim)
410
+ src.ignore_char
411
+ children = read_span(src, EscapedCharInText, nil, [delim])
412
+ src.ignore_char
413
+ md_em(children)
414
+ end
415
+
416
+ def read_strong(src, delim)
417
+ src.ignore_chars(2)
418
+ children = read_span(src, EscapedCharInText, nil, [delim])
419
+ src.ignore_chars(2)
420
+ md_strong(children)
421
+ end
422
+
423
+ def read_emstrong(src, delim)
424
+ src.ignore_chars(3)
425
+ children = read_span(src, EscapedCharInText, nil, [delim])
426
+ src.ignore_chars(3)
427
+ md_emstrong(children)
428
+ end
429
+
430
+ SPACE = ?\ # = 32
431
+
432
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
433
+ R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
434
+
435
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
436
+ def read_ref_id(src, con)
437
+ src.ignore_char # [
438
+ src.consume_whitespace
439
+ # puts "Next: #{src.cur_chars(10).inspect}"
440
+ if m = src.read_regexp(R_REF_ID)
441
+ # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
442
+ # puts "Then: #{src.cur_chars(10).inspect}"
443
+ m[1]
444
+ else
445
+ nil
446
+ end
447
+ end
448
+
449
+ def read_footnote_ref(src,con)
450
+ ref = read_ref_id(src,con)
451
+ con.push_element md_foot_ref(ref)
452
+ end
453
+
454
+ def read_inline_html(src, con)
455
+ h = HTMLHelper.new
456
+ begin
457
+ # This is our current buffer in the context
458
+ next_stuff = src.current_remaining_buffer
459
+
460
+ consumed = 0
461
+ while true
462
+ if consumed >= next_stuff.size
463
+ maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
464
+ break
465
+ end
466
+
467
+ h.eat_this next_stuff[consumed].chr; consumed += 1
468
+ break if h.is_finished?
469
+ end
470
+ src.ignore_chars(consumed)
471
+ con.push_element md_html(h.stuff_you_read)
472
+
473
+ #start = src.current_remaining_buffer
474
+ # h.eat_this start
475
+ # if not h.is_finished?
476
+ # error "inline_html: Malformed:\n "+
477
+ # "#{start.inspect}\n #{h.inspect}",src,con
478
+ # end
479
+ #
480
+ # consumed = start.size - h.rest.size
481
+ # if consumed > 0
482
+ # con.push_element md_html(h.stuff_you_read)
483
+ # src.ignore_chars(consumed)
484
+ # else
485
+ # puts "HTML helper did not work on #{start.inspect}"
486
+ # con.push_char src.shift_char
487
+ # end
488
+ rescue Exception => e
489
+ maruku_error "Bad html: \n" +
490
+ add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
491
+ src,con
492
+ maruku_recover "I will try to continue after bad HTML.", src, con
493
+ con.push_char src.shift_char
494
+ end
495
+ end
496
+
497
+ def read_inline_code(src, con)
498
+ # Count the number of ticks
499
+ num_ticks = 0
500
+ while src.cur_char == ?`
501
+ num_ticks += 1
502
+ src.ignore_char
503
+ end
504
+ # We will read until this string
505
+ end_string = "`"*num_ticks
506
+
507
+ code =
508
+ read_simple(src, escaped=[], break_on_chars=[],
509
+ break_on_strings=[end_string])
510
+
511
+ # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
512
+ src.ignore_chars num_ticks
513
+
514
+ # Ignore at most one space
515
+ if num_ticks > 1 && code[0] == SPACE
516
+ code = code[1, code.size-1]
517
+ end
518
+
519
+ # drop last space
520
+ if num_ticks > 1 && code[-1] == SPACE
521
+ code = code[0,code.size-1]
522
+ end
523
+
524
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
525
+ con.push_element md_code(code)
526
+ end
527
+
528
+ def read_link(src, con)
529
+ # we read the string and see what happens
530
+ src.ignore_char # opening bracket
531
+ children = read_span(src, EscapedCharInText, [?]])
532
+ src.ignore_char # closing bracket
533
+
534
+ # ignore space
535
+ if src.cur_char == SPACE and
536
+ (src.next_char == ?[ or src.next_char == ?( )
537
+ src.shift_char
538
+ end
539
+
540
+ case src.cur_char
541
+ when ?(
542
+ src.ignore_char # opening (
543
+ src.consume_whitespace
544
+ url = read_url(src, [SPACE,?\t,?)])
545
+ if not url
546
+ url = '' # no url is ok
547
+ end
548
+ src.consume_whitespace
549
+ title = nil
550
+ if src.cur_char != ?) # we have a title
551
+ quote_char = src.cur_char
552
+ title = read_quoted(src,con)
553
+
554
+ if not title
555
+ maruku_error 'Must quote title',src,con
556
+ else
557
+ # Tries to read a title with quotes: ![a](url "ti"tle")
558
+ # this is the most ugly thing in Markdown
559
+ if not src.next_matches(/\s*\)/)
560
+ # if there is not a closing par ), then read
561
+ # the rest and guess it's title with quotes
562
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
563
+ break_on_strings=[])
564
+ # chop the closing char
565
+ rest.chop!
566
+ title << quote_char << rest
567
+ end
568
+ end
569
+ end
570
+ src.consume_whitespace
571
+ closing = src.shift_char # closing )
572
+ if closing != ?)
573
+ maruku_error 'Unclosed link',src,con
574
+ maruku_recover "No closing ): I will not create"+
575
+ " the link for #{children.inspect}", src, con
576
+ con.push_elements children
577
+ return
578
+ end
579
+ con.push_element md_im_link(children,url, title)
580
+ when ?[ # link ref
581
+ ref_id = read_ref_id(src,con)
582
+ if ref_id
583
+ if ref_id.size == 0
584
+ ref_id = sanitize_ref_id(children.to_s)
585
+ else
586
+ ref_id = sanitize_ref_id(ref_id)
587
+ end
588
+ con.push_element md_link(children, ref_id)
589
+ else
590
+ maruku_error "Could not read ref_id", src, con
591
+ maruku_recover "I will not create the link for "+
592
+ "#{children.inspect}", src, con
593
+ con.push_elements children
594
+ return
595
+ end
596
+ else # empty [link]
597
+ id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
598
+ con.push_element md_link(children, id)
599
+ end
600
+ end # read link
601
+
602
+ def read_image(src, con)
603
+ src.ignore_chars(2) # opening "!["
604
+ alt_text = read_span(src, EscapedCharInText, [?]])
605
+ src.ignore_char # closing bracket
606
+ # ignore space
607
+ if src.cur_char == SPACE and
608
+ (src.next_char == ?[ or src.next_char == ?( )
609
+ src.ignore_char
610
+ end
611
+ case src.cur_char
612
+ when ?(
613
+ src.ignore_char # opening (
614
+ src.consume_whitespace
615
+ url = read_url(src, [SPACE,?\t,?)])
616
+ if not url
617
+ error "Could not read url from #{src.cur_chars(10).inspect}",
618
+ src,con
619
+ end
620
+ src.consume_whitespace
621
+ title = nil
622
+ if src.cur_char != ?) # we have a title
623
+ quote_char = src.cur_char
624
+ title = read_quoted(src,con)
625
+ if not title
626
+ maruku_error 'Must quote title',src,con
627
+ else
628
+ # Tries to read a title with quotes: ![a](url "ti"tle")
629
+ # this is the most ugly thing in Markdown
630
+ if not src.next_matches(/\s*\)/)
631
+ # if there is not a closing par ), then read
632
+ # the rest and guess it's title with quotes
633
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
634
+ break_on_strings=[])
635
+ # chop the closing char
636
+ rest.chop!
637
+ title << quote_char << rest
638
+ end
639
+ end
640
+ end
641
+ src.consume_whitespace
642
+ closing = src.shift_char # closing )
643
+ if closing != ?)
644
+ error( ("Unclosed link: '"<<closing<<"'")+
645
+ " Read url=#{url.inspect} title=#{title.inspect}",src,con)
646
+ end
647
+ con.push_element md_im_image(alt_text, url, title)
648
+ when ?[ # link ref
649
+ ref_id = read_ref_id(src,con)
650
+ if not ref_id # TODO: check around
651
+ error('Reference not closed.', src, con)
652
+ ref_id = ""
653
+ end
654
+ if ref_id.size == 0
655
+ ref_id = alt_text.to_s
656
+ end
657
+
658
+ ref_id = sanitize_ref_id(ref_id)
659
+
660
+ con.push_element md_image(alt_text, ref_id)
661
+ else # no stuff
662
+ ref_id = sanitize_ref_id(alt_text.to_s)
663
+ con.push_element md_image(alt_text, ref_id)
664
+ end
665
+ end # read link
666
+
667
+
668
+ class SpanContext
669
+ include MaRuKu::Strings
670
+
671
+ # Read elements
672
+ attr_accessor :elements
673
+ attr_accessor :cur_string
674
+
675
+ def initialize
676
+ @elements = []
677
+ @cur_string = ""
678
+ end
679
+
680
+ def push_element(e)
681
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
682
+ not (e.kind_of?(String) or e.kind_of?(MDElement))
683
+
684
+ push_string_if_present
685
+ @elements << e
686
+ nil
687
+ end
688
+ alias push push_element
689
+
690
+ def push_elements(a)
691
+ for e in a
692
+ if e.kind_of? String
693
+ e.each_byte do |b| push_char b end
694
+ else
695
+ push_element e
696
+ end
697
+ end
698
+ end
699
+ def push_string_if_present
700
+ if @cur_string.size > 0
701
+ @elements << @cur_string
702
+ @cur_string = ""
703
+ end
704
+ nil
705
+ end
706
+
707
+ def push_char(c)
708
+ @cur_string << c
709
+ nil
710
+ end
711
+
712
+ # push space into current string if
713
+ # there isn't one
714
+ def push_space
715
+ last = @cur_string[@cur_string.size-1]
716
+ @cur_string << ?\ if last != ?\
717
+ end
718
+
719
+ def describe
720
+ lines = @elements.map{|x| x.inspect}.join("\n")
721
+ s = "Elements read in span: \n" +
722
+ add_tabs(lines,1, ' -')+"\n"
723
+
724
+ if @cur_string.size > 0
725
+ s += "Current string: \n #{@cur_string.inspect}\n"
726
+ end
727
+ s
728
+ end
729
+ end # SpanContext
730
+
731
+ end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
732
+