patcito-maruku 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. data/AUTHORS +23 -0
  2. data/LICENSE +340 -0
  3. data/README.md +73 -0
  4. data/bin/maruku +196 -0
  5. data/bin/marutex +4 -0
  6. data/data/entities.xml +261 -0
  7. data/docs/changelog.md +334 -0
  8. data/docs/div_syntax.md +36 -0
  9. data/docs/entity_test.md +23 -0
  10. data/docs/markdown_syntax.md +899 -0
  11. data/docs/maruku.md +346 -0
  12. data/docs/math.md +194 -0
  13. data/docs/other_stuff.md +51 -0
  14. data/docs/proposal.md +309 -0
  15. data/docs/website/src/bluecloth.md +25 -0
  16. data/docs/website/src/download.md +31 -0
  17. data/docs/website/src/maruku.md +261 -0
  18. data/docs/website/src/proposal.md +271 -0
  19. data/lib/maruku.rb +132 -0
  20. data/lib/maruku/attributes.rb +138 -0
  21. data/lib/maruku/defaults.rb +69 -0
  22. data/lib/maruku/errors.rb +89 -0
  23. data/lib/maruku/ext/div.rb +121 -0
  24. data/lib/maruku/ext/fenced_code.rb +78 -0
  25. data/lib/maruku/ext/math.rb +37 -0
  26. data/lib/maruku/ext/math/elements.rb +21 -0
  27. data/lib/maruku/ext/math/latex_fix.rb +12 -0
  28. data/lib/maruku/ext/math/mathml_engines/blahtex.rb +93 -0
  29. data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +39 -0
  30. data/lib/maruku/ext/math/mathml_engines/none.rb +21 -0
  31. data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
  32. data/lib/maruku/ext/math/parsing.rb +125 -0
  33. data/lib/maruku/ext/math/to_html.rb +237 -0
  34. data/lib/maruku/ext/math/to_latex.rb +36 -0
  35. data/lib/maruku/ext/yaml.rb +43 -0
  36. data/lib/maruku/helpers.rb +214 -0
  37. data/lib/maruku/input/charsource.rb +326 -0
  38. data/lib/maruku/input/extensions.rb +69 -0
  39. data/lib/maruku/input/html_helper.rb +189 -0
  40. data/lib/maruku/input/linesource.rb +111 -0
  41. data/lib/maruku/input/parse_block.rb +608 -0
  42. data/lib/maruku/input/parse_doc.rb +240 -0
  43. data/lib/maruku/input/parse_span_better.rb +746 -0
  44. data/lib/maruku/input/rubypants.rb +225 -0
  45. data/lib/maruku/input/type_detection.rb +147 -0
  46. data/lib/maruku/input_textile2/t2_parser.rb +163 -0
  47. data/lib/maruku/maruku.rb +31 -0
  48. data/lib/maruku/output/s5/fancy.rb +756 -0
  49. data/lib/maruku/output/s5/to_s5.rb +138 -0
  50. data/lib/maruku/output/to_html.rb +994 -0
  51. data/lib/maruku/output/to_latex.rb +580 -0
  52. data/lib/maruku/output/to_latex_entities.rb +101 -0
  53. data/lib/maruku/output/to_latex_strings.rb +64 -0
  54. data/lib/maruku/output/to_markdown.rb +164 -0
  55. data/lib/maruku/output/to_s.rb +54 -0
  56. data/lib/maruku/string_utils.rb +185 -0
  57. data/lib/maruku/structures.rb +143 -0
  58. data/lib/maruku/structures_inspect.rb +51 -0
  59. data/lib/maruku/structures_iterators.rb +48 -0
  60. data/lib/maruku/textile2.rb +1 -0
  61. data/lib/maruku/toc.rb +214 -0
  62. data/lib/maruku/usage/example1.rb +33 -0
  63. data/lib/maruku/version +0 -0
  64. data/lib/maruku/version.rb +54 -0
  65. data/spec/block_docs/abbreviations.md +52 -0
  66. data/spec/block_docs/alt.md +17 -0
  67. data/spec/block_docs/attributes/att2.md +20 -0
  68. data/spec/block_docs/attributes/att3.md +28 -0
  69. data/spec/block_docs/attributes/attributes.md +57 -0
  70. data/spec/block_docs/attributes/circular.md +26 -0
  71. data/spec/block_docs/attributes/default.md +22 -0
  72. data/spec/block_docs/blank.md +24 -0
  73. data/spec/block_docs/blanks_in_code.md +75 -0
  74. data/spec/block_docs/bug_def.md +16 -0
  75. data/spec/block_docs/bug_table.md +46 -0
  76. data/spec/block_docs/code.md +34 -0
  77. data/spec/block_docs/code2.md +28 -0
  78. data/spec/block_docs/code3.md +71 -0
  79. data/spec/block_docs/data_loss.md +25 -0
  80. data/spec/block_docs/divs/div1.md +167 -0
  81. data/spec/block_docs/divs/div2.md +21 -0
  82. data/spec/block_docs/divs/div3_nest.md +45 -0
  83. data/spec/block_docs/easy.md +15 -0
  84. data/spec/block_docs/email.md +20 -0
  85. data/spec/block_docs/encoding/iso-8859-1.md +23 -0
  86. data/spec/block_docs/encoding/utf-8.md +18 -0
  87. data/spec/block_docs/entities.md +94 -0
  88. data/spec/block_docs/escaping.md +67 -0
  89. data/spec/block_docs/extra_dl.md +52 -0
  90. data/spec/block_docs/extra_header_id.md +63 -0
  91. data/spec/block_docs/extra_table1.md +37 -0
  92. data/spec/block_docs/footnotes.md +97 -0
  93. data/spec/block_docs/headers.md +37 -0
  94. data/spec/block_docs/hex_entities.md +37 -0
  95. data/spec/block_docs/hrule.md +39 -0
  96. data/spec/block_docs/html2.md +22 -0
  97. data/spec/block_docs/html3.md +31 -0
  98. data/spec/block_docs/html4.md +25 -0
  99. data/spec/block_docs/html5.md +23 -0
  100. data/spec/block_docs/ie.md +49 -0
  101. data/spec/block_docs/images.md +90 -0
  102. data/spec/block_docs/images2.md +31 -0
  103. data/spec/block_docs/inline_html.md +152 -0
  104. data/spec/block_docs/inline_html2.md +21 -0
  105. data/spec/block_docs/links.md +152 -0
  106. data/spec/block_docs/links2.md +22 -0
  107. data/spec/block_docs/list1.md +46 -0
  108. data/spec/block_docs/list12.md +28 -0
  109. data/spec/block_docs/list2.md +56 -0
  110. data/spec/block_docs/list3.md +64 -0
  111. data/spec/block_docs/list4.md +89 -0
  112. data/spec/block_docs/lists.md +192 -0
  113. data/spec/block_docs/lists10.md +34 -0
  114. data/spec/block_docs/lists11.md +23 -0
  115. data/spec/block_docs/lists6.md +41 -0
  116. data/spec/block_docs/lists9.md +64 -0
  117. data/spec/block_docs/lists_after_paragraph.md +208 -0
  118. data/spec/block_docs/lists_ol.md +262 -0
  119. data/spec/block_docs/loss.md +16 -0
  120. data/spec/block_docs/math/equations.md +45 -0
  121. data/spec/block_docs/math/inline.md +46 -0
  122. data/spec/block_docs/math/math2.md +45 -0
  123. data/spec/block_docs/math/notmath.md +25 -0
  124. data/spec/block_docs/math/table.md +25 -0
  125. data/spec/block_docs/math/table2.md +42 -0
  126. data/spec/block_docs/misc_sw.md +525 -0
  127. data/spec/block_docs/notyet/escape.md +21 -0
  128. data/spec/block_docs/notyet/header_after_par.md +58 -0
  129. data/spec/block_docs/notyet/ticks.md +18 -0
  130. data/spec/block_docs/notyet/triggering.md +157 -0
  131. data/spec/block_docs/olist.md +45 -0
  132. data/spec/block_docs/one.md +15 -0
  133. data/spec/block_docs/paragraph.md +16 -0
  134. data/spec/block_docs/paragraph_rules/dont_merge_ref.md +42 -0
  135. data/spec/block_docs/paragraph_rules/tab_is_blank.md +24 -0
  136. data/spec/block_docs/paragraphs.md +46 -0
  137. data/spec/block_docs/pending/amps.md +15 -0
  138. data/spec/block_docs/pending/empty_cells.md +37 -0
  139. data/spec/block_docs/pending/link.md +72 -0
  140. data/spec/block_docs/pending/ref.md +21 -0
  141. data/spec/block_docs/recover/recover_links.md +15 -0
  142. data/spec/block_docs/red_tests/abbrev.md +679 -0
  143. data/spec/block_docs/red_tests/lists7.md +32 -0
  144. data/spec/block_docs/red_tests/lists7b.md +65 -0
  145. data/spec/block_docs/red_tests/lists8.md +42 -0
  146. data/spec/block_docs/red_tests/ref.md +23 -0
  147. data/spec/block_docs/red_tests/xml.md +35 -0
  148. data/spec/block_docs/references/long_example.md +71 -0
  149. data/spec/block_docs/references/spaces_and_numbers.md +15 -0
  150. data/spec/block_docs/smartypants.md +114 -0
  151. data/spec/block_docs/syntax_hl.md +52 -0
  152. data/spec/block_docs/table_attributes.md +34 -0
  153. data/spec/block_docs/test.md +19 -0
  154. data/spec/block_docs/underscore_in_words.md +15 -0
  155. data/spec/block_docs/wrapping.md +67 -0
  156. data/spec/block_docs/xml2.md +19 -0
  157. data/spec/block_docs/xml3.md +26 -0
  158. data/spec/block_docs/xml_instruction.md +52 -0
  159. data/spec/block_spec.rb +49 -0
  160. data/spec/span_spec.rb +254 -0
  161. data/spec/spec_helper.rb +6 -0
  162. metadata +247 -0
@@ -0,0 +1,240 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'iconv'
23
+
24
+
25
+ module MaRuKu; module In; module Markdown; module BlockLevelParser
26
+
27
+ def parse_doc(s)
28
+ # FIXME \r\n => \n
29
+ if Maruku::Globals[:yaml_frontmatter]
30
+ meta2 = parse_yaml_headers(s)
31
+ else
32
+ meta2 = parse_email_headers(s)
33
+ end
34
+
35
+ puts meta2.inspect
36
+ data = meta2[:data]
37
+ meta2.delete :data
38
+
39
+ self.attributes.merge! meta2
40
+
41
+ =begin maruku_doc
42
+ Attribute: encoding
43
+ Scope: document
44
+ Summary: Encoding for the document.
45
+
46
+ If the `encoding` attribute is specified, then the content
47
+ will be converted from the specified encoding to UTF-8.
48
+
49
+ Conversion happens using the `iconv` library.
50
+ =end
51
+
52
+ enc = self.attributes[:encoding]
53
+ self.attributes.delete :encoding
54
+ if enc && enc.downcase != 'utf-8'
55
+ converted = Iconv.new('utf-8', enc).iconv(data)
56
+
57
+ # puts "Data: #{data.inspect}: #{data}"
58
+ # puts "Conv: #{converted.inspect}: #{converted}"
59
+
60
+ data = converted
61
+ end
62
+
63
+ @children = parse_text_as_markdown(data)
64
+
65
+ if true #markdown_extra?
66
+ self.search_abbreviations
67
+ self.substitute_markdown_inside_raw_html
68
+ end
69
+
70
+ toc = create_toc
71
+
72
+ # use title if not set
73
+ if not self.attributes[:title] and toc.header_element
74
+ title = toc.header_element.to_s
75
+ self.attributes[:title] = title
76
+ # puts "Set document title to #{title}"
77
+ end
78
+
79
+ # save for later use
80
+ self.toc = toc
81
+
82
+ # Now do the attributes magic
83
+ each_element do |e|
84
+ # default attribute list
85
+ if default = self.ald[e.node_type.to_s]
86
+ expand_attribute_list(default, e.attributes)
87
+ end
88
+ expand_attribute_list(e.al, e.attributes)
89
+ # puts "#{e.node_type}: #{e.attributes.inspect}"
90
+ end
91
+
92
+ =begin maruku_doc
93
+ Attribute: unsafe_features
94
+ Scope: global
95
+ Summary: Enables execution of XML instructions.
96
+
97
+ Disabled by default because of security concerns.
98
+ =end
99
+
100
+ if Maruku::Globals[:unsafe_features]
101
+ self.execute_code_blocks
102
+ # TODO: remove executed code blocks
103
+ end
104
+ end
105
+
106
+ # Expands an attribute list in an Hash
107
+ def expand_attribute_list(al, result)
108
+ al.each do |k, v|
109
+ case k
110
+ when :class
111
+ if not result[:class]
112
+ result[:class] = v
113
+ else
114
+ result[:class] += " " + v
115
+ end
116
+ when :id; result[:id] = v
117
+ when :ref;
118
+ if self.ald[v]
119
+ already = (result[:expanded_references] ||= [])
120
+ if not already.include?(v)
121
+ already.push v
122
+ expand_attribute_list(self.ald[v], result)
123
+ else
124
+ already.push v
125
+ maruku_error "Circular reference between labels.\n\n"+
126
+ "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
127
+ (already.map{|x| x.inspect}.join(' => '))
128
+ end
129
+ else
130
+ if not result[:unresolved_references]
131
+ result[:unresolved_references] = v
132
+ else
133
+ result[:unresolved_references] << " #{v}"
134
+ end
135
+
136
+ # $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
137
+ result[v.to_sym] = true
138
+ end
139
+ else
140
+ result[k.to_sym]=v
141
+ end
142
+ end
143
+ end
144
+
145
+ def safe_execute_code(object, code)
146
+ begin
147
+ return object.instance_eval(code)
148
+ rescue Exception => e
149
+ maruku_error "Exception while executing this:\n"+
150
+ code.gsub(/^/, ">")+
151
+ "\nThe error was:\n"+
152
+ (e.inspect+"\n"+e.caller.join("\n")).gsub(/^/, "|")
153
+ rescue RuntimeError => e
154
+ maruku_error "2: Exception while executing this:\n"+
155
+ code.gsub(/^/, ">")+
156
+ "\nThe error was:\n"+
157
+ e.inspect.gsub(/^/, "|")
158
+ rescue SyntaxError => e
159
+ maruku_error "2: Exception while executing this:\n"+
160
+ code.gsub(/^/, ">")+
161
+ "\nThe error was:\n"+
162
+ e.inspect.gsub(/^/, "|")
163
+ end
164
+ nil
165
+ end
166
+
167
+ def execute_code_blocks
168
+ self.each_element(:xml_instr) do |e|
169
+ if e.target == 'maruku'
170
+ result = safe_execute_code(e, e.code)
171
+ if result.kind_of?(String)
172
+ puts "Result is : #{result.inspect}"
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ def search_abbreviations
179
+ self.abbreviations.each do |abbrev, title|
180
+ reg = Regexp.new(Regexp.escape(abbrev))
181
+ self.replace_each_string do |s|
182
+ # bug if many abbreviations are present (agorf)
183
+ if m = reg.match(s)
184
+ e = md_abbr(abbrev.dup, title ? title.dup : nil)
185
+ [m.pre_match, e, m.post_match]
186
+ else
187
+ s
188
+ end
189
+ end
190
+ end
191
+ end
192
+
193
+ include REXML
194
+ # (PHP Markdown extra) Search for elements that have
195
+ # markdown=1 or markdown=block defined
196
+ def substitute_markdown_inside_raw_html
197
+ self.each_element(:raw_html) do |e|
198
+ doc = e.instance_variable_get :@parsed_html
199
+ if doc # valid html
200
+ # parse block-level markdown elements in these HTML tags
201
+ block_tags = ['div']
202
+
203
+ # use xpath to find elements with 'markdown' attribute
204
+ XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
205
+ # puts "Found #{e}"
206
+ # should we parse block-level or span-level?
207
+
208
+ how = e.attributes['markdown']
209
+ parse_blocks = (how == 'block') || block_tags.include?(e.name)
210
+
211
+ # Select all text elements of e
212
+ XPath.match(e, "//text()" ).each { |original_text|
213
+ s = original_text.value.strip
214
+ if s.size > 0
215
+
216
+ # puts "Parsing #{s.inspect} as blocks: #{parse_blocks} (#{e.name}, #{e.attributes['markdown']}) "
217
+
218
+ el = md_el(:dummy,
219
+ parse_blocks ? parse_text_as_markdown(s) :
220
+ parse_lines_as_span([s]) )
221
+ p = original_text.parent
222
+ el.children_to_html.each do |x|
223
+ p.insert_before(original_text, x)
224
+ end
225
+ p.delete(original_text)
226
+
227
+ end
228
+ }
229
+
230
+
231
+ # remove 'markdown' attribute
232
+ e.delete_attribute 'markdown'
233
+
234
+ end
235
+
236
+ end
237
+ end
238
+ end
239
+
240
+ end end end end
@@ -0,0 +1,746 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+ require 'set'
23
+
24
+ module MaRuKu; module In; module Markdown; module SpanLevelParser
25
+ include MaRuKu::Helpers
26
+
27
+ EscapedCharInText =
28
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
29
+
30
+ EscapedCharInQuotes =
31
+ Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
32
+
33
+ EscapedCharInInlineCode = [?\\,?`]
34
+
35
+ def parse_lines_as_span(lines, parent=nil)
36
+ parse_span_better lines.join("\n"), parent
37
+ end
38
+
39
+ def parse_span_better(string, parent=nil)
40
+ if not string.kind_of? String then
41
+ error "Passed #{string.class}." end
42
+
43
+ st = (string + "")
44
+ st.freeze
45
+ src = CharSource.new(st, parent)
46
+ read_span(src, EscapedCharInText, [nil])
47
+ end
48
+
49
+ # This is the main loop for reading span elements
50
+ #
51
+ # It's long, but not *complex* or difficult to understand.
52
+ #
53
+ #
54
+ def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
55
+ con = SpanContext.new
56
+ c = d = nil
57
+ while true
58
+ c = src.cur_char
59
+
60
+ # This is only an optimization which cuts 50% of the time used.
61
+ # (but you can't use a-zA-z in exit_on_chars)
62
+ if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
63
+ con.cur_string << src.shift_char
64
+ next
65
+ end
66
+
67
+ break if exit_on_chars && exit_on_chars.include?(c)
68
+ break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
69
+
70
+ # check if there are extensions
71
+ if check_span_extensions(src, con)
72
+ next
73
+ end
74
+
75
+ case c = src.cur_char
76
+ when ?\ # it's space (32)
77
+ if src.cur_chars_are " \n"
78
+ src.ignore_chars(3)
79
+ con.push_element md_br()
80
+ next
81
+ else
82
+ src.ignore_char
83
+ con.push_space
84
+ end
85
+ when ?\n, ?\t
86
+ src.ignore_char
87
+ con.push_space
88
+ when ?`
89
+ read_inline_code(src,con)
90
+ when ?<
91
+ # It could be:
92
+ # 1) HTML "<div ..."
93
+ # 2) HTML "<!-- ..."
94
+ # 3) url "<http:// ", "<ftp:// ..."
95
+ # 4) email "<andrea@... ", "<mailto:andrea@..."
96
+ # 5) on itself! "a < b "
97
+ # 6) Start of <<guillemettes>>
98
+
99
+ case d = src.next_char
100
+ when ?<; # guillemettes
101
+ src.ignore_chars(2)
102
+ con.push_char ?<
103
+ con.push_char ?<
104
+ when ?!;
105
+ if src.cur_chars_are '<!--'
106
+ read_inline_html(src, con)
107
+ else
108
+ con.push_char src.shift_char
109
+ end
110
+ when ??
111
+ read_xml_instr_span(src, con)
112
+ when ?\ , ?\t
113
+ con.push_char src.shift_char
114
+ else
115
+ if src.next_matches(/<mailto:/) or
116
+ src.next_matches(/<[\w\.]+\@/)
117
+ read_email_el(src, con)
118
+ elsif src.next_matches(/<\w+:/)
119
+ read_url_el(src, con)
120
+ elsif src.next_matches(/<\w/)
121
+ #puts "This is HTML: #{src.cur_chars(20)}"
122
+ read_inline_html(src, con)
123
+ else
124
+ #puts "This is NOT HTML: #{src.cur_chars(20)}"
125
+ con.push_char src.shift_char
126
+ end
127
+ end
128
+ when ?\\
129
+ d = src.next_char
130
+ if d == ?'
131
+ src.ignore_chars(2)
132
+ con.push_element md_entity('apos')
133
+ elsif d == ?"
134
+ src.ignore_chars(2)
135
+ con.push_element md_entity('quot')
136
+ elsif escaped.include? d
137
+ src.ignore_chars(2)
138
+ con.push_char d
139
+ else
140
+ con.push_char src.shift_char
141
+ end
142
+ when ?[
143
+ if markdown_extra? && src.next_char == ?^
144
+ read_footnote_ref(src,con)
145
+ else
146
+ read_link(src, con)
147
+ end
148
+ when ?!
149
+ if src.next_char == ?[
150
+ read_image(src, con)
151
+ else
152
+ con.push_char src.shift_char
153
+ end
154
+ when ?&
155
+ # named references
156
+ if m = src.read_regexp(/\&([\w\d]+);/)
157
+ con.push_element md_entity(m[1])
158
+ # numeric
159
+ elsif m = src.read_regexp(/\&\#(x)?([\w\d]+);/)
160
+ num = m[1] ? m[2].hex : m[2].to_i
161
+ con.push_element md_entity(num)
162
+ else
163
+ con.push_char src.shift_char
164
+ end
165
+ when ?*
166
+ if not src.next_char
167
+ maruku_error "Opening * as last char.", src, con
168
+ maruku_recover "Threating as literal"
169
+ con.push_char src.shift_char
170
+ else
171
+ follows = src.cur_chars(4)
172
+ if follows =~ /^\*\*\*[^\s\*]/
173
+ con.push_element read_emstrong(src,'***')
174
+ elsif follows =~ /^\*\*[^\s\*]/
175
+ con.push_element read_strong(src,'**')
176
+ elsif follows =~ /^\*[^\s\*]/
177
+ con.push_element read_em(src,'*')
178
+ else # * is just a normal char
179
+ con.push_char src.shift_char
180
+ end
181
+ end
182
+ when ?_
183
+ if not src.next_char
184
+ maruku_error "Opening _ as last char", src, con
185
+ maruku_recover "Threating as literal", src, con
186
+ con.push_char src.shift_char
187
+ else
188
+ # we don't want "mod_ruby" to start an emphasis
189
+ # so we start one only if
190
+ # 1) there's nothing else in the span (first char)
191
+ # or 2) the last char was a space
192
+ # or 3) the current string is empty
193
+ #if con.elements.empty? ||
194
+ if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
195
+ # also, we check the next characters
196
+ follows = src.cur_chars(4)
197
+ if follows =~ /^\_\_\_[^\s\_]/
198
+ con.push_element read_emstrong(src,'___')
199
+ elsif follows =~ /^\_\_[^\s\_]/
200
+ con.push_element read_strong(src,'__')
201
+ elsif follows =~ /^\_[^\s\_]/
202
+ con.push_element read_em(src,'_')
203
+ else # _ is just a normal char
204
+ con.push_char src.shift_char
205
+ end
206
+ else
207
+ # _ is just a normal char
208
+ con.push_char src.shift_char
209
+ end
210
+ end
211
+ when ?{ # extension
212
+ if [?#, ?., ?:].include? src.next_char
213
+ src.ignore_char # {
214
+ interpret_extension(src, con, [?}])
215
+ src.ignore_char # }
216
+ else
217
+ con.push_char src.shift_char
218
+ end
219
+ when nil
220
+ maruku_error( ("Unclosed span (waiting for %s"+
221
+ "#{exit_on_strings.inspect})") % [
222
+ exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
223
+ src,con)
224
+ break
225
+ else # normal text
226
+ con.push_char src.shift_char
227
+ end # end case
228
+ end # end while true
229
+ con.push_string_if_present
230
+
231
+ # Assign IAL to elements
232
+ merge_ial(con.elements, src, con)
233
+
234
+
235
+ # Remove leading space
236
+ if (s = con.elements.first).kind_of? String
237
+ if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
238
+ con.elements.shift if s.size == 0
239
+ end
240
+
241
+ # Remove final spaces
242
+ if (s = con.elements.last).kind_of? String
243
+ s.chop! if s[-1] == ?\
244
+ con.elements.pop if s.size == 0
245
+ end
246
+
247
+ educated = educate(con.elements)
248
+
249
+ educated
250
+ end
251
+
252
+
253
+ def read_xml_instr_span(src, con)
254
+ src.ignore_chars(2) # starting <?
255
+
256
+ # read target <?target code... ?>
257
+ target = if m = src.read_regexp(/(\w+)/)
258
+ m[1]
259
+ else
260
+ ''
261
+ end
262
+
263
+ delim = "?>"
264
+
265
+ code =
266
+ read_simple(src, escaped=[], break_on_chars=[],
267
+ break_on_strings=[delim])
268
+
269
+ src.ignore_chars delim.size
270
+
271
+ code = (code || "").strip
272
+ con.push_element md_xml_instr(target, code)
273
+ end
274
+
275
+ # Start: cursor on character **after** '{'
276
+ # End: curson on '}' or EOF
277
+ def interpret_extension(src, con, break_on_chars)
278
+ case src.cur_char
279
+ when ?:
280
+ src.ignore_char # :
281
+ extension_meta(src, con, break_on_chars)
282
+ when ?#, ?.
283
+ extension_meta(src, con, break_on_chars)
284
+ else
285
+ stuff = read_simple(src, escaped=[?}], break_on_chars, [])
286
+ if stuff =~ /^(\w+\s|[^\w])/
287
+ extension_id = $1.strip
288
+ if false
289
+ else
290
+ maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
291
+ "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
292
+ extension_meta(src, con, break_on_chars)
293
+ end
294
+ else
295
+ maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
296
+ extension_meta(src, con, break_on_chars)
297
+ end
298
+ end
299
+ end
300
+
301
+ def extension_meta(src, con, break_on_chars)
302
+ if m = src.read_regexp(/([^\s\:\"\']+?):/)
303
+ name = m[1]
304
+ al = read_attribute_list(src, con, break_on_chars)
305
+ # puts "#{name}=#{al.inspect}"
306
+ self.doc.ald[name] = al
307
+ con.push md_ald(name, al)
308
+ else
309
+ al = read_attribute_list(src, con, break_on_chars)
310
+ self.doc.ald[name] = al
311
+ con.push md_ial(al)
312
+ end
313
+ end
314
+
315
+ def read_url_el(src,con)
316
+ src.ignore_char # leading <
317
+ url = read_simple(src, [], [?>])
318
+ src.ignore_char # closing >
319
+
320
+ con.push_element md_url(url)
321
+ end
322
+
323
+ def read_email_el(src,con)
324
+ src.ignore_char # leading <
325
+ mail = read_simple(src, [], [?>])
326
+ src.ignore_char # closing >
327
+
328
+ address = mail.gsub(/^mailto:/,'')
329
+ con.push_element md_email(address)
330
+ end
331
+
332
+ def read_url(src, break_on)
333
+ if [?',?"].include? src.cur_char
334
+ error 'Invalid char for url', src
335
+ end
336
+
337
+ url = read_simple(src, [], break_on)
338
+ if not url # empty url
339
+ url = ""
340
+ end
341
+
342
+ if url[0] == ?< && url[-1] == ?>
343
+ url = url[1, url.size-2]
344
+ end
345
+
346
+ if url.size == 0
347
+ return nil
348
+ end
349
+
350
+ url
351
+ end
352
+
353
+
354
+ def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
355
+ case src.cur_char
356
+ when ?', ?"
357
+ read_quoted(src, con)
358
+ else
359
+ read_simple(src, escaped, exit_on_chars)
360
+ end
361
+ end
362
+
363
+ # Tries to read a quoted value. If stream does not
364
+ # start with ' or ", returns nil.
365
+ def read_quoted(src, con)
366
+ case src.cur_char
367
+ when ?', ?"
368
+ quote_char = src.shift_char # opening quote
369
+ string = read_simple(src, EscapedCharInQuotes, [quote_char])
370
+ src.ignore_char # closing quote
371
+ return string
372
+ else
373
+ # puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
374
+ return nil
375
+ end
376
+ end
377
+
378
+ # Reads a simple string (no formatting) until one of break_on_chars,
379
+ # while escaping the escaped.
380
+ # If the string is empty, it returns nil.
381
+ # Raises on error if the string terminates unexpectedly.
382
+ # # If eat_delim is true, and if the delim is not the EOF, then the delim
383
+ # # gets eaten from the stream.
384
+ def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
385
+ text = ""
386
+ while true
387
+ # puts "Reading simple #{text.inspect}"
388
+ c = src.cur_char
389
+ if exit_on_chars && exit_on_chars.include?(c)
390
+ # src.ignore_char if eat_delim
391
+ break
392
+ end
393
+
394
+ break if exit_on_strings &&
395
+ exit_on_strings.any? {|x| src.cur_chars_are x}
396
+
397
+ case c
398
+ when nil
399
+ s= "String finished while reading (break on "+
400
+ "#{exit_on_chars.map{|x|""<<x}.inspect})"+
401
+ " already read: #{text.inspect}"
402
+ maruku_error s, src
403
+ maruku_recover "I boldly continue", src
404
+ break
405
+ when ?\\
406
+ d = src.next_char
407
+ if escaped.include? d
408
+ src.ignore_chars(2)
409
+ text << d
410
+ else
411
+ text << src.shift_char
412
+ end
413
+ else
414
+ text << src.shift_char
415
+ end
416
+ end
417
+ # puts "Read simple #{text.inspect}"
418
+ text.empty? ? nil : text
419
+ end
420
+
421
+ def read_em(src, delim)
422
+ src.ignore_char
423
+ children = read_span(src, EscapedCharInText, nil, [delim])
424
+ src.ignore_char
425
+ md_em(children)
426
+ end
427
+
428
+ def read_strong(src, delim)
429
+ src.ignore_chars(2)
430
+ children = read_span(src, EscapedCharInText, nil, [delim])
431
+ src.ignore_chars(2)
432
+ md_strong(children)
433
+ end
434
+
435
+ def read_emstrong(src, delim)
436
+ src.ignore_chars(3)
437
+ children = read_span(src, EscapedCharInText, nil, [delim])
438
+ src.ignore_chars(3)
439
+ md_emstrong(children)
440
+ end
441
+
442
+ SPACE = ?\ # = 32
443
+
444
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
445
+ # R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
446
+ R_REF_ID = Regexp.compile(/([^\]]*?)\]/)
447
+
448
+ # Reads a bracketed id "[refid]". Consumes also both brackets.
449
+ def read_ref_id(src, con)
450
+ src.ignore_char # [
451
+ src.consume_whitespace
452
+ # puts "Next: #{src.cur_chars(10).inspect}"
453
+ if m = src.read_regexp(R_REF_ID)
454
+ # puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
455
+ # puts "Then: #{src.cur_chars(10).inspect}"
456
+ m[1]
457
+ else
458
+ nil
459
+ end
460
+ end
461
+
462
+ def read_footnote_ref(src,con)
463
+ ref = read_ref_id(src,con)
464
+ con.push_element md_foot_ref(ref)
465
+ end
466
+
467
+ def read_inline_html(src, con)
468
+ h = HTMLHelper.new
469
+ begin
470
+ # This is our current buffer in the context
471
+ next_stuff = src.current_remaining_buffer
472
+
473
+ consumed = 0
474
+ while true
475
+ if consumed >= next_stuff.size
476
+ maruku_error "Malformed HTML starting at #{next_stuff.inspect}", src, con
477
+ break
478
+ end
479
+
480
+ h.eat_this next_stuff[consumed].chr; consumed += 1
481
+ break if h.is_finished?
482
+ end
483
+ src.ignore_chars(consumed)
484
+ con.push_element md_html(h.stuff_you_read)
485
+
486
+ #start = src.current_remaining_buffer
487
+ # h.eat_this start
488
+ # if not h.is_finished?
489
+ # error "inline_html: Malformed:\n "+
490
+ # "#{start.inspect}\n #{h.inspect}",src,con
491
+ # end
492
+ #
493
+ # consumed = start.size - h.rest.size
494
+ # if consumed > 0
495
+ # con.push_element md_html(h.stuff_you_read)
496
+ # src.ignore_chars(consumed)
497
+ # else
498
+ # puts "HTML helper did not work on #{start.inspect}"
499
+ # con.push_char src.shift_char
500
+ # end
501
+ rescue Exception => e
502
+ maruku_error "Bad html: \n" +
503
+ (e.inspect+e.backtrace.join("\n")).gsub(/^/, '>'),
504
+ src,con
505
+ maruku_recover "I will try to continue after bad HTML.", src, con
506
+ con.push_char src.shift_char
507
+ end
508
+ end
509
+
510
+ def read_inline_code(src, con)
511
+ # Count the number of ticks
512
+ num_ticks = 0
513
+ while src.cur_char == ?`
514
+ num_ticks += 1
515
+ src.ignore_char
516
+ end
517
+ # We will read until this string
518
+ end_string = "`"*num_ticks
519
+
520
+ code =
521
+ read_simple(src, escaped=[], break_on_chars=[],
522
+ break_on_strings=[end_string])
523
+
524
+ # puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
525
+ src.ignore_chars num_ticks
526
+
527
+ # Ignore at most one space
528
+ if num_ticks > 1 && code[0] == SPACE
529
+ code = code[1, code.size-1]
530
+ end
531
+
532
+ # drop last space
533
+ if num_ticks > 1 && code[-1] == SPACE
534
+ code = code[0,code.size-1]
535
+ end
536
+
537
+ # puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
538
+ con.push_element md_code(code)
539
+ end
540
+
541
+ def read_link(src, con)
542
+ # we read the string and see what happens
543
+ src.ignore_char # opening bracket
544
+ children = read_span(src, EscapedCharInText, [?]])
545
+ src.ignore_char # closing bracket
546
+
547
+ # ignore space
548
+ if src.cur_char == SPACE and
549
+ (src.next_char == ?[ or src.next_char == ?( )
550
+ src.shift_char
551
+ end
552
+
553
+ case src.cur_char
554
+ when ?(
555
+ src.ignore_char # opening (
556
+ src.consume_whitespace
557
+ url = read_url(src, [SPACE,?\t,?)])
558
+ if not url
559
+ url = '' # no url is ok
560
+ end
561
+ src.consume_whitespace
562
+ title = nil
563
+ if src.cur_char != ?) # we have a title
564
+ quote_char = src.cur_char
565
+ title = read_quoted(src,con)
566
+
567
+ if not title
568
+ maruku_error 'Must quote title',src,con
569
+ else
570
+ # Tries to read a title with quotes: ![a](url "ti"tle")
571
+ # this is the most ugly thing in Markdown
572
+ if not src.next_matches(/\s*\)/)
573
+ # if there is not a closing par ), then read
574
+ # the rest and guess it's title with quotes
575
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
576
+ break_on_strings=[])
577
+ # chop the closing char
578
+ rest.chop!
579
+ title << quote_char << rest
580
+ end
581
+ end
582
+ end
583
+ src.consume_whitespace
584
+ closing = src.shift_char # closing )
585
+ if closing != ?)
586
+ maruku_error 'Unclosed link',src,con
587
+ maruku_recover "No closing ): I will not create"+
588
+ " the link for #{children.inspect}", src, con
589
+ con.push_elements children
590
+ return
591
+ end
592
+ con.push_element md_im_link(children,url, title)
593
+ when ?[ # link ref
594
+ ref_id = read_ref_id(src,con)
595
+ if ref_id
596
+ if ref_id.size == 0
597
+ ref_id = sanitize_ref_id(children.to_s)
598
+ else
599
+ ref_id = sanitize_ref_id(ref_id)
600
+ end
601
+ con.push_element md_link(children, ref_id)
602
+ else
603
+ maruku_error "Could not read ref_id", src, con
604
+ maruku_recover "I will not create the link for "+
605
+ "#{children.inspect}", src, con
606
+ con.push_elements children
607
+ return
608
+ end
609
+ else # empty [link]
610
+ id = sanitize_ref_id(children.to_s) #. downcase.gsub(' ','_')
611
+ con.push_element md_link(children, id)
612
+ end
613
+ end # read link
614
+
615
+ def read_image(src, con)
616
+ src.ignore_chars(2) # opening "!["
617
+ alt_text = read_span(src, EscapedCharInText, [?]])
618
+ src.ignore_char # closing bracket
619
+ # ignore space
620
+ if src.cur_char == SPACE and
621
+ (src.next_char == ?[ or src.next_char == ?( )
622
+ src.ignore_char
623
+ end
624
+ case src.cur_char
625
+ when ?(
626
+ src.ignore_char # opening (
627
+ src.consume_whitespace
628
+ url = read_url(src, [SPACE,?\t,?)])
629
+ if not url
630
+ error "Could not read url from #{src.cur_chars(10).inspect}",
631
+ src,con
632
+ end
633
+ src.consume_whitespace
634
+ title = nil
635
+ if src.cur_char != ?) # we have a title
636
+ quote_char = src.cur_char
637
+ title = read_quoted(src,con)
638
+ if not title
639
+ maruku_error 'Must quote title',src,con
640
+ else
641
+ # Tries to read a title with quotes: ![a](url "ti"tle")
642
+ # this is the most ugly thing in Markdown
643
+ if not src.next_matches(/\s*\)/)
644
+ # if there is not a closing par ), then read
645
+ # the rest and guess it's title with quotes
646
+ rest = read_simple(src, escaped=[], break_on_chars=[?)],
647
+ break_on_strings=[])
648
+ # chop the closing char
649
+ rest.chop!
650
+ title << quote_char << rest
651
+ end
652
+ end
653
+ end
654
+ src.consume_whitespace
655
+ closing = src.shift_char # closing )
656
+ if closing != ?)
657
+ error( ("Unclosed link: '"<<closing<<"'")+
658
+ " Read url=#{url.inspect} title=#{title.inspect}",src,con)
659
+ end
660
+ con.push_element md_im_image(alt_text, url, title)
661
+ when ?[ # link ref
662
+ ref_id = read_ref_id(src,con)
663
+ if not ref_id # TODO: check around
664
+ error('Reference not closed.', src, con)
665
+ ref_id = ""
666
+ end
667
+ if ref_id.size == 0
668
+ ref_id = alt_text.to_s
669
+ end
670
+
671
+ ref_id = sanitize_ref_id(ref_id)
672
+
673
+ con.push_element md_image(alt_text, ref_id)
674
+ else # no stuff
675
+ ref_id = sanitize_ref_id(alt_text.to_s)
676
+ con.push_element md_image(alt_text, ref_id)
677
+ end
678
+ end # read link
679
+
680
+
681
+ class SpanContext
682
+ include MaRuKu::Strings
683
+
684
+ # Read elements
685
+ attr_accessor :elements
686
+ attr_accessor :cur_string
687
+
688
+ def initialize
689
+ @elements = []
690
+ @cur_string = ""
691
+ end
692
+
693
+ def push_element(e)
694
+ raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
695
+ not (e.kind_of?(String) or e.kind_of?(MDElement))
696
+
697
+ push_string_if_present
698
+ @elements << e
699
+ nil
700
+ end
701
+ alias push push_element
702
+
703
+ def push_elements(a)
704
+ for e in a
705
+ if e.kind_of? String
706
+ e.each_byte do |b| push_char b end
707
+ else
708
+ push_element e
709
+ end
710
+ end
711
+ end
712
+
713
+ def push_string_if_present
714
+ if @cur_string.size > 0
715
+ @elements << @cur_string
716
+ @cur_string = ""
717
+ end
718
+ nil
719
+ end
720
+
721
+ def push_char(c)
722
+ @cur_string << c
723
+ nil
724
+ end
725
+
726
+ # push space into current string if
727
+ # there isn't one
728
+ def push_space
729
+ last = @cur_string[@cur_string.size-1]
730
+ @cur_string << ?\ if last != ?\
731
+ end
732
+
733
+ def describe
734
+ lines = @elements.map{|x| x.inspect}.join("\n")
735
+ s = "Elements read in span: \n" +
736
+ lines.gsub(/^/, ' -')+"\n"
737
+
738
+ if @cur_string.size > 0
739
+ s += "Current string: \n #{@cur_string.inspect}\n"
740
+ end
741
+ s
742
+ end
743
+ end # SpanContext
744
+
745
+ end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
746
+