maruku 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/bin/{maruku0.3 → marudown} +6 -14
  2. data/bin/maruku +1 -1
  3. data/bin/marutest +37 -9
  4. data/docs/TOFIX.html +22 -0
  5. data/docs/TOFIX.md +3 -0
  6. data/docs/changelog-0.2.13.html +30 -0
  7. data/docs/changelog-0.2.13.md +6 -0
  8. data/docs/changelog-0.3.html +19 -5
  9. data/docs/faq.html +51 -40
  10. data/docs/faq.md +3 -3
  11. data/docs/hidden_o_n_squared.md +10 -0
  12. data/docs/index.html +84 -396
  13. data/docs/markdown_syntax.html +139 -330
  14. data/docs/markdown_syntax.md +80 -93
  15. data/docs/maruku.html +84 -396
  16. data/docs/maruku.md +88 -158
  17. data/docs/proposal.html +13 -106
  18. data/docs/proposal.md +3 -3
  19. data/docs/todo.html +38 -28
  20. data/lib/maruku.rb +77 -11
  21. data/lib/maruku/attributes.rb +186 -0
  22. data/lib/maruku/defaults.rb +40 -0
  23. data/lib/maruku/errors_management.rb +55 -39
  24. data/lib/maruku/helpers.rb +156 -72
  25. data/lib/maruku/input/charsource.rb +319 -0
  26. data/lib/maruku/{html_helper.rb → input/html_helper.rb} +30 -9
  27. data/lib/maruku/input/linesource.rb +111 -0
  28. data/lib/maruku/input/parse_block.rb +562 -0
  29. data/lib/maruku/{parse_doc.rb → input/parse_doc.rb} +60 -28
  30. data/lib/maruku/{parse_span_better.rb → input/parse_span_better.rb} +226 -256
  31. data/lib/maruku/input/type_detection.rb +137 -0
  32. data/lib/maruku/maruku.rb +33 -0
  33. data/lib/maruku/{to_html.rb → output/to_html.rb} +151 -132
  34. data/lib/maruku/{to_latex.rb → output/to_latex.rb} +31 -35
  35. data/lib/maruku/{to_latex_entities.rb → output/to_latex_entities.rb} +25 -3
  36. data/lib/maruku/output/to_latex_strings.rb +64 -0
  37. data/lib/maruku/output/to_markdown.rb +164 -0
  38. data/lib/maruku/{to_s.rb → output/to_s.rb} +6 -0
  39. data/lib/maruku/string_utils.rb +12 -181
  40. data/lib/maruku/structures.rb +91 -67
  41. data/lib/maruku/structures_inspect.rb +78 -0
  42. data/lib/maruku/structures_iterators.rb +24 -2
  43. data/lib/maruku/tests/benchmark.rb +41 -9
  44. data/lib/maruku/tests/new_parser.rb +317 -286
  45. data/lib/maruku/tests/tests.rb +20 -0
  46. data/lib/maruku/toc.rb +64 -64
  47. data/lib/maruku/usage/example1.rb +33 -0
  48. data/lib/maruku/version.rb +8 -2
  49. data/tests/unittest/abbreviations.md +27 -16
  50. data/tests/unittest/attributes/attributes.md +89 -0
  51. data/tests/unittest/attributes/circular.md +51 -0
  52. data/tests/unittest/attributes/default.md +47 -0
  53. data/tests/unittest/blank.md +10 -6
  54. data/tests/unittest/blanks_in_code.md +26 -26
  55. data/tests/unittest/code.md +9 -9
  56. data/tests/unittest/code2.md +12 -13
  57. data/tests/unittest/code3.md +34 -34
  58. data/tests/unittest/easy.md +9 -7
  59. data/tests/unittest/email.md +9 -7
  60. data/tests/unittest/encoding/iso-8859-1.md +41 -4
  61. data/tests/unittest/encoding/utf-8.md +6 -5
  62. data/tests/unittest/entities.md +52 -80
  63. data/tests/unittest/escaping.md +47 -35
  64. data/tests/unittest/extra_dl.md +19 -29
  65. data/tests/unittest/extra_header_id.md +31 -24
  66. data/tests/unittest/extra_table1.md +14 -32
  67. data/tests/unittest/footnotes.md +58 -42
  68. data/tests/unittest/headers.md +11 -11
  69. data/tests/unittest/hrule.md +14 -24
  70. data/tests/unittest/images.md +41 -26
  71. data/tests/unittest/inline_html.md +104 -56
  72. data/tests/unittest/inline_html2.md +38 -0
  73. data/tests/unittest/links.md +74 -33
  74. data/tests/unittest/list1.md +18 -15
  75. data/tests/unittest/list2.md +31 -13
  76. data/tests/unittest/list3.md +29 -28
  77. data/tests/unittest/list4.md +103 -12
  78. data/tests/unittest/lists.md +86 -53
  79. data/tests/unittest/lists6.md +53 -0
  80. data/tests/unittest/lists7.md +31 -0
  81. data/tests/unittest/lists_after_paragraph.md +105 -71
  82. data/tests/unittest/lists_ol.md +149 -73
  83. data/tests/unittest/misc_sw.md +366 -326
  84. data/tests/unittest/notyet/escape.md +10 -10
  85. data/tests/unittest/notyet/header_after_par.md +20 -14
  86. data/tests/unittest/notyet/ticks.md +8 -35
  87. data/tests/unittest/notyet/triggering.md +72 -45
  88. data/tests/unittest/olist.md +78 -0
  89. data/tests/unittest/one.md +5 -3
  90. data/tests/unittest/paragraph.md +5 -3
  91. data/tests/unittest/paragraph_rules/dont_merge_ref.md +15 -9
  92. data/tests/unittest/paragraph_rules/tab_is_blank.md +9 -5
  93. data/tests/unittest/paragraphs.md +21 -26
  94. data/tests/unittest/recover/recover_links.md +6 -5
  95. data/tests/unittest/references/long_example.md +39 -30
  96. data/tests/unittest/references/spaces_and_numbers.md +2 -2
  97. data/tests/unittest/syntax_hl.md +33 -31
  98. data/tests/unittest/test.md +4 -6
  99. data/tests/unittest/wrapping.md +43 -26
  100. metadata +160 -139
  101. data/docs/markdown_extra2.html +0 -87
  102. data/docs/markdown_extra2.md +0 -83
  103. data/docs/markdown_syntax_2.html +0 -152
  104. data/lib/maruku/parse_block.rb +0 -564
  105. data/lib/maruku/parse_span.rb +0 -451
  106. data/lib/maruku/to_latex_strings.rb +0 -59
  107. data/lib/maruku/to_markdown.rb +0 -110
  108. data/lib/test.rb +0 -29
@@ -1,3 +1,4 @@
1
+ #--
1
2
  # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
3
  #
3
4
  # This file is part of Maruku.
@@ -15,8 +16,11 @@
15
16
  # You should have received a copy of the GNU General Public License
16
17
  # along with Maruku; if not, write to the Free Software
17
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
18
20
 
19
21
 
22
+ module MaRuKu
23
+
20
24
  class MDElement
21
25
 
22
26
  # Strips all formatting from the string
@@ -36,4 +40,6 @@ class MDElement
36
40
  title.gsub!(/[^\w_]/,'')
37
41
  title
38
42
  end
43
+ end
44
+
39
45
  end
@@ -1,3 +1,4 @@
1
+ #--
1
2
  # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
3
  #
3
4
  # This file is part of Maruku.
@@ -15,29 +16,19 @@
15
16
  # You should have received a copy of the GNU General Public License
16
17
  # along with Maruku; if not, write to the Free Software
17
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
18
20
 
19
- module MarukuStrings
21
+
22
+ # Boring stuff with strings.
23
+ module MaRuKu; module Strings
24
+
20
25
  def add_tabs(s,n=1,char="\t")
21
26
  s.split("\n").map{|x| char*n+x }.join("\n")
22
27
  end
23
- end
24
- class MDElement
25
- include MarukuStrings
26
- end
27
-
28
- class Maruku
29
28
 
30
- # Split a string into lines, and chomps the newline
31
- def Maruku.split_lines_old(s)
32
- a = []
33
- s.each_line do |l|
34
- l = l.chomp
35
- a << l
36
- end
37
- a
38
- end
39
-
40
- def Maruku.split_lines(s)
29
+ TabSize = 4;
30
+
31
+ def split_lines(s)
41
32
  s.split("\n")
42
33
  end
43
34
 
@@ -111,7 +102,7 @@ class Maruku
111
102
  # ' 1. Hello' # => 5
112
103
 
113
104
  def spaces_before_first_char(s)
114
- case line_node_type(s)
105
+ case s.md_type
115
106
  when :ulist
116
107
  i=0;
117
108
  # skip whitespace if present
@@ -179,7 +170,7 @@ class Maruku
179
170
  def dbg_describe_ary(a, prefix='')
180
171
  i = 0
181
172
  a.each do |l|
182
- tell_user "#{prefix} (#{i+=1})##{l}#"
173
+ puts "#{prefix} (#{i+=1})# #{l.inspect}"
183
174
  end
184
175
  end
185
176
 
@@ -187,165 +178,5 @@ class Maruku
187
178
  l =~ / $/
188
179
  end
189
180
 
190
- def line_node_type(l)
191
- # raw html is like PHP Markdown Extra: at most three spaces before
192
- return :code if number_of_leading_spaces(l)>=4
193
- return :footnote_text if l =~ FootnoteText
194
- return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink
195
- return :abbreviation if l =~ Abbreviation
196
- return :definition if l =~ Definition
197
- # I had a bug with emails and urls at the beginning of the
198
- # line that were mistaken for raw_html
199
- return :text if l=~EMailAddress or l=~ URL
200
- return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
201
- return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
202
- return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
203
- return :olist if l =~ /^\s?\d+\..*\w+/
204
- return :empty if l.strip.size == 0
205
- return :header1 if l =~ /^(=)+/
206
- return :header2 if l =~ /^([-\s])+$/
207
- return :header3 if l =~ /^(#)+\s*\S+/
208
- # at least three asterisks on a line, and only whitespace
209
- return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
210
- return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
211
- return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
212
- return :quote if l =~ /^>/
213
- return :metadata if l =~ /^@/
214
- return :m2ref if l =~ /^\s{0,3}\{[\w\d\s]+\}:/
215
- return :text
216
- end
217
-
218
- # Example:
219
- # ^:blah blah
220
- # ^: blah blah
221
- # ^ : blah blah
222
- Definition = %r{
223
- ^ # begin of line
224
- [ ]{0,3} # up to 3 spaces
225
- : # colon
226
- \s* # whitespace
227
- (\S.*) # the text = $1
228
- $ # end of line
229
- }x
230
-
231
- # Example:
232
- # *[HTML]: Hyper Text Markup Language
233
- Abbreviation = %r{
234
- ^ # begin of line
235
- \* # one asterisk
236
- \[ # opening bracket
237
- ([^\]]+) # any non-closing bracket: id = $1
238
- \] # closing bracket
239
- : # colon
240
- \s* # whitespace
241
- (\S.*\S)* # definition=$2
242
- \s* # strip this whitespace
243
- $ # end of line
244
- }x
245
-
246
- FootnoteText = %r{
247
- ^\s*\[(\^.+)\]: # id = $1 (including '^')
248
- \s*(\S.*)?$ # text = $2 (not obb.)
249
- }x
250
-
251
- # This regex is taken from BlueCloth sources
252
- # Link defs are in the form: ^[id]: \n? url "optional title"
253
- LinkRegex = %r{
254
- ^[ ]*\[([^\]]+)\]: # id = $1
255
- [ ]*
256
- <?(\S+)>? # url = $2
257
- [ ]*
258
- (?:# Titles are delimited by "quotes" or (parens).
259
- ["(']
260
- (.+?) # title = $3
261
- [")'] # Matching ) or "
262
- \s*(.+)? # stuff = $4
263
- )? # title is optional
264
- }x
265
-
266
- IncompleteLink = %r{^\s*\[(.+)\]:\s*$}
267
-
268
- HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
269
-
270
- HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
271
-
272
- TabSize = 4;
273
-
274
- # if contains a pipe, it could be a table header
275
- MightBeTableHeader = %r{\|}
276
- # -------------:
277
- Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
278
- # | -------------:| ------------------------------ |
279
- TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
280
-
281
-
282
- EMailAddress = /<([^:]+@[^:]+)>/
283
- URL = /^<http:/
284
181
  end
285
-
286
- class String
287
- S = 230
288
- MarkdownEscaped =
289
- [["\\",S+0],
290
- ['`',S+1],
291
- ['*',S+2],
292
- ['_',S+3],['{',S+4],['}',S+5],['[',S+6],[']',S+7],
293
- ['(',S+8],[')',S+9],['#',S+10],['.',S+11],
294
- ['!',S+12],
295
- # PHP Markdown extra
296
- ['|',S+13],[':',S+14], ["+",S+15], ["-",S+16], [">",S+17]]
297
-
298
- MarkdownAdd = 200
299
-
300
-
301
- def escape_md_special!
302
- MarkdownEscaped.each do |c|
303
- escape_sequence = "\\#{c[0]}"
304
- #puts "Escaping -#{escape_sequence}-"
305
- escaped ="0"; escaped[0]=c[1]
306
- gsub!(escape_sequence, escaped)
307
- end
308
-
309
- # But if you surround an * or _ with spaces,
310
- # it’ll be treated as a literal asterisk or underscore.
311
- escaped_ast = [S+2].pack('c')
312
- gsub!(/(\s)\*(\s|$)/, '\1%s\2' % [escaped_ast] )
313
- escaped_under = [S+3].pack('c')
314
- gsub!(/(\s)_(\s|$)/, '\1%s\2' % [escaped_under])
315
-
316
- self
317
- end
318
-
319
- def unescape_md_special!
320
- for i in 0..size-1
321
- for e in MarkdownEscaped
322
- if self[i] == e[1]
323
- self[i,1] = e[0]
324
- end
325
- end
326
- end
327
- self
328
- end
329
-
330
- # Resubstitute '\<char>' as this was a code block
331
- def it_was_a_code_block
332
- s = ""; tmp =" "
333
- each_byte do |b|
334
- tmp[0] = b
335
- found = false
336
- for e in MarkdownEscaped
337
- if b == e[1]
338
- s << '\\'
339
- s << e[0]
340
- found = true
341
- end
342
- end
343
- s << tmp if not found
344
- end
345
- s
346
- end
347
-
348
- def unescape_md_special; dup.unescape_md_special! end
349
- def escape_md_special; dup. escape_md_special! end
350
-
351
- end
182
+ end
@@ -1,3 +1,4 @@
1
+ #--
1
2
  # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
2
3
  #
3
4
  # This file is part of Maruku.
@@ -15,9 +16,34 @@
15
16
  # You should have received a copy of the GNU General Public License
16
17
  # along with Maruku; if not, write to the Free Software
17
18
  # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
- #
19
+ #++
20
+
21
+
19
22
 
23
+ class Module
24
+ def safe_attr_accessor2(symbol, klass)
25
+ attr_reader symbol
26
+ code = <<-EOF
27
+ def #{symbol}=(val)
28
+ if not val.kind_of? #{klass}
29
+ s = "Could not assign an object of type \#{val.class} to #{symbol}.\n"
30
+ s += "Tried to assign\n\#{val.inspect}\nto #{symbol} of object\n"
31
+ s += "\#{self.inspect}"
32
+ raise s
33
+ end
34
+ @#{symbol} = val
35
+ end
36
+
37
+ EOF
38
+ module_eval code
39
+ end
20
40
 
41
+ def safe_attr_accessor(symbol, klass)
42
+ attr_accessor symbol
43
+ end
44
+ end
45
+
46
+ module MaRuKu
21
47
  # I did not want to have a class for each possible element.
22
48
  # Instead I opted to have only the class "MDElement"
23
49
  # that represents eveything in the document (paragraphs, headers, etc).
@@ -47,90 +73,88 @@
47
73
  # from whichever MDElement in the hierarchy.
48
74
 
49
75
  class MDElement
50
- # XXX List not complete
51
- # Allowed: :document, :paragraph, :ul, :ol, :li,
52
- # :li_span, :strong, :emphasis, :link, :email
53
- attr_accessor :node_type
76
+ # See helpers.rb for the list of allowed #node_type values
77
+ safe_attr_accessor :node_type, Symbol
78
+
54
79
  # Children are either Strings or MDElement
55
- attr_accessor :children
56
- # Hash for metadata
57
- # contains :id for :link1
58
- # :li :want_my_paragraph
59
- # :header: :level
60
- # code, inline_code: :raw_code
61
- attr_reader :meta
62
- # reference of containing document (document has list of ref)
80
+ safe_attr_accessor :children, Array
81
+
82
+
83
+ # safe_attr_accessor :meta, Hash
84
+
85
+ # An attribute list, may not be nil
86
+ safe_attr_accessor :al, Array #Maruku::AttributeList
87
+
88
+ # These are the processed attributes
89
+ safe_attr_accessor :attributes, Hash
90
+
91
+ # Reference of the document (which is of class Maruku)
63
92
  attr_accessor :doc
64
93
 
65
- def initialize(node_type_=:unset, children_=[], meta_={} )
94
+ def initialize(node_type=:unset, children=[], meta={},
95
+ al=MaRuKu::AttributeList.new )
66
96
  super();
67
- raise 'children is nil' if not children_
68
- raise 'meta is nil' if not meta_
97
+ self.children = children
98
+ self.node_type = node_type
69
99
 
70
- @children = children_
71
- @node_type = node_type_
72
- @meta = meta_
100
+ @attributes = {}
101
+
102
+ meta.each do |symbol, value|
103
+ self.instance_eval "
104
+ def #{symbol}; @#{symbol}; end
105
+ def #{symbol}=(val); @#{symbol}=val; end"
106
+ self.send "#{symbol}=", value
107
+ end
108
+
109
+ self.al = al || AttributeList.new
110
+
111
+ self.meta_priv = meta
73
112
  end
74
113
 
114
+ attr_accessor :meta_priv
115
+
75
116
  def ==(o)
76
117
  ok = o.kind_of?(MDElement) &&
77
118
  (self.node_type == o.node_type) &&
78
- (self.meta == o.meta) &&
119
+ (self.meta_priv == o.meta_priv) &&
79
120
  (self.children == o.children)
80
- ok
81
- end
82
-
83
- def inspect(compact=true)
84
- if compact
85
- i2 = inspect2
86
- return i2 if i2
87
- end
88
121
 
89
- "md_el(:%s,%s %s)" %
90
- [
91
- @node_type,
92
- children_inspect(compact),
93
- if @meta.size>0 then
94
- ', '+@meta.inspect
95
- else '' end
96
- ]
97
- end
98
-
99
- def children_inspect(compact=true)
100
- s = @children.inspect_more(compact,', ')
101
- if @children.empty?
102
- "[]"
103
- elsif s.size < 70
104
- s
105
- else
106
- "[\n"+
107
- add_tabs(@children.inspect_more(compact,",\n ",false))+
108
- "\n]"
122
+ if not ok
123
+ # puts "This:\n"+self.inspect+"\nis different from\n"+o.inspect+"\n\n"
109
124
  end
125
+ ok
110
126
  end
111
-
112
127
  end
113
128
 
114
- class String
115
- alias inspect_more inspect
116
- end
129
+ # This represents the whole document and holds global data.
117
130
 
118
- class Array
119
- def inspect_more(compact, join_string, add_brackets=true)
120
- s = map {|x|
121
- x.kind_of?(String) ? x.inspect :
122
- x.kind_of?(MDElement) ? x.inspect(compact) :
123
- (raise "WTF #{x.class} #{x.inspect}")
124
- }.join(join_string)
131
+ class MDDocument
132
+ safe_attr_accessor :refs, Hash
133
+ safe_attr_accessor :footnotes, Hash
134
+
135
+ # This is an hash. The key might be nil.
136
+ safe_attr_accessor :abbreviations, Hash
137
+
138
+ # Attribute lists definition
139
+ safe_attr_accessor :ald, Hash
140
+
141
+ # The order in which footnotes are used. Contains the id.
142
+ safe_attr_accessor :footnotes_order, Array
143
+
144
+ def initialize(s=nil)
145
+ super(:document)
146
+ @doc = self
147
+
148
+ self.refs = {}
149
+ self.footnotes = {}
150
+ self.footnotes_order = []
151
+ self.abbreviations = {}
152
+ self.ald = {}
125
153
 
126
- add_brackets ? "[#{s}]" : s
154
+ parse_doc(s) if s
127
155
  end
128
156
  end
129
- # The Maruku class represent the whole document
130
- # and holds global data.
131
157
 
132
- class Maruku < MDElement
133
- attr_accessor :refs
134
- attr_accessor :footnotes
135
- attr_accessor :abbreviations
136
- end
158
+
159
+ end # MaRuKu
160
+
@@ -0,0 +1,78 @@
1
+ #--
2
+ # Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
3
+ #
4
+ # This file is part of Maruku.
5
+ #
6
+ # Maruku is free software; you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation; either version 2 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # Maruku is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with Maruku; if not, write to the Free Software
18
+ # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
+ #++
20
+
21
+
22
+
23
+ class String
24
+ alias inspect_more inspect
25
+ end
26
+
27
+ class Array
28
+ def inspect_more(compact, join_string, add_brackets=true)
29
+ s = map {|x|
30
+ x.kind_of?(String) ? x.inspect :
31
+ x.kind_of?(MaRuKu::MDElement) ? x.inspect(compact) :
32
+ (raise "WTF #{x.class} #{x.inspect}")
33
+ }.join(join_string)
34
+
35
+ add_brackets ? "[#{s}]" : s
36
+ end
37
+ end
38
+
39
+ class Hash
40
+ def inspect_ordered
41
+ "{"+map{|k,v| k.inspect + "=>"+v.inspect}.join(',')+"}"
42
+ end
43
+ end
44
+
45
+ module MaRuKu
46
+ class MDElement
47
+ def inspect(compact=true)
48
+ if compact
49
+ i2 = inspect2
50
+ return i2 if i2
51
+ end
52
+
53
+ "md_el(:%s,%s,%s,%s)" %
54
+ [
55
+ self.node_type,
56
+ children_inspect(compact),
57
+ @meta_priv.inspect_ordered,
58
+ self.al.inspect
59
+ ]
60
+ end
61
+
62
+ def children_inspect(compact=true)
63
+ s = @children.inspect_more(compact,', ')
64
+ if @children.empty?
65
+ "[]"
66
+ elsif s.size < 70
67
+ s
68
+ else
69
+ "[\n"+
70
+ add_tabs(@children.inspect_more(compact,",\n",false))+
71
+ "\n]"
72
+ end
73
+ end
74
+
75
+ end
76
+
77
+ end
78
+