creole 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/lib/creole.rb +264 -278
  2. data/test/test_creole.rb +622 -6
  3. metadata +1 -2
  4. data/test/testcases.rb +0 -631
@@ -27,14 +27,22 @@ require 'uri'
27
27
  # You can customize the created anchor/image markup by overriding
28
28
  # make_*_anchor/make_image.
29
29
 
30
- module Creole
30
+ # Main Creole parser class. Call CreoleParser#parse to parse Creole
31
+ # formatted text.
32
+ #
33
+ # This class is not reentrant. A separate instance is needed for
34
+ # each thread that needs to convert Creole to HTML.
35
+ #
36
+ # Inherit this to provide custom handling of links. The overrideable
37
+ # methods are: make_local_link
38
+ class Creole
31
39
 
32
- VERSION = '0.3.3'
40
+ VERSION = '0.3.4'
33
41
 
34
- # CreoleParseError is raised when the Creole parser encounters
42
+ # CreoleError is raised when the Creole parser encounters
35
43
  # something unexpected. This is generally now thrown unless there is
36
44
  # a bug in the parser.
37
- class CreoleParseError < Exception; end
45
+ class CreoleError < Exception; end
38
46
 
39
47
  # Convert the argument in Creole format to HTML and return the
40
48
  # result. Example:
@@ -42,331 +50,309 @@ module Creole
42
50
  # Creole.creolize("**Hello //World//**")
43
51
  # #=> "<p><strong>Hello <em>World</em></strong></p>"
44
52
  #
45
- # This is an alias for calling CreoleParser#parse:
46
- # CreoleParser.new.parse(creole)
53
+ # This is an alias for calling Creole#parse:
54
+ # Creole.new.parse(creole)
47
55
  def self.creolize(creole)
48
- CreoleParser.new.parse(creole)
56
+ new.parse(creole)
57
+ end
58
+
59
+ # Create a new CreoleParser instance.
60
+ def initialize
61
+ @base = nil
62
+ @allowed_schemes = [ 'http', 'https', 'ftp', 'ftps' ]
63
+ @uri_scheme_re = @allowed_schemes.join('|')
49
64
  end
50
65
 
51
- # Main Creole parser class. Call CreoleParser#parse to parse Creole
52
- # formatted text.
66
+ # Parse and convert the argument in Creole text to HTML and return
67
+ # the result. The resulting HTML does not contain <html> and
68
+ # <body> tags.
53
69
  #
54
- # This class is not reentrant. A separate instance is needed for
55
- # each thread that needs to convert Creole to HTML.
70
+ # Example:
56
71
  #
57
- # Inherit this to provide custom handling of links. The overrideable
58
- # methods are: make_local_link
59
- class CreoleParser
60
-
61
- # Create a new CreoleParser instance.
62
- def initialize
63
- @base = nil
64
- @allowed_schemes = [ 'http', 'https', 'ftp', 'ftps' ]
65
- @uri_scheme_re = @allowed_schemes.join('|')
66
- end
67
-
68
- # Parse and convert the argument in Creole text to HTML and return
69
- # the result. The resulting HTML does not contain <html> and
70
- # <body> tags.
71
- #
72
- # Example:
73
- #
74
- # parser = CreoleParser.new
75
- # parser.parse("**Hello //World//**")
76
- # #=> "<p><strong>Hello <em>World</em></strong></p>"
77
- def parse(string)
78
- @out = ""
79
- @strong = false
80
- @p = false
81
- @stack = []
82
- parse_block(string)
83
- return @out
84
- end
72
+ # parser = CreoleParser.new
73
+ # parser.parse("**Hello //World//**")
74
+ # #=> "<p><strong>Hello <em>World</em></strong></p>"
75
+ def parse(string)
76
+ @out = ""
77
+ @p = false
78
+ @stack = []
79
+ parse_block(string)
80
+ @out
81
+ end
85
82
 
86
- # Escape any characters with special meaning in HTML using HTML
87
- # entities.
88
- private
89
- def escape_html(string)
90
- CGI::escapeHTML(string)
91
- end
83
+ protected
92
84
 
93
- # Escape any characters with special meaning in URLs using URL
94
- # encoding.
95
- private
96
- def escape_url(string)
97
- CGI::escape(string)
98
- end
85
+ # Escape any characters with special meaning in HTML using HTML
86
+ # entities.
87
+ def escape_html(string)
88
+ CGI::escapeHTML(string)
89
+ end
99
90
 
100
- private
91
+ # Escape any characters with special meaning in URLs using URL
92
+ # encoding.
93
+ def escape_url(string)
94
+ CGI::escape(string)
95
+ end
101
96
 
102
- def start_tag(tag)
103
- @stack.push(tag)
104
- @out << '<' << tag << '>'
105
- end
97
+ def start_tag(tag)
98
+ @stack.push(tag)
99
+ @out << '<' << tag << '>'
100
+ end
106
101
 
107
- def end_tag
108
- @out << '</' << @stack.pop << '>'
109
- end
102
+ def end_tag
103
+ @out << '</' << @stack.pop << '>'
104
+ end
110
105
 
111
- def toggle_tag(tag, match)
112
- if @stack.include?(tag)
113
- if @stack.last == tag
114
- end_tag
115
- else
116
- @out << escape_html(match)
117
- end
106
+ def toggle_tag(tag, match)
107
+ if @stack.include?(tag)
108
+ if @stack.last == tag
109
+ end_tag
118
110
  else
119
- start_tag(tag)
111
+ @out << escape_html(match)
120
112
  end
113
+ else
114
+ start_tag(tag)
121
115
  end
116
+ end
122
117
 
123
- def end_paragraph
124
- end_tag while !@stack.empty?
125
- @p = false
126
- end
118
+ def end_paragraph
119
+ end_tag while !@stack.empty?
120
+ @p = false
121
+ end
127
122
 
128
- def start_paragraph
129
- if @p
130
- @out << ' ' if @out[-1,1] != ' '
131
- else
132
- end_paragraph
133
- start_tag('p')
134
- @p = true
135
- end
123
+ def start_paragraph
124
+ if @p
125
+ @out << ' ' if @out[-1,1] != ' '
126
+ else
127
+ end_paragraph
128
+ start_tag('p')
129
+ @p = true
136
130
  end
131
+ end
137
132
 
138
- # Create anchor markup for direct links. This
139
- # method can be overridden to generate custom
140
- # markup, for example to add html additional attributes.
141
- private
142
- def make_direct_anchor(uri, text)
143
- '<a href="' << escape_html(uri) << '">' << escape_html(text) << '</a>'
144
- end
133
+ # Create anchor markup for direct links. This
134
+ # method can be overridden to generate custom
135
+ # markup, for example to add html additional attributes.
136
+ def make_direct_anchor(uri, text)
137
+ '<a href="' << escape_html(uri) << '">' << escape_html(text) << '</a>'
138
+ end
145
139
 
146
- # Create anchor markup for explicit links. This
147
- # method can be overridden to generate custom
148
- # markup, for example to add html additional attributes.
149
- private
150
- def make_explicit_anchor(uri, text)
151
- '<a href="' << escape_html(uri) << '">' << escape_html(text) << '</a>'
152
- end
140
+ # Create anchor markup for explicit links. This
141
+ # method can be overridden to generate custom
142
+ # markup, for example to add html additional attributes.
143
+ def make_explicit_anchor(uri, text)
144
+ '<a href="' << escape_html(uri) << '">' << escape_html(text) << '</a>'
145
+ end
153
146
 
154
- # Translate an explicit local link to a desired URL that is
155
- # properly URL-escaped. The default behaviour is to convert local
156
- # links directly, escaping any characters that have special
157
- # meaning in URLs. Relative URLs in local links are not handled.
158
- #
159
- # Examples:
160
- #
161
- # make_local_link("LocalLink") #=> "LocalLink"
162
- # make_local_link("/Foo/Bar") #=> "%2FFoo%2FBar"
163
- #
164
- # Must ensure that the result is properly URL-escaped. The caller
165
- # will handle HTML escaping as necessary. HTML links will not be
166
- # inserted if the function returns nil.
167
- #
168
- # Example custom behaviour:
169
- #
170
- # make_local_link("LocalLink") #=> "/LocalLink"
171
- # make_local_link("Wikipedia:Bread") #=> "http://en.wikipedia.org/wiki/Bread"
172
- private
173
- def make_local_link(link) #:doc:
174
- escape_url(link)
175
- end
147
+ # Translate an explicit local link to a desired URL that is
148
+ # properly URL-escaped. The default behaviour is to convert local
149
+ # links directly, escaping any characters that have special
150
+ # meaning in URLs. Relative URLs in local links are not handled.
151
+ #
152
+ # Examples:
153
+ #
154
+ # make_local_link("LocalLink") #=> "LocalLink"
155
+ # make_local_link("/Foo/Bar") #=> "%2FFoo%2FBar"
156
+ #
157
+ # Must ensure that the result is properly URL-escaped. The caller
158
+ # will handle HTML escaping as necessary. HTML links will not be
159
+ # inserted if the function returns nil.
160
+ #
161
+ # Example custom behaviour:
162
+ #
163
+ # make_local_link("LocalLink") #=> "/LocalLink"
164
+ # make_local_link("Wikipedia:Bread") #=> "http://en.wikipedia.org/wiki/Bread"
165
+ def make_local_link(link) #:doc:
166
+ escape_url(link)
167
+ end
176
168
 
177
- # Sanatize a direct url (e.g. http://wikipedia.org/). The default
178
- # behaviour returns the original link as-is.
179
- #
180
- # Must ensure that the result is properly URL-escaped. The caller
181
- # will handle HTML escaping as necessary. Links will not be
182
- # converted to HTML links if the function returns link.
183
- #
184
- # Custom versions of this function in inherited classes can
185
- # implement specific link handling behaviour, such as redirection
186
- # to intermediate pages (for example, for notifing the user that
187
- # he is leaving the site).
188
- private
189
- def make_direct_link(url) #:doc:
190
- return url
191
- end
169
+ # Sanatize a direct url (e.g. http://wikipedia.org/). The default
170
+ # behaviour returns the original link as-is.
171
+ #
172
+ # Must ensure that the result is properly URL-escaped. The caller
173
+ # will handle HTML escaping as necessary. Links will not be
174
+ # converted to HTML links if the function returns link.
175
+ #
176
+ # Custom versions of this function in inherited classes can
177
+ # implement specific link handling behaviour, such as redirection
178
+ # to intermediate pages (for example, for notifing the user that
179
+ # he is leaving the site).
180
+ def make_direct_link(url) #:doc:
181
+ url
182
+ end
192
183
 
193
- # Sanatize and prefix image URLs. When images are encountered in
194
- # Creole text, this function is called to obtain the actual URL of
195
- # the image. The default behaviour is to return the image link
196
- # as-is. No image tags are inserted if the function returns nil.
197
- #
198
- # Custom version of the method can be used to sanatize URLs
199
- # (e.g. remove query-parts), inhibit off-site images, or add a
200
- # base URL, for example:
201
- #
202
- # def make_image_link(url)
203
- # URI.join("http://mywiki.org/images/", url)
204
- # end
205
- private
206
- def make_image_link(url) #:doc:
207
- return url
208
- end
184
+ # Sanatize and prefix image URLs. When images are encountered in
185
+ # Creole text, this function is called to obtain the actual URL of
186
+ # the image. The default behaviour is to return the image link
187
+ # as-is. No image tags are inserted if the function returns nil.
188
+ #
189
+ # Custom version of the method can be used to sanatize URLs
190
+ # (e.g. remove query-parts), inhibit off-site images, or add a
191
+ # base URL, for example:
192
+ #
193
+ # def make_image_link(url)
194
+ # URI.join("http://mywiki.org/images/", url)
195
+ # end
196
+ def make_image_link(url) #:doc:
197
+ url
198
+ end
209
199
 
210
- # Create image markup. This
211
- # method can be overridden to generate custom
212
- # markup, for example to add html additional attributes or
213
- # to put divs around the imgs.
214
- private
215
- def make_image(uri, alt)
216
- if alt
217
- '<img src="' << escape_html(uri) << '" alt="' << escape_html(alt) << '"/>'
218
- else
219
- '<img src="' << escape_html(uri) << '"/>'
220
- end
200
+ # Create image markup. This
201
+ # method can be overridden to generate custom
202
+ # markup, for example to add html additional attributes or
203
+ # to put divs around the imgs.
204
+ def make_image(uri, alt)
205
+ if alt
206
+ '<img src="' << escape_html(uri) << '" alt="' << escape_html(alt) << '"/>'
207
+ else
208
+ '<img src="' << escape_html(uri) << '"/>'
221
209
  end
210
+ end
222
211
 
223
- private
224
- def make_explicit_link(link)
225
- begin
226
- uri = URI.parse(link)
227
- return uri.to_s if uri.scheme && @allowed_schemes.include?(uri.scheme)
228
- rescue URI::InvalidURIError
229
- end
230
- return make_local_link(link)
212
+ def make_explicit_link(link)
213
+ begin
214
+ uri = URI.parse(link)
215
+ return uri.to_s if uri.scheme && @allowed_schemes.include?(uri.scheme)
216
+ rescue URI::InvalidURIError
231
217
  end
218
+ make_local_link(link)
219
+ end
232
220
 
233
- def parse_inline(str)
234
- until str.empty?
235
- case str
236
- when /\A(\~)?((https?|ftps?):\/\/\S+?)(?=([,.?!:;"'\)])?(\s|$))/
237
- if $1
238
- @out << escape_html($2)
239
- else
240
- if uri = make_direct_link($2)
241
- @out << make_direct_anchor(uri, $2)
242
- else
243
- @out << escape_html($&)
244
- end
245
- end
246
- when /\A\[\[\s*([^|]*?)\s*(\|\s*(.*?))?\s*\]\]/m
247
- link = $1
248
- if uri = make_explicit_link(link)
249
- @out << make_explicit_anchor(uri, $3 || link)
250
- else
251
- @out << escape_html($&)
252
- end
253
- when /\A\{\{\{(.*)\}\}\}/
254
- @out << '<tt>' << escape_html($1) << '</tt>'
255
- when /\A\{\{\s*(.*?)\s*(\|\s*(.*?)\s*)?\}\}/
256
- if uri = make_image_link($1)
257
- @out << make_image(uri, $3)
221
+ def parse_inline(str)
222
+ until str.empty?
223
+ case str
224
+ when /\A(\~)?((https?|ftps?):\/\/\S+?)(?=([,.?!:;"'\)])?(\s|$))/
225
+ if $1
226
+ @out << escape_html($2)
227
+ else
228
+ if uri = make_direct_link($2)
229
+ @out << make_direct_anchor(uri, $2)
258
230
  else
259
231
  @out << escape_html($&)
260
232
  end
261
- when /\A~([^\s])/
262
- @out << escape_html($1)
263
- when /\A\w+/
264
- @out << $&
265
- when /\A\s+/
266
- @out << ' ' if @out[-1,1] != ' '
267
- when /\A\*\*/
268
- toggle_tag 'strong', $&
269
- when /\A\/\//
270
- toggle_tag 'em', $&
271
- when /\A\\\\/
272
- @out << '<br/>'
273
- when /./
233
+ end
234
+ when /\A\[\[\s*([^|]*?)\s*(\|\s*(.*?))?\s*\]\]/m
235
+ link = $1
236
+ if uri = make_explicit_link(link)
237
+ @out << make_explicit_anchor(uri, $3 || link)
238
+ else
274
239
  @out << escape_html($&)
240
+ end
241
+ when /\A\{\{\{(.*)\}\}\}/
242
+ @out << '<tt>' << escape_html($1) << '</tt>'
243
+ when /\A\{\{\s*(.*?)\s*(\|\s*(.*?)\s*)?\}\}/
244
+ if uri = make_image_link($1)
245
+ @out << make_image(uri, $3)
275
246
  else
276
- raise CreoleParseError, "Parse error at #{str[0,30].inspect}"
247
+ @out << escape_html($&)
277
248
  end
278
- str = $'
249
+ when /\A~([^\s])/
250
+ @out << escape_html($1)
251
+ when /\A\w+/
252
+ @out << $&
253
+ when /\A\s+/
254
+ @out << ' ' if @out[-1,1] != ' '
255
+ when /\A\*\*/
256
+ toggle_tag 'strong', $&
257
+ when /\A\/\//
258
+ toggle_tag 'em', $&
259
+ when /\A\\\\/
260
+ @out << '<br/>'
261
+ when /./
262
+ @out << escape_html($&)
263
+ else
264
+ raise CreoleError, "Parse error at #{str[0,30].inspect}"
279
265
  end
266
+ str = $'
280
267
  end
268
+ end
281
269
 
282
- def parse_table_row(str)
283
- @out << '<tr>'
284
- str.scan(/\s*\|(=)?\s*((\[\[.*?\]\]|\{\{.*?\}\}|[^|~]|~.)*)(?=\||$)/) do
285
- if !$2.empty? || !$'.empty?
286
- @out << ($1 ? '<th>' : '<td>')
287
- parse_inline($2) if $2
288
- end_tag while @stack.last != 'table'
289
- @out << ($1 ? '</th>' : '</td>')
290
- end
270
+ def parse_table_row(str)
271
+ @out << '<tr>'
272
+ str.scan(/\s*\|(=)?\s*((\[\[.*?\]\]|\{\{.*?\}\}|[^|~]|~.)*)(?=\||$)/) do
273
+ if !$2.empty? || !$'.empty?
274
+ @out << ($1 ? '<th>' : '<td>')
275
+ parse_inline($2) if $2
276
+ end_tag while @stack.last != 'table'
277
+ @out << ($1 ? '</th>' : '</td>')
291
278
  end
292
- @out << '</tr>'
293
279
  end
280
+ @out << '</tr>'
281
+ end
294
282
 
295
- def make_nowikiblock(input)
296
- input.gsub(/^ (?=\}\}\})/, '')
297
- end
283
+ def make_nowikiblock(input)
284
+ input.gsub(/^ (?=\}\}\})/, '')
285
+ end
298
286
 
299
- def ulol?(x); x == 'ul' || x == 'ol'; end
287
+ def ulol?(x); x == 'ul' || x == 'ol'; end
300
288
 
301
- def parse_block(str)
302
- until str.empty?
303
- case str
304
- when /\A\{\{\{\r?\n(.*?)\r?\n\}\}\}/m
305
- end_paragraph
306
- nowikiblock = make_nowikiblock($1)
307
- @out << '<pre>' << escape_html(nowikiblock) << '</pre>'
308
- when /\A\s*-{4,}\s*$/
309
- end_paragraph
310
- @out << '<hr/>'
311
- when /\A\s*(={1,6})\s*(.*?)\s*=*\s*$(\r?\n)?/
289
+ def parse_block(str)
290
+ until str.empty?
291
+ case str
292
+ when /\A\{\{\{\r?\n(.*?)\r?\n\}\}\}/m
293
+ end_paragraph
294
+ nowikiblock = make_nowikiblock($1)
295
+ @out << '<pre>' << escape_html(nowikiblock) << '</pre>'
296
+ when /\A\s*-{4,}\s*$/
297
+ end_paragraph
298
+ @out << '<hr/>'
299
+ when /\A\s*(={1,6})\s*(.*?)\s*=*\s*$(\r?\n)?/
300
+ end_paragraph
301
+ level = $1.size
302
+ @out << "<h#{level}>" << escape_html($2) << "</h#{level}>"
303
+ when /\A[ \t]*\|.*$(\r?\n)?/
304
+ if !@stack.include?('table')
312
305
  end_paragraph
313
- level = $1.size
314
- @out << "<h#{level}>" << escape_html($2) << "</h#{level}>"
315
- when /\A[ \t]*\|.*$(\r?\n)?/
316
- if !@stack.include?('table')
317
- end_paragraph
318
- start_tag('table')
306
+ start_tag('table')
307
+ end
308
+ parse_table_row($&)
309
+ when /\A\s*$(\r?\n)?/
310
+ end_paragraph
311
+ when /\A(\s*([*#]+)\s*(.*?))$(\r?\n)?/
312
+ line, bullet, item = $1, $2, $3
313
+ tag = (bullet[0,1] == '*' ? 'ul' : 'ol')
314
+ if bullet[0,1] == '#' || bullet.size != 2 || @stack.find {|x| ulol?(x) }
315
+ count = @stack.select { |x| ulol?(x) }.size
316
+
317
+ while !@stack.empty? && count > bullet.size
318
+ count -= 1 if ulol?(@stack.last)
319
+ end_tag
319
320
  end
320
- parse_table_row($&)
321
- when /\A\s*$(\r?\n)?/
322
- end_paragraph
323
- when /\A(\s*([*#]+)\s*(.*?))$(\r?\n)?/
324
- line, bullet, item = $1, $2, $3
325
- tag = (bullet[0,1] == '*' ? 'ul' : 'ol')
326
- if bullet[0,1] == '#' || bullet.size != 2 || @stack.find {|x| ulol?(x) }
327
- count = @stack.select { |x| ulol?(x) }.size
328
-
329
- while !@stack.empty? && count > bullet.size
330
- count -= 1 if ulol?(@stack.last)
331
- end_tag
332
- end
333
321
 
334
- end_tag while !@stack.empty? && @stack.last != 'li'
322
+ end_tag while !@stack.empty? && @stack.last != 'li'
335
323
 
336
- if @stack.last == 'li' && count == bullet.size
324
+ if @stack.last == 'li' && count == bullet.size
325
+ end_tag
326
+ if @stack.last != tag
337
327
  end_tag
338
- if @stack.last != tag
339
- end_tag
340
- count -= 1
341
- end
342
- end
343
-
344
- while count < bullet.size
345
- start_tag tag
346
- count += 1
347
- start_tag 'li' if count < bullet.size
328
+ count -= 1
348
329
  end
330
+ end
349
331
 
350
- @p = true
351
- start_tag('li')
352
- parse_inline(item)
353
- else
354
- start_paragraph
355
- parse_inline(line)
332
+ while count < bullet.size
333
+ start_tag tag
334
+ count += 1
335
+ start_tag 'li' if count < bullet.size
356
336
  end
357
- when /\A([ \t]*\S+.*?)$(\r?\n)?/
358
- start_paragraph
359
- parse_inline($1)
337
+
338
+ @p = true
339
+ start_tag('li')
340
+ parse_inline(item)
360
341
  else
361
- raise CreoleParseError, "Parse error at #{str[0,30].inspect}"
342
+ start_paragraph
343
+ parse_inline(line)
362
344
  end
363
- #p [$&, $']
364
- str = $'
345
+ when /\A([ \t]*\S+.*?)$(\r?\n)?/
346
+ start_paragraph
347
+ parse_inline($1)
348
+ else
349
+ raise CreoleError, "Parse error at #{str[0,30].inspect}"
365
350
  end
366
- end_paragraph
367
- return @out
351
+ #p [$&, $']
352
+ str = $'
368
353
  end
354
+ end_paragraph
355
+ @out
356
+ end
369
357
 
370
- end # class CreoleParser
371
-
372
- end # module Creole
358
+ end # class Creole