creole 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/creole.rb +264 -278
- data/test/test_creole.rb +622 -6
- metadata +1 -2
- data/test/testcases.rb +0 -631
data/lib/creole.rb
CHANGED
@@ -27,14 +27,22 @@ require 'uri'
|
|
27
27
|
# You can customize the created anchor/image markup by overriding
|
28
28
|
# make_*_anchor/make_image.
|
29
29
|
|
30
|
-
|
30
|
+
# Main Creole parser class. Call CreoleParser#parse to parse Creole
|
31
|
+
# formatted text.
|
32
|
+
#
|
33
|
+
# This class is not reentrant. A separate instance is needed for
|
34
|
+
# each thread that needs to convert Creole to HTML.
|
35
|
+
#
|
36
|
+
# Inherit this to provide custom handling of links. The overrideable
|
37
|
+
# methods are: make_local_link
|
38
|
+
class Creole
|
31
39
|
|
32
|
-
VERSION = '0.3.
|
40
|
+
VERSION = '0.3.4'
|
33
41
|
|
34
|
-
#
|
42
|
+
# CreoleError is raised when the Creole parser encounters
|
35
43
|
# something unexpected. This is generally now thrown unless there is
|
36
44
|
# a bug in the parser.
|
37
|
-
class
|
45
|
+
class CreoleError < Exception; end
|
38
46
|
|
39
47
|
# Convert the argument in Creole format to HTML and return the
|
40
48
|
# result. Example:
|
@@ -42,331 +50,309 @@ module Creole
|
|
42
50
|
# Creole.creolize("**Hello //World//**")
|
43
51
|
# #=> "<p><strong>Hello <em>World</em></strong></p>"
|
44
52
|
#
|
45
|
-
# This is an alias for calling
|
46
|
-
#
|
53
|
+
# This is an alias for calling Creole#parse:
|
54
|
+
# Creole.new.parse(creole)
|
47
55
|
def self.creolize(creole)
|
48
|
-
|
56
|
+
new.parse(creole)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Create a new CreoleParser instance.
|
60
|
+
def initialize
|
61
|
+
@base = nil
|
62
|
+
@allowed_schemes = [ 'http', 'https', 'ftp', 'ftps' ]
|
63
|
+
@uri_scheme_re = @allowed_schemes.join('|')
|
49
64
|
end
|
50
65
|
|
51
|
-
#
|
52
|
-
#
|
66
|
+
# Parse and convert the argument in Creole text to HTML and return
|
67
|
+
# the result. The resulting HTML does not contain <html> and
|
68
|
+
# <body> tags.
|
53
69
|
#
|
54
|
-
#
|
55
|
-
# each thread that needs to convert Creole to HTML.
|
70
|
+
# Example:
|
56
71
|
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
# Parse and convert the argument in Creole text to HTML and return
|
69
|
-
# the result. The resulting HTML does not contain <html> and
|
70
|
-
# <body> tags.
|
71
|
-
#
|
72
|
-
# Example:
|
73
|
-
#
|
74
|
-
# parser = CreoleParser.new
|
75
|
-
# parser.parse("**Hello //World//**")
|
76
|
-
# #=> "<p><strong>Hello <em>World</em></strong></p>"
|
77
|
-
def parse(string)
|
78
|
-
@out = ""
|
79
|
-
@strong = false
|
80
|
-
@p = false
|
81
|
-
@stack = []
|
82
|
-
parse_block(string)
|
83
|
-
return @out
|
84
|
-
end
|
72
|
+
# parser = CreoleParser.new
|
73
|
+
# parser.parse("**Hello //World//**")
|
74
|
+
# #=> "<p><strong>Hello <em>World</em></strong></p>"
|
75
|
+
def parse(string)
|
76
|
+
@out = ""
|
77
|
+
@p = false
|
78
|
+
@stack = []
|
79
|
+
parse_block(string)
|
80
|
+
@out
|
81
|
+
end
|
85
82
|
|
86
|
-
|
87
|
-
# entities.
|
88
|
-
private
|
89
|
-
def escape_html(string)
|
90
|
-
CGI::escapeHTML(string)
|
91
|
-
end
|
83
|
+
protected
|
92
84
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
85
|
+
# Escape any characters with special meaning in HTML using HTML
|
86
|
+
# entities.
|
87
|
+
def escape_html(string)
|
88
|
+
CGI::escapeHTML(string)
|
89
|
+
end
|
99
90
|
|
100
|
-
|
91
|
+
# Escape any characters with special meaning in URLs using URL
|
92
|
+
# encoding.
|
93
|
+
def escape_url(string)
|
94
|
+
CGI::escape(string)
|
95
|
+
end
|
101
96
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
97
|
+
def start_tag(tag)
|
98
|
+
@stack.push(tag)
|
99
|
+
@out << '<' << tag << '>'
|
100
|
+
end
|
106
101
|
|
107
|
-
|
108
|
-
|
109
|
-
|
102
|
+
def end_tag
|
103
|
+
@out << '</' << @stack.pop << '>'
|
104
|
+
end
|
110
105
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
else
|
116
|
-
@out << escape_html(match)
|
117
|
-
end
|
106
|
+
def toggle_tag(tag, match)
|
107
|
+
if @stack.include?(tag)
|
108
|
+
if @stack.last == tag
|
109
|
+
end_tag
|
118
110
|
else
|
119
|
-
|
111
|
+
@out << escape_html(match)
|
120
112
|
end
|
113
|
+
else
|
114
|
+
start_tag(tag)
|
121
115
|
end
|
116
|
+
end
|
122
117
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
118
|
+
def end_paragraph
|
119
|
+
end_tag while !@stack.empty?
|
120
|
+
@p = false
|
121
|
+
end
|
127
122
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
end
|
123
|
+
def start_paragraph
|
124
|
+
if @p
|
125
|
+
@out << ' ' if @out[-1,1] != ' '
|
126
|
+
else
|
127
|
+
end_paragraph
|
128
|
+
start_tag('p')
|
129
|
+
@p = true
|
136
130
|
end
|
131
|
+
end
|
137
132
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
end
|
133
|
+
# Create anchor markup for direct links. This
|
134
|
+
# method can be overridden to generate custom
|
135
|
+
# markup, for example to add html additional attributes.
|
136
|
+
def make_direct_anchor(uri, text)
|
137
|
+
'<a href="' << escape_html(uri) << '">' << escape_html(text) << '</a>'
|
138
|
+
end
|
145
139
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
end
|
140
|
+
# Create anchor markup for explicit links. This
|
141
|
+
# method can be overridden to generate custom
|
142
|
+
# markup, for example to add html additional attributes.
|
143
|
+
def make_explicit_anchor(uri, text)
|
144
|
+
'<a href="' << escape_html(uri) << '">' << escape_html(text) << '</a>'
|
145
|
+
end
|
153
146
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
end
|
147
|
+
# Translate an explicit local link to a desired URL that is
|
148
|
+
# properly URL-escaped. The default behaviour is to convert local
|
149
|
+
# links directly, escaping any characters that have special
|
150
|
+
# meaning in URLs. Relative URLs in local links are not handled.
|
151
|
+
#
|
152
|
+
# Examples:
|
153
|
+
#
|
154
|
+
# make_local_link("LocalLink") #=> "LocalLink"
|
155
|
+
# make_local_link("/Foo/Bar") #=> "%2FFoo%2FBar"
|
156
|
+
#
|
157
|
+
# Must ensure that the result is properly URL-escaped. The caller
|
158
|
+
# will handle HTML escaping as necessary. HTML links will not be
|
159
|
+
# inserted if the function returns nil.
|
160
|
+
#
|
161
|
+
# Example custom behaviour:
|
162
|
+
#
|
163
|
+
# make_local_link("LocalLink") #=> "/LocalLink"
|
164
|
+
# make_local_link("Wikipedia:Bread") #=> "http://en.wikipedia.org/wiki/Bread"
|
165
|
+
def make_local_link(link) #:doc:
|
166
|
+
escape_url(link)
|
167
|
+
end
|
176
168
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
end
|
169
|
+
# Sanatize a direct url (e.g. http://wikipedia.org/). The default
|
170
|
+
# behaviour returns the original link as-is.
|
171
|
+
#
|
172
|
+
# Must ensure that the result is properly URL-escaped. The caller
|
173
|
+
# will handle HTML escaping as necessary. Links will not be
|
174
|
+
# converted to HTML links if the function returns link.
|
175
|
+
#
|
176
|
+
# Custom versions of this function in inherited classes can
|
177
|
+
# implement specific link handling behaviour, such as redirection
|
178
|
+
# to intermediate pages (for example, for notifing the user that
|
179
|
+
# he is leaving the site).
|
180
|
+
def make_direct_link(url) #:doc:
|
181
|
+
url
|
182
|
+
end
|
192
183
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
end
|
184
|
+
# Sanatize and prefix image URLs. When images are encountered in
|
185
|
+
# Creole text, this function is called to obtain the actual URL of
|
186
|
+
# the image. The default behaviour is to return the image link
|
187
|
+
# as-is. No image tags are inserted if the function returns nil.
|
188
|
+
#
|
189
|
+
# Custom version of the method can be used to sanatize URLs
|
190
|
+
# (e.g. remove query-parts), inhibit off-site images, or add a
|
191
|
+
# base URL, for example:
|
192
|
+
#
|
193
|
+
# def make_image_link(url)
|
194
|
+
# URI.join("http://mywiki.org/images/", url)
|
195
|
+
# end
|
196
|
+
def make_image_link(url) #:doc:
|
197
|
+
url
|
198
|
+
end
|
209
199
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
'<img src="' << escape_html(uri) << '"/>'
|
220
|
-
end
|
200
|
+
# Create image markup. This
|
201
|
+
# method can be overridden to generate custom
|
202
|
+
# markup, for example to add html additional attributes or
|
203
|
+
# to put divs around the imgs.
|
204
|
+
def make_image(uri, alt)
|
205
|
+
if alt
|
206
|
+
'<img src="' << escape_html(uri) << '" alt="' << escape_html(alt) << '"/>'
|
207
|
+
else
|
208
|
+
'<img src="' << escape_html(uri) << '"/>'
|
221
209
|
end
|
210
|
+
end
|
222
211
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
rescue URI::InvalidURIError
|
229
|
-
end
|
230
|
-
return make_local_link(link)
|
212
|
+
def make_explicit_link(link)
|
213
|
+
begin
|
214
|
+
uri = URI.parse(link)
|
215
|
+
return uri.to_s if uri.scheme && @allowed_schemes.include?(uri.scheme)
|
216
|
+
rescue URI::InvalidURIError
|
231
217
|
end
|
218
|
+
make_local_link(link)
|
219
|
+
end
|
232
220
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
else
|
243
|
-
@out << escape_html($&)
|
244
|
-
end
|
245
|
-
end
|
246
|
-
when /\A\[\[\s*([^|]*?)\s*(\|\s*(.*?))?\s*\]\]/m
|
247
|
-
link = $1
|
248
|
-
if uri = make_explicit_link(link)
|
249
|
-
@out << make_explicit_anchor(uri, $3 || link)
|
250
|
-
else
|
251
|
-
@out << escape_html($&)
|
252
|
-
end
|
253
|
-
when /\A\{\{\{(.*)\}\}\}/
|
254
|
-
@out << '<tt>' << escape_html($1) << '</tt>'
|
255
|
-
when /\A\{\{\s*(.*?)\s*(\|\s*(.*?)\s*)?\}\}/
|
256
|
-
if uri = make_image_link($1)
|
257
|
-
@out << make_image(uri, $3)
|
221
|
+
def parse_inline(str)
|
222
|
+
until str.empty?
|
223
|
+
case str
|
224
|
+
when /\A(\~)?((https?|ftps?):\/\/\S+?)(?=([,.?!:;"'\)])?(\s|$))/
|
225
|
+
if $1
|
226
|
+
@out << escape_html($2)
|
227
|
+
else
|
228
|
+
if uri = make_direct_link($2)
|
229
|
+
@out << make_direct_anchor(uri, $2)
|
258
230
|
else
|
259
231
|
@out << escape_html($&)
|
260
232
|
end
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
when /\A\*\*/
|
268
|
-
toggle_tag 'strong', $&
|
269
|
-
when /\A\/\//
|
270
|
-
toggle_tag 'em', $&
|
271
|
-
when /\A\\\\/
|
272
|
-
@out << '<br/>'
|
273
|
-
when /./
|
233
|
+
end
|
234
|
+
when /\A\[\[\s*([^|]*?)\s*(\|\s*(.*?))?\s*\]\]/m
|
235
|
+
link = $1
|
236
|
+
if uri = make_explicit_link(link)
|
237
|
+
@out << make_explicit_anchor(uri, $3 || link)
|
238
|
+
else
|
274
239
|
@out << escape_html($&)
|
240
|
+
end
|
241
|
+
when /\A\{\{\{(.*)\}\}\}/
|
242
|
+
@out << '<tt>' << escape_html($1) << '</tt>'
|
243
|
+
when /\A\{\{\s*(.*?)\s*(\|\s*(.*?)\s*)?\}\}/
|
244
|
+
if uri = make_image_link($1)
|
245
|
+
@out << make_image(uri, $3)
|
275
246
|
else
|
276
|
-
|
247
|
+
@out << escape_html($&)
|
277
248
|
end
|
278
|
-
|
249
|
+
when /\A~([^\s])/
|
250
|
+
@out << escape_html($1)
|
251
|
+
when /\A\w+/
|
252
|
+
@out << $&
|
253
|
+
when /\A\s+/
|
254
|
+
@out << ' ' if @out[-1,1] != ' '
|
255
|
+
when /\A\*\*/
|
256
|
+
toggle_tag 'strong', $&
|
257
|
+
when /\A\/\//
|
258
|
+
toggle_tag 'em', $&
|
259
|
+
when /\A\\\\/
|
260
|
+
@out << '<br/>'
|
261
|
+
when /./
|
262
|
+
@out << escape_html($&)
|
263
|
+
else
|
264
|
+
raise CreoleError, "Parse error at #{str[0,30].inspect}"
|
279
265
|
end
|
266
|
+
str = $'
|
280
267
|
end
|
268
|
+
end
|
281
269
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
end
|
270
|
+
def parse_table_row(str)
|
271
|
+
@out << '<tr>'
|
272
|
+
str.scan(/\s*\|(=)?\s*((\[\[.*?\]\]|\{\{.*?\}\}|[^|~]|~.)*)(?=\||$)/) do
|
273
|
+
if !$2.empty? || !$'.empty?
|
274
|
+
@out << ($1 ? '<th>' : '<td>')
|
275
|
+
parse_inline($2) if $2
|
276
|
+
end_tag while @stack.last != 'table'
|
277
|
+
@out << ($1 ? '</th>' : '</td>')
|
291
278
|
end
|
292
|
-
@out << '</tr>'
|
293
279
|
end
|
280
|
+
@out << '</tr>'
|
281
|
+
end
|
294
282
|
|
295
|
-
|
296
|
-
|
297
|
-
|
283
|
+
def make_nowikiblock(input)
|
284
|
+
input.gsub(/^ (?=\}\}\})/, '')
|
285
|
+
end
|
298
286
|
|
299
|
-
|
287
|
+
def ulol?(x); x == 'ul' || x == 'ol'; end
|
300
288
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
289
|
+
def parse_block(str)
|
290
|
+
until str.empty?
|
291
|
+
case str
|
292
|
+
when /\A\{\{\{\r?\n(.*?)\r?\n\}\}\}/m
|
293
|
+
end_paragraph
|
294
|
+
nowikiblock = make_nowikiblock($1)
|
295
|
+
@out << '<pre>' << escape_html(nowikiblock) << '</pre>'
|
296
|
+
when /\A\s*-{4,}\s*$/
|
297
|
+
end_paragraph
|
298
|
+
@out << '<hr/>'
|
299
|
+
when /\A\s*(={1,6})\s*(.*?)\s*=*\s*$(\r?\n)?/
|
300
|
+
end_paragraph
|
301
|
+
level = $1.size
|
302
|
+
@out << "<h#{level}>" << escape_html($2) << "</h#{level}>"
|
303
|
+
when /\A[ \t]*\|.*$(\r?\n)?/
|
304
|
+
if !@stack.include?('table')
|
312
305
|
end_paragraph
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
306
|
+
start_tag('table')
|
307
|
+
end
|
308
|
+
parse_table_row($&)
|
309
|
+
when /\A\s*$(\r?\n)?/
|
310
|
+
end_paragraph
|
311
|
+
when /\A(\s*([*#]+)\s*(.*?))$(\r?\n)?/
|
312
|
+
line, bullet, item = $1, $2, $3
|
313
|
+
tag = (bullet[0,1] == '*' ? 'ul' : 'ol')
|
314
|
+
if bullet[0,1] == '#' || bullet.size != 2 || @stack.find {|x| ulol?(x) }
|
315
|
+
count = @stack.select { |x| ulol?(x) }.size
|
316
|
+
|
317
|
+
while !@stack.empty? && count > bullet.size
|
318
|
+
count -= 1 if ulol?(@stack.last)
|
319
|
+
end_tag
|
319
320
|
end
|
320
|
-
parse_table_row($&)
|
321
|
-
when /\A\s*$(\r?\n)?/
|
322
|
-
end_paragraph
|
323
|
-
when /\A(\s*([*#]+)\s*(.*?))$(\r?\n)?/
|
324
|
-
line, bullet, item = $1, $2, $3
|
325
|
-
tag = (bullet[0,1] == '*' ? 'ul' : 'ol')
|
326
|
-
if bullet[0,1] == '#' || bullet.size != 2 || @stack.find {|x| ulol?(x) }
|
327
|
-
count = @stack.select { |x| ulol?(x) }.size
|
328
|
-
|
329
|
-
while !@stack.empty? && count > bullet.size
|
330
|
-
count -= 1 if ulol?(@stack.last)
|
331
|
-
end_tag
|
332
|
-
end
|
333
321
|
|
334
|
-
|
322
|
+
end_tag while !@stack.empty? && @stack.last != 'li'
|
335
323
|
|
336
|
-
|
324
|
+
if @stack.last == 'li' && count == bullet.size
|
325
|
+
end_tag
|
326
|
+
if @stack.last != tag
|
337
327
|
end_tag
|
338
|
-
|
339
|
-
end_tag
|
340
|
-
count -= 1
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
while count < bullet.size
|
345
|
-
start_tag tag
|
346
|
-
count += 1
|
347
|
-
start_tag 'li' if count < bullet.size
|
328
|
+
count -= 1
|
348
329
|
end
|
330
|
+
end
|
349
331
|
|
350
|
-
|
351
|
-
start_tag
|
352
|
-
|
353
|
-
|
354
|
-
start_paragraph
|
355
|
-
parse_inline(line)
|
332
|
+
while count < bullet.size
|
333
|
+
start_tag tag
|
334
|
+
count += 1
|
335
|
+
start_tag 'li' if count < bullet.size
|
356
336
|
end
|
357
|
-
|
358
|
-
|
359
|
-
|
337
|
+
|
338
|
+
@p = true
|
339
|
+
start_tag('li')
|
340
|
+
parse_inline(item)
|
360
341
|
else
|
361
|
-
|
342
|
+
start_paragraph
|
343
|
+
parse_inline(line)
|
362
344
|
end
|
363
|
-
|
364
|
-
|
345
|
+
when /\A([ \t]*\S+.*?)$(\r?\n)?/
|
346
|
+
start_paragraph
|
347
|
+
parse_inline($1)
|
348
|
+
else
|
349
|
+
raise CreoleError, "Parse error at #{str[0,30].inspect}"
|
365
350
|
end
|
366
|
-
|
367
|
-
|
351
|
+
#p [$&, $']
|
352
|
+
str = $'
|
368
353
|
end
|
354
|
+
end_paragraph
|
355
|
+
@out
|
356
|
+
end
|
369
357
|
|
370
|
-
|
371
|
-
|
372
|
-
end # module Creole
|
358
|
+
end # class Creole
|