kron4eg-wikicreole 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +340 -0
- data/Changelog +20 -0
- data/LICENSE +52 -0
- data/README +14 -0
- data/Rakefile +26 -0
- data/lib/wiki_creole.rb +973 -0
- data/test/profiling.rb +22 -0
- data/test/test_all.rb +891 -0
- data/test/test_amp.html +3 -0
- data/test/test_amp.markup +2 -0
- data/test/test_block.html +131 -0
- data/test/test_block.markup +69 -0
- data/test/test_escape.html +33 -0
- data/test/test_escape.markup +35 -0
- data/test/test_inline.html +67 -0
- data/test/test_inline.markup +48 -0
- data/test/test_jsp_wiki.html +157 -0
- data/test/test_jsp_wiki.markup +100 -0
- data/test/test_nested_lists.html +10 -0
- data/test/test_nested_lists.markup +4 -0
- data/test/test_specialchars.html +29 -0
- data/test/test_specialchars.markup +18 -0
- metadata +76 -0
data/lib/wiki_creole.rb
ADDED
@@ -0,0 +1,973 @@
|
|
1
|
+
# WikiCreole implements the Wiki Creole markup language,
|
2
|
+
# version 1.0, as described at http://www.wikicreole.org. It
|
3
|
+
# reads Creole 1.0 markup and returns XHTML.
|
4
|
+
#
|
5
|
+
# Author:: Gordon McCreight (mailto:wikicreole.to.gordon@mccreight.com)
|
6
|
+
# Copyright:: Copyright (c) 2008 Gordon McCreight
|
7
|
+
# License:: Distributes under the same terms as Ruby (see the LICENSE file)
|
8
|
+
# Version:: 0.1.3
|
9
|
+
# Date:: 2009-02-05
|
10
|
+
#
|
11
|
+
# == Synopsis
|
12
|
+
# Most likely you'll just want to do:
|
13
|
+
# require 'rubygems'
|
14
|
+
# require 'wiki_creole'
|
15
|
+
# xhtml = WikiCreole.creole_parse(wiki_creole_markup)
|
16
|
+
# If you want to override the default behaviors, make sure to look at the other
|
17
|
+
# public methods.
|
18
|
+
#
|
19
|
+
# == Official Markup
|
20
|
+
#
|
21
|
+
# Here is a summary of the official Creole 1.0 markup
|
22
|
+
# elements. See http://www.wikicreole.org for the full
|
23
|
+
# details.
|
24
|
+
#
|
25
|
+
# Headings:
|
26
|
+
# = heading 1 -> <h1>heading 1</h1>
|
27
|
+
# == heading 2 -> <h2>heading 2</h2>
|
28
|
+
# ...
|
29
|
+
# ====== heading 6 -> <h6>heading 6</h6>
|
30
|
+
#
|
31
|
+
# Various inline markup:
|
32
|
+
# ** bold ** -> <strong> bold </strong>
|
33
|
+
# // italics // -> <em> italics </em>
|
34
|
+
# **// both //** -> <strong><em> both </em></strong>
|
35
|
+
# [[ link ]] -> <a href="link">link</a>
|
36
|
+
# [[ link | text ]] -> <a href="link">text</a>
|
37
|
+
# http://cpan.org -> <a href="http://cpan.org">http://cpan.org</a>
|
38
|
+
# line \\ break -> line <br /> break
|
39
|
+
# {{img.jpg|alt}} -> <img src="img.jpg" alt="alt">
|
40
|
+
#
|
41
|
+
# Lists:
|
42
|
+
# * unordered list <ul><li>unordered list</li>
|
43
|
+
# * second item <li>second item</li>
|
44
|
+
# ## nested ordered -> <ol><li>nested ordered</li>
|
45
|
+
# *** uber-nested <ul><li>uber-nested</li></ul>
|
46
|
+
# * back to level 1 </ol><li>back to level 1</li></ul>
|
47
|
+
#
|
48
|
+
# Tables:
|
49
|
+
# |= h1 |= h2 -> <table><tr><th>h1</th><th>h2</th></tr>
|
50
|
+
# | c1 | c2 <tr><td>c1</td><td>c2</td></tr></table>
|
51
|
+
#
|
52
|
+
# Nowiki (Preformatted):
|
53
|
+
# {{{ <pre>
|
54
|
+
# ** not bold ** ** not bold **
|
55
|
+
# escaped HTML: -> escaped HTML:
|
56
|
+
# <i> test </i> <i> test </i>
|
57
|
+
# }}} <pre>
|
58
|
+
#
|
59
|
+
# {{{ inline\\also }}} -> <tt>inline\\also</tt>
|
60
|
+
#
|
61
|
+
# Escape Character:
|
62
|
+
# ~** not bold ** -> ** not bold **
|
63
|
+
# tilde: ~~ -> tilde: ~
|
64
|
+
#
|
65
|
+
# Paragraphs are separated by other blocks and blank lines.
|
66
|
+
# Inline markup can usually be combined, overlapped, etc. List
|
67
|
+
# items and plugin text can span lines.
|
68
|
+
#
|
69
|
+
# == Extended Markup
|
70
|
+
#
|
71
|
+
# In addition to OFFICIAL MARKUP, Text::WikiCreole also supports
|
72
|
+
# the following markup:
|
73
|
+
#
|
74
|
+
# Plugins:
|
75
|
+
# << plugin >> -> whatever you want (see WikiCreole.creole_plugin)
|
76
|
+
# <<< plugin >>> -> whatever you want (see WikiCreole.creole_plugin)
|
77
|
+
# Triple-bracket syntax has priority, in order to allow you to embed
|
78
|
+
# double-brackets in plugins, such as to embed Perl code.
|
79
|
+
#
|
80
|
+
# Inline:
|
81
|
+
# ## monospace ## -> <tt> monospace </tt>
|
82
|
+
# ^^ superscript ^^ -> <sup> superscript </sup>
|
83
|
+
# ,, subscript ,, -> <sub> subscript </sub>
|
84
|
+
# __ underline __ -> <u> underline </u>
|
85
|
+
# (TM) -> ™
|
86
|
+
# (R) -> ®
|
87
|
+
# (C) -> ©
|
88
|
+
# ... -> …
|
89
|
+
# -- -> –
|
90
|
+
#
|
91
|
+
# Indented Paragraphs:
|
92
|
+
# :this -> <div style="margin-left:2em"><p>this
|
93
|
+
# is indented is indented</p>
|
94
|
+
# :: more indented <div style="margin-left:2em"><p> more
|
95
|
+
# indented</div></div>
|
96
|
+
#
|
97
|
+
# Definition Lists:
|
98
|
+
# ; Title -> <dl><dt>Title</dt>
|
99
|
+
# : item 1 : item 2 <dd>item 1</dd><dd>item 2</dd>
|
100
|
+
# ; Title 2 : item2a <dt>Title 2</dt><dd>item 2a</dd></dl>
|
101
|
+
#
|
102
|
+
# == Acknowledgements
|
103
|
+
# Most of this code is ported from Jason Burnett's excellent Perl-based
|
104
|
+
# converter which can be found here:
|
105
|
+
# http://search.cpan.org/~jburnett/Text-WikiCreole/
|
106
|
+
# He, in turn, acknowledges the Document::Parser perl module.
|
107
|
+
#
|
108
|
+
# Also, some of the tests are taken from Lars Christensen's implementation of
|
109
|
+
# the Creole parser. You can find his code at:
|
110
|
+
# http://github.com/larsch/creole/tree/master
|
111
|
+
#
|
112
|
+
# Other test come from the wikicreole website itself, here:
|
113
|
+
# http://www.wikicreole.org/
|
114
|
+
|
115
|
+
class WikiCreole
|
116
|
+
|
117
|
+
# Reads Creole 1.0 markup and return XHTML.
|
118
|
+
#
|
119
|
+
# xhtml = WikiCreole.creole_parse(wiki_creole_markup)
|
120
|
+
def self.creole_parse(s)
|
121
|
+
return "" unless String === s
|
122
|
+
return "" if s.empty?
|
123
|
+
|
124
|
+
init
|
125
|
+
parse(s, :top)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Creole 1.0 supports two plugin syntaxes: << plugin content >> and
|
129
|
+
# <<< plugin content >>>
|
130
|
+
#
|
131
|
+
# Write a function that receives the text between the <<>>
|
132
|
+
# delimiters (not including the delimiters) and
|
133
|
+
# returns the text to be displayed. For example, here is a
|
134
|
+
# simple plugin that converts plugin text to uppercase:
|
135
|
+
#
|
136
|
+
# WikiCreole.creole_plugin {|s| s.upcase }
|
137
|
+
#
|
138
|
+
# If you do not register a plugin function, plugin markup will be left
|
139
|
+
# as is, including the surrounding << >>.
|
140
|
+
def self.creole_plugin(&blk)
|
141
|
+
@plugin_function = blk
|
142
|
+
end
|
143
|
+
|
144
|
+
# You may wish to customize [[ links ]], such as to prefix a hostname,
|
145
|
+
# port, etc.
|
146
|
+
#
|
147
|
+
# Write a function, similar to the plugin function, which receives the
|
148
|
+
# URL part of the link (with leading and trailing whitespace stripped)
|
149
|
+
# and returns the customized link. For example, to prepend
|
150
|
+
# http://my.domain/
|
151
|
+
# to pagename:
|
152
|
+
#
|
153
|
+
# WikiCreole.creole_link {|s| "http://my.domain/#{s}" }
|
154
|
+
def self.creole_link(&blk)
|
155
|
+
@link_function = blk
|
156
|
+
end
|
157
|
+
|
158
|
+
# Same purpose as creole_link, but for "bare" link markup. Bare links are
|
159
|
+
# the links which are in the text but not surrounded by brackets.
|
160
|
+
#
|
161
|
+
# WikiCreole.creole_barelink {|s| "#{s}.html" }
|
162
|
+
def self.creole_barelink(&blk)
|
163
|
+
@barelink_function = blk
|
164
|
+
end
|
165
|
+
|
166
|
+
# Same purpose as creole_link, but for image URLs.
|
167
|
+
#
|
168
|
+
# WikiCreole.creole_img {|s| "http://my.domain/#{s}" }
|
169
|
+
def self.creole_img(&blk)
|
170
|
+
@img_function = blk
|
171
|
+
end
|
172
|
+
|
173
|
+
# If you want complete control over links, rather than just modifying
|
174
|
+
# the URL, register your link markup function with WikiCreole.creole_link()
|
175
|
+
# as above and then call creole_customlinks(). Now your function will receive
|
176
|
+
# the entire link markup chunk, such as <tt>[[ some_wiki_page | page description ]]</tt>
|
177
|
+
# and must return HTML.
|
178
|
+
#
|
179
|
+
# This has no effect on "bare" link markup, such as
|
180
|
+
# http://cpan.org
|
181
|
+
def self.creole_customlinks
|
182
|
+
@@chunks_hash[:href][:open] = ""
|
183
|
+
@@chunks_hash[:href][:close] = ""
|
184
|
+
@@chunks_hash[:link][:open] = ""
|
185
|
+
@@chunks_hash[:link][:close] = ""
|
186
|
+
@@chunks_hash[:link].delete(:contains)
|
187
|
+
@@chunks_hash[:link][:filter] = Proc.new {|s|
|
188
|
+
s = @link_function.call(s) if @link_function
|
189
|
+
s
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
# Same purpose as creole_customlinks, but for "bare" link markup.
|
194
|
+
def self.creole_custombarelinks
|
195
|
+
@@chunks_hash[:ilink][:open] = ""
|
196
|
+
@@chunks_hash[:ilink][:close] = ""
|
197
|
+
@@chunks_hash[:ilink][:filter] = Proc.new {|s|
|
198
|
+
s = @barelink_function.call(s) if @barelink_function
|
199
|
+
s
|
200
|
+
}
|
201
|
+
end
|
202
|
+
|
203
|
+
# Similar to creole_customlinks, but for images.
|
204
|
+
def self.creole_customimgs
|
205
|
+
@@chunks_hash[:img][:open] = ""
|
206
|
+
@@chunks_hash[:img][:close] = ""
|
207
|
+
@@chunks_hash[:img].delete(:contains)
|
208
|
+
@@chunks_hash[:img][:filter] = Proc.new {|s|
|
209
|
+
s = @img_function.call(s) if @img_function
|
210
|
+
s
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
# You may wish to customize the opening and/or closing tags
|
215
|
+
# for the various bits of Creole markup. For example, to
|
216
|
+
# assign a CSS class to list items:
|
217
|
+
# WikiCreole.creole_tag(:li, :open, "<li class=myclass>")
|
218
|
+
#
|
219
|
+
# The tags that may be of interest are:
|
220
|
+
#
|
221
|
+
# br dd dl
|
222
|
+
# dt em h1
|
223
|
+
# h2 h3 h4
|
224
|
+
# h5 h6 hr
|
225
|
+
# ilink img inowiki
|
226
|
+
# ip li link
|
227
|
+
# mono nowiki ol
|
228
|
+
# p strong sub
|
229
|
+
# sup table td
|
230
|
+
# th tr u
|
231
|
+
# ul
|
232
|
+
#
|
233
|
+
# Those should be self-explanatory, except for inowiki (inline nowiki),
|
234
|
+
# ilink (bare links, e.g.
|
235
|
+
# http://www.cpan.org
|
236
|
+
# ) and ip (indented paragraph).
|
237
|
+
def self.creole_tag(tag, type, text="")
|
238
|
+
type = type.to_sym
|
239
|
+
return unless [:open, :close].include?(type)
|
240
|
+
return unless @@chunks_hash.has_key?(tag)
|
241
|
+
@@chunks_hash[tag][type] = text
|
242
|
+
end
|
243
|
+
|
244
|
+
# See all current tags:
|
245
|
+
# puts WikiCreole.creole_tags()
|
246
|
+
#
|
247
|
+
def self.creole_tags
|
248
|
+
tags = []
|
249
|
+
keys = @@chunks_hash.keys.collect{|x| x.to_s}.sort
|
250
|
+
keys.each do |key|
|
251
|
+
key = key.to_sym
|
252
|
+
o = @@chunks_hash[key][:open] || ""
|
253
|
+
c = @@chunks_hash[key][:close] || ""
|
254
|
+
next if o !~ /</m
|
255
|
+
o, c = [o, c].map {|x| x.gsub(/\n/m,"\\n") }
|
256
|
+
this_tag = "#{key}: open(#{o}) close(#{c})\n"
|
257
|
+
tags << this_tag
|
258
|
+
end
|
259
|
+
tags.join
|
260
|
+
end
|
261
|
+
|
262
|
+
private
|
263
|
+
|
264
|
+
# characters that may indicate inline wiki markup
|
265
|
+
SPECIALCHARS = ['^', '\\', '*', '/', '_', ',', '{', '[',
|
266
|
+
'<', '~', '|', "\n", '#', ':', ';', '(', '-', '.']
|
267
|
+
|
268
|
+
# plain characters
|
269
|
+
# build an array of "plain content" characters by subtracting SPECIALCHARS
|
270
|
+
# from ascii printable (ascii 32 to 126)
|
271
|
+
PLAINCHARS = (32..126).map{|c| c.chr}.reject{|c| SPECIALCHARS.index(c)}
|
272
|
+
|
273
|
+
# non-plain text inline widgets
|
274
|
+
INLINE = %w{strong em br esc img link ilink inowiki
|
275
|
+
sub sup mono u plug plug2 tm reg copy ndash ellipsis amp}
|
276
|
+
|
277
|
+
ALL_INLINE = [INLINE, 'plain', 'any'].flatten # including plain text
|
278
|
+
|
279
|
+
BLOCKS = %w{h1 h2 h3 hr nowiki h4 h5 h6 ul ol table p ip dl plug plug2 blank}
|
280
|
+
|
281
|
+
# handy - used several times in %chunks
|
282
|
+
EOL = '(?:\n|$)'.freeze # end of line (or string)
|
283
|
+
|
284
|
+
@plugin_function = nil
|
285
|
+
@barelink_function = nil
|
286
|
+
@link_function = nil
|
287
|
+
@img_function = nil
|
288
|
+
|
289
|
+
@is_initialized = false
|
290
|
+
|
291
|
+
@@chunks_hash = {
|
292
|
+
:top => {
|
293
|
+
:contains => BLOCKS,
|
294
|
+
},
|
295
|
+
:blank => {
|
296
|
+
:curpat => "(?= *#{EOL})",
|
297
|
+
:fwpat => "(?=(?:^|\n) *#{EOL})",
|
298
|
+
:stops => '(?=\S)',
|
299
|
+
:hint => ["\n"],
|
300
|
+
:filter => Proc.new { "" }, # whitespace into the bit bucket
|
301
|
+
:open => "", :close => "",
|
302
|
+
},
|
303
|
+
:p => {
|
304
|
+
:curpat => '(?=.)',
|
305
|
+
:stops => ['blank', 'ip', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
|
306
|
+
:hint => PLAINCHARS,
|
307
|
+
:contains => ALL_INLINE,
|
308
|
+
:filter => Proc.new {|s| s.chomp },
|
309
|
+
:open => "<p>", :close => "</p>\n\n",
|
310
|
+
},
|
311
|
+
:ip => {
|
312
|
+
:curpat => '(?=:)',
|
313
|
+
:fwpat => '\n(?=:)',
|
314
|
+
:stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
|
315
|
+
:hint => [':'],
|
316
|
+
:contains => ['p', 'ip'],
|
317
|
+
:filter => Proc.new {|s|
|
318
|
+
s.sub!(/:/, '')
|
319
|
+
s.sub!(/\n:/m, "\n")
|
320
|
+
s
|
321
|
+
},
|
322
|
+
:open => "<div style=\"margin-left: 2em\">", :close => "</div>\n",
|
323
|
+
},
|
324
|
+
:dl => {
|
325
|
+
:curpat => '(?=;)',
|
326
|
+
:fwpat => '\n(?=;)',
|
327
|
+
:stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'table'],
|
328
|
+
:hint => [';'],
|
329
|
+
:contains => ['dt', 'dd'],
|
330
|
+
:open => "<dl>\n", :close => "</dl>\n",
|
331
|
+
},
|
332
|
+
:dt => {
|
333
|
+
:curpat => '(?=;)',
|
334
|
+
:fwpat => '\n(?=;)',
|
335
|
+
:stops => '(?=:|\n)',
|
336
|
+
:hint => [';'],
|
337
|
+
:contains => ALL_INLINE,
|
338
|
+
:filter => Proc.new {|s|
|
339
|
+
s.sub!(/^;\s*/, '')
|
340
|
+
s
|
341
|
+
},
|
342
|
+
:open => " <dt>", :close => "</dt>\n",
|
343
|
+
},
|
344
|
+
:dd => {
|
345
|
+
:curpat => '(?=\n|:)',
|
346
|
+
:fwpat => '(?:\n|:)',
|
347
|
+
:stops => '.(?=:)|\n(?=;)',
|
348
|
+
:hint => [':', "\n"],
|
349
|
+
:contains => ALL_INLINE,
|
350
|
+
:filter => Proc.new {|s|
|
351
|
+
s.sub!(/(?:\n|:)\s*/m, '')
|
352
|
+
s.sub!(/\s*$/m, '')
|
353
|
+
s
|
354
|
+
},
|
355
|
+
:open => " <dd>", :close => "</dd>\n",
|
356
|
+
},
|
357
|
+
:table => {
|
358
|
+
:curpat => '(?= *\|.)',
|
359
|
+
:fwpat => '\n(?= *\|.)',
|
360
|
+
:stops => '\n(?= *[^\|])',
|
361
|
+
:contains => ['tr'],
|
362
|
+
:hint => ['|', ' '],
|
363
|
+
:open => "<table>\n", :close => "</table>\n\n",
|
364
|
+
},
|
365
|
+
:tr => {
|
366
|
+
:curpat => '(?= *\|)',
|
367
|
+
:stops => '\n',
|
368
|
+
:contains => ['td', 'th'],
|
369
|
+
:hint => ['|', ' '],
|
370
|
+
:filter => Proc.new {|s|
|
371
|
+
s.sub!(/^ */, '')
|
372
|
+
s.sub!(/\| *$/, '')
|
373
|
+
s
|
374
|
+
},
|
375
|
+
:open => " <tr>\n", :close => " </tr>\n",
|
376
|
+
},
|
377
|
+
:td => {
|
378
|
+
:curpat => '(?=\|[^=])',
|
379
|
+
# this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
|
380
|
+
:stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
|
381
|
+
:contains => ALL_INLINE,
|
382
|
+
:hint => ['|'],
|
383
|
+
:filter => Proc.new {|s|
|
384
|
+
s.sub!(/^ *\| */, '')
|
385
|
+
s.sub!(/\s*$/m, '')
|
386
|
+
s
|
387
|
+
},
|
388
|
+
:open => " <td>", :close => "</td>\n",
|
389
|
+
},
|
390
|
+
:th => {
|
391
|
+
:curpat => '(?=\|=)',
|
392
|
+
# this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
|
393
|
+
:stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
|
394
|
+
:contains => ALL_INLINE,
|
395
|
+
:hint => ['|'],
|
396
|
+
:filter => Proc.new {|s|
|
397
|
+
s.sub!(/^ *\|= */, '')
|
398
|
+
s.sub!(/\s*$/m, '')
|
399
|
+
s
|
400
|
+
},
|
401
|
+
:open => " <th>", :close => "</th>\n",
|
402
|
+
},
|
403
|
+
:ul => {
|
404
|
+
:curpat => '(?=(?:`| *)\*[^*])',
|
405
|
+
:fwpat => '(?=\n(?:`| *)\*[^*])',
|
406
|
+
:stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
|
407
|
+
:contains => ['ul', 'ol', 'li'],
|
408
|
+
:hint => ['*', ' '],
|
409
|
+
:filter => Proc.new {|s|
|
410
|
+
s = strip_list(s)
|
411
|
+
s
|
412
|
+
},
|
413
|
+
:open => "<ul>\n", :close => "</ul>\n",
|
414
|
+
},
|
415
|
+
:ol => {
|
416
|
+
:curpat => '(?=(?:`| *)\#[^#])',
|
417
|
+
:fwpat => '(?=\n(?:`| *)\#[^#])',
|
418
|
+
:stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
|
419
|
+
:contains => ['ul', 'ol', 'li'],
|
420
|
+
:hint => ['#', ' '],
|
421
|
+
:filter => Proc.new {|s|
|
422
|
+
s = strip_list(s)
|
423
|
+
s
|
424
|
+
},
|
425
|
+
:open => "<ol>\n", :close => "</ol>\n",
|
426
|
+
},
|
427
|
+
:li => {
|
428
|
+
:curpat => '(?=`[^*#])',
|
429
|
+
:fwpat => '\n(?=`[^*#])',
|
430
|
+
:stops => '\n(?=`)',
|
431
|
+
:hint => ['`'],
|
432
|
+
:filter => Proc.new {|s|
|
433
|
+
s.sub!(/` */, '')
|
434
|
+
s.chomp!
|
435
|
+
s
|
436
|
+
},
|
437
|
+
:contains => ALL_INLINE,
|
438
|
+
:open => " <li>", :close => "</li>\n",
|
439
|
+
},
|
440
|
+
:nowiki => {
|
441
|
+
:curpat => '(?=\{\{\{ *\n)',
|
442
|
+
:fwpat => '\n(?=\{\{\{ *\n)',
|
443
|
+
:stops => "\n\\}\\}\\} *#{EOL}",
|
444
|
+
:hint => ['{'],
|
445
|
+
:filter => Proc.new {|s|
|
446
|
+
s[0,3] = ''
|
447
|
+
s.sub!(/\}{3}\s*$/, '')
|
448
|
+
s.gsub!(/&/, '&')
|
449
|
+
s.gsub!(/</, '<')
|
450
|
+
s.gsub!(/>/, '>')
|
451
|
+
s
|
452
|
+
},
|
453
|
+
:open => "<pre>", :close => "</pre>\n\n",
|
454
|
+
},
|
455
|
+
:hr => {
|
456
|
+
:curpat => "(?= *-{4,} *#{EOL})",
|
457
|
+
:fwpat => "\n(?= *-{4,} *#{EOL})",
|
458
|
+
:hint => ['-', ' '],
|
459
|
+
:stops => EOL,
|
460
|
+
:open => "<hr />\n\n", :close => "",
|
461
|
+
:filter => Proc.new { "" } # ----- into the bit bucket
|
462
|
+
},
|
463
|
+
:h => { :curpat => '(?=(?:^|\n) *=)' }, # matches any heading
|
464
|
+
:h1 => {
|
465
|
+
:curpat => '(?= *=[^=])',
|
466
|
+
:hint => ['=', ' '],
|
467
|
+
:stops => '\n',
|
468
|
+
:contains => ALL_INLINE,
|
469
|
+
:open => "<h1>", :close => "</h1>\n\n",
|
470
|
+
:filter => Proc.new {|s|
|
471
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
472
|
+
s
|
473
|
+
},
|
474
|
+
},
|
475
|
+
:h2 => {
|
476
|
+
:curpat => '(?= *={2}[^=])',
|
477
|
+
:hint => ['=', ' '],
|
478
|
+
:stops => '\n',
|
479
|
+
:contains => ALL_INLINE,
|
480
|
+
:open => "<h2>", :close => "</h2>\n\n",
|
481
|
+
:filter => Proc.new {|s|
|
482
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
483
|
+
s
|
484
|
+
},
|
485
|
+
},
|
486
|
+
:h3 => {
|
487
|
+
:curpat => '(?= *={3}[^=])',
|
488
|
+
:hint => ['=', ' '],
|
489
|
+
:stops => '\n',
|
490
|
+
:contains => ALL_INLINE,
|
491
|
+
:open => "<h3>", :close => "</h3>\n\n",
|
492
|
+
:filter => Proc.new {|s|
|
493
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
494
|
+
s
|
495
|
+
},
|
496
|
+
},
|
497
|
+
:h4 => {
|
498
|
+
:curpat => '(?= *={4}[^=])',
|
499
|
+
:hint => ['=', ' '],
|
500
|
+
:stops => '\n',
|
501
|
+
:contains => ALL_INLINE,
|
502
|
+
:open => "<h4>", :close => "</h4>\n\n",
|
503
|
+
:filter => Proc.new {|s|
|
504
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
505
|
+
s
|
506
|
+
},
|
507
|
+
},
|
508
|
+
:h5 => {
|
509
|
+
:curpat => '(?= *={5}[^=])',
|
510
|
+
:hint => ['=', ' '],
|
511
|
+
:stops => '\n',
|
512
|
+
:contains => ALL_INLINE,
|
513
|
+
:open => "<h5>", :close => "</h5>\n\n",
|
514
|
+
:filter => Proc.new {|s|
|
515
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
516
|
+
s
|
517
|
+
},
|
518
|
+
},
|
519
|
+
:h6 => {
|
520
|
+
:curpat => '(?= *={6,})',
|
521
|
+
:hint => ['=', ' '],
|
522
|
+
:stops => '\n',
|
523
|
+
:contains => ALL_INLINE,
|
524
|
+
:open => "<h6>", :close => "</h6>\n\n",
|
525
|
+
:filter => Proc.new {|s|
|
526
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
527
|
+
s
|
528
|
+
},
|
529
|
+
},
|
530
|
+
:plain => {
|
531
|
+
:curpat => '(?=[^*/_,^\\{\[<|])',
|
532
|
+
:stops => INLINE,
|
533
|
+
:hint => PLAINCHARS,
|
534
|
+
:open => '', :close => ''
|
535
|
+
},
|
536
|
+
:any => { # catch-all
|
537
|
+
:curpat => '(?=.)',
|
538
|
+
:stops => INLINE,
|
539
|
+
:open => '', :close => ''
|
540
|
+
},
|
541
|
+
:br => {
|
542
|
+
:curpat => '(?=\\\\\\\\)',
|
543
|
+
:stops => '\\\\\\\\',
|
544
|
+
:hint => ['\\'],
|
545
|
+
:filter => Proc.new { "" },
|
546
|
+
:open => '<br />', :close => '',
|
547
|
+
},
|
548
|
+
:esc => {
|
549
|
+
:curpat => '(?=~[\S])',
|
550
|
+
:stops => '~.',
|
551
|
+
:hint => ['~'],
|
552
|
+
:filter => Proc.new {|s|
|
553
|
+
s.sub!(/^./m, '')
|
554
|
+
s
|
555
|
+
},
|
556
|
+
:open => '', :close => '',
|
557
|
+
},
|
558
|
+
:inowiki => {
|
559
|
+
:curpat => '(?=\{{3}.*?\}*\}{3})',
|
560
|
+
:stops => '.*?\}*\}{3}',
|
561
|
+
:hint => ['{'],
|
562
|
+
:filter => Proc.new {|s|
|
563
|
+
s[0,3] = ''
|
564
|
+
s.sub!(/\}{3}\s*$/, '')
|
565
|
+
s.gsub!(/&/, '&')
|
566
|
+
s.gsub!(/</, '<')
|
567
|
+
s.gsub!(/>/, '>')
|
568
|
+
s
|
569
|
+
},
|
570
|
+
:open => "<tt>", :close => "</tt>",
|
571
|
+
},
|
572
|
+
:plug => {
|
573
|
+
:curpat => '(?=\<{3}.*?\>*\>{3})',
|
574
|
+
:stops => '.*?\>*\>{3}',
|
575
|
+
:hint => ['<'],
|
576
|
+
:filter => Proc.new {|s|
|
577
|
+
s[0,3] = ''
|
578
|
+
s.sub!(/\>{3}$/, '')
|
579
|
+
if @plugin_function
|
580
|
+
s = @plugin_function.call(s)
|
581
|
+
else
|
582
|
+
s = "<<<#{s}>>>"
|
583
|
+
end
|
584
|
+
s
|
585
|
+
},
|
586
|
+
:open => "", :close => "",
|
587
|
+
},
|
588
|
+
:plug2 => {
|
589
|
+
:curpat => '(?=\<{2}.*?\>*\>{2})',
|
590
|
+
:stops => '.*?\>*\>{2}',
|
591
|
+
:hint => ['<'],
|
592
|
+
:filter => Proc.new {|s|
|
593
|
+
s[0,2] = ''
|
594
|
+
s.sub!(/\>{2}$/, '')
|
595
|
+
if @plugin_function
|
596
|
+
s = @plugin_function.call(s)
|
597
|
+
else
|
598
|
+
s = "<<#{s}>>"
|
599
|
+
end
|
600
|
+
s
|
601
|
+
},
|
602
|
+
:open => "", :close => "",
|
603
|
+
},
|
604
|
+
:ilink => {
|
605
|
+
:curpat => '(?=(?:https?|ftp):\/\/)',
|
606
|
+
# This following is the [:punct:] character class with the / and ? removed
|
607
|
+
# so that URLs like http://www.somesite.com/ will match the trailing
|
608
|
+
# slash. URLs with a trailing ? will also work. Trailing ? is sometimes
|
609
|
+
# used to ensure that browsers don't cache the page.
|
610
|
+
:stops => '(?=[!"#$%&\'()*+,-.:;<=>@\[\\]^_`{|}~]?(?:\s|$))',
|
611
|
+
:hint => ['h', 'f'],
|
612
|
+
:filter => Proc.new {|s|
|
613
|
+
s.sub!(/^\s*/, '')
|
614
|
+
s.sub!(/\s*$/, '')
|
615
|
+
if @barelink_function
|
616
|
+
s = @barelink_function.call(s)
|
617
|
+
end
|
618
|
+
s = "href=\"#{s}\">#{s}"
|
619
|
+
s
|
620
|
+
},
|
621
|
+
:open => "<a ", :close=> "</a>",
|
622
|
+
},
|
623
|
+
:link => {
|
624
|
+
:curpat => '(?=\[\[[^\n]+?\]\])',
|
625
|
+
:stops => '\]\]',
|
626
|
+
:hint => ['['],
|
627
|
+
:contains => ['href', 'atext'],
|
628
|
+
:filter => Proc.new {|s|
|
629
|
+
s[0,2] = ''
|
630
|
+
s[-2,2] = ''
|
631
|
+
s += "|#{s}" if ! s.index(/\|/) # text = url unless given
|
632
|
+
s
|
633
|
+
},
|
634
|
+
:open => "<a ", :close => "</a>",
|
635
|
+
},
|
636
|
+
:href => {
|
637
|
+
:curpat => '(?=[^\|])',
|
638
|
+
:stops => '(?=\|)',
|
639
|
+
:filter => Proc.new {|s|
|
640
|
+
s.sub!(/^\s*/, '')
|
641
|
+
s.sub!(/\s*$/, '')
|
642
|
+
if @link_function
|
643
|
+
s = @link_function.call(s)
|
644
|
+
end
|
645
|
+
s
|
646
|
+
},
|
647
|
+
:open => 'href="', :close => '">',
|
648
|
+
},
|
649
|
+
:atext => {
|
650
|
+
:curpat => '(?=\|)',
|
651
|
+
:stops => '\n',
|
652
|
+
:hint => ['|'],
|
653
|
+
:contains => ALL_INLINE,
|
654
|
+
:filter => Proc.new {|s|
|
655
|
+
s.sub!(/^\|\s*/, '')
|
656
|
+
s.sub!(/\s*$/, '')
|
657
|
+
s
|
658
|
+
},
|
659
|
+
:open => '', :close => '',
|
660
|
+
},
|
661
|
+
:img => {
|
662
|
+
:curpat => '(?=\{\{[^\{][^\n]*?\}\})',
|
663
|
+
:stops => '\}\}',
|
664
|
+
:hint => ['{'],
|
665
|
+
:contains => ['imgsrc', 'imgalt'],
|
666
|
+
:filter => Proc.new {|s|
|
667
|
+
s[0,2] = ''
|
668
|
+
s.sub!(/\}\}$/, '')
|
669
|
+
s
|
670
|
+
},
|
671
|
+
:open => "<img ", :close => " />",
|
672
|
+
},
|
673
|
+
:imgalt => {
|
674
|
+
:curpat => '(?=\|)',
|
675
|
+
:stops => '\n',
|
676
|
+
:hint => ['|'],
|
677
|
+
:filter => Proc.new {|s|
|
678
|
+
s.sub!(/^\|\s*/, '')
|
679
|
+
s.sub!(/\s*$/, '')
|
680
|
+
s
|
681
|
+
},
|
682
|
+
:open => ' alt="', :close => '"',
|
683
|
+
},
|
684
|
+
:imgsrc => {
|
685
|
+
:curpat => '(?=[^\|])',
|
686
|
+
:stops => '(?=\|)',
|
687
|
+
:filter => Proc.new {|s|
|
688
|
+
s.sub!(/^\|\s*/, '')
|
689
|
+
s.sub!(/\s*$/, '')
|
690
|
+
if @img_function
|
691
|
+
s = @img_function.call(s)
|
692
|
+
end
|
693
|
+
s
|
694
|
+
},
|
695
|
+
:open => 'src="', :close => '"',
|
696
|
+
},
|
697
|
+
:strong => {
|
698
|
+
:curpat => '(?=\*\*)',
|
699
|
+
:stops => '\*\*.*?\*\*',
|
700
|
+
:hint => ['*'],
|
701
|
+
:contains => ALL_INLINE,
|
702
|
+
:filter => Proc.new {|s|
|
703
|
+
s[0,2] = ''
|
704
|
+
s.sub!(/\*\*$/, '')
|
705
|
+
s
|
706
|
+
},
|
707
|
+
:open => "<strong>", :close => "</strong>",
|
708
|
+
},
|
709
|
+
:em => {
|
710
|
+
# This could use a negative lookback assertion to let you know whether
|
711
|
+
# it's part of a URL or not. That would be helpful if the URL had been
|
712
|
+
# escaped. Currently, it will just become italic after the // since
|
713
|
+
# it didn't process the URL.
|
714
|
+
:curpat => '(?=\/\/)',
|
715
|
+
# Removed a negative lookback assertion (?<!:) from the Perl version
|
716
|
+
# and replaced it with [^:] Not sure of the consequences, however, as
|
717
|
+
# of this version, Ruby does not have negative lookback assertions, so
|
718
|
+
# I had to do it.
|
719
|
+
:stops => '\/\/.*?[^:]\/\/',
|
720
|
+
:hint => ['/'],
|
721
|
+
:contains => ALL_INLINE,
|
722
|
+
:filter => Proc.new {|s|
|
723
|
+
s[0,2] = ''
|
724
|
+
s.sub!(/\/\/$/, '')
|
725
|
+
s
|
726
|
+
},
|
727
|
+
:open => "<em>", :close => "</em>",
|
728
|
+
},
|
729
|
+
:mono => {
|
730
|
+
:curpat => '(?=\#\#)',
|
731
|
+
:stops => '\#\#.*?\#\#',
|
732
|
+
:hint => ['#'],
|
733
|
+
:contains => ALL_INLINE,
|
734
|
+
:filter => Proc.new {|s|
|
735
|
+
s[0,2] = ''
|
736
|
+
s.sub!(/\#\#$/, '')
|
737
|
+
s
|
738
|
+
},
|
739
|
+
:open => "<tt>", :close => "</tt>",
|
740
|
+
},
|
741
|
+
:sub => {
|
742
|
+
:curpat => '(?=,,)',
|
743
|
+
:stops => ',,.*?,,',
|
744
|
+
:hint => [','],
|
745
|
+
:contains => ALL_INLINE,
|
746
|
+
:filter => Proc.new {|s|
|
747
|
+
s[0,2] = ''
|
748
|
+
s.sub!(/\,\,$/, '')
|
749
|
+
s
|
750
|
+
},
|
751
|
+
:open => "<sub>", :close => "</sub>",
|
752
|
+
},
|
753
|
+
:sup => {
|
754
|
+
:curpat => '(?=\^\^)',
|
755
|
+
:stops => '\^\^.*?\^\^',
|
756
|
+
:hint => ['^'],
|
757
|
+
:contains => ALL_INLINE,
|
758
|
+
:filter => Proc.new {|s|
|
759
|
+
s[0,2] = ''
|
760
|
+
s.sub!(/\^\^$/, '')
|
761
|
+
s
|
762
|
+
},
|
763
|
+
:open => "<sup>", :close => "</sup>",
|
764
|
+
},
|
765
|
+
:u => {
|
766
|
+
:curpat => '(?=__)',
|
767
|
+
:stops => '__.*?__',
|
768
|
+
:hint => ['_'],
|
769
|
+
:contains => ALL_INLINE,
|
770
|
+
:filter => Proc.new {|s|
|
771
|
+
s[0,2] = ''
|
772
|
+
s.sub!(/__$/, '')
|
773
|
+
s
|
774
|
+
},
|
775
|
+
:open => "<u>", :close => "</u>",
|
776
|
+
},
|
777
|
+
:amp => {
|
778
|
+
:curpat => '(?=\&(?!\w+\;))',
|
779
|
+
:stops => '.',
|
780
|
+
:hint => ['&'],
|
781
|
+
:filter => Proc.new { "&" },
|
782
|
+
:open => "", :close => "",
|
783
|
+
},
|
784
|
+
:tm => {
|
785
|
+
:curpat => '(?=\(TM\))',
|
786
|
+
:stops => '\(TM\)',
|
787
|
+
:hint => ['('],
|
788
|
+
:filter => Proc.new { "™" },
|
789
|
+
:open => "", :close => "",
|
790
|
+
},
|
791
|
+
:reg => {
|
792
|
+
:curpat => '(?=\(R\))',
|
793
|
+
:stops => '\(R\)',
|
794
|
+
:hint => ['('],
|
795
|
+
:filter => Proc.new { "®" },
|
796
|
+
:open => "", :close => "",
|
797
|
+
},
|
798
|
+
:copy => {
|
799
|
+
:curpat => '(?=\(C\))',
|
800
|
+
:stops => '\(C\)',
|
801
|
+
:hint => ['('],
|
802
|
+
:filter => Proc.new { "©" },
|
803
|
+
:open => "", :close => "",
|
804
|
+
},
|
805
|
+
:ndash => {
|
806
|
+
:curpat => '(?=--)',
|
807
|
+
:stops => '--',
|
808
|
+
:hint => ['-'],
|
809
|
+
:filter => Proc.new { "–" },
|
810
|
+
:open => "", :close => "",
|
811
|
+
},
|
812
|
+
:ellipsis => {
|
813
|
+
:curpat => '(?=\.\.\.)',
|
814
|
+
:stops => '\.\.\.',
|
815
|
+
:hint => ['.'],
|
816
|
+
:filter => Proc.new { "…" },
|
817
|
+
:open => "", :close => "",
|
818
|
+
},
|
819
|
+
}
|
820
|
+
|
821
|
+
def self.strip_leading_and_trailing_eq_and_whitespace(s)
|
822
|
+
s.sub!(/^\s*=*\s*/, '')
|
823
|
+
s.sub!(/\s*=*\s*$/, '')
|
824
|
+
s
|
825
|
+
end
|
826
|
+
|
827
|
+
def self.strip_list(s)
|
828
|
+
s.sub!(/(?:`*| *)[*#]/, '`')
|
829
|
+
s.gsub!(/\n(?:`*| *)[*#]/m, "\n`")
|
830
|
+
s
|
831
|
+
end
|
832
|
+
|
833
|
+
def self.filter_string_x_with_chunk_filter_y(str, chunk)
|
834
|
+
@@chunks_hash[chunk][:filter].call(str)
|
835
|
+
end
|
836
|
+
|
837
|
+
def self.parse(tref, chunk)
|
838
|
+
|
839
|
+
sub_chunk = nil
|
840
|
+
pos = 0
|
841
|
+
last_pos = 0
|
842
|
+
html = []
|
843
|
+
first_try = true
|
844
|
+
|
845
|
+
loop do
|
846
|
+
|
847
|
+
if sub_chunk # we've determined what type of sub_chunk this is
|
848
|
+
|
849
|
+
# This is a little slower than it could be. The delim should be
|
850
|
+
# pre-compiled, but see the issue in the comment above.
|
851
|
+
if tref.index(@@chunks_hash[sub_chunk][:delim], pos)
|
852
|
+
pos = Regexp.last_match.end(0)
|
853
|
+
else
|
854
|
+
pos = tref.length
|
855
|
+
end
|
856
|
+
|
857
|
+
html << @@chunks_hash[sub_chunk][:open]
|
858
|
+
|
859
|
+
t = tref[last_pos, pos - last_pos] # grab the chunk
|
860
|
+
|
861
|
+
if @@chunks_hash[sub_chunk].has_key?(:filter) # filter it, if applicable
|
862
|
+
t = @@chunks_hash[sub_chunk][:filter].call(t)
|
863
|
+
end
|
864
|
+
|
865
|
+
last_pos = pos # remember where this chunk ends (where next begins)
|
866
|
+
|
867
|
+
if t && @@chunks_hash[sub_chunk].has_key?(:contains) # if it contains other chunks...
|
868
|
+
html << parse(t, sub_chunk) # recurse.
|
869
|
+
else
|
870
|
+
html << t # otherwise, print it
|
871
|
+
end
|
872
|
+
|
873
|
+
html << @@chunks_hash[sub_chunk][:close] # print the close tag
|
874
|
+
|
875
|
+
else
|
876
|
+
if !first_try
|
877
|
+
# The nested list test will cause a dangling newline. I tried fiddling
|
878
|
+
# with the grammer for a while, then decided this was just an easier
|
879
|
+
# fix for the time being. If anyone wants to find the issue in the
|
880
|
+
# grammer and fix it, we can remove this hack.
|
881
|
+
if pos == tref.length - 1 && tref[pos..tref.length] == "\n"
|
882
|
+
break
|
883
|
+
else
|
884
|
+
$stderr.puts "ERROR: endless loop detected"
|
885
|
+
break
|
886
|
+
end
|
887
|
+
end
|
888
|
+
first_try = false
|
889
|
+
end
|
890
|
+
|
891
|
+
break if pos && pos == tref.length # we've eaten the whole string
|
892
|
+
sub_chunk = get_sub_chunk_for(tref, chunk, pos) # more string to come
|
893
|
+
|
894
|
+
end
|
895
|
+
|
896
|
+
html.join
|
897
|
+
end
|
898
|
+
|
899
|
+
def self.get_sub_chunk_for(tref, chunk, pos)
|
900
|
+
|
901
|
+
first_char = tref[pos, 1] # get a hint about the next chunk
|
902
|
+
for chunk_hinted_at in @@chunks_hash[chunk][:calculated_hint_array_for][first_char].to_a
|
903
|
+
#puts "trying hint #{chunk_hinted_at} for -#{first_char}- on -" + tref[pos, 2] + "-\n"
|
904
|
+
if tref.index(@@chunks_hash[chunk_hinted_at][:curpatcmp], pos) # hint helped id the chunk
|
905
|
+
return chunk_hinted_at
|
906
|
+
end
|
907
|
+
end
|
908
|
+
|
909
|
+
# the hint didn't help. Check all the chunk types which this chunk contains
|
910
|
+
for contained_chunk in @@chunks_hash[chunk][:contains].to_a
|
911
|
+
#puts "trying contained chunk #{contained_chunk} on -" + tref[pos, 2] + "- within chunk #{chunk.to_s}\n"
|
912
|
+
if tref.index(@@chunks_hash[contained_chunk.to_sym][:curpatcmp], pos) # found one
|
913
|
+
return contained_chunk.to_sym
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
nil
|
918
|
+
end
|
919
|
+
|
920
|
+
# compile a regex that matches any of the patterns that interrupt the
|
921
|
+
# current chunk.
|
922
|
+
def self.delim(chunk)
|
923
|
+
chunk = @@chunks_hash[chunk]
|
924
|
+
if Array === chunk[:stops]
|
925
|
+
regex = ''
|
926
|
+
chunk[:stops].each do |stop|
|
927
|
+
stop = stop.to_sym
|
928
|
+
if @@chunks_hash[stop].has_key?(:fwpat)
|
929
|
+
regex += @@chunks_hash[stop][:fwpat] + "|"
|
930
|
+
else
|
931
|
+
regex += @@chunks_hash[stop][:curpat] + "|"
|
932
|
+
end
|
933
|
+
end
|
934
|
+
regex.chop!
|
935
|
+
regex
|
936
|
+
else
|
937
|
+
chunk[:stops]
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
# one-time optimization of the grammar - speeds the parser up a ton
|
942
|
+
def self.init
|
943
|
+
return if @is_initialized
|
944
|
+
|
945
|
+
@is_initialized = true
|
946
|
+
|
947
|
+
# precompile a bunch of regexes
|
948
|
+
@@chunks_hash.keys.each do |k|
|
949
|
+
c = @@chunks_hash[k]
|
950
|
+
if c.has_key?(:curpat)
|
951
|
+
c[:curpatcmp] = Regexp.compile('\G' + c[:curpat], Regexp::MULTILINE)
|
952
|
+
end
|
953
|
+
|
954
|
+
if c.has_key?(:stops)
|
955
|
+
c[:delim] = Regexp.compile(delim(k), Regexp::MULTILINE)
|
956
|
+
end
|
957
|
+
|
958
|
+
if c.has_key?(:contains) # store hints about each chunk to speed id
|
959
|
+
c[:calculated_hint_array_for] = {}
|
960
|
+
|
961
|
+
c[:contains].each do |ct|
|
962
|
+
ct = ct.to_sym
|
963
|
+
|
964
|
+
(@@chunks_hash[ct][:hint] || []).each do |hint|
|
965
|
+
(c[:calculated_hint_array_for][hint] ||= []) << ct
|
966
|
+
end
|
967
|
+
|
968
|
+
end
|
969
|
+
end
|
970
|
+
end
|
971
|
+
end
|
972
|
+
|
973
|
+
end
|