gmccreight-WikiCreole 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +340 -0
- data/Changelog +34 -0
- data/LICENSE +52 -0
- data/README +14 -0
- data/Rakefile +26 -0
- data/lib/wiki_creole.rb +973 -0
- data/test/profiling.rb +22 -0
- data/test/test_all.rb +891 -0
- data/test/test_amp.html +3 -0
- data/test/test_amp.markup +2 -0
- data/test/test_block.html +131 -0
- data/test/test_block.markup +69 -0
- data/test/test_escape.html +33 -0
- data/test/test_escape.markup +35 -0
- data/test/test_inline.html +67 -0
- data/test/test_inline.markup +48 -0
- data/test/test_jsp_wiki.html +157 -0
- data/test/test_jsp_wiki.markup +100 -0
- data/test/test_nested_lists.html +10 -0
- data/test/test_nested_lists.markup +4 -0
- data/test/test_release_attributes.rb +37 -0
- data/test/test_specialchars.html +29 -0
- data/test/test_specialchars.markup +18 -0
- metadata +77 -0
data/lib/wiki_creole.rb
ADDED
@@ -0,0 +1,973 @@
|
|
1
|
+
# WikiCreole implements the Wiki Creole markup language,
|
2
|
+
# version 1.0, as described at http://www.wikicreole.org. It
|
3
|
+
# reads Creole 1.0 markup and returns XHTML.
|
4
|
+
#
|
5
|
+
# Author:: Gordon McCreight (mailto:wikicreole.to.gordon@mccreight.com)
|
6
|
+
# Copyright:: Copyright (c) 2008 Gordon McCreight
|
7
|
+
# License:: Distributes under the same terms as Ruby (see the LICENSE file)
|
8
|
+
# Version:: 0.1.4
|
9
|
+
# Date:: 2009-08-11
|
10
|
+
#
|
11
|
+
# == Synopsis
|
12
|
+
# Most likely you'll just want to do:
|
13
|
+
# require 'rubygems'
|
14
|
+
# require 'wiki_creole'
|
15
|
+
# xhtml = WikiCreole.creole_parse(wiki_creole_markup)
|
16
|
+
# If you want to override the default behaviors, make sure to look at the other
|
17
|
+
# public methods.
|
18
|
+
#
|
19
|
+
# == Official Markup
|
20
|
+
#
|
21
|
+
# Here is a summary of the official Creole 1.0 markup
|
22
|
+
# elements. See http://www.wikicreole.org for the full
|
23
|
+
# details.
|
24
|
+
#
|
25
|
+
# Headings:
|
26
|
+
# = heading 1 -> <h1>heading 1</h1>
|
27
|
+
# == heading 2 -> <h2>heading 2</h2>
|
28
|
+
# ...
|
29
|
+
# ====== heading 6 -> <h6>heading 6</h6>
|
30
|
+
#
|
31
|
+
# Various inline markup:
|
32
|
+
# ** bold ** -> <strong> bold </strong>
|
33
|
+
# // italics // -> <em> italics </em>
|
34
|
+
# **// both //** -> <strong><em> both </em></strong>
|
35
|
+
# [[ link ]] -> <a href="link">link</a>
|
36
|
+
# [[ link | text ]] -> <a href="link">text</a>
|
37
|
+
# http://cpan.org -> <a href="http://cpan.org">http://cpan.org</a>
|
38
|
+
# line \\ break -> line <br /> break
|
39
|
+
# {{img.jpg|alt}} -> <img src="img.jpg" alt="alt">
|
40
|
+
#
|
41
|
+
# Lists:
|
42
|
+
# * unordered list <ul><li>unordered list</li>
|
43
|
+
# * second item <li>second item</li>
|
44
|
+
# ## nested ordered -> <ol><li>nested ordered</li>
|
45
|
+
# *** uber-nested <ul><li>uber-nested</li></ul>
|
46
|
+
# * back to level 1 </ol><li>back to level 1</li></ul>
|
47
|
+
#
|
48
|
+
# Tables:
|
49
|
+
# |= h1 |= h2 -> <table><tr><th>h1</th><th>h2</th></tr>
|
50
|
+
# | c1 | c2 <tr><td>c1</td><td>c2</td></tr></table>
|
51
|
+
#
|
52
|
+
# Nowiki (Preformatted):
|
53
|
+
# {{{ <pre>
|
54
|
+
# ** not bold ** ** not bold **
|
55
|
+
# escaped HTML: -> escaped HTML:
|
56
|
+
# <i> test </i> <i> test </i>
|
57
|
+
# }}} <pre>
|
58
|
+
#
|
59
|
+
# {{{ inline\\also }}} -> <tt>inline\\also</tt>
|
60
|
+
#
|
61
|
+
# Escape Character:
|
62
|
+
# ~** not bold ** -> ** not bold **
|
63
|
+
# tilde: ~~ -> tilde: ~
|
64
|
+
#
|
65
|
+
# Paragraphs are separated by other blocks and blank lines.
|
66
|
+
# Inline markup can usually be combined, overlapped, etc. List
|
67
|
+
# items and plugin text can span lines.
|
68
|
+
#
|
69
|
+
# == Extended Markup
|
70
|
+
#
|
71
|
+
# In addition to OFFICIAL MARKUP, Text::WikiCreole also supports
|
72
|
+
# the following markup:
|
73
|
+
#
|
74
|
+
# Plugins:
|
75
|
+
# << plugin >> -> whatever you want (see WikiCreole.creole_plugin)
|
76
|
+
# <<< plugin >>> -> whatever you want (see WikiCreole.creole_plugin)
|
77
|
+
# Triple-bracket syntax has priority, in order to allow you to embed
|
78
|
+
# double-brackets in plugins, such as to embed Perl code.
|
79
|
+
#
|
80
|
+
# Inline:
|
81
|
+
# ## monospace ## -> <tt> monospace </tt>
|
82
|
+
# ^^ superscript ^^ -> <sup> superscript </sup>
|
83
|
+
# ,, subscript ,, -> <sub> subscript </sub>
|
84
|
+
# __ underline __ -> <u> underline </u>
|
85
|
+
# (TM) -> ™
|
86
|
+
# (R) -> ®
|
87
|
+
# (C) -> ©
|
88
|
+
# ... -> …
|
89
|
+
# -- -> –
|
90
|
+
#
|
91
|
+
# Indented Paragraphs:
|
92
|
+
# :this -> <div style="margin-left:2em"><p>this
|
93
|
+
# is indented is indented</p>
|
94
|
+
# :: more indented <div style="margin-left:2em"><p> more
|
95
|
+
# indented</div></div>
|
96
|
+
#
|
97
|
+
# Definition Lists:
|
98
|
+
# ; Title -> <dl><dt>Title</dt>
|
99
|
+
# : item 1 : item 2 <dd>item 1</dd><dd>item 2</dd>
|
100
|
+
# ; Title 2 : item2a <dt>Title 2</dt><dd>item 2a</dd></dl>
|
101
|
+
#
|
102
|
+
# == Acknowledgements
|
103
|
+
# Most of this code is ported from Jason Burnett's excellent Perl-based
|
104
|
+
# converter which can be found here:
|
105
|
+
# http://search.cpan.org/~jburnett/Text-WikiCreole/
|
106
|
+
# He, in turn, acknowledges the Document::Parser perl module.
|
107
|
+
#
|
108
|
+
# Also, some of the tests are taken from Lars Christensen's implementation of
|
109
|
+
# the Creole parser. You can find his code at:
|
110
|
+
# http://github.com/larsch/creole/tree/master
|
111
|
+
#
|
112
|
+
# Other test come from the wikicreole website itself, here:
|
113
|
+
# http://www.wikicreole.org/
|
114
|
+
|
115
|
+
class WikiCreole
|
116
|
+
|
117
|
+
# Reads Creole 1.0 markup and return XHTML.
|
118
|
+
#
|
119
|
+
# xhtml = WikiCreole.creole_parse(wiki_creole_markup)
|
120
|
+
def self.creole_parse(s)
|
121
|
+
return "" unless String === s
|
122
|
+
return "" if s.empty?
|
123
|
+
|
124
|
+
init
|
125
|
+
parse(s, :top)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Creole 1.0 supports two plugin syntaxes: << plugin content >> and
|
129
|
+
# <<< plugin content >>>
|
130
|
+
#
|
131
|
+
# Write a function that receives the text between the <<>>
|
132
|
+
# delimiters (not including the delimiters) and
|
133
|
+
# returns the text to be displayed. For example, here is a
|
134
|
+
# simple plugin that converts plugin text to uppercase:
|
135
|
+
#
|
136
|
+
# WikiCreole.creole_plugin {|s| s.upcase }
|
137
|
+
#
|
138
|
+
# If you do not register a plugin function, plugin markup will be left
|
139
|
+
# as is, including the surrounding << >>.
|
140
|
+
def self.creole_plugin(&blk)
|
141
|
+
@plugin_function = blk
|
142
|
+
end
|
143
|
+
|
144
|
+
# You may wish to customize [[ links ]], such as to prefix a hostname,
|
145
|
+
# port, etc.
|
146
|
+
#
|
147
|
+
# Write a function, similar to the plugin function, which receives the
|
148
|
+
# URL part of the link (with leading and trailing whitespace stripped)
|
149
|
+
# and returns the customized link. For example, to prepend
|
150
|
+
# http://my.domain/
|
151
|
+
# to pagename:
|
152
|
+
#
|
153
|
+
# WikiCreole.creole_link {|s| "http://my.domain/#{s}" }
|
154
|
+
def self.creole_link(&blk)
|
155
|
+
@link_function = blk
|
156
|
+
end
|
157
|
+
|
158
|
+
# Same purpose as creole_link, but for "bare" link markup. Bare links are
|
159
|
+
# the links which are in the text but not surrounded by brackets.
|
160
|
+
#
|
161
|
+
# WikiCreole.creole_barelink {|s| "#{s}.html" }
|
162
|
+
def self.creole_barelink(&blk)
|
163
|
+
@barelink_function = blk
|
164
|
+
end
|
165
|
+
|
166
|
+
# Same purpose as creole_link, but for image URLs.
|
167
|
+
#
|
168
|
+
# WikiCreole.creole_img {|s| "http://my.domain/#{s}" }
|
169
|
+
def self.creole_img(&blk)
|
170
|
+
@img_function = blk
|
171
|
+
end
|
172
|
+
|
173
|
+
# If you want complete control over links, rather than just modifying
|
174
|
+
# the URL, register your link markup function with WikiCreole.creole_link()
|
175
|
+
# as above and then call creole_customlinks(). Now your function will receive
|
176
|
+
# the entire link markup chunk, such as <tt>[[ some_wiki_page | page description ]]</tt>
|
177
|
+
# and must return HTML.
|
178
|
+
#
|
179
|
+
# This has no effect on "bare" link markup, such as
|
180
|
+
# http://cpan.org
|
181
|
+
def self.creole_customlinks
|
182
|
+
@@chunks_hash[:href][:open] = ""
|
183
|
+
@@chunks_hash[:href][:close] = ""
|
184
|
+
@@chunks_hash[:link][:open] = ""
|
185
|
+
@@chunks_hash[:link][:close] = ""
|
186
|
+
@@chunks_hash[:link].delete(:contains)
|
187
|
+
@@chunks_hash[:link][:filter] = Proc.new {|s|
|
188
|
+
s = @link_function.call(s) if @link_function
|
189
|
+
s
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
# Same purpose as creole_customlinks, but for "bare" link markup.
|
194
|
+
def self.creole_custombarelinks
|
195
|
+
@@chunks_hash[:ilink][:open] = ""
|
196
|
+
@@chunks_hash[:ilink][:close] = ""
|
197
|
+
@@chunks_hash[:ilink][:filter] = Proc.new {|s|
|
198
|
+
s = @barelink_function.call(s) if @barelink_function
|
199
|
+
s
|
200
|
+
}
|
201
|
+
end
|
202
|
+
|
203
|
+
# Similar to creole_customlinks, but for images.
|
204
|
+
def self.creole_customimgs
|
205
|
+
@@chunks_hash[:img][:open] = ""
|
206
|
+
@@chunks_hash[:img][:close] = ""
|
207
|
+
@@chunks_hash[:img].delete(:contains)
|
208
|
+
@@chunks_hash[:img][:filter] = Proc.new {|s|
|
209
|
+
s = @img_function.call(s) if @img_function
|
210
|
+
s
|
211
|
+
}
|
212
|
+
end
|
213
|
+
|
214
|
+
# You may wish to customize the opening and/or closing tags
|
215
|
+
# for the various bits of Creole markup. For example, to
|
216
|
+
# assign a CSS class to list items:
|
217
|
+
# WikiCreole.creole_tag(:li, :open, "<li class=myclass>")
|
218
|
+
#
|
219
|
+
# The tags that may be of interest are:
|
220
|
+
#
|
221
|
+
# br dd dl
|
222
|
+
# dt em h1
|
223
|
+
# h2 h3 h4
|
224
|
+
# h5 h6 hr
|
225
|
+
# ilink img inowiki
|
226
|
+
# ip li link
|
227
|
+
# mono nowiki ol
|
228
|
+
# p strong sub
|
229
|
+
# sup table td
|
230
|
+
# th tr u
|
231
|
+
# ul
|
232
|
+
#
|
233
|
+
# Those should be self-explanatory, except for inowiki (inline nowiki),
|
234
|
+
# ilink (bare links, e.g.
|
235
|
+
# http://www.cpan.org
|
236
|
+
# ) and ip (indented paragraph).
|
237
|
+
def self.creole_tag(tag, type, text="")
|
238
|
+
type = type.to_sym
|
239
|
+
return unless [:open, :close].include?(type)
|
240
|
+
return unless @@chunks_hash.has_key?(tag)
|
241
|
+
@@chunks_hash[tag][type] = text
|
242
|
+
end
|
243
|
+
|
244
|
+
# See all current tags:
|
245
|
+
# puts WikiCreole.creole_tags()
|
246
|
+
#
|
247
|
+
def self.creole_tags
|
248
|
+
tags = []
|
249
|
+
keys = @@chunks_hash.keys.collect{|x| x.to_s}.sort
|
250
|
+
keys.each do |key|
|
251
|
+
key = key.to_sym
|
252
|
+
o = @@chunks_hash[key][:open] || ""
|
253
|
+
c = @@chunks_hash[key][:close] || ""
|
254
|
+
next if o !~ /</m
|
255
|
+
o, c = [o, c].map {|x| x.gsub(/\n/m,"\\n") }
|
256
|
+
this_tag = "#{key}: open(#{o}) close(#{c})\n"
|
257
|
+
tags << this_tag
|
258
|
+
end
|
259
|
+
tags.join
|
260
|
+
end
|
261
|
+
|
262
|
+
private
|
263
|
+
|
264
|
+
# characters that may indicate inline wiki markup
|
265
|
+
SPECIALCHARS = ['^', '\\', '*', '/', '_', ',', '{', '[',
|
266
|
+
'<', '~', '|', "\n", '#', ':', ';', '(', '-', '.']
|
267
|
+
|
268
|
+
# plain characters
|
269
|
+
# build an array of "plain content" characters by subtracting SPECIALCHARS
|
270
|
+
# from ascii printable (ascii 32 to 126)
|
271
|
+
PLAINCHARS = (32..126).map{|c| c.chr}.reject{|c| SPECIALCHARS.index(c)}
|
272
|
+
|
273
|
+
# non-plain text inline widgets
|
274
|
+
INLINE = %w{strong em br esc img link ilink inowiki
|
275
|
+
sub sup mono u plug plug2 tm reg copy ndash ellipsis amp}
|
276
|
+
|
277
|
+
ALL_INLINE = [INLINE, 'plain', 'any'].flatten # including plain text
|
278
|
+
|
279
|
+
BLOCKS = %w{h1 h2 h3 hr nowiki h4 h5 h6 ul ol table p ip dl plug plug2 blank}
|
280
|
+
|
281
|
+
# handy - used several times in %chunks
|
282
|
+
EOL = '(?:\n|$)'.freeze # end of line (or string)
|
283
|
+
|
284
|
+
@plugin_function = nil
|
285
|
+
@barelink_function = nil
|
286
|
+
@link_function = nil
|
287
|
+
@img_function = nil
|
288
|
+
|
289
|
+
@is_initialized = false
|
290
|
+
|
291
|
+
@@chunks_hash = {
|
292
|
+
:top => {
|
293
|
+
:contains => BLOCKS,
|
294
|
+
},
|
295
|
+
:blank => {
|
296
|
+
:curpat => "(?= *#{EOL})",
|
297
|
+
:fwpat => "(?=(?:^|\n) *#{EOL})",
|
298
|
+
:stops => '(?=\S)',
|
299
|
+
:hint => ["\n"],
|
300
|
+
:filter => Proc.new { "" }, # whitespace into the bit bucket
|
301
|
+
:open => "", :close => "",
|
302
|
+
},
|
303
|
+
:p => {
|
304
|
+
:curpat => '(?=.)',
|
305
|
+
:stops => ['blank', 'ip', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
|
306
|
+
:hint => PLAINCHARS,
|
307
|
+
:contains => ALL_INLINE,
|
308
|
+
:filter => Proc.new {|s| s.chomp },
|
309
|
+
:open => "<p>", :close => "</p>\n\n",
|
310
|
+
},
|
311
|
+
:ip => {
|
312
|
+
:curpat => '(?=:)',
|
313
|
+
:fwpat => '\n(?=:)',
|
314
|
+
:stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
|
315
|
+
:hint => [':'],
|
316
|
+
:contains => ['p', 'ip'],
|
317
|
+
:filter => Proc.new {|s|
|
318
|
+
s.sub!(/:/, '')
|
319
|
+
s.sub!(/\n:/m, "\n")
|
320
|
+
s
|
321
|
+
},
|
322
|
+
:open => "<div style=\"margin-left: 2em\">", :close => "</div>\n",
|
323
|
+
},
|
324
|
+
:dl => {
|
325
|
+
:curpat => '(?=;)',
|
326
|
+
:fwpat => '\n(?=;)',
|
327
|
+
:stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'table'],
|
328
|
+
:hint => [';'],
|
329
|
+
:contains => ['dt', 'dd'],
|
330
|
+
:open => "<dl>\n", :close => "</dl>\n",
|
331
|
+
},
|
332
|
+
:dt => {
|
333
|
+
:curpat => '(?=;)',
|
334
|
+
:fwpat => '\n(?=;)',
|
335
|
+
:stops => '(?=:|\n)',
|
336
|
+
:hint => [';'],
|
337
|
+
:contains => ALL_INLINE,
|
338
|
+
:filter => Proc.new {|s|
|
339
|
+
s.sub!(/^;\s*/, '')
|
340
|
+
s
|
341
|
+
},
|
342
|
+
:open => " <dt>", :close => "</dt>\n",
|
343
|
+
},
|
344
|
+
:dd => {
|
345
|
+
:curpat => '(?=\n|:)',
|
346
|
+
:fwpat => '(?:\n|:)',
|
347
|
+
:stops => '.(?=:)|\n(?=;)',
|
348
|
+
:hint => [':', "\n"],
|
349
|
+
:contains => ALL_INLINE,
|
350
|
+
:filter => Proc.new {|s|
|
351
|
+
s.sub!(/(?:\n|:)\s*/m, '')
|
352
|
+
s.sub!(/\s*$/m, '')
|
353
|
+
s
|
354
|
+
},
|
355
|
+
:open => " <dd>", :close => "</dd>\n",
|
356
|
+
},
|
357
|
+
:table => {
|
358
|
+
:curpat => '(?= *\|.)',
|
359
|
+
:fwpat => '\n(?= *\|.)',
|
360
|
+
:stops => '\n(?= *[^\|])',
|
361
|
+
:contains => ['tr'],
|
362
|
+
:hint => ['|', ' '],
|
363
|
+
:open => "<table>\n", :close => "</table>\n\n",
|
364
|
+
},
|
365
|
+
:tr => {
|
366
|
+
:curpat => '(?= *\|)',
|
367
|
+
:stops => '\n',
|
368
|
+
:contains => ['td', 'th'],
|
369
|
+
:hint => ['|', ' '],
|
370
|
+
:filter => Proc.new {|s|
|
371
|
+
s.sub!(/^ */, '')
|
372
|
+
s.sub!(/\| *$/, '')
|
373
|
+
s
|
374
|
+
},
|
375
|
+
:open => " <tr>\n", :close => " </tr>\n",
|
376
|
+
},
|
377
|
+
:td => {
|
378
|
+
:curpat => '(?=\|[^=])',
|
379
|
+
# this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
|
380
|
+
:stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
|
381
|
+
:contains => ALL_INLINE,
|
382
|
+
:hint => ['|'],
|
383
|
+
:filter => Proc.new {|s|
|
384
|
+
s.sub!(/^ *\| */, '')
|
385
|
+
s.sub!(/\s*$/m, '')
|
386
|
+
s
|
387
|
+
},
|
388
|
+
:open => " <td>", :close => "</td>\n",
|
389
|
+
},
|
390
|
+
:th => {
|
391
|
+
:curpat => '(?=\|=)',
|
392
|
+
# this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
|
393
|
+
:stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
|
394
|
+
:contains => ALL_INLINE,
|
395
|
+
:hint => ['|'],
|
396
|
+
:filter => Proc.new {|s|
|
397
|
+
s.sub!(/^ *\|= */, '')
|
398
|
+
s.sub!(/\s*$/m, '')
|
399
|
+
s
|
400
|
+
},
|
401
|
+
:open => " <th>", :close => "</th>\n",
|
402
|
+
},
|
403
|
+
:ul => {
|
404
|
+
:curpat => '(?=(?:`| *)\*[^*])',
|
405
|
+
:fwpat => '(?=\n(?:`| *)\*[^*])',
|
406
|
+
:stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
|
407
|
+
:contains => ['ul', 'ol', 'li'],
|
408
|
+
:hint => ['*', ' '],
|
409
|
+
:filter => Proc.new {|s|
|
410
|
+
s = strip_list(s)
|
411
|
+
s
|
412
|
+
},
|
413
|
+
:open => "<ul>\n", :close => "</ul>\n",
|
414
|
+
},
|
415
|
+
:ol => {
|
416
|
+
:curpat => '(?=(?:`| *)\#[^#])',
|
417
|
+
:fwpat => '(?=\n(?:`| *)\#[^#])',
|
418
|
+
:stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
|
419
|
+
:contains => ['ul', 'ol', 'li'],
|
420
|
+
:hint => ['#', ' '],
|
421
|
+
:filter => Proc.new {|s|
|
422
|
+
s = strip_list(s)
|
423
|
+
s
|
424
|
+
},
|
425
|
+
:open => "<ol>\n", :close => "</ol>\n",
|
426
|
+
},
|
427
|
+
:li => {
|
428
|
+
:curpat => '(?=`[^*#])',
|
429
|
+
:fwpat => '\n(?=`[^*#])',
|
430
|
+
:stops => '\n(?=`)',
|
431
|
+
:hint => ['`'],
|
432
|
+
:filter => Proc.new {|s|
|
433
|
+
s.sub!(/` */, '')
|
434
|
+
s.chomp!
|
435
|
+
s
|
436
|
+
},
|
437
|
+
:contains => ALL_INLINE,
|
438
|
+
:open => " <li>", :close => "</li>\n",
|
439
|
+
},
|
440
|
+
:nowiki => {
|
441
|
+
:curpat => '(?=\{\{\{ *\n)',
|
442
|
+
:fwpat => '\n(?=\{\{\{ *\n)',
|
443
|
+
:stops => "\n\\}\\}\\} *#{EOL}",
|
444
|
+
:hint => ['{'],
|
445
|
+
:filter => Proc.new {|s|
|
446
|
+
s[0,3] = ''
|
447
|
+
s.sub!(/\}{3}\s*$/, '')
|
448
|
+
s.gsub!(/&/, '&')
|
449
|
+
s.gsub!(/</, '<')
|
450
|
+
s.gsub!(/>/, '>')
|
451
|
+
s
|
452
|
+
},
|
453
|
+
:open => "<pre>", :close => "</pre>\n\n",
|
454
|
+
},
|
455
|
+
:hr => {
|
456
|
+
:curpat => "(?= *-{4,} *#{EOL})",
|
457
|
+
:fwpat => "\n(?= *-{4,} *#{EOL})",
|
458
|
+
:hint => ['-', ' '],
|
459
|
+
:stops => EOL,
|
460
|
+
:open => "<hr />\n\n", :close => "",
|
461
|
+
:filter => Proc.new { "" } # ----- into the bit bucket
|
462
|
+
},
|
463
|
+
:h => { :curpat => '(?=(?:^|\n) *=)' }, # matches any heading
|
464
|
+
:h1 => {
|
465
|
+
:curpat => '(?= *=[^=])',
|
466
|
+
:hint => ['=', ' '],
|
467
|
+
:stops => '\n',
|
468
|
+
:contains => ALL_INLINE,
|
469
|
+
:open => "<h1>", :close => "</h1>\n\n",
|
470
|
+
:filter => Proc.new {|s|
|
471
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
472
|
+
s
|
473
|
+
},
|
474
|
+
},
|
475
|
+
:h2 => {
|
476
|
+
:curpat => '(?= *={2}[^=])',
|
477
|
+
:hint => ['=', ' '],
|
478
|
+
:stops => '\n',
|
479
|
+
:contains => ALL_INLINE,
|
480
|
+
:open => "<h2>", :close => "</h2>\n\n",
|
481
|
+
:filter => Proc.new {|s|
|
482
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
483
|
+
s
|
484
|
+
},
|
485
|
+
},
|
486
|
+
:h3 => {
|
487
|
+
:curpat => '(?= *={3}[^=])',
|
488
|
+
:hint => ['=', ' '],
|
489
|
+
:stops => '\n',
|
490
|
+
:contains => ALL_INLINE,
|
491
|
+
:open => "<h3>", :close => "</h3>\n\n",
|
492
|
+
:filter => Proc.new {|s|
|
493
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
494
|
+
s
|
495
|
+
},
|
496
|
+
},
|
497
|
+
:h4 => {
|
498
|
+
:curpat => '(?= *={4}[^=])',
|
499
|
+
:hint => ['=', ' '],
|
500
|
+
:stops => '\n',
|
501
|
+
:contains => ALL_INLINE,
|
502
|
+
:open => "<h4>", :close => "</h4>\n\n",
|
503
|
+
:filter => Proc.new {|s|
|
504
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
505
|
+
s
|
506
|
+
},
|
507
|
+
},
|
508
|
+
:h5 => {
|
509
|
+
:curpat => '(?= *={5}[^=])',
|
510
|
+
:hint => ['=', ' '],
|
511
|
+
:stops => '\n',
|
512
|
+
:contains => ALL_INLINE,
|
513
|
+
:open => "<h5>", :close => "</h5>\n\n",
|
514
|
+
:filter => Proc.new {|s|
|
515
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
516
|
+
s
|
517
|
+
},
|
518
|
+
},
|
519
|
+
:h6 => {
|
520
|
+
:curpat => '(?= *={6,})',
|
521
|
+
:hint => ['=', ' '],
|
522
|
+
:stops => '\n',
|
523
|
+
:contains => ALL_INLINE,
|
524
|
+
:open => "<h6>", :close => "</h6>\n\n",
|
525
|
+
:filter => Proc.new {|s|
|
526
|
+
s = strip_leading_and_trailing_eq_and_whitespace(s)
|
527
|
+
s
|
528
|
+
},
|
529
|
+
},
|
530
|
+
:plain => {
|
531
|
+
:curpat => '(?=[^*/_,^\\{\[<|])',
|
532
|
+
:stops => INLINE,
|
533
|
+
:hint => PLAINCHARS,
|
534
|
+
:open => '', :close => ''
|
535
|
+
},
|
536
|
+
:any => { # catch-all
|
537
|
+
:curpat => '(?=.)',
|
538
|
+
:stops => INLINE,
|
539
|
+
:open => '', :close => ''
|
540
|
+
},
|
541
|
+
:br => {
|
542
|
+
:curpat => '(?=\\\\\\\\)',
|
543
|
+
:stops => '\\\\\\\\',
|
544
|
+
:hint => ['\\'],
|
545
|
+
:filter => Proc.new { "" },
|
546
|
+
:open => '<br />', :close => '',
|
547
|
+
},
|
548
|
+
:esc => {
|
549
|
+
:curpat => '(?=~[\S])',
|
550
|
+
:stops => '~.',
|
551
|
+
:hint => ['~'],
|
552
|
+
:filter => Proc.new {|s|
|
553
|
+
s.sub!(/^./m, '')
|
554
|
+
s
|
555
|
+
},
|
556
|
+
:open => '', :close => '',
|
557
|
+
},
|
558
|
+
:inowiki => {
|
559
|
+
:curpat => '(?=\{{3}.*?\}*\}{3})',
|
560
|
+
:stops => '.*?\}*\}{3}',
|
561
|
+
:hint => ['{'],
|
562
|
+
:filter => Proc.new {|s|
|
563
|
+
s[0,3] = ''
|
564
|
+
s.sub!(/\}{3}\s*$/, '')
|
565
|
+
s.gsub!(/&/, '&')
|
566
|
+
s.gsub!(/</, '<')
|
567
|
+
s.gsub!(/>/, '>')
|
568
|
+
s
|
569
|
+
},
|
570
|
+
:open => "<tt>", :close => "</tt>",
|
571
|
+
},
|
572
|
+
:plug => {
|
573
|
+
:curpat => '(?=\<{3}.*?\>*\>{3})',
|
574
|
+
:stops => '.*?\>*\>{3}',
|
575
|
+
:hint => ['<'],
|
576
|
+
:filter => Proc.new {|s|
|
577
|
+
s[0,3] = ''
|
578
|
+
s.sub!(/\>{3}$/, '')
|
579
|
+
if @plugin_function
|
580
|
+
s = @plugin_function.call(s)
|
581
|
+
else
|
582
|
+
s = "<<<#{s}>>>"
|
583
|
+
end
|
584
|
+
s
|
585
|
+
},
|
586
|
+
:open => "", :close => "",
|
587
|
+
},
|
588
|
+
:plug2 => {
|
589
|
+
:curpat => '(?=\<{2}.*?\>*\>{2})',
|
590
|
+
:stops => '.*?\>*\>{2}',
|
591
|
+
:hint => ['<'],
|
592
|
+
:filter => Proc.new {|s|
|
593
|
+
s[0,2] = ''
|
594
|
+
s.sub!(/\>{2}$/, '')
|
595
|
+
if @plugin_function
|
596
|
+
s = @plugin_function.call(s)
|
597
|
+
else
|
598
|
+
s = "<<#{s}>>"
|
599
|
+
end
|
600
|
+
s
|
601
|
+
},
|
602
|
+
:open => "", :close => "",
|
603
|
+
},
|
604
|
+
:ilink => {
|
605
|
+
:curpat => '(?=(?:https?|ftp):\/\/)',
|
606
|
+
# This following is the [:punct:] character class with the / and ? removed
|
607
|
+
# so that URLs like http://www.somesite.com/ will match the trailing
|
608
|
+
# slash. URLs with a trailing ? will also work. Trailing ? is sometimes
|
609
|
+
# used to ensure that browsers don't cache the page.
|
610
|
+
:stops => '(?=[!"#$%&\'()*+,-.:;<=>@\[\\]^_`{|}~]?(?:\s|$))',
|
611
|
+
:hint => ['h', 'f'],
|
612
|
+
:filter => Proc.new {|s|
|
613
|
+
s.sub!(/^\s*/, '')
|
614
|
+
s.sub!(/\s*$/, '')
|
615
|
+
if @barelink_function
|
616
|
+
s = @barelink_function.call(s)
|
617
|
+
end
|
618
|
+
s = "href=\"#{s}\">#{s}"
|
619
|
+
s
|
620
|
+
},
|
621
|
+
:open => "<a ", :close=> "</a>",
|
622
|
+
},
|
623
|
+
:link => {
|
624
|
+
:curpat => '(?=\[\[[^\n]+?\]\])',
|
625
|
+
:stops => '\]\]',
|
626
|
+
:hint => ['['],
|
627
|
+
:contains => ['href', 'atext'],
|
628
|
+
:filter => Proc.new {|s|
|
629
|
+
s[0,2] = ''
|
630
|
+
s[-2,2] = ''
|
631
|
+
s += "|#{s}" if ! s.index(/\|/) # text = url unless given
|
632
|
+
s
|
633
|
+
},
|
634
|
+
:open => "<a ", :close => "</a>",
|
635
|
+
},
|
636
|
+
:href => {
|
637
|
+
:curpat => '(?=[^\|])',
|
638
|
+
:stops => '(?=\|)',
|
639
|
+
:filter => Proc.new {|s|
|
640
|
+
s.sub!(/^\s*/, '')
|
641
|
+
s.sub!(/\s*$/, '')
|
642
|
+
if @link_function
|
643
|
+
s = @link_function.call(s)
|
644
|
+
end
|
645
|
+
s
|
646
|
+
},
|
647
|
+
:open => 'href="', :close => '">',
|
648
|
+
},
|
649
|
+
:atext => {
|
650
|
+
:curpat => '(?=\|)',
|
651
|
+
:stops => '\n',
|
652
|
+
:hint => ['|'],
|
653
|
+
:contains => ALL_INLINE,
|
654
|
+
:filter => Proc.new {|s|
|
655
|
+
s.sub!(/^\|\s*/, '')
|
656
|
+
s.sub!(/\s*$/, '')
|
657
|
+
s
|
658
|
+
},
|
659
|
+
:open => '', :close => '',
|
660
|
+
},
|
661
|
+
:img => {
|
662
|
+
:curpat => '(?=\{\{[^\{][^\n]*?\}\})',
|
663
|
+
:stops => '\}\}',
|
664
|
+
:hint => ['{'],
|
665
|
+
:contains => ['imgsrc', 'imgalt'],
|
666
|
+
:filter => Proc.new {|s|
|
667
|
+
s[0,2] = ''
|
668
|
+
s.sub!(/\}\}$/, '')
|
669
|
+
s
|
670
|
+
},
|
671
|
+
:open => "<img ", :close => " />",
|
672
|
+
},
|
673
|
+
:imgalt => {
|
674
|
+
:curpat => '(?=\|)',
|
675
|
+
:stops => '\n',
|
676
|
+
:hint => ['|'],
|
677
|
+
:filter => Proc.new {|s|
|
678
|
+
s.sub!(/^\|\s*/, '')
|
679
|
+
s.sub!(/\s*$/, '')
|
680
|
+
s
|
681
|
+
},
|
682
|
+
:open => ' alt="', :close => '"',
|
683
|
+
},
|
684
|
+
:imgsrc => {
|
685
|
+
:curpat => '(?=[^\|])',
|
686
|
+
:stops => '(?=\|)',
|
687
|
+
:filter => Proc.new {|s|
|
688
|
+
s.sub!(/^\|\s*/, '')
|
689
|
+
s.sub!(/\s*$/, '')
|
690
|
+
if @img_function
|
691
|
+
s = @img_function.call(s)
|
692
|
+
end
|
693
|
+
s
|
694
|
+
},
|
695
|
+
:open => 'src="', :close => '"',
|
696
|
+
},
|
697
|
+
:strong => {
|
698
|
+
:curpat => '(?=\*\*)',
|
699
|
+
:stops => '\*\*.*?\*\*',
|
700
|
+
:hint => ['*'],
|
701
|
+
:contains => ALL_INLINE,
|
702
|
+
:filter => Proc.new {|s|
|
703
|
+
s[0,2] = ''
|
704
|
+
s.sub!(/\*\*$/, '')
|
705
|
+
s
|
706
|
+
},
|
707
|
+
:open => "<strong>", :close => "</strong>",
|
708
|
+
},
|
709
|
+
:em => {
|
710
|
+
# This could use a negative lookback assertion to let you know whether
|
711
|
+
# it's part of a URL or not. That would be helpful if the URL had been
|
712
|
+
# escaped. Currently, it will just become italic after the // since
|
713
|
+
# it didn't process the URL.
|
714
|
+
:curpat => '(?=\/\/)',
|
715
|
+
# Removed a negative lookback assertion (?<!:) from the Perl version
|
716
|
+
# and replaced it with [^:] Not sure of the consequences, however, as
|
717
|
+
# of this version, Ruby does not have negative lookback assertions, so
|
718
|
+
# I had to do it.
|
719
|
+
:stops => '\/\/.*?[^:]\/\/',
|
720
|
+
:hint => ['/'],
|
721
|
+
:contains => ALL_INLINE,
|
722
|
+
:filter => Proc.new {|s|
|
723
|
+
s[0,2] = ''
|
724
|
+
s.sub!(/\/\/$/, '')
|
725
|
+
s
|
726
|
+
},
|
727
|
+
:open => "<em>", :close => "</em>",
|
728
|
+
},
|
729
|
+
:mono => {
|
730
|
+
:curpat => '(?=\#\#)',
|
731
|
+
:stops => '\#\#.*?\#\#',
|
732
|
+
:hint => ['#'],
|
733
|
+
:contains => ALL_INLINE,
|
734
|
+
:filter => Proc.new {|s|
|
735
|
+
s[0,2] = ''
|
736
|
+
s.sub!(/\#\#$/, '')
|
737
|
+
s
|
738
|
+
},
|
739
|
+
:open => "<tt>", :close => "</tt>",
|
740
|
+
},
|
741
|
+
:sub => {
|
742
|
+
:curpat => '(?=,,)',
|
743
|
+
:stops => ',,.*?,,',
|
744
|
+
:hint => [','],
|
745
|
+
:contains => ALL_INLINE,
|
746
|
+
:filter => Proc.new {|s|
|
747
|
+
s[0,2] = ''
|
748
|
+
s.sub!(/\,\,$/, '')
|
749
|
+
s
|
750
|
+
},
|
751
|
+
:open => "<sub>", :close => "</sub>",
|
752
|
+
},
|
753
|
+
:sup => {
|
754
|
+
:curpat => '(?=\^\^)',
|
755
|
+
:stops => '\^\^.*?\^\^',
|
756
|
+
:hint => ['^'],
|
757
|
+
:contains => ALL_INLINE,
|
758
|
+
:filter => Proc.new {|s|
|
759
|
+
s[0,2] = ''
|
760
|
+
s.sub!(/\^\^$/, '')
|
761
|
+
s
|
762
|
+
},
|
763
|
+
:open => "<sup>", :close => "</sup>",
|
764
|
+
},
|
765
|
+
:u => {
|
766
|
+
:curpat => '(?=__)',
|
767
|
+
:stops => '__.*?__',
|
768
|
+
:hint => ['_'],
|
769
|
+
:contains => ALL_INLINE,
|
770
|
+
:filter => Proc.new {|s|
|
771
|
+
s[0,2] = ''
|
772
|
+
s.sub!(/__$/, '')
|
773
|
+
s
|
774
|
+
},
|
775
|
+
:open => "<u>", :close => "</u>",
|
776
|
+
},
|
777
|
+
:amp => {
|
778
|
+
:curpat => '(?=\&(?!\w+\;))',
|
779
|
+
:stops => '.',
|
780
|
+
:hint => ['&'],
|
781
|
+
:filter => Proc.new { "&" },
|
782
|
+
:open => "", :close => "",
|
783
|
+
},
|
784
|
+
:tm => {
|
785
|
+
:curpat => '(?=\(TM\))',
|
786
|
+
:stops => '\(TM\)',
|
787
|
+
:hint => ['('],
|
788
|
+
:filter => Proc.new { "™" },
|
789
|
+
:open => "", :close => "",
|
790
|
+
},
|
791
|
+
:reg => {
|
792
|
+
:curpat => '(?=\(R\))',
|
793
|
+
:stops => '\(R\)',
|
794
|
+
:hint => ['('],
|
795
|
+
:filter => Proc.new { "®" },
|
796
|
+
:open => "", :close => "",
|
797
|
+
},
|
798
|
+
:copy => {
|
799
|
+
:curpat => '(?=\(C\))',
|
800
|
+
:stops => '\(C\)',
|
801
|
+
:hint => ['('],
|
802
|
+
:filter => Proc.new { "©" },
|
803
|
+
:open => "", :close => "",
|
804
|
+
},
|
805
|
+
:ndash => {
|
806
|
+
:curpat => '(?=--)',
|
807
|
+
:stops => '--',
|
808
|
+
:hint => ['-'],
|
809
|
+
:filter => Proc.new { "–" },
|
810
|
+
:open => "", :close => "",
|
811
|
+
},
|
812
|
+
:ellipsis => {
|
813
|
+
:curpat => '(?=\.\.\.)',
|
814
|
+
:stops => '\.\.\.',
|
815
|
+
:hint => ['.'],
|
816
|
+
:filter => Proc.new { "…" },
|
817
|
+
:open => "", :close => "",
|
818
|
+
},
|
819
|
+
}
|
820
|
+
|
821
|
+
def self.strip_leading_and_trailing_eq_and_whitespace(s)
|
822
|
+
s.sub!(/^\s*=*\s*/, '')
|
823
|
+
s.sub!(/\s*=*\s*$/, '')
|
824
|
+
s
|
825
|
+
end
|
826
|
+
|
827
|
+
def self.strip_list(s)
|
828
|
+
s.sub!(/(?:`*| *)[*#]/, '`')
|
829
|
+
s.gsub!(/\n(?:`*| *)[*#]/m, "\n`")
|
830
|
+
s
|
831
|
+
end
|
832
|
+
|
833
|
+
def self.filter_string_x_with_chunk_filter_y(str, chunk)
|
834
|
+
@@chunks_hash[chunk][:filter].call(str)
|
835
|
+
end
|
836
|
+
|
837
|
+
def self.parse(tref, chunk)
|
838
|
+
|
839
|
+
sub_chunk = nil
|
840
|
+
pos = 0
|
841
|
+
last_pos = 0
|
842
|
+
html = []
|
843
|
+
first_try = true
|
844
|
+
|
845
|
+
loop do
|
846
|
+
|
847
|
+
if sub_chunk # we've determined what type of sub_chunk this is
|
848
|
+
|
849
|
+
# This is a little slower than it could be. The delim should be
|
850
|
+
# pre-compiled, but see the issue in the comment above.
|
851
|
+
if tref.index(@@chunks_hash[sub_chunk][:delim], pos)
|
852
|
+
pos = Regexp.last_match.end(0)
|
853
|
+
else
|
854
|
+
pos = tref.length
|
855
|
+
end
|
856
|
+
|
857
|
+
html << @@chunks_hash[sub_chunk][:open]
|
858
|
+
|
859
|
+
t = tref[last_pos, pos - last_pos] # grab the chunk
|
860
|
+
|
861
|
+
if @@chunks_hash[sub_chunk].has_key?(:filter) # filter it, if applicable
|
862
|
+
t = @@chunks_hash[sub_chunk][:filter].call(t)
|
863
|
+
end
|
864
|
+
|
865
|
+
last_pos = pos # remember where this chunk ends (where next begins)
|
866
|
+
|
867
|
+
if t && @@chunks_hash[sub_chunk].has_key?(:contains) # if it contains other chunks...
|
868
|
+
html << parse(t, sub_chunk) # recurse.
|
869
|
+
else
|
870
|
+
html << t # otherwise, print it
|
871
|
+
end
|
872
|
+
|
873
|
+
html << @@chunks_hash[sub_chunk][:close] # print the close tag
|
874
|
+
|
875
|
+
else
|
876
|
+
if !first_try
|
877
|
+
# The nested list test will cause a dangling newline. I tried fiddling
|
878
|
+
# with the grammer for a while, then decided this was just an easier
|
879
|
+
# fix for the time being. If anyone wants to find the issue in the
|
880
|
+
# grammer and fix it, we can remove this hack.
|
881
|
+
if pos == tref.length - 1 && tref[pos..tref.length] == "\n"
|
882
|
+
break
|
883
|
+
else
|
884
|
+
$stderr.puts "ERROR: endless loop detected"
|
885
|
+
break
|
886
|
+
end
|
887
|
+
end
|
888
|
+
first_try = false
|
889
|
+
end
|
890
|
+
|
891
|
+
break if pos && pos == tref.length # we've eaten the whole string
|
892
|
+
sub_chunk = get_sub_chunk_for(tref, chunk, pos) # more string to come
|
893
|
+
|
894
|
+
end
|
895
|
+
|
896
|
+
html.join
|
897
|
+
end
|
898
|
+
|
899
|
+
def self.get_sub_chunk_for(tref, chunk, pos)
|
900
|
+
|
901
|
+
first_char = tref[pos, 1] # get a hint about the next chunk
|
902
|
+
for chunk_hinted_at in @@chunks_hash[chunk][:calculated_hint_array_for][first_char].to_a
|
903
|
+
#puts "trying hint #{chunk_hinted_at} for -#{first_char}- on -" + tref[pos, 2] + "-\n"
|
904
|
+
if tref.index(@@chunks_hash[chunk_hinted_at][:curpatcmp], pos) # hint helped id the chunk
|
905
|
+
return chunk_hinted_at
|
906
|
+
end
|
907
|
+
end
|
908
|
+
|
909
|
+
# the hint didn't help. Check all the chunk types which this chunk contains
|
910
|
+
for contained_chunk in @@chunks_hash[chunk][:contains].to_a
|
911
|
+
#puts "trying contained chunk #{contained_chunk} on -" + tref[pos, 2] + "- within chunk #{chunk.to_s}\n"
|
912
|
+
if tref.index(@@chunks_hash[contained_chunk.to_sym][:curpatcmp], pos) # found one
|
913
|
+
return contained_chunk.to_sym
|
914
|
+
end
|
915
|
+
end
|
916
|
+
|
917
|
+
nil
|
918
|
+
end
|
919
|
+
|
920
|
+
# compile a regex that matches any of the patterns that interrupt the
|
921
|
+
# current chunk.
|
922
|
+
def self.delim(chunk)
|
923
|
+
chunk = @@chunks_hash[chunk]
|
924
|
+
if Array === chunk[:stops]
|
925
|
+
regex = ''
|
926
|
+
chunk[:stops].each do |stop|
|
927
|
+
stop = stop.to_sym
|
928
|
+
if @@chunks_hash[stop].has_key?(:fwpat)
|
929
|
+
regex += @@chunks_hash[stop][:fwpat] + "|"
|
930
|
+
else
|
931
|
+
regex += @@chunks_hash[stop][:curpat] + "|"
|
932
|
+
end
|
933
|
+
end
|
934
|
+
regex.chop!
|
935
|
+
regex
|
936
|
+
else
|
937
|
+
chunk[:stops]
|
938
|
+
end
|
939
|
+
end
|
940
|
+
|
941
|
+
# one-time optimization of the grammar - speeds the parser up a ton
|
942
|
+
def self.init
|
943
|
+
return if @is_initialized
|
944
|
+
|
945
|
+
@is_initialized = true
|
946
|
+
|
947
|
+
# precompile a bunch of regexes
|
948
|
+
@@chunks_hash.keys.each do |k|
|
949
|
+
c = @@chunks_hash[k]
|
950
|
+
if c.has_key?(:curpat)
|
951
|
+
c[:curpatcmp] = Regexp.compile('\G' + c[:curpat], Regexp::MULTILINE)
|
952
|
+
end
|
953
|
+
|
954
|
+
if c.has_key?(:stops)
|
955
|
+
c[:delim] = Regexp.compile(delim(k), Regexp::MULTILINE)
|
956
|
+
end
|
957
|
+
|
958
|
+
if c.has_key?(:contains) # store hints about each chunk to speed id
|
959
|
+
c[:calculated_hint_array_for] = {}
|
960
|
+
|
961
|
+
c[:contains].each do |ct|
|
962
|
+
ct = ct.to_sym
|
963
|
+
|
964
|
+
(@@chunks_hash[ct][:hint] || []).each do |hint|
|
965
|
+
(c[:calculated_hint_array_for][hint] ||= []) << ct
|
966
|
+
end
|
967
|
+
|
968
|
+
end
|
969
|
+
end
|
970
|
+
end
|
971
|
+
end
|
972
|
+
|
973
|
+
end
|