kron4eg-wikicreole 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,973 @@
1
+ # WikiCreole implements the Wiki Creole markup language,
2
+ # version 1.0, as described at http://www.wikicreole.org. It
3
+ # reads Creole 1.0 markup and returns XHTML.
4
+ #
5
+ # Author:: Gordon McCreight (mailto:wikicreole.to.gordon@mccreight.com)
6
+ # Copyright:: Copyright (c) 2008 Gordon McCreight
7
+ # License:: Distributes under the same terms as Ruby (see the LICENSE file)
8
+ # Version:: 0.1.3
9
+ # Date:: 2009-02-05
10
+ #
11
+ # == Synopsis
12
+ # Most likely you'll just want to do:
13
+ # require 'rubygems'
14
+ # require 'wiki_creole'
15
+ # xhtml = WikiCreole.creole_parse(wiki_creole_markup)
16
+ # If you want to override the default behaviors, make sure to look at the other
17
+ # public methods.
18
+ #
19
+ # == Official Markup
20
+ #
21
+ # Here is a summary of the official Creole 1.0 markup
22
+ # elements. See http://www.wikicreole.org for the full
23
+ # details.
24
+ #
25
+ # Headings:
26
+ # = heading 1 -> <h1>heading 1</h1>
27
+ # == heading 2 -> <h2>heading 2</h2>
28
+ # ...
29
+ # ====== heading 6 -> <h6>heading 6</h6>
30
+ #
31
+ # Various inline markup:
32
+ # ** bold ** -> <strong> bold </strong>
33
+ # // italics // -> <em> italics </em>
34
+ # **// both //** -> <strong><em> both </em></strong>
35
+ # [[ link ]] -> <a href="link">link</a>
36
+ # [[ link | text ]] -> <a href="link">text</a>
37
+ # http://cpan.org -> <a href="http://cpan.org">http://cpan.org</a>
38
+ # line \\ break -> line <br /> break
39
+ # {{img.jpg|alt}} -> <img src="img.jpg" alt="alt">
40
+ #
41
+ # Lists:
42
+ # * unordered list <ul><li>unordered list</li>
43
+ # * second item <li>second item</li>
44
+ # ## nested ordered -> <ol><li>nested ordered</li>
45
+ # *** uber-nested <ul><li>uber-nested</li></ul>
46
+ # * back to level 1 </ol><li>back to level 1</li></ul>
47
+ #
48
+ # Tables:
49
+ # |= h1 |= h2 -> <table><tr><th>h1</th><th>h2</th></tr>
50
+ # | c1 | c2 <tr><td>c1</td><td>c2</td></tr></table>
51
+ #
52
+ # Nowiki (Preformatted):
53
+ # {{{ <pre>
54
+ # ** not bold ** ** not bold **
55
+ # escaped HTML: -> escaped HTML:
56
+ # <i> test </i> &lt;i&gt; test &lt;/i&gt;
57
+ # }}} <pre>
58
+ #
59
+ # {{{ inline\\also }}} -> <tt>inline\\also</tt>
60
+ #
61
+ # Escape Character:
62
+ # ~** not bold ** -> ** not bold **
63
+ # tilde: ~~ -> tilde: ~
64
+ #
65
+ # Paragraphs are separated by other blocks and blank lines.
66
+ # Inline markup can usually be combined, overlapped, etc. List
67
+ # items and plugin text can span lines.
68
+ #
69
+ # == Extended Markup
70
+ #
71
+ # In addition to OFFICIAL MARKUP, Text::WikiCreole also supports
72
+ # the following markup:
73
+ #
74
+ # Plugins:
75
+ # << plugin >> -> whatever you want (see WikiCreole.creole_plugin)
76
+ # <<< plugin >>> -> whatever you want (see WikiCreole.creole_plugin)
77
+ # Triple-bracket syntax has priority, in order to allow you to embed
78
+ # double-brackets in plugins, such as to embed Perl code.
79
+ #
80
+ # Inline:
81
+ # ## monospace ## -> <tt> monospace </tt>
82
+ # ^^ superscript ^^ -> <sup> superscript </sup>
83
+ # ,, subscript ,, -> <sub> subscript </sub>
84
+ # __ underline __ -> <u> underline </u>
85
+ # (TM) -> &trade;
86
+ # (R) -> &reg;
87
+ # (C) -> &copy;
88
+ # ... -> &hellip;
89
+ # -- -> &ndash;
90
+ #
91
+ # Indented Paragraphs:
92
+ # :this -> <div style="margin-left:2em"><p>this
93
+ # is indented is indented</p>
94
+ # :: more indented <div style="margin-left:2em"><p> more
95
+ # indented</div></div>
96
+ #
97
+ # Definition Lists:
98
+ # ; Title -> <dl><dt>Title</dt>
99
+ # : item 1 : item 2 <dd>item 1</dd><dd>item 2</dd>
100
+ # ; Title 2 : item2a <dt>Title 2</dt><dd>item 2a</dd></dl>
101
+ #
102
+ # == Acknowledgements
103
+ # Most of this code is ported from Jason Burnett's excellent Perl-based
104
+ # converter which can be found here:
105
+ # http://search.cpan.org/~jburnett/Text-WikiCreole/
106
+ # He, in turn, acknowledges the Document::Parser perl module.
107
+ #
108
+ # Also, some of the tests are taken from Lars Christensen's implementation of
109
+ # the Creole parser. You can find his code at:
110
+ # http://github.com/larsch/creole/tree/master
111
+ #
112
+ # Other test come from the wikicreole website itself, here:
113
+ # http://www.wikicreole.org/
114
+
115
+ class WikiCreole
116
+
117
+ # Reads Creole 1.0 markup and return XHTML.
118
+ #
119
+ # xhtml = WikiCreole.creole_parse(wiki_creole_markup)
120
+ def self.creole_parse(s)
121
+ return "" unless String === s
122
+ return "" if s.empty?
123
+
124
+ init
125
+ parse(s, :top)
126
+ end
127
+
128
+ # Creole 1.0 supports two plugin syntaxes: << plugin content >> and
129
+ # <<< plugin content >>>
130
+ #
131
+ # Write a function that receives the text between the <<>>
132
+ # delimiters (not including the delimiters) and
133
+ # returns the text to be displayed. For example, here is a
134
+ # simple plugin that converts plugin text to uppercase:
135
+ #
136
+ # WikiCreole.creole_plugin {|s| s.upcase }
137
+ #
138
+ # If you do not register a plugin function, plugin markup will be left
139
+ # as is, including the surrounding << >>.
140
+ def self.creole_plugin(&blk)
141
+ @plugin_function = blk
142
+ end
143
+
144
+ # You may wish to customize [[ links ]], such as to prefix a hostname,
145
+ # port, etc.
146
+ #
147
+ # Write a function, similar to the plugin function, which receives the
148
+ # URL part of the link (with leading and trailing whitespace stripped)
149
+ # and returns the customized link. For example, to prepend
150
+ # http://my.domain/
151
+ # to pagename:
152
+ #
153
+ # WikiCreole.creole_link {|s| "http://my.domain/#{s}" }
154
+ def self.creole_link(&blk)
155
+ @link_function = blk
156
+ end
157
+
158
+ # Same purpose as creole_link, but for "bare" link markup. Bare links are
159
+ # the links which are in the text but not surrounded by brackets.
160
+ #
161
+ # WikiCreole.creole_barelink {|s| "#{s}.html" }
162
+ def self.creole_barelink(&blk)
163
+ @barelink_function = blk
164
+ end
165
+
166
+ # Same purpose as creole_link, but for image URLs.
167
+ #
168
+ # WikiCreole.creole_img {|s| "http://my.domain/#{s}" }
169
+ def self.creole_img(&blk)
170
+ @img_function = blk
171
+ end
172
+
173
+ # If you want complete control over links, rather than just modifying
174
+ # the URL, register your link markup function with WikiCreole.creole_link()
175
+ # as above and then call creole_customlinks(). Now your function will receive
176
+ # the entire link markup chunk, such as <tt>[[ some_wiki_page | page description ]]</tt>
177
+ # and must return HTML.
178
+ #
179
+ # This has no effect on "bare" link markup, such as
180
+ # http://cpan.org
181
+ def self.creole_customlinks
182
+ @@chunks_hash[:href][:open] = ""
183
+ @@chunks_hash[:href][:close] = ""
184
+ @@chunks_hash[:link][:open] = ""
185
+ @@chunks_hash[:link][:close] = ""
186
+ @@chunks_hash[:link].delete(:contains)
187
+ @@chunks_hash[:link][:filter] = Proc.new {|s|
188
+ s = @link_function.call(s) if @link_function
189
+ s
190
+ }
191
+ end
192
+
193
+ # Same purpose as creole_customlinks, but for "bare" link markup.
194
+ def self.creole_custombarelinks
195
+ @@chunks_hash[:ilink][:open] = ""
196
+ @@chunks_hash[:ilink][:close] = ""
197
+ @@chunks_hash[:ilink][:filter] = Proc.new {|s|
198
+ s = @barelink_function.call(s) if @barelink_function
199
+ s
200
+ }
201
+ end
202
+
203
+ # Similar to creole_customlinks, but for images.
204
+ def self.creole_customimgs
205
+ @@chunks_hash[:img][:open] = ""
206
+ @@chunks_hash[:img][:close] = ""
207
+ @@chunks_hash[:img].delete(:contains)
208
+ @@chunks_hash[:img][:filter] = Proc.new {|s|
209
+ s = @img_function.call(s) if @img_function
210
+ s
211
+ }
212
+ end
213
+
214
+ # You may wish to customize the opening and/or closing tags
215
+ # for the various bits of Creole markup. For example, to
216
+ # assign a CSS class to list items:
217
+ # WikiCreole.creole_tag(:li, :open, "<li class=myclass>")
218
+ #
219
+ # The tags that may be of interest are:
220
+ #
221
+ # br dd dl
222
+ # dt em h1
223
+ # h2 h3 h4
224
+ # h5 h6 hr
225
+ # ilink img inowiki
226
+ # ip li link
227
+ # mono nowiki ol
228
+ # p strong sub
229
+ # sup table td
230
+ # th tr u
231
+ # ul
232
+ #
233
+ # Those should be self-explanatory, except for inowiki (inline nowiki),
234
+ # ilink (bare links, e.g.
235
+ # http://www.cpan.org
236
+ # ) and ip (indented paragraph).
237
+ def self.creole_tag(tag, type, text="")
238
+ type = type.to_sym
239
+ return unless [:open, :close].include?(type)
240
+ return unless @@chunks_hash.has_key?(tag)
241
+ @@chunks_hash[tag][type] = text
242
+ end
243
+
244
+ # See all current tags:
245
+ # puts WikiCreole.creole_tags()
246
+ #
247
+ def self.creole_tags
248
+ tags = []
249
+ keys = @@chunks_hash.keys.collect{|x| x.to_s}.sort
250
+ keys.each do |key|
251
+ key = key.to_sym
252
+ o = @@chunks_hash[key][:open] || ""
253
+ c = @@chunks_hash[key][:close] || ""
254
+ next if o !~ /</m
255
+ o, c = [o, c].map {|x| x.gsub(/\n/m,"\\n") }
256
+ this_tag = "#{key}: open(#{o}) close(#{c})\n"
257
+ tags << this_tag
258
+ end
259
+ tags.join
260
+ end
261
+
262
+ private
263
+
264
+ # characters that may indicate inline wiki markup
265
+ SPECIALCHARS = ['^', '\\', '*', '/', '_', ',', '{', '[',
266
+ '<', '~', '|', "\n", '#', ':', ';', '(', '-', '.']
267
+
268
+ # plain characters
269
+ # build an array of "plain content" characters by subtracting SPECIALCHARS
270
+ # from ascii printable (ascii 32 to 126)
271
+ PLAINCHARS = (32..126).map{|c| c.chr}.reject{|c| SPECIALCHARS.index(c)}
272
+
273
+ # non-plain text inline widgets
274
+ INLINE = %w{strong em br esc img link ilink inowiki
275
+ sub sup mono u plug plug2 tm reg copy ndash ellipsis amp}
276
+
277
+ ALL_INLINE = [INLINE, 'plain', 'any'].flatten # including plain text
278
+
279
+ BLOCKS = %w{h1 h2 h3 hr nowiki h4 h5 h6 ul ol table p ip dl plug plug2 blank}
280
+
281
+ # handy - used several times in %chunks
282
+ EOL = '(?:\n|$)'.freeze # end of line (or string)
283
+
284
+ @plugin_function = nil
285
+ @barelink_function = nil
286
+ @link_function = nil
287
+ @img_function = nil
288
+
289
+ @is_initialized = false
290
+
291
+ @@chunks_hash = {
292
+ :top => {
293
+ :contains => BLOCKS,
294
+ },
295
+ :blank => {
296
+ :curpat => "(?= *#{EOL})",
297
+ :fwpat => "(?=(?:^|\n) *#{EOL})",
298
+ :stops => '(?=\S)',
299
+ :hint => ["\n"],
300
+ :filter => Proc.new { "" }, # whitespace into the bit bucket
301
+ :open => "", :close => "",
302
+ },
303
+ :p => {
304
+ :curpat => '(?=.)',
305
+ :stops => ['blank', 'ip', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
306
+ :hint => PLAINCHARS,
307
+ :contains => ALL_INLINE,
308
+ :filter => Proc.new {|s| s.chomp },
309
+ :open => "<p>", :close => "</p>\n\n",
310
+ },
311
+ :ip => {
312
+ :curpat => '(?=:)',
313
+ :fwpat => '\n(?=:)',
314
+ :stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
315
+ :hint => [':'],
316
+ :contains => ['p', 'ip'],
317
+ :filter => Proc.new {|s|
318
+ s.sub!(/:/, '')
319
+ s.sub!(/\n:/m, "\n")
320
+ s
321
+ },
322
+ :open => "<div style=\"margin-left: 2em\">", :close => "</div>\n",
323
+ },
324
+ :dl => {
325
+ :curpat => '(?=;)',
326
+ :fwpat => '\n(?=;)',
327
+ :stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'table'],
328
+ :hint => [';'],
329
+ :contains => ['dt', 'dd'],
330
+ :open => "<dl>\n", :close => "</dl>\n",
331
+ },
332
+ :dt => {
333
+ :curpat => '(?=;)',
334
+ :fwpat => '\n(?=;)',
335
+ :stops => '(?=:|\n)',
336
+ :hint => [';'],
337
+ :contains => ALL_INLINE,
338
+ :filter => Proc.new {|s|
339
+ s.sub!(/^;\s*/, '')
340
+ s
341
+ },
342
+ :open => " <dt>", :close => "</dt>\n",
343
+ },
344
+ :dd => {
345
+ :curpat => '(?=\n|:)',
346
+ :fwpat => '(?:\n|:)',
347
+ :stops => '.(?=:)|\n(?=;)',
348
+ :hint => [':', "\n"],
349
+ :contains => ALL_INLINE,
350
+ :filter => Proc.new {|s|
351
+ s.sub!(/(?:\n|:)\s*/m, '')
352
+ s.sub!(/\s*$/m, '')
353
+ s
354
+ },
355
+ :open => " <dd>", :close => "</dd>\n",
356
+ },
357
+ :table => {
358
+ :curpat => '(?= *\|.)',
359
+ :fwpat => '\n(?= *\|.)',
360
+ :stops => '\n(?= *[^\|])',
361
+ :contains => ['tr'],
362
+ :hint => ['|', ' '],
363
+ :open => "<table>\n", :close => "</table>\n\n",
364
+ },
365
+ :tr => {
366
+ :curpat => '(?= *\|)',
367
+ :stops => '\n',
368
+ :contains => ['td', 'th'],
369
+ :hint => ['|', ' '],
370
+ :filter => Proc.new {|s|
371
+ s.sub!(/^ */, '')
372
+ s.sub!(/\| *$/, '')
373
+ s
374
+ },
375
+ :open => " <tr>\n", :close => " </tr>\n",
376
+ },
377
+ :td => {
378
+ :curpat => '(?=\|[^=])',
379
+ # this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
380
+ :stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
381
+ :contains => ALL_INLINE,
382
+ :hint => ['|'],
383
+ :filter => Proc.new {|s|
384
+ s.sub!(/^ *\| */, '')
385
+ s.sub!(/\s*$/m, '')
386
+ s
387
+ },
388
+ :open => " <td>", :close => "</td>\n",
389
+ },
390
+ :th => {
391
+ :curpat => '(?=\|=)',
392
+ # this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
393
+ :stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
394
+ :contains => ALL_INLINE,
395
+ :hint => ['|'],
396
+ :filter => Proc.new {|s|
397
+ s.sub!(/^ *\|= */, '')
398
+ s.sub!(/\s*$/m, '')
399
+ s
400
+ },
401
+ :open => " <th>", :close => "</th>\n",
402
+ },
403
+ :ul => {
404
+ :curpat => '(?=(?:`| *)\*[^*])',
405
+ :fwpat => '(?=\n(?:`| *)\*[^*])',
406
+ :stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
407
+ :contains => ['ul', 'ol', 'li'],
408
+ :hint => ['*', ' '],
409
+ :filter => Proc.new {|s|
410
+ s = strip_list(s)
411
+ s
412
+ },
413
+ :open => "<ul>\n", :close => "</ul>\n",
414
+ },
415
+ :ol => {
416
+ :curpat => '(?=(?:`| *)\#[^#])',
417
+ :fwpat => '(?=\n(?:`| *)\#[^#])',
418
+ :stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
419
+ :contains => ['ul', 'ol', 'li'],
420
+ :hint => ['#', ' '],
421
+ :filter => Proc.new {|s|
422
+ s = strip_list(s)
423
+ s
424
+ },
425
+ :open => "<ol>\n", :close => "</ol>\n",
426
+ },
427
+ :li => {
428
+ :curpat => '(?=`[^*#])',
429
+ :fwpat => '\n(?=`[^*#])',
430
+ :stops => '\n(?=`)',
431
+ :hint => ['`'],
432
+ :filter => Proc.new {|s|
433
+ s.sub!(/` */, '')
434
+ s.chomp!
435
+ s
436
+ },
437
+ :contains => ALL_INLINE,
438
+ :open => " <li>", :close => "</li>\n",
439
+ },
440
+ :nowiki => {
441
+ :curpat => '(?=\{\{\{ *\n)',
442
+ :fwpat => '\n(?=\{\{\{ *\n)',
443
+ :stops => "\n\\}\\}\\} *#{EOL}",
444
+ :hint => ['{'],
445
+ :filter => Proc.new {|s|
446
+ s[0,3] = ''
447
+ s.sub!(/\}{3}\s*$/, '')
448
+ s.gsub!(/&/, '&amp;')
449
+ s.gsub!(/</, '&lt;')
450
+ s.gsub!(/>/, '&gt;')
451
+ s
452
+ },
453
+ :open => "<pre>", :close => "</pre>\n\n",
454
+ },
455
+ :hr => {
456
+ :curpat => "(?= *-{4,} *#{EOL})",
457
+ :fwpat => "\n(?= *-{4,} *#{EOL})",
458
+ :hint => ['-', ' '],
459
+ :stops => EOL,
460
+ :open => "<hr />\n\n", :close => "",
461
+ :filter => Proc.new { "" } # ----- into the bit bucket
462
+ },
463
+ :h => { :curpat => '(?=(?:^|\n) *=)' }, # matches any heading
464
+ :h1 => {
465
+ :curpat => '(?= *=[^=])',
466
+ :hint => ['=', ' '],
467
+ :stops => '\n',
468
+ :contains => ALL_INLINE,
469
+ :open => "<h1>", :close => "</h1>\n\n",
470
+ :filter => Proc.new {|s|
471
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
472
+ s
473
+ },
474
+ },
475
+ :h2 => {
476
+ :curpat => '(?= *={2}[^=])',
477
+ :hint => ['=', ' '],
478
+ :stops => '\n',
479
+ :contains => ALL_INLINE,
480
+ :open => "<h2>", :close => "</h2>\n\n",
481
+ :filter => Proc.new {|s|
482
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
483
+ s
484
+ },
485
+ },
486
+ :h3 => {
487
+ :curpat => '(?= *={3}[^=])',
488
+ :hint => ['=', ' '],
489
+ :stops => '\n',
490
+ :contains => ALL_INLINE,
491
+ :open => "<h3>", :close => "</h3>\n\n",
492
+ :filter => Proc.new {|s|
493
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
494
+ s
495
+ },
496
+ },
497
+ :h4 => {
498
+ :curpat => '(?= *={4}[^=])',
499
+ :hint => ['=', ' '],
500
+ :stops => '\n',
501
+ :contains => ALL_INLINE,
502
+ :open => "<h4>", :close => "</h4>\n\n",
503
+ :filter => Proc.new {|s|
504
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
505
+ s
506
+ },
507
+ },
508
+ :h5 => {
509
+ :curpat => '(?= *={5}[^=])',
510
+ :hint => ['=', ' '],
511
+ :stops => '\n',
512
+ :contains => ALL_INLINE,
513
+ :open => "<h5>", :close => "</h5>\n\n",
514
+ :filter => Proc.new {|s|
515
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
516
+ s
517
+ },
518
+ },
519
+ :h6 => {
520
+ :curpat => '(?= *={6,})',
521
+ :hint => ['=', ' '],
522
+ :stops => '\n',
523
+ :contains => ALL_INLINE,
524
+ :open => "<h6>", :close => "</h6>\n\n",
525
+ :filter => Proc.new {|s|
526
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
527
+ s
528
+ },
529
+ },
530
+ :plain => {
531
+ :curpat => '(?=[^*/_,^\\{\[<|])',
532
+ :stops => INLINE,
533
+ :hint => PLAINCHARS,
534
+ :open => '', :close => ''
535
+ },
536
+ :any => { # catch-all
537
+ :curpat => '(?=.)',
538
+ :stops => INLINE,
539
+ :open => '', :close => ''
540
+ },
541
+ :br => {
542
+ :curpat => '(?=\\\\\\\\)',
543
+ :stops => '\\\\\\\\',
544
+ :hint => ['\\'],
545
+ :filter => Proc.new { "" },
546
+ :open => '<br />', :close => '',
547
+ },
548
+ :esc => {
549
+ :curpat => '(?=~[\S])',
550
+ :stops => '~.',
551
+ :hint => ['~'],
552
+ :filter => Proc.new {|s|
553
+ s.sub!(/^./m, '')
554
+ s
555
+ },
556
+ :open => '', :close => '',
557
+ },
558
+ :inowiki => {
559
+ :curpat => '(?=\{{3}.*?\}*\}{3})',
560
+ :stops => '.*?\}*\}{3}',
561
+ :hint => ['{'],
562
+ :filter => Proc.new {|s|
563
+ s[0,3] = ''
564
+ s.sub!(/\}{3}\s*$/, '')
565
+ s.gsub!(/&/, '&amp;')
566
+ s.gsub!(/</, '&lt;')
567
+ s.gsub!(/>/, '&gt;')
568
+ s
569
+ },
570
+ :open => "<tt>", :close => "</tt>",
571
+ },
572
+ :plug => {
573
+ :curpat => '(?=\<{3}.*?\>*\>{3})',
574
+ :stops => '.*?\>*\>{3}',
575
+ :hint => ['<'],
576
+ :filter => Proc.new {|s|
577
+ s[0,3] = ''
578
+ s.sub!(/\>{3}$/, '')
579
+ if @plugin_function
580
+ s = @plugin_function.call(s)
581
+ else
582
+ s = "<<<#{s}>>>"
583
+ end
584
+ s
585
+ },
586
+ :open => "", :close => "",
587
+ },
588
+ :plug2 => {
589
+ :curpat => '(?=\<{2}.*?\>*\>{2})',
590
+ :stops => '.*?\>*\>{2}',
591
+ :hint => ['<'],
592
+ :filter => Proc.new {|s|
593
+ s[0,2] = ''
594
+ s.sub!(/\>{2}$/, '')
595
+ if @plugin_function
596
+ s = @plugin_function.call(s)
597
+ else
598
+ s = "<<#{s}>>"
599
+ end
600
+ s
601
+ },
602
+ :open => "", :close => "",
603
+ },
604
+ :ilink => {
605
+ :curpat => '(?=(?:https?|ftp):\/\/)',
606
+ # This following is the [:punct:] character class with the / and ? removed
607
+ # so that URLs like http://www.somesite.com/ will match the trailing
608
+ # slash. URLs with a trailing ? will also work. Trailing ? is sometimes
609
+ # used to ensure that browsers don't cache the page.
610
+ :stops => '(?=[!"#$%&\'()*+,-.:;<=>@\[\\]^_`{|}~]?(?:\s|$))',
611
+ :hint => ['h', 'f'],
612
+ :filter => Proc.new {|s|
613
+ s.sub!(/^\s*/, '')
614
+ s.sub!(/\s*$/, '')
615
+ if @barelink_function
616
+ s = @barelink_function.call(s)
617
+ end
618
+ s = "href=\"#{s}\">#{s}"
619
+ s
620
+ },
621
+ :open => "<a ", :close=> "</a>",
622
+ },
623
+ :link => {
624
+ :curpat => '(?=\[\[[^\n]+?\]\])',
625
+ :stops => '\]\]',
626
+ :hint => ['['],
627
+ :contains => ['href', 'atext'],
628
+ :filter => Proc.new {|s|
629
+ s[0,2] = ''
630
+ s[-2,2] = ''
631
+ s += "|#{s}" if ! s.index(/\|/) # text = url unless given
632
+ s
633
+ },
634
+ :open => "<a ", :close => "</a>",
635
+ },
636
+ :href => {
637
+ :curpat => '(?=[^\|])',
638
+ :stops => '(?=\|)',
639
+ :filter => Proc.new {|s|
640
+ s.sub!(/^\s*/, '')
641
+ s.sub!(/\s*$/, '')
642
+ if @link_function
643
+ s = @link_function.call(s)
644
+ end
645
+ s
646
+ },
647
+ :open => 'href="', :close => '">',
648
+ },
649
+ :atext => {
650
+ :curpat => '(?=\|)',
651
+ :stops => '\n',
652
+ :hint => ['|'],
653
+ :contains => ALL_INLINE,
654
+ :filter => Proc.new {|s|
655
+ s.sub!(/^\|\s*/, '')
656
+ s.sub!(/\s*$/, '')
657
+ s
658
+ },
659
+ :open => '', :close => '',
660
+ },
661
+ :img => {
662
+ :curpat => '(?=\{\{[^\{][^\n]*?\}\})',
663
+ :stops => '\}\}',
664
+ :hint => ['{'],
665
+ :contains => ['imgsrc', 'imgalt'],
666
+ :filter => Proc.new {|s|
667
+ s[0,2] = ''
668
+ s.sub!(/\}\}$/, '')
669
+ s
670
+ },
671
+ :open => "<img ", :close => " />",
672
+ },
673
+ :imgalt => {
674
+ :curpat => '(?=\|)',
675
+ :stops => '\n',
676
+ :hint => ['|'],
677
+ :filter => Proc.new {|s|
678
+ s.sub!(/^\|\s*/, '')
679
+ s.sub!(/\s*$/, '')
680
+ s
681
+ },
682
+ :open => ' alt="', :close => '"',
683
+ },
684
+ :imgsrc => {
685
+ :curpat => '(?=[^\|])',
686
+ :stops => '(?=\|)',
687
+ :filter => Proc.new {|s|
688
+ s.sub!(/^\|\s*/, '')
689
+ s.sub!(/\s*$/, '')
690
+ if @img_function
691
+ s = @img_function.call(s)
692
+ end
693
+ s
694
+ },
695
+ :open => 'src="', :close => '"',
696
+ },
697
+ :strong => {
698
+ :curpat => '(?=\*\*)',
699
+ :stops => '\*\*.*?\*\*',
700
+ :hint => ['*'],
701
+ :contains => ALL_INLINE,
702
+ :filter => Proc.new {|s|
703
+ s[0,2] = ''
704
+ s.sub!(/\*\*$/, '')
705
+ s
706
+ },
707
+ :open => "<strong>", :close => "</strong>",
708
+ },
709
+ :em => {
710
+ # This could use a negative lookback assertion to let you know whether
711
+ # it's part of a URL or not. That would be helpful if the URL had been
712
+ # escaped. Currently, it will just become italic after the // since
713
+ # it didn't process the URL.
714
+ :curpat => '(?=\/\/)',
715
+ # Removed a negative lookback assertion (?<!:) from the Perl version
716
+ # and replaced it with [^:] Not sure of the consequences, however, as
717
+ # of this version, Ruby does not have negative lookback assertions, so
718
+ # I had to do it.
719
+ :stops => '\/\/.*?[^:]\/\/',
720
+ :hint => ['/'],
721
+ :contains => ALL_INLINE,
722
+ :filter => Proc.new {|s|
723
+ s[0,2] = ''
724
+ s.sub!(/\/\/$/, '')
725
+ s
726
+ },
727
+ :open => "<em>", :close => "</em>",
728
+ },
729
+ :mono => {
730
+ :curpat => '(?=\#\#)',
731
+ :stops => '\#\#.*?\#\#',
732
+ :hint => ['#'],
733
+ :contains => ALL_INLINE,
734
+ :filter => Proc.new {|s|
735
+ s[0,2] = ''
736
+ s.sub!(/\#\#$/, '')
737
+ s
738
+ },
739
+ :open => "<tt>", :close => "</tt>",
740
+ },
741
+ :sub => {
742
+ :curpat => '(?=,,)',
743
+ :stops => ',,.*?,,',
744
+ :hint => [','],
745
+ :contains => ALL_INLINE,
746
+ :filter => Proc.new {|s|
747
+ s[0,2] = ''
748
+ s.sub!(/\,\,$/, '')
749
+ s
750
+ },
751
+ :open => "<sub>", :close => "</sub>",
752
+ },
753
+ :sup => {
754
+ :curpat => '(?=\^\^)',
755
+ :stops => '\^\^.*?\^\^',
756
+ :hint => ['^'],
757
+ :contains => ALL_INLINE,
758
+ :filter => Proc.new {|s|
759
+ s[0,2] = ''
760
+ s.sub!(/\^\^$/, '')
761
+ s
762
+ },
763
+ :open => "<sup>", :close => "</sup>",
764
+ },
765
+ :u => {
766
+ :curpat => '(?=__)',
767
+ :stops => '__.*?__',
768
+ :hint => ['_'],
769
+ :contains => ALL_INLINE,
770
+ :filter => Proc.new {|s|
771
+ s[0,2] = ''
772
+ s.sub!(/__$/, '')
773
+ s
774
+ },
775
+ :open => "<u>", :close => "</u>",
776
+ },
777
+ :amp => {
778
+ :curpat => '(?=\&(?!\w+\;))',
779
+ :stops => '.',
780
+ :hint => ['&'],
781
+ :filter => Proc.new { "&amp;" },
782
+ :open => "", :close => "",
783
+ },
784
+ :tm => {
785
+ :curpat => '(?=\(TM\))',
786
+ :stops => '\(TM\)',
787
+ :hint => ['('],
788
+ :filter => Proc.new { "&trade;" },
789
+ :open => "", :close => "",
790
+ },
791
+ :reg => {
792
+ :curpat => '(?=\(R\))',
793
+ :stops => '\(R\)',
794
+ :hint => ['('],
795
+ :filter => Proc.new { "&reg;" },
796
+ :open => "", :close => "",
797
+ },
798
+ :copy => {
799
+ :curpat => '(?=\(C\))',
800
+ :stops => '\(C\)',
801
+ :hint => ['('],
802
+ :filter => Proc.new { "&copy;" },
803
+ :open => "", :close => "",
804
+ },
805
+ :ndash => {
806
+ :curpat => '(?=--)',
807
+ :stops => '--',
808
+ :hint => ['-'],
809
+ :filter => Proc.new { "&ndash;" },
810
+ :open => "", :close => "",
811
+ },
812
+ :ellipsis => {
813
+ :curpat => '(?=\.\.\.)',
814
+ :stops => '\.\.\.',
815
+ :hint => ['.'],
816
+ :filter => Proc.new { "&hellip;" },
817
+ :open => "", :close => "",
818
+ },
819
+ }
820
+
821
+ def self.strip_leading_and_trailing_eq_and_whitespace(s)
822
+ s.sub!(/^\s*=*\s*/, '')
823
+ s.sub!(/\s*=*\s*$/, '')
824
+ s
825
+ end
826
+
827
+ def self.strip_list(s)
828
+ s.sub!(/(?:`*| *)[*#]/, '`')
829
+ s.gsub!(/\n(?:`*| *)[*#]/m, "\n`")
830
+ s
831
+ end
832
+
833
+ def self.filter_string_x_with_chunk_filter_y(str, chunk)
834
+ @@chunks_hash[chunk][:filter].call(str)
835
+ end
836
+
837
+ def self.parse(tref, chunk)
838
+
839
+ sub_chunk = nil
840
+ pos = 0
841
+ last_pos = 0
842
+ html = []
843
+ first_try = true
844
+
845
+ loop do
846
+
847
+ if sub_chunk # we've determined what type of sub_chunk this is
848
+
849
+ # This is a little slower than it could be. The delim should be
850
+ # pre-compiled, but see the issue in the comment above.
851
+ if tref.index(@@chunks_hash[sub_chunk][:delim], pos)
852
+ pos = Regexp.last_match.end(0)
853
+ else
854
+ pos = tref.length
855
+ end
856
+
857
+ html << @@chunks_hash[sub_chunk][:open]
858
+
859
+ t = tref[last_pos, pos - last_pos] # grab the chunk
860
+
861
+ if @@chunks_hash[sub_chunk].has_key?(:filter) # filter it, if applicable
862
+ t = @@chunks_hash[sub_chunk][:filter].call(t)
863
+ end
864
+
865
+ last_pos = pos # remember where this chunk ends (where next begins)
866
+
867
+ if t && @@chunks_hash[sub_chunk].has_key?(:contains) # if it contains other chunks...
868
+ html << parse(t, sub_chunk) # recurse.
869
+ else
870
+ html << t # otherwise, print it
871
+ end
872
+
873
+ html << @@chunks_hash[sub_chunk][:close] # print the close tag
874
+
875
+ else
876
+ if !first_try
877
+ # The nested list test will cause a dangling newline. I tried fiddling
878
+ # with the grammer for a while, then decided this was just an easier
879
+ # fix for the time being. If anyone wants to find the issue in the
880
+ # grammer and fix it, we can remove this hack.
881
+ if pos == tref.length - 1 && tref[pos..tref.length] == "\n"
882
+ break
883
+ else
884
+ $stderr.puts "ERROR: endless loop detected"
885
+ break
886
+ end
887
+ end
888
+ first_try = false
889
+ end
890
+
891
+ break if pos && pos == tref.length # we've eaten the whole string
892
+ sub_chunk = get_sub_chunk_for(tref, chunk, pos) # more string to come
893
+
894
+ end
895
+
896
+ html.join
897
+ end
898
+
899
+ def self.get_sub_chunk_for(tref, chunk, pos)
900
+
901
+ first_char = tref[pos, 1] # get a hint about the next chunk
902
+ for chunk_hinted_at in @@chunks_hash[chunk][:calculated_hint_array_for][first_char].to_a
903
+ #puts "trying hint #{chunk_hinted_at} for -#{first_char}- on -" + tref[pos, 2] + "-\n"
904
+ if tref.index(@@chunks_hash[chunk_hinted_at][:curpatcmp], pos) # hint helped id the chunk
905
+ return chunk_hinted_at
906
+ end
907
+ end
908
+
909
+ # the hint didn't help. Check all the chunk types which this chunk contains
910
+ for contained_chunk in @@chunks_hash[chunk][:contains].to_a
911
+ #puts "trying contained chunk #{contained_chunk} on -" + tref[pos, 2] + "- within chunk #{chunk.to_s}\n"
912
+ if tref.index(@@chunks_hash[contained_chunk.to_sym][:curpatcmp], pos) # found one
913
+ return contained_chunk.to_sym
914
+ end
915
+ end
916
+
917
+ nil
918
+ end
919
+
920
+ # compile a regex that matches any of the patterns that interrupt the
921
+ # current chunk.
922
+ def self.delim(chunk)
923
+ chunk = @@chunks_hash[chunk]
924
+ if Array === chunk[:stops]
925
+ regex = ''
926
+ chunk[:stops].each do |stop|
927
+ stop = stop.to_sym
928
+ if @@chunks_hash[stop].has_key?(:fwpat)
929
+ regex += @@chunks_hash[stop][:fwpat] + "|"
930
+ else
931
+ regex += @@chunks_hash[stop][:curpat] + "|"
932
+ end
933
+ end
934
+ regex.chop!
935
+ regex
936
+ else
937
+ chunk[:stops]
938
+ end
939
+ end
940
+
941
+ # one-time optimization of the grammar - speeds the parser up a ton
942
+ def self.init
943
+ return if @is_initialized
944
+
945
+ @is_initialized = true
946
+
947
+ # precompile a bunch of regexes
948
+ @@chunks_hash.keys.each do |k|
949
+ c = @@chunks_hash[k]
950
+ if c.has_key?(:curpat)
951
+ c[:curpatcmp] = Regexp.compile('\G' + c[:curpat], Regexp::MULTILINE)
952
+ end
953
+
954
+ if c.has_key?(:stops)
955
+ c[:delim] = Regexp.compile(delim(k), Regexp::MULTILINE)
956
+ end
957
+
958
+ if c.has_key?(:contains) # store hints about each chunk to speed id
959
+ c[:calculated_hint_array_for] = {}
960
+
961
+ c[:contains].each do |ct|
962
+ ct = ct.to_sym
963
+
964
+ (@@chunks_hash[ct][:hint] || []).each do |hint|
965
+ (c[:calculated_hint_array_for][hint] ||= []) << ct
966
+ end
967
+
968
+ end
969
+ end
970
+ end
971
+ end
972
+
973
+ end