gmccreight-WikiCreole 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,973 @@
1
+ # WikiCreole implements the Wiki Creole markup language,
2
+ # version 1.0, as described at http://www.wikicreole.org. It
3
+ # reads Creole 1.0 markup and returns XHTML.
4
+ #
5
+ # Author:: Gordon McCreight (mailto:wikicreole.to.gordon@mccreight.com)
6
+ # Copyright:: Copyright (c) 2008 Gordon McCreight
7
+ # License:: Distributes under the same terms as Ruby (see the LICENSE file)
8
+ # Version:: 0.1.4
9
+ # Date:: 2009-08-11
10
+ #
11
+ # == Synopsis
12
+ # Most likely you'll just want to do:
13
+ # require 'rubygems'
14
+ # require 'wiki_creole'
15
+ # xhtml = WikiCreole.creole_parse(wiki_creole_markup)
16
+ # If you want to override the default behaviors, make sure to look at the other
17
+ # public methods.
18
+ #
19
+ # == Official Markup
20
+ #
21
+ # Here is a summary of the official Creole 1.0 markup
22
+ # elements. See http://www.wikicreole.org for the full
23
+ # details.
24
+ #
25
+ # Headings:
26
+ # = heading 1 -> <h1>heading 1</h1>
27
+ # == heading 2 -> <h2>heading 2</h2>
28
+ # ...
29
+ # ====== heading 6 -> <h6>heading 6</h6>
30
+ #
31
+ # Various inline markup:
32
+ # ** bold ** -> <strong> bold </strong>
33
+ # // italics // -> <em> italics </em>
34
+ # **// both //** -> <strong><em> both </em></strong>
35
+ # [[ link ]] -> <a href="link">link</a>
36
+ # [[ link | text ]] -> <a href="link">text</a>
37
+ # http://cpan.org -> <a href="http://cpan.org">http://cpan.org</a>
38
+ # line \\ break -> line <br /> break
39
+ # {{img.jpg|alt}} -> <img src="img.jpg" alt="alt">
40
+ #
41
+ # Lists:
42
+ # * unordered list <ul><li>unordered list</li>
43
+ # * second item <li>second item</li>
44
+ # ## nested ordered -> <ol><li>nested ordered</li>
45
+ # *** uber-nested <ul><li>uber-nested</li></ul>
46
+ # * back to level 1 </ol><li>back to level 1</li></ul>
47
+ #
48
+ # Tables:
49
+ # |= h1 |= h2 -> <table><tr><th>h1</th><th>h2</th></tr>
50
+ # | c1 | c2 <tr><td>c1</td><td>c2</td></tr></table>
51
+ #
52
+ # Nowiki (Preformatted):
53
+ # {{{ <pre>
54
+ # ** not bold ** ** not bold **
55
+ # escaped HTML: -> escaped HTML:
56
+ # <i> test </i> &lt;i&gt; test &lt;/i&gt;
57
+ # }}} <pre>
58
+ #
59
+ # {{{ inline\\also }}} -> <tt>inline\\also</tt>
60
+ #
61
+ # Escape Character:
62
+ # ~** not bold ** -> ** not bold **
63
+ # tilde: ~~ -> tilde: ~
64
+ #
65
+ # Paragraphs are separated by other blocks and blank lines.
66
+ # Inline markup can usually be combined, overlapped, etc. List
67
+ # items and plugin text can span lines.
68
+ #
69
+ # == Extended Markup
70
+ #
71
+ # In addition to OFFICIAL MARKUP, Text::WikiCreole also supports
72
+ # the following markup:
73
+ #
74
+ # Plugins:
75
+ # << plugin >> -> whatever you want (see WikiCreole.creole_plugin)
76
+ # <<< plugin >>> -> whatever you want (see WikiCreole.creole_plugin)
77
+ # Triple-bracket syntax has priority, in order to allow you to embed
78
+ # double-brackets in plugins, such as to embed Perl code.
79
+ #
80
+ # Inline:
81
+ # ## monospace ## -> <tt> monospace </tt>
82
+ # ^^ superscript ^^ -> <sup> superscript </sup>
83
+ # ,, subscript ,, -> <sub> subscript </sub>
84
+ # __ underline __ -> <u> underline </u>
85
+ # (TM) -> &trade;
86
+ # (R) -> &reg;
87
+ # (C) -> &copy;
88
+ # ... -> &hellip;
89
+ # -- -> &ndash;
90
+ #
91
+ # Indented Paragraphs:
92
+ # :this -> <div style="margin-left:2em"><p>this
93
+ # is indented is indented</p>
94
+ # :: more indented <div style="margin-left:2em"><p> more
95
+ # indented</div></div>
96
+ #
97
+ # Definition Lists:
98
+ # ; Title -> <dl><dt>Title</dt>
99
+ # : item 1 : item 2 <dd>item 1</dd><dd>item 2</dd>
100
+ # ; Title 2 : item2a <dt>Title 2</dt><dd>item 2a</dd></dl>
101
+ #
102
+ # == Acknowledgements
103
+ # Most of this code is ported from Jason Burnett's excellent Perl-based
104
+ # converter which can be found here:
105
+ # http://search.cpan.org/~jburnett/Text-WikiCreole/
106
+ # He, in turn, acknowledges the Document::Parser perl module.
107
+ #
108
+ # Also, some of the tests are taken from Lars Christensen's implementation of
109
+ # the Creole parser. You can find his code at:
110
+ # http://github.com/larsch/creole/tree/master
111
+ #
112
+ # Other test come from the wikicreole website itself, here:
113
+ # http://www.wikicreole.org/
114
+
115
+ class WikiCreole
116
+
117
+ # Reads Creole 1.0 markup and return XHTML.
118
+ #
119
+ # xhtml = WikiCreole.creole_parse(wiki_creole_markup)
120
+ def self.creole_parse(s)
121
+ return "" unless String === s
122
+ return "" if s.empty?
123
+
124
+ init
125
+ parse(s, :top)
126
+ end
127
+
128
+ # Creole 1.0 supports two plugin syntaxes: << plugin content >> and
129
+ # <<< plugin content >>>
130
+ #
131
+ # Write a function that receives the text between the <<>>
132
+ # delimiters (not including the delimiters) and
133
+ # returns the text to be displayed. For example, here is a
134
+ # simple plugin that converts plugin text to uppercase:
135
+ #
136
+ # WikiCreole.creole_plugin {|s| s.upcase }
137
+ #
138
+ # If you do not register a plugin function, plugin markup will be left
139
+ # as is, including the surrounding << >>.
140
+ def self.creole_plugin(&blk)
141
+ @plugin_function = blk
142
+ end
143
+
144
+ # You may wish to customize [[ links ]], such as to prefix a hostname,
145
+ # port, etc.
146
+ #
147
+ # Write a function, similar to the plugin function, which receives the
148
+ # URL part of the link (with leading and trailing whitespace stripped)
149
+ # and returns the customized link. For example, to prepend
150
+ # http://my.domain/
151
+ # to pagename:
152
+ #
153
+ # WikiCreole.creole_link {|s| "http://my.domain/#{s}" }
154
+ def self.creole_link(&blk)
155
+ @link_function = blk
156
+ end
157
+
158
+ # Same purpose as creole_link, but for "bare" link markup. Bare links are
159
+ # the links which are in the text but not surrounded by brackets.
160
+ #
161
+ # WikiCreole.creole_barelink {|s| "#{s}.html" }
162
+ def self.creole_barelink(&blk)
163
+ @barelink_function = blk
164
+ end
165
+
166
+ # Same purpose as creole_link, but for image URLs.
167
+ #
168
+ # WikiCreole.creole_img {|s| "http://my.domain/#{s}" }
169
+ def self.creole_img(&blk)
170
+ @img_function = blk
171
+ end
172
+
173
+ # If you want complete control over links, rather than just modifying
174
+ # the URL, register your link markup function with WikiCreole.creole_link()
175
+ # as above and then call creole_customlinks(). Now your function will receive
176
+ # the entire link markup chunk, such as <tt>[[ some_wiki_page | page description ]]</tt>
177
+ # and must return HTML.
178
+ #
179
+ # This has no effect on "bare" link markup, such as
180
+ # http://cpan.org
181
+ def self.creole_customlinks
182
+ @@chunks_hash[:href][:open] = ""
183
+ @@chunks_hash[:href][:close] = ""
184
+ @@chunks_hash[:link][:open] = ""
185
+ @@chunks_hash[:link][:close] = ""
186
+ @@chunks_hash[:link].delete(:contains)
187
+ @@chunks_hash[:link][:filter] = Proc.new {|s|
188
+ s = @link_function.call(s) if @link_function
189
+ s
190
+ }
191
+ end
192
+
193
+ # Same purpose as creole_customlinks, but for "bare" link markup.
194
+ def self.creole_custombarelinks
195
+ @@chunks_hash[:ilink][:open] = ""
196
+ @@chunks_hash[:ilink][:close] = ""
197
+ @@chunks_hash[:ilink][:filter] = Proc.new {|s|
198
+ s = @barelink_function.call(s) if @barelink_function
199
+ s
200
+ }
201
+ end
202
+
203
+ # Similar to creole_customlinks, but for images.
204
+ def self.creole_customimgs
205
+ @@chunks_hash[:img][:open] = ""
206
+ @@chunks_hash[:img][:close] = ""
207
+ @@chunks_hash[:img].delete(:contains)
208
+ @@chunks_hash[:img][:filter] = Proc.new {|s|
209
+ s = @img_function.call(s) if @img_function
210
+ s
211
+ }
212
+ end
213
+
214
+ # You may wish to customize the opening and/or closing tags
215
+ # for the various bits of Creole markup. For example, to
216
+ # assign a CSS class to list items:
217
+ # WikiCreole.creole_tag(:li, :open, "<li class=myclass>")
218
+ #
219
+ # The tags that may be of interest are:
220
+ #
221
+ # br dd dl
222
+ # dt em h1
223
+ # h2 h3 h4
224
+ # h5 h6 hr
225
+ # ilink img inowiki
226
+ # ip li link
227
+ # mono nowiki ol
228
+ # p strong sub
229
+ # sup table td
230
+ # th tr u
231
+ # ul
232
+ #
233
+ # Those should be self-explanatory, except for inowiki (inline nowiki),
234
+ # ilink (bare links, e.g.
235
+ # http://www.cpan.org
236
+ # ) and ip (indented paragraph).
237
+ def self.creole_tag(tag, type, text="")
238
+ type = type.to_sym
239
+ return unless [:open, :close].include?(type)
240
+ return unless @@chunks_hash.has_key?(tag)
241
+ @@chunks_hash[tag][type] = text
242
+ end
243
+
244
+ # See all current tags:
245
+ # puts WikiCreole.creole_tags()
246
+ #
247
+ def self.creole_tags
248
+ tags = []
249
+ keys = @@chunks_hash.keys.collect{|x| x.to_s}.sort
250
+ keys.each do |key|
251
+ key = key.to_sym
252
+ o = @@chunks_hash[key][:open] || ""
253
+ c = @@chunks_hash[key][:close] || ""
254
+ next if o !~ /</m
255
+ o, c = [o, c].map {|x| x.gsub(/\n/m,"\\n") }
256
+ this_tag = "#{key}: open(#{o}) close(#{c})\n"
257
+ tags << this_tag
258
+ end
259
+ tags.join
260
+ end
261
+
262
+ private
263
+
264
+ # characters that may indicate inline wiki markup
265
+ SPECIALCHARS = ['^', '\\', '*', '/', '_', ',', '{', '[',
266
+ '<', '~', '|', "\n", '#', ':', ';', '(', '-', '.']
267
+
268
+ # plain characters
269
+ # build an array of "plain content" characters by subtracting SPECIALCHARS
270
+ # from ascii printable (ascii 32 to 126)
271
+ PLAINCHARS = (32..126).map{|c| c.chr}.reject{|c| SPECIALCHARS.index(c)}
272
+
273
+ # non-plain text inline widgets
274
+ INLINE = %w{strong em br esc img link ilink inowiki
275
+ sub sup mono u plug plug2 tm reg copy ndash ellipsis amp}
276
+
277
+ ALL_INLINE = [INLINE, 'plain', 'any'].flatten # including plain text
278
+
279
+ BLOCKS = %w{h1 h2 h3 hr nowiki h4 h5 h6 ul ol table p ip dl plug plug2 blank}
280
+
281
+ # handy - used several times in %chunks
282
+ EOL = '(?:\n|$)'.freeze # end of line (or string)
283
+
284
+ @plugin_function = nil
285
+ @barelink_function = nil
286
+ @link_function = nil
287
+ @img_function = nil
288
+
289
+ @is_initialized = false
290
+
291
+ @@chunks_hash = {
292
+ :top => {
293
+ :contains => BLOCKS,
294
+ },
295
+ :blank => {
296
+ :curpat => "(?= *#{EOL})",
297
+ :fwpat => "(?=(?:^|\n) *#{EOL})",
298
+ :stops => '(?=\S)',
299
+ :hint => ["\n"],
300
+ :filter => Proc.new { "" }, # whitespace into the bit bucket
301
+ :open => "", :close => "",
302
+ },
303
+ :p => {
304
+ :curpat => '(?=.)',
305
+ :stops => ['blank', 'ip', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
306
+ :hint => PLAINCHARS,
307
+ :contains => ALL_INLINE,
308
+ :filter => Proc.new {|s| s.chomp },
309
+ :open => "<p>", :close => "</p>\n\n",
310
+ },
311
+ :ip => {
312
+ :curpat => '(?=:)',
313
+ :fwpat => '\n(?=:)',
314
+ :stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'dl', 'table'],
315
+ :hint => [':'],
316
+ :contains => ['p', 'ip'],
317
+ :filter => Proc.new {|s|
318
+ s.sub!(/:/, '')
319
+ s.sub!(/\n:/m, "\n")
320
+ s
321
+ },
322
+ :open => "<div style=\"margin-left: 2em\">", :close => "</div>\n",
323
+ },
324
+ :dl => {
325
+ :curpat => '(?=;)',
326
+ :fwpat => '\n(?=;)',
327
+ :stops => ['blank', 'h', 'hr', 'nowiki', 'ul', 'ol', 'table'],
328
+ :hint => [';'],
329
+ :contains => ['dt', 'dd'],
330
+ :open => "<dl>\n", :close => "</dl>\n",
331
+ },
332
+ :dt => {
333
+ :curpat => '(?=;)',
334
+ :fwpat => '\n(?=;)',
335
+ :stops => '(?=:|\n)',
336
+ :hint => [';'],
337
+ :contains => ALL_INLINE,
338
+ :filter => Proc.new {|s|
339
+ s.sub!(/^;\s*/, '')
340
+ s
341
+ },
342
+ :open => " <dt>", :close => "</dt>\n",
343
+ },
344
+ :dd => {
345
+ :curpat => '(?=\n|:)',
346
+ :fwpat => '(?:\n|:)',
347
+ :stops => '.(?=:)|\n(?=;)',
348
+ :hint => [':', "\n"],
349
+ :contains => ALL_INLINE,
350
+ :filter => Proc.new {|s|
351
+ s.sub!(/(?:\n|:)\s*/m, '')
352
+ s.sub!(/\s*$/m, '')
353
+ s
354
+ },
355
+ :open => " <dd>", :close => "</dd>\n",
356
+ },
357
+ :table => {
358
+ :curpat => '(?= *\|.)',
359
+ :fwpat => '\n(?= *\|.)',
360
+ :stops => '\n(?= *[^\|])',
361
+ :contains => ['tr'],
362
+ :hint => ['|', ' '],
363
+ :open => "<table>\n", :close => "</table>\n\n",
364
+ },
365
+ :tr => {
366
+ :curpat => '(?= *\|)',
367
+ :stops => '\n',
368
+ :contains => ['td', 'th'],
369
+ :hint => ['|', ' '],
370
+ :filter => Proc.new {|s|
371
+ s.sub!(/^ */, '')
372
+ s.sub!(/\| *$/, '')
373
+ s
374
+ },
375
+ :open => " <tr>\n", :close => " </tr>\n",
376
+ },
377
+ :td => {
378
+ :curpat => '(?=\|[^=])',
379
+ # this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
380
+ :stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
381
+ :contains => ALL_INLINE,
382
+ :hint => ['|'],
383
+ :filter => Proc.new {|s|
384
+ s.sub!(/^ *\| */, '')
385
+ s.sub!(/\s*$/m, '')
386
+ s
387
+ },
388
+ :open => " <td>", :close => "</td>\n",
389
+ },
390
+ :th => {
391
+ :curpat => '(?=\|=)',
392
+ # this gnarly regex fixes ambiguous '|' for links/imgs/nowiki in tables
393
+ :stops => '[^~](?=\|(?!(?:[^\[]*\]\])|(?:[^\{]*\}\})))',
394
+ :contains => ALL_INLINE,
395
+ :hint => ['|'],
396
+ :filter => Proc.new {|s|
397
+ s.sub!(/^ *\|= */, '')
398
+ s.sub!(/\s*$/m, '')
399
+ s
400
+ },
401
+ :open => " <th>", :close => "</th>\n",
402
+ },
403
+ :ul => {
404
+ :curpat => '(?=(?:`| *)\*[^*])',
405
+ :fwpat => '(?=\n(?:`| *)\*[^*])',
406
+ :stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
407
+ :contains => ['ul', 'ol', 'li'],
408
+ :hint => ['*', ' '],
409
+ :filter => Proc.new {|s|
410
+ s = strip_list(s)
411
+ s
412
+ },
413
+ :open => "<ul>\n", :close => "</ul>\n",
414
+ },
415
+ :ol => {
416
+ :curpat => '(?=(?:`| *)\#[^#])',
417
+ :fwpat => '(?=\n(?:`| *)\#[^#])',
418
+ :stops => ['blank', 'ip', 'h', 'nowiki', 'li', 'table', 'hr', 'dl'],
419
+ :contains => ['ul', 'ol', 'li'],
420
+ :hint => ['#', ' '],
421
+ :filter => Proc.new {|s|
422
+ s = strip_list(s)
423
+ s
424
+ },
425
+ :open => "<ol>\n", :close => "</ol>\n",
426
+ },
427
+ :li => {
428
+ :curpat => '(?=`[^*#])',
429
+ :fwpat => '\n(?=`[^*#])',
430
+ :stops => '\n(?=`)',
431
+ :hint => ['`'],
432
+ :filter => Proc.new {|s|
433
+ s.sub!(/` */, '')
434
+ s.chomp!
435
+ s
436
+ },
437
+ :contains => ALL_INLINE,
438
+ :open => " <li>", :close => "</li>\n",
439
+ },
440
+ :nowiki => {
441
+ :curpat => '(?=\{\{\{ *\n)',
442
+ :fwpat => '\n(?=\{\{\{ *\n)',
443
+ :stops => "\n\\}\\}\\} *#{EOL}",
444
+ :hint => ['{'],
445
+ :filter => Proc.new {|s|
446
+ s[0,3] = ''
447
+ s.sub!(/\}{3}\s*$/, '')
448
+ s.gsub!(/&/, '&amp;')
449
+ s.gsub!(/</, '&lt;')
450
+ s.gsub!(/>/, '&gt;')
451
+ s
452
+ },
453
+ :open => "<pre>", :close => "</pre>\n\n",
454
+ },
455
+ :hr => {
456
+ :curpat => "(?= *-{4,} *#{EOL})",
457
+ :fwpat => "\n(?= *-{4,} *#{EOL})",
458
+ :hint => ['-', ' '],
459
+ :stops => EOL,
460
+ :open => "<hr />\n\n", :close => "",
461
+ :filter => Proc.new { "" } # ----- into the bit bucket
462
+ },
463
+ :h => { :curpat => '(?=(?:^|\n) *=)' }, # matches any heading
464
+ :h1 => {
465
+ :curpat => '(?= *=[^=])',
466
+ :hint => ['=', ' '],
467
+ :stops => '\n',
468
+ :contains => ALL_INLINE,
469
+ :open => "<h1>", :close => "</h1>\n\n",
470
+ :filter => Proc.new {|s|
471
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
472
+ s
473
+ },
474
+ },
475
+ :h2 => {
476
+ :curpat => '(?= *={2}[^=])',
477
+ :hint => ['=', ' '],
478
+ :stops => '\n',
479
+ :contains => ALL_INLINE,
480
+ :open => "<h2>", :close => "</h2>\n\n",
481
+ :filter => Proc.new {|s|
482
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
483
+ s
484
+ },
485
+ },
486
+ :h3 => {
487
+ :curpat => '(?= *={3}[^=])',
488
+ :hint => ['=', ' '],
489
+ :stops => '\n',
490
+ :contains => ALL_INLINE,
491
+ :open => "<h3>", :close => "</h3>\n\n",
492
+ :filter => Proc.new {|s|
493
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
494
+ s
495
+ },
496
+ },
497
+ :h4 => {
498
+ :curpat => '(?= *={4}[^=])',
499
+ :hint => ['=', ' '],
500
+ :stops => '\n',
501
+ :contains => ALL_INLINE,
502
+ :open => "<h4>", :close => "</h4>\n\n",
503
+ :filter => Proc.new {|s|
504
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
505
+ s
506
+ },
507
+ },
508
+ :h5 => {
509
+ :curpat => '(?= *={5}[^=])',
510
+ :hint => ['=', ' '],
511
+ :stops => '\n',
512
+ :contains => ALL_INLINE,
513
+ :open => "<h5>", :close => "</h5>\n\n",
514
+ :filter => Proc.new {|s|
515
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
516
+ s
517
+ },
518
+ },
519
+ :h6 => {
520
+ :curpat => '(?= *={6,})',
521
+ :hint => ['=', ' '],
522
+ :stops => '\n',
523
+ :contains => ALL_INLINE,
524
+ :open => "<h6>", :close => "</h6>\n\n",
525
+ :filter => Proc.new {|s|
526
+ s = strip_leading_and_trailing_eq_and_whitespace(s)
527
+ s
528
+ },
529
+ },
530
+ :plain => {
531
+ :curpat => '(?=[^*/_,^\\{\[<|])',
532
+ :stops => INLINE,
533
+ :hint => PLAINCHARS,
534
+ :open => '', :close => ''
535
+ },
536
+ :any => { # catch-all
537
+ :curpat => '(?=.)',
538
+ :stops => INLINE,
539
+ :open => '', :close => ''
540
+ },
541
+ :br => {
542
+ :curpat => '(?=\\\\\\\\)',
543
+ :stops => '\\\\\\\\',
544
+ :hint => ['\\'],
545
+ :filter => Proc.new { "" },
546
+ :open => '<br />', :close => '',
547
+ },
548
+ :esc => {
549
+ :curpat => '(?=~[\S])',
550
+ :stops => '~.',
551
+ :hint => ['~'],
552
+ :filter => Proc.new {|s|
553
+ s.sub!(/^./m, '')
554
+ s
555
+ },
556
+ :open => '', :close => '',
557
+ },
558
+ :inowiki => {
559
+ :curpat => '(?=\{{3}.*?\}*\}{3})',
560
+ :stops => '.*?\}*\}{3}',
561
+ :hint => ['{'],
562
+ :filter => Proc.new {|s|
563
+ s[0,3] = ''
564
+ s.sub!(/\}{3}\s*$/, '')
565
+ s.gsub!(/&/, '&amp;')
566
+ s.gsub!(/</, '&lt;')
567
+ s.gsub!(/>/, '&gt;')
568
+ s
569
+ },
570
+ :open => "<tt>", :close => "</tt>",
571
+ },
572
+ :plug => {
573
+ :curpat => '(?=\<{3}.*?\>*\>{3})',
574
+ :stops => '.*?\>*\>{3}',
575
+ :hint => ['<'],
576
+ :filter => Proc.new {|s|
577
+ s[0,3] = ''
578
+ s.sub!(/\>{3}$/, '')
579
+ if @plugin_function
580
+ s = @plugin_function.call(s)
581
+ else
582
+ s = "<<<#{s}>>>"
583
+ end
584
+ s
585
+ },
586
+ :open => "", :close => "",
587
+ },
588
+ :plug2 => {
589
+ :curpat => '(?=\<{2}.*?\>*\>{2})',
590
+ :stops => '.*?\>*\>{2}',
591
+ :hint => ['<'],
592
+ :filter => Proc.new {|s|
593
+ s[0,2] = ''
594
+ s.sub!(/\>{2}$/, '')
595
+ if @plugin_function
596
+ s = @plugin_function.call(s)
597
+ else
598
+ s = "<<#{s}>>"
599
+ end
600
+ s
601
+ },
602
+ :open => "", :close => "",
603
+ },
604
+ :ilink => {
605
+ :curpat => '(?=(?:https?|ftp):\/\/)',
606
+ # This following is the [:punct:] character class with the / and ? removed
607
+ # so that URLs like http://www.somesite.com/ will match the trailing
608
+ # slash. URLs with a trailing ? will also work. Trailing ? is sometimes
609
+ # used to ensure that browsers don't cache the page.
610
+ :stops => '(?=[!"#$%&\'()*+,-.:;<=>@\[\\]^_`{|}~]?(?:\s|$))',
611
+ :hint => ['h', 'f'],
612
+ :filter => Proc.new {|s|
613
+ s.sub!(/^\s*/, '')
614
+ s.sub!(/\s*$/, '')
615
+ if @barelink_function
616
+ s = @barelink_function.call(s)
617
+ end
618
+ s = "href=\"#{s}\">#{s}"
619
+ s
620
+ },
621
+ :open => "<a ", :close=> "</a>",
622
+ },
623
+ :link => {
624
+ :curpat => '(?=\[\[[^\n]+?\]\])',
625
+ :stops => '\]\]',
626
+ :hint => ['['],
627
+ :contains => ['href', 'atext'],
628
+ :filter => Proc.new {|s|
629
+ s[0,2] = ''
630
+ s[-2,2] = ''
631
+ s += "|#{s}" if ! s.index(/\|/) # text = url unless given
632
+ s
633
+ },
634
+ :open => "<a ", :close => "</a>",
635
+ },
636
+ :href => {
637
+ :curpat => '(?=[^\|])',
638
+ :stops => '(?=\|)',
639
+ :filter => Proc.new {|s|
640
+ s.sub!(/^\s*/, '')
641
+ s.sub!(/\s*$/, '')
642
+ if @link_function
643
+ s = @link_function.call(s)
644
+ end
645
+ s
646
+ },
647
+ :open => 'href="', :close => '">',
648
+ },
649
+ :atext => {
650
+ :curpat => '(?=\|)',
651
+ :stops => '\n',
652
+ :hint => ['|'],
653
+ :contains => ALL_INLINE,
654
+ :filter => Proc.new {|s|
655
+ s.sub!(/^\|\s*/, '')
656
+ s.sub!(/\s*$/, '')
657
+ s
658
+ },
659
+ :open => '', :close => '',
660
+ },
661
+ :img => {
662
+ :curpat => '(?=\{\{[^\{][^\n]*?\}\})',
663
+ :stops => '\}\}',
664
+ :hint => ['{'],
665
+ :contains => ['imgsrc', 'imgalt'],
666
+ :filter => Proc.new {|s|
667
+ s[0,2] = ''
668
+ s.sub!(/\}\}$/, '')
669
+ s
670
+ },
671
+ :open => "<img ", :close => " />",
672
+ },
673
+ :imgalt => {
674
+ :curpat => '(?=\|)',
675
+ :stops => '\n',
676
+ :hint => ['|'],
677
+ :filter => Proc.new {|s|
678
+ s.sub!(/^\|\s*/, '')
679
+ s.sub!(/\s*$/, '')
680
+ s
681
+ },
682
+ :open => ' alt="', :close => '"',
683
+ },
684
+ :imgsrc => {
685
+ :curpat => '(?=[^\|])',
686
+ :stops => '(?=\|)',
687
+ :filter => Proc.new {|s|
688
+ s.sub!(/^\|\s*/, '')
689
+ s.sub!(/\s*$/, '')
690
+ if @img_function
691
+ s = @img_function.call(s)
692
+ end
693
+ s
694
+ },
695
+ :open => 'src="', :close => '"',
696
+ },
697
+ :strong => {
698
+ :curpat => '(?=\*\*)',
699
+ :stops => '\*\*.*?\*\*',
700
+ :hint => ['*'],
701
+ :contains => ALL_INLINE,
702
+ :filter => Proc.new {|s|
703
+ s[0,2] = ''
704
+ s.sub!(/\*\*$/, '')
705
+ s
706
+ },
707
+ :open => "<strong>", :close => "</strong>",
708
+ },
709
+ :em => {
710
+ # This could use a negative lookback assertion to let you know whether
711
+ # it's part of a URL or not. That would be helpful if the URL had been
712
+ # escaped. Currently, it will just become italic after the // since
713
+ # it didn't process the URL.
714
+ :curpat => '(?=\/\/)',
715
+ # Removed a negative lookback assertion (?<!:) from the Perl version
716
+ # and replaced it with [^:] Not sure of the consequences, however, as
717
+ # of this version, Ruby does not have negative lookback assertions, so
718
+ # I had to do it.
719
+ :stops => '\/\/.*?[^:]\/\/',
720
+ :hint => ['/'],
721
+ :contains => ALL_INLINE,
722
+ :filter => Proc.new {|s|
723
+ s[0,2] = ''
724
+ s.sub!(/\/\/$/, '')
725
+ s
726
+ },
727
+ :open => "<em>", :close => "</em>",
728
+ },
729
+ :mono => {
730
+ :curpat => '(?=\#\#)',
731
+ :stops => '\#\#.*?\#\#',
732
+ :hint => ['#'],
733
+ :contains => ALL_INLINE,
734
+ :filter => Proc.new {|s|
735
+ s[0,2] = ''
736
+ s.sub!(/\#\#$/, '')
737
+ s
738
+ },
739
+ :open => "<tt>", :close => "</tt>",
740
+ },
741
+ :sub => {
742
+ :curpat => '(?=,,)',
743
+ :stops => ',,.*?,,',
744
+ :hint => [','],
745
+ :contains => ALL_INLINE,
746
+ :filter => Proc.new {|s|
747
+ s[0,2] = ''
748
+ s.sub!(/\,\,$/, '')
749
+ s
750
+ },
751
+ :open => "<sub>", :close => "</sub>",
752
+ },
753
+ :sup => {
754
+ :curpat => '(?=\^\^)',
755
+ :stops => '\^\^.*?\^\^',
756
+ :hint => ['^'],
757
+ :contains => ALL_INLINE,
758
+ :filter => Proc.new {|s|
759
+ s[0,2] = ''
760
+ s.sub!(/\^\^$/, '')
761
+ s
762
+ },
763
+ :open => "<sup>", :close => "</sup>",
764
+ },
765
+ :u => {
766
+ :curpat => '(?=__)',
767
+ :stops => '__.*?__',
768
+ :hint => ['_'],
769
+ :contains => ALL_INLINE,
770
+ :filter => Proc.new {|s|
771
+ s[0,2] = ''
772
+ s.sub!(/__$/, '')
773
+ s
774
+ },
775
+ :open => "<u>", :close => "</u>",
776
+ },
777
+ :amp => {
778
+ :curpat => '(?=\&(?!\w+\;))',
779
+ :stops => '.',
780
+ :hint => ['&'],
781
+ :filter => Proc.new { "&amp;" },
782
+ :open => "", :close => "",
783
+ },
784
+ :tm => {
785
+ :curpat => '(?=\(TM\))',
786
+ :stops => '\(TM\)',
787
+ :hint => ['('],
788
+ :filter => Proc.new { "&trade;" },
789
+ :open => "", :close => "",
790
+ },
791
+ :reg => {
792
+ :curpat => '(?=\(R\))',
793
+ :stops => '\(R\)',
794
+ :hint => ['('],
795
+ :filter => Proc.new { "&reg;" },
796
+ :open => "", :close => "",
797
+ },
798
+ :copy => {
799
+ :curpat => '(?=\(C\))',
800
+ :stops => '\(C\)',
801
+ :hint => ['('],
802
+ :filter => Proc.new { "&copy;" },
803
+ :open => "", :close => "",
804
+ },
805
+ :ndash => {
806
+ :curpat => '(?=--)',
807
+ :stops => '--',
808
+ :hint => ['-'],
809
+ :filter => Proc.new { "&ndash;" },
810
+ :open => "", :close => "",
811
+ },
812
+ :ellipsis => {
813
+ :curpat => '(?=\.\.\.)',
814
+ :stops => '\.\.\.',
815
+ :hint => ['.'],
816
+ :filter => Proc.new { "&hellip;" },
817
+ :open => "", :close => "",
818
+ },
819
+ }
820
+
821
+ def self.strip_leading_and_trailing_eq_and_whitespace(s)
822
+ s.sub!(/^\s*=*\s*/, '')
823
+ s.sub!(/\s*=*\s*$/, '')
824
+ s
825
+ end
826
+
827
+ def self.strip_list(s)
828
+ s.sub!(/(?:`*| *)[*#]/, '`')
829
+ s.gsub!(/\n(?:`*| *)[*#]/m, "\n`")
830
+ s
831
+ end
832
+
833
+ def self.filter_string_x_with_chunk_filter_y(str, chunk)
834
+ @@chunks_hash[chunk][:filter].call(str)
835
+ end
836
+
837
+ def self.parse(tref, chunk)
838
+
839
+ sub_chunk = nil
840
+ pos = 0
841
+ last_pos = 0
842
+ html = []
843
+ first_try = true
844
+
845
+ loop do
846
+
847
+ if sub_chunk # we've determined what type of sub_chunk this is
848
+
849
+ # This is a little slower than it could be. The delim should be
850
+ # pre-compiled, but see the issue in the comment above.
851
+ if tref.index(@@chunks_hash[sub_chunk][:delim], pos)
852
+ pos = Regexp.last_match.end(0)
853
+ else
854
+ pos = tref.length
855
+ end
856
+
857
+ html << @@chunks_hash[sub_chunk][:open]
858
+
859
+ t = tref[last_pos, pos - last_pos] # grab the chunk
860
+
861
+ if @@chunks_hash[sub_chunk].has_key?(:filter) # filter it, if applicable
862
+ t = @@chunks_hash[sub_chunk][:filter].call(t)
863
+ end
864
+
865
+ last_pos = pos # remember where this chunk ends (where next begins)
866
+
867
+ if t && @@chunks_hash[sub_chunk].has_key?(:contains) # if it contains other chunks...
868
+ html << parse(t, sub_chunk) # recurse.
869
+ else
870
+ html << t # otherwise, print it
871
+ end
872
+
873
+ html << @@chunks_hash[sub_chunk][:close] # print the close tag
874
+
875
+ else
876
+ if !first_try
877
+ # The nested list test will cause a dangling newline. I tried fiddling
878
+ # with the grammer for a while, then decided this was just an easier
879
+ # fix for the time being. If anyone wants to find the issue in the
880
+ # grammer and fix it, we can remove this hack.
881
+ if pos == tref.length - 1 && tref[pos..tref.length] == "\n"
882
+ break
883
+ else
884
+ $stderr.puts "ERROR: endless loop detected"
885
+ break
886
+ end
887
+ end
888
+ first_try = false
889
+ end
890
+
891
+ break if pos && pos == tref.length # we've eaten the whole string
892
+ sub_chunk = get_sub_chunk_for(tref, chunk, pos) # more string to come
893
+
894
+ end
895
+
896
+ html.join
897
+ end
898
+
899
+ def self.get_sub_chunk_for(tref, chunk, pos)
900
+
901
+ first_char = tref[pos, 1] # get a hint about the next chunk
902
+ for chunk_hinted_at in @@chunks_hash[chunk][:calculated_hint_array_for][first_char].to_a
903
+ #puts "trying hint #{chunk_hinted_at} for -#{first_char}- on -" + tref[pos, 2] + "-\n"
904
+ if tref.index(@@chunks_hash[chunk_hinted_at][:curpatcmp], pos) # hint helped id the chunk
905
+ return chunk_hinted_at
906
+ end
907
+ end
908
+
909
+ # the hint didn't help. Check all the chunk types which this chunk contains
910
+ for contained_chunk in @@chunks_hash[chunk][:contains].to_a
911
+ #puts "trying contained chunk #{contained_chunk} on -" + tref[pos, 2] + "- within chunk #{chunk.to_s}\n"
912
+ if tref.index(@@chunks_hash[contained_chunk.to_sym][:curpatcmp], pos) # found one
913
+ return contained_chunk.to_sym
914
+ end
915
+ end
916
+
917
+ nil
918
+ end
919
+
920
+ # compile a regex that matches any of the patterns that interrupt the
921
+ # current chunk.
922
+ def self.delim(chunk)
923
+ chunk = @@chunks_hash[chunk]
924
+ if Array === chunk[:stops]
925
+ regex = ''
926
+ chunk[:stops].each do |stop|
927
+ stop = stop.to_sym
928
+ if @@chunks_hash[stop].has_key?(:fwpat)
929
+ regex += @@chunks_hash[stop][:fwpat] + "|"
930
+ else
931
+ regex += @@chunks_hash[stop][:curpat] + "|"
932
+ end
933
+ end
934
+ regex.chop!
935
+ regex
936
+ else
937
+ chunk[:stops]
938
+ end
939
+ end
940
+
941
+ # one-time optimization of the grammar - speeds the parser up a ton
942
+ def self.init
943
+ return if @is_initialized
944
+
945
+ @is_initialized = true
946
+
947
+ # precompile a bunch of regexes
948
+ @@chunks_hash.keys.each do |k|
949
+ c = @@chunks_hash[k]
950
+ if c.has_key?(:curpat)
951
+ c[:curpatcmp] = Regexp.compile('\G' + c[:curpat], Regexp::MULTILINE)
952
+ end
953
+
954
+ if c.has_key?(:stops)
955
+ c[:delim] = Regexp.compile(delim(k), Regexp::MULTILINE)
956
+ end
957
+
958
+ if c.has_key?(:contains) # store hints about each chunk to speed id
959
+ c[:calculated_hint_array_for] = {}
960
+
961
+ c[:contains].each do |ct|
962
+ ct = ct.to_sym
963
+
964
+ (@@chunks_hash[ct][:hint] || []).each do |hint|
965
+ (c[:calculated_hint_array_for][hint] ||= []) << ct
966
+ end
967
+
968
+ end
969
+ end
970
+ end
971
+ end
972
+
973
+ end