merb_comatose 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/redcloth.rb ADDED
@@ -0,0 +1,1129 @@
1
+ # vim:ts=4:sw=4:
2
+ # = RedCloth - Textile and Markdown Hybrid for Ruby
3
+ #
4
+ # Homepage:: http://whytheluckystiff.net/ruby/redcloth/
5
+ # Author:: why the lucky stiff (http://whytheluckystiff.net/)
6
+ # Copyright:: (cc) 2004 why the lucky stiff (and his puppet organizations.)
7
+ # License:: BSD
8
+ #
9
+ # (see http://hobix.com/textile/ for a Textile Reference.)
10
+ #
11
+ # Based on (and also inspired by) both:
12
+ #
13
+ # PyTextile: http://diveintomark.org/projects/textile/textile.py.txt
14
+ # Textism for PHP: http://www.textism.com/tools/textile/
15
+ #
16
+ #
17
+
18
+ # = RedCloth
19
+ #
20
+ # RedCloth is a Ruby library for converting Textile and/or Markdown
21
+ # into HTML. You can use either format, intermingled or separately.
22
+ # You can also extend RedCloth to honor your own custom text stylings.
23
+ #
24
+ # RedCloth users are encouraged to use Textile if they are generating
25
+ # HTML and to use Markdown if others will be viewing the plain text.
26
+ #
27
+ # == What is Textile?
28
+ #
29
+ # Textile is a simple formatting style for text
30
+ # documents, loosely based on some HTML conventions.
31
+ #
32
+ # == Sample Textile Text
33
+ #
34
+ # h2. This is a title
35
+ #
36
+ # h3. This is a subhead
37
+ #
38
+ # This is a bit of paragraph.
39
+ #
40
+ # bq. This is a blockquote.
41
+ #
42
+ # = Writing Textile
43
+ #
44
+ # A Textile document consists of paragraphs. Paragraphs
45
+ # can be specially formatted by adding a small instruction
46
+ # to the beginning of the paragraph.
47
+ #
48
+ # h[n]. Header of size [n].
49
+ # bq. Blockquote.
50
+ # # Numeric list.
51
+ # * Bulleted list.
52
+ #
53
+ # == Quick Phrase Modifiers
54
+ #
55
+ # Quick phrase modifiers are also included, to allow formatting
56
+ # of small portions of text within a paragraph.
57
+ #
58
+ # \_emphasis\_
59
+ # \_\_italicized\_\_
60
+ # \*strong\*
61
+ # \*\*bold\*\*
62
+ # ??citation??
63
+ # -deleted text-
64
+ # +inserted text+
65
+ # ^superscript^
66
+ # ~subscript~
67
+ # @code@
68
+ # %(classname)span%
69
+ #
70
+ # ==notextile== (leave text alone)
71
+ #
72
+ # == Links
73
+ #
74
+ # To make a hypertext link, put the link text in "quotation
75
+ # marks" followed immediately by a colon and the URL of the link.
76
+ #
77
+ # Optional: text in (parentheses) following the link text,
78
+ # but before the closing quotation mark, will become a Title
79
+ # attribute for the link, visible as a tool tip when a cursor is above it.
80
+ #
81
+ # Example:
82
+ #
83
+ # "This is a link (This is a title) ":http://www.textism.com
84
+ #
85
+ # Will become:
86
+ #
87
+ # <a href="http://www.textism.com" title="This is a title">This is a link</a>
88
+ #
89
+ # == Images
90
+ #
91
+ # To insert an image, put the URL for the image inside exclamation marks.
92
+ #
93
+ # Optional: text that immediately follows the URL in (parentheses) will
94
+ # be used as the Alt text for the image. Images on the web should always
95
+ # have descriptive Alt text for the benefit of readers using non-graphical
96
+ # browsers.
97
+ #
98
+ # Optional: place a colon followed by a URL immediately after the
99
+ # closing ! to make the image into a link.
100
+ #
101
+ # Example:
102
+ #
103
+ # !http://www.textism.com/common/textist.gif(Textist)!
104
+ #
105
+ # Will become:
106
+ #
107
+ # <img src="http://www.textism.com/common/textist.gif" alt="Textist" />
108
+ #
109
+ # With a link:
110
+ #
111
+ # !/common/textist.gif(Textist)!:http://textism.com
112
+ #
113
+ # Will become:
114
+ #
115
+ # <a href="http://textism.com"><img src="/common/textist.gif" alt="Textist" /></a>
116
+ #
117
+ # == Defining Acronyms
118
+ #
119
+ # HTML allows authors to define acronyms via the tag. The definition appears as a
120
+ # tool tip when a cursor hovers over the acronym. A crucial aid to clear writing,
121
+ # this should be used at least once for each acronym in documents where they appear.
122
+ #
123
+ # To quickly define an acronym in Textile, place the full text in (parentheses)
124
+ # immediately following the acronym.
125
+ #
126
+ # Example:
127
+ #
128
+ # ACLU(American Civil Liberties Union)
129
+ #
130
+ # Will become:
131
+ #
132
+ # <acronym title="American Civil Liberties Union">ACLU</acronym>
133
+ #
134
+ # == Adding Tables
135
+ #
136
+ # In Textile, simple tables can be added by seperating each column by
137
+ # a pipe.
138
+ #
139
+ # |a|simple|table|row|
140
+ # |And|Another|table|row|
141
+ #
142
+ # Attributes are defined by style definitions in parentheses.
143
+ #
144
+ # table(border:1px solid black).
145
+ # (background:#ddd;color:red). |{}| | | |
146
+ #
147
+ # == Using RedCloth
148
+ #
149
+ # RedCloth is simply an extension of the String class, which can handle
150
+ # Textile formatting. Use it like a String and output HTML with its
151
+ # RedCloth#to_html method.
152
+ #
153
+ # doc = RedCloth.new "
154
+ #
155
+ # h2. Test document
156
+ #
157
+ # Just a simple test."
158
+ #
159
+ # puts doc.to_html
160
+ #
161
+ # By default, RedCloth uses both Textile and Markdown formatting, with
162
+ # Textile formatting taking precedence. If you want to turn off Markdown
163
+ # formatting, to boost speed and limit the processor:
164
+ #
165
+ # class RedCloth::Textile.new( str )
166
+
167
+ class RedCloth < String
168
+
169
+ VERSION = '3.0.4'
170
+ DEFAULT_RULES = [:textile, :markdown]
171
+
172
+ #
173
+ # Two accessor for setting security restrictions.
174
+ #
175
+ # This is a nice thing if you're using RedCloth for
176
+ # formatting in public places (e.g. Wikis) where you
177
+ # don't want users to abuse HTML for bad things.
178
+ #
179
+ # If +:filter_html+ is set, HTML which wasn't
180
+ # created by the Textile processor will be escaped.
181
+ #
182
+ # If +:filter_styles+ is set, it will also disable
183
+ # the style markup specifier. ('{color: red}')
184
+ #
185
+ attr_accessor :filter_html, :filter_styles
186
+
187
+ #
188
+ # Accessor for toggling hard breaks.
189
+ #
190
+ # If +:hard_breaks+ is set, single newlines will
191
+ # be converted to HTML break tags. This is the
192
+ # default behavior for traditional RedCloth.
193
+ #
194
+ attr_accessor :hard_breaks
195
+
196
+ # Accessor for toggling lite mode.
197
+ #
198
+ # In lite mode, block-level rules are ignored. This means
199
+ # that tables, paragraphs, lists, and such aren't available.
200
+ # Only the inline markup for bold, italics, entities and so on.
201
+ #
202
+ # r = RedCloth.new( "And then? She *fell*!", [:lite_mode] )
203
+ # r.to_html
204
+ # #=> "And then? She <strong>fell</strong>!"
205
+ #
206
+ attr_accessor :lite_mode
207
+
208
+ #
209
+ # Accessor for toggling span caps.
210
+ #
211
+ # Textile places `span' tags around capitalized
212
+ # words by default, but this wreaks havoc on Wikis.
213
+ # If +:no_span_caps+ is set, this will be
214
+ # suppressed.
215
+ #
216
+ attr_accessor :no_span_caps
217
+
218
+ #
219
+ # Establishes the markup predence. Available rules include:
220
+ #
221
+ # == Textile Rules
222
+ #
223
+ # The following textile rules can be set individually. Or add the complete
224
+ # set of rules with the single :textile rule, which supplies the rule set in
225
+ # the following precedence:
226
+ #
227
+ # refs_textile:: Textile references (i.e. [hobix]http://hobix.com/)
228
+ # block_textile_table:: Textile table block structures
229
+ # block_textile_lists:: Textile list structures
230
+ # block_textile_prefix:: Textile blocks with prefixes (i.e. bq., h2., etc.)
231
+ # inline_textile_image:: Textile inline images
232
+ # inline_textile_link:: Textile inline links
233
+ # inline_textile_span:: Textile inline spans
234
+ # glyphs_textile:: Textile entities (such as em-dashes and smart quotes)
235
+ #
236
+ # == Markdown
237
+ #
238
+ # refs_markdown:: Markdown references (for example: [hobix]: http://hobix.com/)
239
+ # block_markdown_setext:: Markdown setext headers
240
+ # block_markdown_atx:: Markdown atx headers
241
+ # block_markdown_rule:: Markdown horizontal rules
242
+ # block_markdown_bq:: Markdown blockquotes
243
+ # block_markdown_lists:: Markdown lists
244
+ # inline_markdown_link:: Markdown links
245
+ attr_accessor :rules
246
+
247
+ # Returns a new RedCloth object, based on _string_ and
248
+ # enforcing all the included _restrictions_.
249
+ #
250
+ # r = RedCloth.new( "h1. A <b>bold</b> man", [:filter_html] )
251
+ # r.to_html
252
+ # #=>"<h1>A &lt;b&gt;bold&lt;/b&gt; man</h1>"
253
+ #
254
+ def initialize( string, restrictions = [] )
255
+ restrictions.each { |r| method( "#{ r }=" ).call( true ) }
256
+ super( string )
257
+ end
258
+
259
+ #
260
+ # Generates HTML from the Textile contents.
261
+ #
262
+ # r = RedCloth.new( "And then? She *fell*!" )
263
+ # r.to_html( true )
264
+ # #=>"And then? She <strong>fell</strong>!"
265
+ #
266
+ def to_html( *rules )
267
+ rules = DEFAULT_RULES if rules.empty?
268
+ # make our working copy
269
+ text = self.dup
270
+
271
+ @urlrefs = {}
272
+ @shelf = []
273
+ textile_rules = [:refs_textile, :block_textile_table, :block_textile_lists,
274
+ :block_textile_prefix, :inline_textile_image, :inline_textile_link,
275
+ :inline_textile_code, :inline_textile_span, :glyphs_textile]
276
+ markdown_rules = [:refs_markdown, :block_markdown_setext, :block_markdown_atx, :block_markdown_rule,
277
+ :block_markdown_bq, :block_markdown_lists,
278
+ :inline_markdown_reflink, :inline_markdown_link]
279
+ @rules = rules.collect do |rule|
280
+ case rule
281
+ when :markdown
282
+ markdown_rules
283
+ when :textile
284
+ textile_rules
285
+ else
286
+ rule
287
+ end
288
+ end.flatten
289
+
290
+ # standard clean up
291
+ incoming_entities text
292
+ clean_white_space text
293
+
294
+ # start processor
295
+ @pre_list = []
296
+ rip_offtags text
297
+ no_textile text
298
+ hard_break text
299
+ unless @lite_mode
300
+ refs text
301
+ blocks text
302
+ end
303
+ inline text
304
+ smooth_offtags text
305
+
306
+ retrieve text
307
+
308
+ text.gsub!( /<\/?notextile>/, '' )
309
+ text.gsub!( /x%x%/, '&#38;' )
310
+ clean_html text if filter_html
311
+ text.strip!
312
+ text
313
+
314
+ end
315
+
316
+ #######
317
+ private
318
+ #######
319
+ #
320
+ # Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
321
+ # (from PyTextile)
322
+ #
323
+ TEXTILE_TAGS =
324
+
325
+ [[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
326
+ [134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
327
+ [140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
328
+ [147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
329
+ [153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
330
+
331
+ collect! do |a, b|
332
+ [a.chr, ( b.zero? and "" or "&#{ b };" )]
333
+ end
334
+
335
+ #
336
+ # Regular expressions to convert to HTML.
337
+ #
338
+ A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
339
+ A_VLGN = /[\-^~]/
340
+ C_CLAS = '(?:\([^)]+\))'
341
+ C_LNGE = '(?:\[[^\]]+\])'
342
+ C_STYL = '(?:\{[^}]+\})'
343
+ S_CSPN = '(?:\\\\\d+)'
344
+ S_RSPN = '(?:/\d+)'
345
+ A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
346
+ S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
347
+ C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
348
+ # PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
349
+ PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
350
+ PUNCT_NOQ = Regexp::quote( '!"#$&\',./:;=?@\\`|' )
351
+ PUNCT_Q = Regexp::quote( '*-_+^~%' )
352
+ HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(?=\s|<|$)'
353
+
354
+ # Text markup tags, don't conflict with block tags
355
+ SIMPLE_HTML_TAGS = [
356
+ 'tt', 'b', 'i', 'big', 'small', 'em', 'strong', 'dfn', 'code',
357
+ 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', 'a', 'img', 'br',
358
+ 'br', 'map', 'q', 'sub', 'sup', 'span', 'bdo'
359
+ ]
360
+
361
+ QTAGS = [
362
+ ['**', 'b'],
363
+ ['*', 'strong'],
364
+ ['??', 'cite', :limit],
365
+ ['-', 'del', :limit],
366
+ ['__', 'i'],
367
+ ['_', 'em', :limit],
368
+ ['%', 'span', :limit],
369
+ ['+', 'ins', :limit],
370
+ ['^', 'sup'],
371
+ ['~', 'sub']
372
+ ]
373
+ QTAGS.collect! do |rc, ht, rtype|
374
+ rcq = Regexp::quote rc
375
+ re =
376
+ case rtype
377
+ when :limit
378
+ /(\W)
379
+ (#{rcq})
380
+ (#{C})
381
+ (?::(\S+?))?
382
+ (\S.*?\S|\S)
383
+ #{rcq}
384
+ (?=\W)/x
385
+ else
386
+ /(#{rcq})
387
+ (#{C})
388
+ (?::(\S+))?
389
+ (\S.*?\S|\S)
390
+ #{rcq}/xm
391
+ end
392
+ [rc, ht, re, rtype]
393
+ end
394
+
395
+ # Elements to handle
396
+ GLYPHS = [
397
+ # [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
398
+ [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)\'/, '\1&#8217;' ], # single closing
399
+ [ /\'(?=[#{PUNCT_Q}]*(s\b|[\s#{PUNCT_NOQ}]))/, '&#8217;' ], # single closing
400
+ [ /\'/, '&#8216;' ], # single opening
401
+ [ /</, '&lt;' ], # less-than
402
+ [ />/, '&gt;' ], # greater-than
403
+ # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
404
+ [ /([^\s\[{(>#{PUNCT_Q}][#{PUNCT_Q}]*)"/, '\1&#8221;' ], # double closing
405
+ [ /"(?=[#{PUNCT_Q}]*[\s#{PUNCT_NOQ}])/, '&#8221;' ], # double closing
406
+ [ /"/, '&#8220;' ], # double opening
407
+ [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
408
+ [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
409
+ [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]+[A-Z0-9])([^<A-Za-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ], # 3+ uppercase caps
410
+ [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
411
+ [ /\s->\s/, ' &rarr; ' ], # right arrow
412
+ [ /\s-\s/, ' &#8211; ' ], # en dash
413
+ [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
414
+ [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
415
+ [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
416
+ [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
417
+ ]
418
+
419
+ H_ALGN_VALS = {
420
+ '<' => 'left',
421
+ '=' => 'center',
422
+ '>' => 'right',
423
+ '<>' => 'justify'
424
+ }
425
+
426
+ V_ALGN_VALS = {
427
+ '^' => 'top',
428
+ '-' => 'middle',
429
+ '~' => 'bottom'
430
+ }
431
+
432
+ #
433
+ # Flexible HTML escaping
434
+ #
435
+ def htmlesc( str, mode )
436
+ str.gsub!( '&', '&amp;' )
437
+ str.gsub!( '"', '&quot;' ) if mode != :NoQuotes
438
+ str.gsub!( "'", '&#039;' ) if mode == :Quotes
439
+ str.gsub!( '<', '&lt;')
440
+ str.gsub!( '>', '&gt;')
441
+ end
442
+
443
+ # Search and replace for Textile glyphs (quotes, dashes, other symbols)
444
+ def pgl( text )
445
+ GLYPHS.each do |re, resub, tog|
446
+ next if tog and method( tog ).call
447
+ text.gsub! re, resub
448
+ end
449
+ end
450
+
451
+ # Parses Textile attribute lists and builds an HTML attribute string
452
+ def pba( text_in, element = "" )
453
+
454
+ return '' unless text_in
455
+
456
+ style = []
457
+ text = text_in.dup
458
+ if element == 'td'
459
+ colspan = $1 if text =~ /\\(\d+)/
460
+ rowspan = $1 if text =~ /\/(\d+)/
461
+ style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
462
+ end
463
+
464
+ style << "#{ $1 };" if not filter_styles and
465
+ text.sub!( /\{([^}]*)\}/, '' )
466
+
467
+ lang = $1 if
468
+ text.sub!( /\[([^)]+?)\]/, '' )
469
+
470
+ cls = $1 if
471
+ text.sub!( /\(([^()]+?)\)/, '' )
472
+
473
+ style << "padding-left:#{ $1.length }em;" if
474
+ text.sub!( /([(]+)/, '' )
475
+
476
+ style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
477
+
478
+ style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
479
+
480
+ cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
481
+
482
+ atts = ''
483
+ atts << " style=\"#{ style.join }\"" unless style.empty?
484
+ atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
485
+ atts << " lang=\"#{ lang }\"" if lang
486
+ atts << " id=\"#{ id }\"" if id
487
+ atts << " colspan=\"#{ colspan }\"" if colspan
488
+ atts << " rowspan=\"#{ rowspan }\"" if rowspan
489
+
490
+ atts
491
+ end
492
+
493
+ TABLE_RE = /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)(\n\n|\Z)/m
494
+
495
+ # Parses a Textile table block, building HTML from the result.
496
+ def block_textile_table( text )
497
+ text.gsub!( TABLE_RE ) do |matches|
498
+
499
+ tatts, fullrow = $~[1..2]
500
+ tatts = pba( tatts, 'table' )
501
+ tatts = shelve( tatts ) if tatts
502
+ rows = []
503
+
504
+ fullrow.
505
+ split( /\|$/m ).
506
+ delete_if { |x| x.empty? }.
507
+ each do |row|
508
+
509
+ ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
510
+
511
+ cells = []
512
+ row.split( '|' ).each do |cell|
513
+ ctyp = 'd'
514
+ ctyp = 'h' if cell =~ /^_/
515
+
516
+ catts = ''
517
+ catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
518
+
519
+ unless cell.strip.empty?
520
+ catts = shelve( catts ) if catts
521
+ cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
522
+ end
523
+ end
524
+ ratts = shelve( ratts ) if ratts
525
+ rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
526
+ end
527
+ "\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
528
+ end
529
+ end
530
+
531
+ LISTS_RE = /^([#*]+?#{C} .*?)$(?![^#*])/m
532
+ LISTS_CONTENT_RE = /^([#*]+)(#{A}#{C}) (.*)$/m
533
+
534
+ # Parses Textile lists and generates HTML
535
+ def block_textile_lists( text )
536
+ text.gsub!( LISTS_RE ) do |match|
537
+ lines = match.split( /\n/ )
538
+ last_line = -1
539
+ depth = []
540
+ lines.each_with_index do |line, line_id|
541
+ if line =~ LISTS_CONTENT_RE
542
+ tl,atts,content = $~[1..3]
543
+ if depth.last
544
+ if depth.last.length > tl.length
545
+ (depth.length - 1).downto(0) do |i|
546
+ break if depth[i].length == tl.length
547
+ lines[line_id - 1] << "</li>\n\t</#{ lT( depth[i] ) }l>\n\t"
548
+ depth.pop
549
+ end
550
+ end
551
+ if depth.last and depth.last.length == tl.length
552
+ lines[line_id - 1] << '</li>'
553
+ end
554
+ end
555
+ unless depth.last == tl
556
+ depth << tl
557
+ atts = pba( atts )
558
+ atts = shelve( atts ) if atts
559
+ lines[line_id] = "\t<#{ lT(tl) }l#{ atts }>\n\t<li>#{ content }"
560
+ else
561
+ lines[line_id] = "\t\t<li>#{ content }"
562
+ end
563
+ last_line = line_id
564
+
565
+ else
566
+ last_line = line_id
567
+ end
568
+ if line_id - last_line > 1 or line_id == lines.length - 1
569
+ depth.delete_if do |v|
570
+ lines[last_line] << "</li>\n\t</#{ lT( v ) }l>"
571
+ end
572
+ end
573
+ end
574
+ lines.join( "\n" )
575
+ end
576
+ end
577
+
578
+ CODE_RE = /(\W)
579
+ @
580
+ (?:\|(\w+?)\|)?
581
+ (.+?)
582
+ @
583
+ (?=\W)/x
584
+
585
+ def inline_textile_code( text )
586
+ text.gsub!( CODE_RE ) do |m|
587
+ before,lang,code,after = $~[1..4]
588
+ lang = " lang=\"#{ lang }\"" if lang
589
+ rip_offtags( "#{ before }<code#{ lang }>#{ code }</code>#{ after }" )
590
+ end
591
+ end
592
+
593
+ def lT( text )
594
+ text =~ /\#$/ ? 'o' : 'u'
595
+ end
596
+
597
+ def hard_break( text )
598
+ text.gsub!( /(.)\n(?!\Z| *([#*=]+(\s|$)|[{|]))/, "\\1<br />" ) if hard_breaks
599
+ end
600
+
601
+ BLOCKS_GROUP_RE = /\n{2,}(?! )/m
602
+
603
+ def blocks( text, deep_code = false )
604
+ text.replace( text.split( BLOCKS_GROUP_RE ).collect do |blk|
605
+ plain = blk !~ /\A[#*> ]/
606
+
607
+ # skip blocks that are complex HTML
608
+ if blk =~ /^<\/?(\w+).*>/ and not SIMPLE_HTML_TAGS.include? $1
609
+ blk
610
+ else
611
+ # search for indentation levels
612
+ blk.strip!
613
+ if blk.empty?
614
+ blk
615
+ else
616
+ code_blk = nil
617
+ blk.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
618
+ flush_left iblk
619
+ blocks iblk, plain
620
+ iblk.gsub( /^(\S)/, "\t\\1" )
621
+ if plain
622
+ code_blk = iblk; ""
623
+ else
624
+ iblk
625
+ end
626
+ end
627
+
628
+ block_applied = 0
629
+ @rules.each do |rule_name|
630
+ block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( blk ) )
631
+ end
632
+ if block_applied.zero?
633
+ if deep_code
634
+ blk = "\t<pre><code>#{ blk }</code></pre>"
635
+ else
636
+ blk = "\t<p>#{ blk }</p>"
637
+ end
638
+ end
639
+ # hard_break blk
640
+ blk + "\n#{ code_blk }"
641
+ end
642
+ end
643
+
644
+ end.join( "\n\n" ) )
645
+ end
646
+
647
+ def textile_bq( tag, atts, cite, content )
648
+ cite, cite_title = check_refs( cite )
649
+ cite = " cite=\"#{ cite }\"" if cite
650
+ atts = shelve( atts ) if atts
651
+ "\t<blockquote#{ cite }>\n\t\t<p#{ atts }>#{ content }</p>\n\t</blockquote>"
652
+ end
653
+
654
+ def textile_p( tag, atts, cite, content )
655
+ atts = shelve( atts ) if atts
656
+ "\t<#{ tag }#{ atts }>#{ content }</#{ tag }>"
657
+ end
658
+
659
+ alias textile_h1 textile_p
660
+ alias textile_h2 textile_p
661
+ alias textile_h3 textile_p
662
+ alias textile_h4 textile_p
663
+ alias textile_h5 textile_p
664
+ alias textile_h6 textile_p
665
+
666
+ def textile_fn_( tag, num, atts, cite, content )
667
+ atts << " id=\"fn#{ num }\""
668
+ content = "<sup>#{ num }</sup> #{ content }"
669
+ atts = shelve( atts ) if atts
670
+ "\t<p#{ atts }>#{ content }</p>"
671
+ end
672
+
673
+ BLOCK_RE = /^(([a-z]+)(\d*))(#{A}#{C})\.(?::(\S+))? (.*)$/m
674
+
675
+ def block_textile_prefix( text )
676
+ if text =~ BLOCK_RE
677
+ tag,tagpre,num,atts,cite,content = $~[1..6]
678
+ atts = pba( atts )
679
+
680
+ # pass to prefix handler
681
+ if respond_to? "textile_#{ tag }", true
682
+ text.gsub!( $&, method( "textile_#{ tag }" ).call( tag, atts, cite, content ) )
683
+ elsif respond_to? "textile_#{ tagpre }_", true
684
+ text.gsub!( $&, method( "textile_#{ tagpre }_" ).call( tagpre, num, atts, cite, content ) )
685
+ end
686
+ end
687
+ end
688
+
689
+ SETEXT_RE = /\A(.+?)\n([=-])[=-]* *$/m
690
+ def block_markdown_setext( text )
691
+ if text =~ SETEXT_RE
692
+ tag = if $2 == "="; "h1"; else; "h2"; end
693
+ blk, cont = "<#{ tag }>#{ $1 }</#{ tag }>", $'
694
+ blocks cont
695
+ text.replace( blk + cont )
696
+ end
697
+ end
698
+
699
+ ATX_RE = /\A(\#{1,6}) # $1 = string of #'s
700
+ [ ]*
701
+ (.+?) # $2 = Header text
702
+ [ ]*
703
+ \#* # optional closing #'s (not counted)
704
+ $/x
705
+ def block_markdown_atx( text )
706
+ if text =~ ATX_RE
707
+ tag = "h#{ $1.length }"
708
+ blk, cont = "<#{ tag }>#{ $2 }</#{ tag }>\n\n", $'
709
+ blocks cont
710
+ text.replace( blk + cont )
711
+ end
712
+ end
713
+
714
+ MARKDOWN_BQ_RE = /\A(^ *> ?.+$(.+\n)*\n*)+/m
715
+
716
+ def block_markdown_bq( text )
717
+ text.gsub!( MARKDOWN_BQ_RE ) do |blk|
718
+ blk.gsub!( /^ *> ?/, '' )
719
+ flush_left blk
720
+ blocks blk
721
+ blk.gsub!( /^(\S)/, "\t\\1" )
722
+ "<blockquote>\n#{ blk }\n</blockquote>\n\n"
723
+ end
724
+ end
725
+
726
+ MARKDOWN_RULE_RE = /^(#{
727
+ ['*', '-', '_'].collect { |ch| '( ?' + Regexp::quote( ch ) + ' ?){3,}' }.join( '|' )
728
+ })$/
729
+
730
+ def block_markdown_rule( text )
731
+ text.gsub!( MARKDOWN_RULE_RE ) do |blk|
732
+ "<hr />"
733
+ end
734
+ end
735
+
736
+ def block_markdown_lists( text )
737
+ end
738
+
739
+ def inline_textile_span( text )
740
+ QTAGS.each do |qtag_rc, ht, qtag_re, rtype|
741
+ text.gsub!( qtag_re ) do |m|
742
+
743
+ case rtype
744
+ when :limit
745
+ sta,qtag,atts,cite,content = $~[1..5]
746
+ else
747
+ qtag,atts,cite,content = $~[1..4]
748
+ sta = ''
749
+ end
750
+ atts = pba( atts )
751
+ atts << " cite=\"#{ cite }\"" if cite
752
+ atts = shelve( atts ) if atts
753
+
754
+ "#{ sta }<#{ ht }#{ atts }>#{ content }</#{ ht }>"
755
+
756
+ end
757
+ end
758
+ end
759
+
760
+ LINK_RE = /
761
+ ([\s\[{(]|[#{PUNCT}])? # $pre
762
+ " # start
763
+ (#{C}) # $atts
764
+ ([^"]+?) # $text
765
+ \s?
766
+ (?:\(([^)]+?)\)(?="))? # $title
767
+ ":
768
+ (\S+?) # $url
769
+ (\/)? # $slash
770
+ ([^\w\/;]*?) # $post
771
+ (?=<|\s|$)
772
+ /x
773
+
774
+ def inline_textile_link( text )
775
+ text.gsub!( LINK_RE ) do |m|
776
+ pre,atts,text,title,url,slash,post = $~[1..7]
777
+
778
+ url, url_title = check_refs( url )
779
+ title ||= url_title
780
+
781
+ atts = pba( atts )
782
+ atts = " href=\"#{ url }#{ slash }\"#{ atts }"
783
+ atts << " title=\"#{ title }\"" if title
784
+ atts = shelve( atts ) if atts
785
+
786
+ "#{ pre }<a#{ atts }>#{ text }</a>#{ post }"
787
+ end
788
+ end
789
+
790
+ MARKDOWN_REFLINK_RE = /
791
+ \[([^\[\]]+)\] # $text
792
+ [ ]? # opt. space
793
+ (?:\n[ ]*)? # one optional newline followed by spaces
794
+ \[(.*?)\] # $id
795
+ /x
796
+
797
+ def inline_markdown_reflink( text )
798
+ text.gsub!( MARKDOWN_REFLINK_RE ) do |m|
799
+ text, id = $~[1..2]
800
+
801
+ if id.empty?
802
+ url, title = check_refs( text )
803
+ else
804
+ url, title = check_refs( id )
805
+ end
806
+
807
+ atts = " href=\"#{ url }\""
808
+ atts << " title=\"#{ title }\"" if title
809
+ atts = shelve( atts )
810
+
811
+ "<a#{ atts }>#{ text }</a>"
812
+ end
813
+ end
814
+
815
+ MARKDOWN_LINK_RE = /
816
+ \[([^\[\]]+)\] # $text
817
+ \( # open paren
818
+ [ \t]* # opt space
819
+ <?(.+?)>? # $href
820
+ [ \t]* # opt space
821
+ (?: # whole title
822
+ (['"]) # $quote
823
+ (.*?) # $title
824
+ \3 # matching quote
825
+ )? # title is optional
826
+ \)
827
+ /x
828
+
829
+ def inline_markdown_link( text )
830
+ text.gsub!( MARKDOWN_LINK_RE ) do |m|
831
+ text, url, quote, title = $~[1..4]
832
+
833
+ atts = " href=\"#{ url }\""
834
+ atts << " title=\"#{ title }\"" if title
835
+ atts = shelve( atts )
836
+
837
+ "<a#{ atts }>#{ text }</a>"
838
+ end
839
+ end
840
+
841
+ TEXTILE_REFS_RE = /(^ *)\[([^\n]+?)\](#{HYPERLINK})(?=\s|$)/
842
+ MARKDOWN_REFS_RE = /(^ *)\[([^\n]+?)\]:\s+<?(#{HYPERLINK})>?(?:\s+"((?:[^"]|\\")+)")?(?=\s|$)/m
843
+
844
+ def refs( text )
845
+ @rules.each do |rule_name|
846
+ method( rule_name ).call( text ) if rule_name.to_s.match /^refs_/
847
+ end
848
+ end
849
+
850
+ def refs_textile( text )
851
+ text.gsub!( TEXTILE_REFS_RE ) do |m|
852
+ flag, url = $~[2..3]
853
+ @urlrefs[flag.downcase] = [url, nil]
854
+ nil
855
+ end
856
+ end
857
+
858
+ def refs_markdown( text )
859
+ text.gsub!( MARKDOWN_REFS_RE ) do |m|
860
+ flag, url = $~[2..3]
861
+ title = $~[6]
862
+ @urlrefs[flag.downcase] = [url, title]
863
+ nil
864
+ end
865
+ end
866
+
867
+ def check_refs( text )
868
+ ret = @urlrefs[text.downcase] if text
869
+ ret || [text, nil]
870
+ end
871
+
872
+ IMAGE_RE = /
873
+ (<p>|.|^) # start of line?
874
+ \! # opening
875
+ (\<|\=|\>)? # optional alignment atts
876
+ (#{C}) # optional style,class atts
877
+ (?:\. )? # optional dot-space
878
+ ([^\s(!]+?) # presume this is the src
879
+ \s? # optional space
880
+ (?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
881
+ \! # closing
882
+ (?::#{ HYPERLINK })? # optional href
883
+ /x
884
+
885
+ def inline_textile_image( text )
886
+ text.gsub!( IMAGE_RE ) do |m|
887
+ stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
888
+ atts = pba( atts )
889
+ atts = " src=\"#{ url }\"#{ atts }"
890
+ atts << " title=\"#{ title }\"" if title
891
+ atts << " alt=\"#{ title }\""
892
+ # size = @getimagesize($url);
893
+ # if($size) $atts.= " $size[3]";
894
+
895
+ href, alt_title = check_refs( href ) if href
896
+ url, url_title = check_refs( url )
897
+
898
+ out = ''
899
+ out << "<a#{ shelve( " href=\"#{ href }\"" ) }>" if href
900
+ out << "<img#{ shelve( atts ) } />"
901
+ out << "</a>#{ href_a1 }#{ href_a2 }" if href
902
+
903
+ if algn
904
+ algn = h_align( algn )
905
+ if stln == "<p>"
906
+ out = "<p style=\"float:#{ algn }\">#{ out }"
907
+ else
908
+ out = "#{ stln }<div style=\"float:#{ algn }\">#{ out }</div>"
909
+ end
910
+ else
911
+ out = stln + out
912
+ end
913
+
914
+ out
915
+ end
916
+ end
917
+
918
+ def shelve( val )
919
+ @shelf << val
920
+ " :redsh##{ @shelf.length }:"
921
+ end
922
+
923
+ def retrieve( text )
924
+ @shelf.each_with_index do |r, i|
925
+ text.gsub!( " :redsh##{ i + 1 }:", r )
926
+ end
927
+ end
928
+
929
+ def incoming_entities( text )
930
+ ## turn any incoming ampersands into a dummy character for now.
931
+ ## This uses a negative lookahead for alphanumerics followed by a semicolon,
932
+ ## implying an incoming html entity, to be skipped
933
+
934
+ text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
935
+ end
936
+
937
+ def no_textile( text )
938
+ text.gsub!( /(^|\s)==([^=]+.*?)==(\s|$)?/,
939
+ '\1<notextile>\2</notextile>\3' )
940
+ text.gsub!( /^ *==([^=]+.*?)==/m,
941
+ '\1<notextile>\2</notextile>\3' )
942
+ end
943
+
944
+ def clean_white_space( text )
945
+ # normalize line breaks
946
+ text.gsub!( /\r\n/, "\n" )
947
+ text.gsub!( /\r/, "\n" )
948
+ text.gsub!( /\t/, ' ' )
949
+ text.gsub!( /^ +$/, '' )
950
+ text.gsub!( /\n{3,}/, "\n\n" )
951
+ text.gsub!( /"$/, "\" " )
952
+
953
+ # if entire document is indented, flush
954
+ # to the left side
955
+ flush_left text
956
+ end
957
+
958
+ def flush_left( text )
959
+ indt = 0
960
+ if text =~ /^ /
961
+ while text !~ /^ {#{indt}}\S/
962
+ indt += 1
963
+ end unless text.empty?
964
+ if indt.nonzero?
965
+ text.gsub!( /^ {#{indt}}/, '' )
966
+ end
967
+ end
968
+ end
969
+
970
+ def footnote_ref( text )
971
+ text.gsub!( /\b\[([0-9]+?)\](\s)?/,
972
+ '<sup><a href="#fn\1">\1</a></sup>\2' )
973
+ end
974
+
975
+ OFFTAGS = /(code|pre|kbd|notextile)/
976
+ OFFTAG_MATCH = /(?:(<\/#{ OFFTAGS }>)|(<#{ OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ OFFTAGS }|\Z)/mi
977
+ OFFTAG_OPEN = /<#{ OFFTAGS }/
978
+ OFFTAG_CLOSE = /<\/?#{ OFFTAGS }/
979
+ HASTAG_MATCH = /(<\/?\w[^\n]*?>)/m
980
+ ALLTAG_MATCH = /(<\/?\w[^\n]*?>)|.*?(?=<\/?\w[^\n]*?>|$)/m
981
+
982
+ def glyphs_textile( text, level = 0 )
983
+ if text !~ HASTAG_MATCH
984
+ pgl text
985
+ footnote_ref text
986
+ else
987
+ codepre = 0
988
+ text.gsub!( ALLTAG_MATCH ) do |line|
989
+ ## matches are off if we're between <code>, <pre> etc.
990
+ if $1
991
+ if line =~ OFFTAG_OPEN
992
+ codepre += 1
993
+ elsif line =~ OFFTAG_CLOSE
994
+ codepre -= 1
995
+ codepre = 0 if codepre < 0
996
+ end
997
+ elsif codepre.zero?
998
+ glyphs_textile( line, level + 1 )
999
+ else
1000
+ htmlesc( line, :NoQuotes )
1001
+ end
1002
+ # p [level, codepre, line]
1003
+
1004
+ line
1005
+ end
1006
+ end
1007
+ end
1008
+
1009
+ def rip_offtags( text )
1010
+ if text =~ /<.*>/
1011
+ ## strip and encode <pre> content
1012
+ codepre, used_offtags = 0, {}
1013
+ text.gsub!( OFFTAG_MATCH ) do |line|
1014
+ if $3
1015
+ offtag, aftertag = $4, $5
1016
+ codepre += 1
1017
+ used_offtags[offtag] = true
1018
+ if codepre - used_offtags.length > 0
1019
+ htmlesc( line, :NoQuotes ) unless used_offtags['notextile']
1020
+ @pre_list.last << line
1021
+ line = ""
1022
+ else
1023
+ htmlesc( aftertag, :NoQuotes ) if aftertag and not used_offtags['notextile']
1024
+ line = "<redpre##{ @pre_list.length }>"
1025
+ @pre_list << "#{ $3 }#{ aftertag }"
1026
+ end
1027
+ elsif $1 and codepre > 0
1028
+ if codepre - used_offtags.length > 0
1029
+ htmlesc( line, :NoQuotes ) unless used_offtags['notextile']
1030
+ @pre_list.last << line
1031
+ line = ""
1032
+ end
1033
+ codepre -= 1 unless codepre.zero?
1034
+ used_offtags = {} if codepre.zero?
1035
+ end
1036
+ line
1037
+ end
1038
+ end
1039
+ text
1040
+ end
1041
+
1042
+ def smooth_offtags( text )
1043
+ unless @pre_list.empty?
1044
+ ## replace <pre> content
1045
+ text.gsub!( /<redpre#(\d+)>/ ) { @pre_list[$1.to_i] }
1046
+ end
1047
+ end
1048
+
1049
+ def inline( text )
1050
+ [/^inline_/, /^glyphs_/].each do |meth_re|
1051
+ @rules.each do |rule_name|
1052
+ method( rule_name ).call( text ) if rule_name.to_s.match( meth_re )
1053
+ end
1054
+ end
1055
+ end
1056
+
1057
+ def h_align( text )
1058
+ H_ALGN_VALS[text]
1059
+ end
1060
+
1061
+ def v_align( text )
1062
+ V_ALGN_VALS[text]
1063
+ end
1064
+
1065
+ def textile_popup_help( name, windowW, windowH )
1066
+ ' <a target="_blank" href="http://hobix.com/textile/#' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
1067
+ end
1068
+
1069
+ # HTML cleansing stuff
1070
+ BASIC_TAGS = {
1071
+ 'a' => ['href', 'title'],
1072
+ 'img' => ['src', 'alt', 'title'],
1073
+ 'br' => [],
1074
+ 'i' => nil,
1075
+ 'u' => nil,
1076
+ 'b' => nil,
1077
+ 'pre' => nil,
1078
+ 'kbd' => nil,
1079
+ 'code' => ['lang'],
1080
+ 'cite' => nil,
1081
+ 'strong' => nil,
1082
+ 'em' => nil,
1083
+ 'ins' => nil,
1084
+ 'sup' => nil,
1085
+ 'sub' => nil,
1086
+ 'del' => nil,
1087
+ 'table' => nil,
1088
+ 'tr' => nil,
1089
+ 'td' => ['colspan', 'rowspan'],
1090
+ 'th' => nil,
1091
+ 'ol' => nil,
1092
+ 'ul' => nil,
1093
+ 'li' => nil,
1094
+ 'p' => nil,
1095
+ 'h1' => nil,
1096
+ 'h2' => nil,
1097
+ 'h3' => nil,
1098
+ 'h4' => nil,
1099
+ 'h5' => nil,
1100
+ 'h6' => nil,
1101
+ 'blockquote' => ['cite']
1102
+ }
1103
+
1104
+ def clean_html( text, tags = BASIC_TAGS )
1105
+ text.gsub!( /<!\[CDATA\[/, '' )
1106
+ text.gsub!( /<(\/*)(\w+)([^>]*)>/ ) do
1107
+ raw = $~
1108
+ tag = raw[2].downcase
1109
+ if tags.has_key? tag
1110
+ pcs = [tag]
1111
+ tags[tag].each do |prop|
1112
+ ['"', "'", ''].each do |q|
1113
+ q2 = ( q != '' ? q : '\s' )
1114
+ if raw[3] =~ /#{prop}\s*=\s*#{q}([^#{q2}]+)#{q}/i
1115
+ attrv = $1
1116
+ next if prop == 'src' and attrv =~ %r{^(?!http)\w+:}
1117
+ pcs << "#{prop}=\"#{$1.gsub('"', '\\"')}\""
1118
+ break
1119
+ end
1120
+ end
1121
+ end if tags[tag]
1122
+ "<#{raw[1]}#{pcs.join " "}>"
1123
+ else
1124
+ " "
1125
+ end
1126
+ end
1127
+ end
1128
+ end
1129
+