junebug 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1006 @@
1
+ require 'md5'
2
+
3
+ unless defined? RedCloth
4
+ $:.unshift(File.dirname(__FILE__))
5
+ require 'base'
6
+ end
7
+
8
+ class RedCloth < String
9
+
10
+ DEFAULT_RULES << :docbook
11
+
12
+ # == Docbook Rules
13
+ #
14
+ # The following docbook rules can be set individually. Or add the complete
15
+ # set of rules with the single :docbook rule, which supplies the rule set in
16
+ # the following precedence:
17
+ #
18
+ # refs_docbook:: Docbook references (i.e. [hobix]http://hobix.com/)
19
+ # block_docbook_table:: Docbook table block structures
20
+ # block_docbook_lists:: Docbook list structures
21
+ # block_docbook_prefix:: Docbook blocks with prefixes (i.e. bq., h2., etc.)
22
+ # inline_docbook_image:: Docbook inline images
23
+ # inline_docbook_link:: Docbook inline links
24
+ # inline_docbook_wiki_words:: Docbook inline refering links
25
+ # inline_docbook_wiki_links:: Docbook inline refering links
26
+ # inline_docbook_span:: Docbook inline spans
27
+ # inline_docbook_glyphs:: Docbook entities (such as em-dashes and smart quotes)
28
+
29
+ # Elements to handle
30
+ DOCBOOK_GLYPHS = [
31
+ [ /([^\s\[{(>])\'/, '\1&#8217;' ], # single closing
32
+ [ /\'(?=\s|s\b|[#{PUNCT}])/, '&#8217;' ], # single closing
33
+ [ /\'/, '&#8216;' ], # single opening
34
+ # [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
35
+ [ /([^\s\[{(>])"/, '\1&#8221;' ], # double closing
36
+ [ /"(?=\s|[#{PUNCT}])/, '&#8221;' ], # double closing
37
+ [ /"/, '&#8220;' ], # double opening
38
+ [ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
39
+ [ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
40
+ [ /\s->\s/, ' &rarr; ' ], # right arrow
41
+ [ /\s-\s/, ' &#8211; ' ], # en dash
42
+ [ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
43
+ [ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
44
+ [ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
45
+ [ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
46
+ ]
47
+
48
+ #
49
+ # Generates HTML from the Textile contents.
50
+ #
51
+ # r = RedCloth.new( "And then? She *fell*!" )
52
+ # r.to_docbook
53
+ # #=>"And then? She <emphasis role=\"strong\">fell</emphasis>!"
54
+ #
55
+ def to_docbook( *rules )
56
+ @stack = Array.new
57
+ @ids = Array.new
58
+ @references = Array.new
59
+ @automatic_content_ids = Array.new
60
+
61
+ rules = DEFAULT_RULES if rules.empty?
62
+ # make our working copy
63
+ text = self.dup
64
+
65
+ @urlrefs = {}
66
+ @shelf = []
67
+ @rules = rules.collect do |rule|
68
+ case rule
69
+ when :docbook
70
+ DOCBOOK_RULES
71
+ else
72
+ rule
73
+ end
74
+ end.flatten
75
+
76
+ # standard clean up
77
+ incoming_entities text
78
+ clean_white_space text
79
+
80
+ # start processor
81
+ @pre_list = []
82
+ pre_process_docbook text
83
+
84
+ no_docbook text
85
+ docbook_rip_offtags text
86
+ docbook_hard_break text
87
+
88
+ refs text
89
+ docbook_blocks text
90
+ inline text
91
+
92
+ smooth_offtags text
93
+ retrieve text
94
+
95
+ post_process_docbook text
96
+ clean_html text if filter_html
97
+ text.strip!
98
+
99
+ text << "\n"
100
+ @stack.each_with_index {|sect,index| text << "</sect#{@stack.size-index}>\n"}
101
+ text << "</chapter>" if @chapter
102
+
103
+ if (@references - @ids).size > 0
104
+ text << %{<chapter label="86" id="chapter-86"><title>To Come</title>}
105
+ (@references - @ids).each {|name| text << %!<sect1 id="#{name}"><title>#{name.split('-').map {|t| t.capitalize}.join(' ')}</title><remark>TK</remark></sect1>\n!}
106
+ text << "</chapter>"
107
+ end
108
+
109
+ text
110
+
111
+ end
112
+
113
+ #######
114
+ private
115
+ #######
116
+
117
+ # Elements to handle
118
+ # GLYPHS << [ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ] # 3+ uppercase acronym
119
+ # GLYPHS << [ /(^|[^"][>\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)/, '\1<span class="caps">\2</span>\3', :no_span_caps ] # 3+ uppercase caps
120
+
121
+ SIMPLE_DOCBOOK_TAGS = [
122
+ 'para', 'title', 'remark', 'blockquote', 'itemizedlist', 'orderedlist', 'variablelist', 'programlisting', 'screen',
123
+ 'literallayout', 'figure', 'example', 'abbrev', 'accel', 'acronym', 'action', 'application', 'citation',
124
+ 'citetitle', 'classname', 'classref', 'command', 'computeroutput', 'email', 'emphasis', 'envar', 'filename',
125
+ 'firstterm', 'foreignphrase', 'footnoteref', 'graphic', 'function', 'guibutton', 'guimenu', 'guimenuitem', 'keycap',
126
+ 'keysym', 'lineannotation', 'literal', 'option', 'optional', 'parameter', 'prompt', 'quote', 'replaceable',
127
+ 'returnvalue', 'sgmltag', 'structfield', 'structname', 'subscript', 'superscript', 'symbol', 'systemitem',
128
+ 'type', 'userinput', 'wordasword', 'xref'
129
+ ]
130
+
131
+ DOCBOOK_TAGS = [
132
+ ['**', 'emphasis role="strong"'],
133
+ ['__', 'emphasis'],
134
+ ['*', 'emphasis role="strong"', :limit],
135
+ ['_', 'emphasis', :limit],
136
+ ['??', 'citation', :limit],
137
+ ['^', 'superscript', :limit],
138
+ ['~', 'subscript', :limit],
139
+ ['%', 'para', :limit],
140
+ ['@', 'literal', :limit],
141
+ ]
142
+ DOCBOOK_TAGS.collect! do |rc, ht, rtype|
143
+ rcq = Regexp::quote rc
144
+ re =
145
+ case rtype
146
+ when :limit
147
+ /(\W)
148
+ (#{rcq})
149
+ (#{C})
150
+ (?::(\S+?))?
151
+ (\S.*?\S|\S)
152
+ #{rcq}
153
+ (?=\W)/x
154
+ else
155
+ /(#{rcq})
156
+ (#{C})
157
+ (?::(\S+))?
158
+ (\S.*?\S|\S)
159
+ #{rcq}/xm
160
+ end
161
+ escaped_re =
162
+ case rtype
163
+ when :limit
164
+ /(\W)
165
+ (#{@@escape_keyword}#{rcq})
166
+ (#{C})
167
+ (?::(\S+?))?
168
+ (\S.*?\S|\S)
169
+ #{rcq}#{@@escape_keyword}
170
+ (?=\W)/x
171
+ else
172
+ /(#{@@escape_keyword}#{rcq})
173
+ (#{C})
174
+ (?::(\S+))?
175
+ (\S.*?\S|\S)
176
+ #{rcq}#{@@escape_keyword}/xm
177
+ end
178
+ [rc, ht, re, rtype, escaped_re]
179
+ end
180
+
181
+ def pre_process_docbook(text)
182
+
183
+ # Prepare dt and dd the way they should be
184
+ text.gsub!( /div\((d[dt])\)\.(.*?)div\(\1\)\./m ) do |m|
185
+ "p(#{$1}). #{$2.gsub("\n", LB)}"
186
+ end
187
+ text.gsub!( /p\(dt\)\.(.*?)p\(dd\)\.(.*?)$/m ) do |m|
188
+ dt, dd = $~[1..2]
189
+ "- #{dt.gsub(LB,"\n").strip} := #{dd.gsub(LB,"\n").strip} =:"
190
+ end
191
+
192
+ # Prepare superscripts and subscripts
193
+ text.gsub!( /(\w)(\^[0-9,]+\^)/, '\1 \2' )
194
+ text.gsub!( /(\w)(\~[0-9,]+\~)/, '\1 \2' )
195
+
196
+ {'w' => 'warning', 'n' => 'note', 'c' => 'comment', 'pro' => 'production', 'dt' => 'dt', 'dd' => 'dd'}.each do |char, word|
197
+ parts = text.split(/^\s*#{char}\./)
198
+ text.replace(parts.first + "\n" + parts[1..-1].map do |part|
199
+ if part =~ /\.#{char}\s*$/
200
+ "div(#{word}).\n" + part.sub(/\.#{char}\s*$/, "\ndiv(#{word}). \n")
201
+ else
202
+ "#{char}.#{part}"
203
+ end+"\n"
204
+ end.join("\n"))
205
+
206
+ self.class.class_eval %!
207
+ def docbook_#{char}(tag, atts, cite, content)
208
+ docbook_p('p', #{word.inspect}, cite, content)
209
+ end
210
+ !
211
+ end
212
+
213
+ {'bq' => 'blockquote'}.each do |char, word|
214
+ parts = text.split(/^\s*#{char}\./)
215
+ text.replace(parts.first + "\n" + parts[1..-1].map do |part|
216
+ if part =~ /\.#{char}\s*$/
217
+ "div(#{word}).\n\n<para>" + part.sub(/\.#{char}\s*$/, "</para>\n\ndiv(#{word}). ")
218
+ else
219
+ "#{char}.#{part}"
220
+ end
221
+ end.join("\n"))
222
+ end
223
+
224
+ text.gsub!(/<br.*?>/i, "&#x00A;")
225
+ text.gsub!(/<\/?em.*?>/i, "__")
226
+
227
+ text.gsub!( BACKTICK_CODE_RE ) do |m|
228
+ before,lang,code,after = $~[1..4]
229
+ docbook_rip_offtags( "#{ before }<programlisting>#{ code.gsub(/\\\`\`\`/,'```') }</programlisting>#{ after }" )
230
+ end
231
+ text.gsub! %r{<pre>\s*(<code>)?}i, '<para><programlisting>'
232
+ text.gsub! %r{(</code>)?\s*</pre>}i, '</programlisting></para>'
233
+ text.gsub! %r{<(/?)code>}i, '<\1programlisting>'
234
+
235
+ end
236
+
237
+ def post_process_docbook( text )
238
+ text.sub!( "</chapter>\n\n", "" )
239
+ text.gsub!( LB, "\n" )
240
+ text.gsub!( NB, "" )
241
+ text << "</#{@div_atts}>" if @div_atts
242
+ text.gsub!(%r{<(#{DOCBOOK_PARAS.join("|")})([^>]*)>\s*<para>(.*?)</para>\s*</\1>}mi) { |m| t, c = $~[1..2]; "<#{t}#{c}>" << $3.gsub(/<para>/, "<#{t}#{c}>").gsub(/<\/para>/, "</#{t}>") << "</#{t}>" }
243
+ text.gsub! %r{<para[^>]*>\s*<para([^>]*)>}i,'<para\1>' # clean multiple paragraphs in a row just in case
244
+ text.gsub! %r{</para>\s*</para>}i,'</para>' # clean multiple paragraphs in a row just in case
245
+ text.gsub! %r{<para[^>]*>\s*</para>\s*}i, '' # clean emtpy paras
246
+ text.gsub! %r{<(/?)sup>}i, '<\1superscript>'
247
+ text.gsub! %r{<(/?)sub>}i, '<\1subscript>'
248
+ text.gsub! %r{</?nodocbook>}, ''
249
+ text.gsub! %r{x%x%}, '&#38;'
250
+
251
+ text.scan( /id="id([0-9]+)"/i ) do |match|
252
+ text.gsub!( /<ulink url="#{match}">(.*?)<\/ulink>/, %{<link linkend="id#{match}">\\1</link>} )
253
+ end
254
+
255
+ text.gsub!( %r{<programlisting>\n}, "<programlisting>" )
256
+ text.gsub!( %r{\n</programlisting>}, "</programlisting>\n" )
257
+
258
+ i = 1
259
+ text.gsub!(/\[\d+\]/) do |ref|
260
+ id = ref[/\d+/].to_i
261
+ if id == i
262
+ i += 1
263
+ if text =~ /<footnote id="fn#{id}">(.*?)<\/footnote>/
264
+ "<footnote id=\"footnote#{id}\">#{$1}</footnote>"
265
+ else
266
+ ref
267
+ end
268
+ else
269
+ ref
270
+ end
271
+ end
272
+
273
+ text.gsub!(/<footnote id="fn\d+">(.*?)<\/footnote>/, '')
274
+
275
+ DOCBOOK_TAGS.each do |qtag_rc, ht, qtag_re, rtype, escaped_re|
276
+ text.gsub!( escaped_re ) do |m|
277
+ case rtype
278
+ when :limit
279
+ sta,qtag,atts,cite,content = $~[1..5]
280
+ else
281
+ qtag,atts,cite,content = $~[1..4]
282
+ sta = ''
283
+ end
284
+
285
+ ht, atts = docbook_sanitize_para atts, content, ht
286
+
287
+ atts = docbook_pba( atts )
288
+
289
+ if @stack.size == 0
290
+ sect1 = ""
291
+ end_sect1 = ""
292
+ end
293
+
294
+ "#{ sta }#{ sect1 }<#{ ht }#{ atts }>#{ '<para>' if ['note', 'blockquote'].include? ht }#{ cite }#{ content }#{ '</para>' if ['note', 'blockquote'].include? ht }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>#{ end_sect1 }"
295
+ end
296
+ end
297
+ end
298
+
299
+ # Parses a Docbook table block, building XML from the result.
300
+ def block_docbook_table( text )
301
+ text.gsub!( TABLE_RE ) do |matches|
302
+
303
+ caption, id, tatts, fullrow = $~[1..4]
304
+ tatts = docbook_pba( tatts, caption ? 'table' : 'informaltable' )
305
+ tatts = shelve( tatts ) if tatts
306
+ rows = []
307
+
308
+ found_first = false
309
+ cols = 0
310
+ raw_rows = fullrow.split( /\|$/m ).delete_if {|row|row.empty?}
311
+ raw_rows.each do |row|
312
+
313
+ ratts, row = docbook_pba( $1, 'row' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
314
+ row << " "
315
+
316
+ cells = []
317
+ head = 'tbody'
318
+ cols = row.split( '|' ).size-1
319
+ row.split( '|' ).each_with_index do |cell, i|
320
+ next if i == 0
321
+ ctyp = 'entry'
322
+ head = 'thead' if cell =~ /^_/
323
+
324
+ catts = ''
325
+ catts, cell = docbook_pba( $1, 'entry' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. ?)(.*)/
326
+
327
+ catts = shelve( catts ) if catts
328
+ cells << "<#{ ctyp }#{ catts }>#{ cell.strip.empty? ? "&nbsp;" : row.split( '|' ).size-1 != i ? cell : cell[0...cell.length-1] }</#{ ctyp }>"
329
+ end
330
+ ratts = shelve( ratts ) if ratts
331
+ if head == 'tbody'
332
+ if !found_first
333
+ found_first = true
334
+ rows << "<#{ head }>"
335
+ end
336
+ else
337
+ rows << "<#{ head }>"
338
+ end
339
+ rows << "<row#{ ratts }>\n#{ cells.join( "\n" ) }\n</row>"
340
+ rows << "</#{ head }>" if head != 'tbody' || raw_rows.last == row
341
+ end
342
+ title = "<title>#{ caption }</title>\n" if caption
343
+
344
+ if id
345
+ @ids << "id#{id}"
346
+ id = " id=\"#{ "id#{id}" }\""
347
+ end
348
+
349
+ %{<#{ caption ? nil : 'informal' }table#{ id }#{ tatts }>\n#{title}<tgroup cols="#{cols}">\n#{ rows.join( "\n" ) }\n</tgroup>\n</#{ caption ? nil : 'informal' }table>\n\n}
350
+ end
351
+ end
352
+
353
+ # Parses Docbook lists and generates Docbook XML
354
+ def block_docbook_lists( text )
355
+ orig_text = text.dup
356
+ delimiter = ""
357
+ text.gsub!( LISTS_RE ) do |match|
358
+ lines = match.split( /\n/ )
359
+ last_line = -1
360
+ depth = []
361
+ lines.each_with_index do |line, line_id|
362
+ if line =~ LISTS_CONTENT_RE
363
+ tl,continuation,atts,content = $~[1..4]
364
+ if depth.last
365
+ if depth.last.length > tl.length
366
+ (depth.length - 1).downto(0) do |i|
367
+ break if depth[i].length == tl.length
368
+ lines[line_id - 1] << "</para></listitem>\n</#{ lD( depth[i] ) }>\n"
369
+ depth.pop
370
+ end
371
+ end
372
+ if depth.last.length == tl.length
373
+ lines[line_id - 1] << "</para></listitem>"
374
+ end
375
+ end
376
+ unless depth.last == tl
377
+ depth << tl
378
+ atts = docbook_pba( atts )
379
+ atts = shelve( atts ) if atts
380
+ delimiter = lD(tl)
381
+ lines[line_id] = "<#{ delimiter }#{ atts }>\n<listitem><para>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
382
+ else
383
+ lines[line_id] = "<listitem><para>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
384
+ end
385
+ last_line = line_id
386
+
387
+ else
388
+ last_line = line_id
389
+ end
390
+ if line_id - last_line > 1 or line_id == lines.length - 1
391
+ depth.delete_if do |v|
392
+ lines[last_line] << "</para></listitem>\n</#{ lD( v ) }>"
393
+ end
394
+ end
395
+ end
396
+ lines.join( "\n" )
397
+ end
398
+ text != orig_text
399
+ end
400
+
401
+ # Parses Docbook lists and generates Docbook XML
402
+ def block_docbook_simple_lists( text )
403
+ orig_text = text.dup
404
+ delimiter = ""
405
+ text.gsub!( LISTS_RE ) do |match|
406
+ lines = match.split( /\n/ )
407
+ last_line = -1
408
+ depth = []
409
+ lines.each_with_index do |line, line_id|
410
+ if line =~ /^([_]+)(#{A}#{C}) (.*)$/m
411
+ tl,atts,content = $~[1..4]
412
+ if depth.last
413
+ if depth.last.length > tl.length
414
+ (depth.length - 1).downto(0) do |i|
415
+ break if depth[i].length == tl.length
416
+ lines[line_id - 1] << "</member>\n</simplelist>\n"
417
+ depth.pop
418
+ end
419
+ end
420
+ if depth.last.length == tl.length
421
+ lines[line_id - 1] << "</member>"
422
+ end
423
+ end
424
+ unless depth.last == tl
425
+ depth << tl
426
+ atts = docbook_pba( atts )
427
+ atts = shelve( atts ) if atts
428
+ lines[line_id] = "<simplelist#{ atts }>\n<member>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
429
+ else
430
+ lines[line_id] = "<member>#{ content.gsub("<","&lt;").gsub(">","&gt;") }"
431
+ end
432
+ last_line = line_id
433
+
434
+ else
435
+ last_line = line_id
436
+ end
437
+ if line_id - last_line > 1 or line_id == lines.length - 1
438
+ depth.delete_if do |v|
439
+ lines[last_line] << "</member>\n</simplelist>"
440
+ end
441
+ end
442
+ end
443
+ lines.join( "\n" )
444
+ end
445
+ text != orig_text
446
+ end
447
+
448
+ # Parses docbook definition lists and generates HTML
449
+ def block_docbook_defs( text )
450
+ text.gsub!(/^-\s+(.*?):=(.*?)=:\s*$/m) do |m|
451
+ "- #{$1.strip} := <para>"+$2.split(/\n/).map{|w|w.strip}.delete_if{|w|w.empty?}.join("</para><para>")+"</para>"
452
+ end
453
+
454
+ text.gsub!( DEFS_RE ) do |match|
455
+ lines = match.split( /\n/ )
456
+ lines.each_with_index do |line, line_id|
457
+ if line =~ DEFS_CONTENT_RE
458
+ dl,continuation,dt,dd = $~[1..4]
459
+
460
+ atts = pba( atts )
461
+ atts = shelve( atts ) if atts
462
+ lines[line_id] = line_id == 0 ? "<variablelist>" : ""
463
+ lines[line_id] << "\n\t<varlistentry><term>#{ dt.strip }</term>\n\t<listitem><para>#{ dd.strip }</para></listitem></varlistentry>"
464
+
465
+ end
466
+
467
+ if line_id == lines.length - 1
468
+ lines[-1] << "\n</variablelist>"
469
+ end
470
+ end
471
+ lines.join( "\n" )
472
+ end
473
+ end
474
+
475
+ def inline_docbook_code( text )
476
+ text.gsub!( CODE_RE ) do |m|
477
+ before,lang,code,after = $~[1..4]
478
+ code = code.gsub(/\\@@?/,'@')
479
+ htmlesc code, :NoQuotes
480
+ docbook_rip_offtags( "#{ before }<literal>#{ shelve code }</literal>#{ after }" )
481
+ end
482
+ end
483
+
484
+ def lD( text )
485
+ text =~ /\#$/ ? 'orderedlist' : 'itemizedlist'
486
+ end
487
+
488
+ def docbook_hard_break( text )
489
+ text.gsub!( /(.)\n(?! *[#*\s|]|$)/, "\\1<sbr />" ) if hard_breaks
490
+ end
491
+
492
+ def docbook_bq( tag, atts, cite, content )
493
+ cite, cite_title = check_refs( cite )
494
+ cite = " citetitle=\"#{ cite }\"" if cite
495
+ atts = shelve( atts ) if atts
496
+ "<blockquote#{ cite }>\n<para>#{ content }</para>\n</blockquote>"
497
+ end
498
+
499
+ DOCBOOK_DIVS = ['note', 'blockquote', 'warning']
500
+ def docbook_p( tag, atts, cite, content )
501
+ ht, atts = docbook_sanitize_para atts, content
502
+ atts = docbook_pba( atts )
503
+ atts << " citetitle=\"#{ cite }\"" if cite
504
+ atts = shelve( atts ) if atts
505
+
506
+ "<#{ ht }#{ atts }>#{ '<para>' if DOCBOOK_DIVS.include? ht }#{ content }#{ '</para>' if DOCBOOK_DIVS.include? ht }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>"
507
+ end
508
+
509
+ def docbook_div( tag, atts, cite, content, extra_para = true )
510
+ ht, atts = docbook_sanitize_para atts, content
511
+ para, end_para = extra_para || (ht == 'para') ? ["\n<para>", "</para>\n"] : ["", ""]
512
+ return "<#{ ht }#{ atts }>#{ para }#{ content }#{ end_para }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>\n"
513
+ end
514
+
515
+ def automatic_content_id
516
+ i, new_id = 0, 0
517
+ while new_id == 0 || @automatic_content_ids.include?(new_id)
518
+ j = (i == 0) ? nil : i
519
+ new_id = "S"+MD5.new(@stack.map{|title|title.sub(/^\s*\{\{(.+)\}\}.+/,'\1').strip}.join('-').to_s+j.to_s).to_s
520
+ i += 1
521
+ end
522
+ @automatic_content_ids.push(new_id)
523
+ return new_id
524
+ end
525
+
526
+ # def docbook_h1, def docbook_h2, def docbook_h3, def docbook_h4
527
+ 1.upto 4 do |i|
528
+ class_eval %Q{
529
+ def docbook_h#{i}( tag, atts, cite, content )
530
+ content_id, role = sanitize_content(content)
531
+
532
+ atts = shelve( atts ) if atts
533
+ end_sections = ''
534
+ @stack.dup.each do |level|
535
+ if @stack.size >= #{i}
536
+ sect = '</sect'
537
+ sect << @stack.size.to_s
538
+ sect << ">\n"
539
+ @stack.pop
540
+ end_sections << sect
541
+ end
542
+ end
543
+ @stack.push sanitized_id_for(content)
544
+ string = end_sections
545
+ string << '<sect#{i} id="'
546
+ string << (content_id.nil? ? automatic_content_id : sanitized_id_for(content_id))
547
+ string << '"'
548
+ if role
549
+ string << ' role="'
550
+ string << role
551
+ string << '"'
552
+ end
553
+ string << '><title>'
554
+ string << content.sub(/^\\s*\\{\\{.+\\}\\}(.+)/,'\\1').strip
555
+ string << '</title>'
556
+ end
557
+ }
558
+ end
559
+
560
+ # Handle things like:
561
+ # ch. 1. Some Title id. 123
562
+ def docbook_ch( tag, atts, cite, content )
563
+ content_id, role = sanitize_content(content)
564
+
565
+ label, title = content.split('.').map {|c| c.strip}
566
+
567
+ string = ""
568
+ # Close of the sections in order to end the chapter cleanly
569
+ @stack.each_with_index { |level, index| string << "</sect#{@stack.size-index}>" }
570
+ @stack = []
571
+
572
+ string << "</chapter>\n\n"
573
+ @chapter = true # let the instance know that a chapter has started
574
+ string << '<chapter label="'
575
+ string << label
576
+ string << '" id="'
577
+ string << (content_id.nil? ? title : sanitized_id_for(content_id))
578
+ string << '"><title>'
579
+ string << title.to_s
580
+ string << '</title>'
581
+
582
+ return string
583
+ end
584
+
585
+ def docbook_fn_( tag, num, atts, cite, content )
586
+ atts << " id=\"fn#{ num }\""
587
+ atts = shelve( atts ) if atts
588
+ "<footnote#{atts}><para>#{ content }</para></footnote>"
589
+ end
590
+
591
+ def block_docbook_prefix( text )
592
+ if text =~ BLOCK_RE
593
+ tag,tagpre,num,atts,cite,content = $~[1..6]
594
+ atts = docbook_pba( atts )
595
+
596
+ # pass to prefix handler
597
+ if respond_to? "docbook_#{ tag }", true
598
+ text.gsub!( $&, method( "docbook_#{ tag }" ).call( tag, atts, cite, content ) )
599
+ elsif respond_to? "docbook_#{ tagpre }_", true
600
+ text.gsub!( $&, method( "docbook_#{ tagpre }_" ).call( tagpre, num, atts, cite, content ) )
601
+ end
602
+ end
603
+ end
604
+
605
+ def inline_docbook_span( text )
606
+ DOCBOOK_TAGS.each do |qtag_rc, ht, qtag_re, rtype, escaped_re|
607
+ text.gsub!( qtag_re ) do |m|
608
+
609
+ case rtype
610
+ when :limit
611
+ sta,qtag,atts,cite,content = $~[1..5]
612
+ else
613
+ qtag,atts,cite,content = $~[1..4]
614
+ sta = ''
615
+ end
616
+
617
+ ht, atts = docbook_sanitize_para atts, content, ht
618
+
619
+ atts = docbook_pba( atts )
620
+ atts << " citetitle=\"#{ cite }\"" if cite
621
+ atts = shelve( atts ) if atts
622
+
623
+ if @stack.size == 0
624
+ sect1 = ""
625
+ end_sect1 = ""
626
+ end
627
+
628
+ "#{ sta }#{ sect1 }<#{ ht }#{ atts }>#{ '<para>' if ['note', 'blockquote'].include? ht }#{ content }#{ '</para>' if ['note', 'blockquote'].include? ht }</#{ ht.gsub(/^([^\s]+).*/,'\1') }>#{ end_sect1 }"
629
+
630
+ end
631
+ end
632
+ end
633
+
634
+ def docbook_lookup_hack(name)
635
+ @book ||= BOOK.inject([]) {|array, chapter| array += chapter[1]}
636
+ @book.index name
637
+ end
638
+
639
+ def inline_docbook_link( text )
640
+ text.gsub!( LINK_RE ) do |m|
641
+ pre,atts,text,title,url,slash,post = $~[1..7]
642
+
643
+ url, url_title = check_refs( url )
644
+ title ||= url_title
645
+
646
+ atts = shelve( atts ) if atts
647
+
648
+ "#{ pre }<ulink url=\"#{ url.to_s.gsub('"','&quot;') }#{ slash.to_s.gsub('"','&quot;') }\">#{ text }</ulink>#{ post }"
649
+ end
650
+ end
651
+
652
+ DOCBOOK_REFS_RE = /(^ *)\[([^\[\n]+?)\](#{HYPERLINK})(?=\s|$)/
653
+
654
+ def refs_docbook( text )
655
+ text.gsub!( DOCBOOK_REFS_RE ) do |m|
656
+ flag, url = $~[2..3]
657
+ @urlrefs[flag.downcase] = [url, nil]
658
+ nil
659
+ end
660
+ end
661
+
662
+ def inline_docbook_image( text )
663
+ text.gsub!( IMAGE_RE ) do |m|
664
+ stln,algn,atts,url,title,href,href_a1,href_a2 = $~[1..8]
665
+ atts = docbook_pba( atts )
666
+ atts = " fileref=\"#{ url }\"#{ atts }"
667
+
668
+ href, alt_title = check_refs( href ) if href
669
+ url, url_title = check_refs( url )
670
+
671
+ out = stln
672
+ out << "<figure><title>#{title}</title>\n" if title && !title.empty?
673
+ out << "<graphic#{ shelve( atts ) } />\n"
674
+ out << "</figure>" if title && !title.empty?
675
+
676
+ out
677
+ end
678
+ end
679
+
680
+ # Turns all urls into clickable links.
681
+ # Taken from ActionPack's ActionView
682
+ def inline_docbook_autolink_urls(text)
683
+ text.gsub!(AUTO_LINK_RE) do
684
+ all, a, b, c, d = $&, $1, $2, $3, $5
685
+ if a =~ /<a\s/i # don't replace URL's that are already linked
686
+ all
687
+ else
688
+ %(#{a}<ulink url="#{b=="www."?"http://www.":b}#{c}">#{b}#{c}</ulink>#{d})
689
+ end
690
+ end
691
+ end
692
+
693
+ # Turns all email addresses into clickable links.
694
+ def inline_docbook_autolink_emails(text)
695
+ text.gsub!(/([\w\.!#\$%\-+.]+@[A-Za-z0-9\-]+(\.[A-Za-z0-9\-]+)+)/, '<email>\1</email>')
696
+ end
697
+
698
+ def no_docbook( text )
699
+ text.gsub!( /(^|\s)(\\?)==([^=]+.*?)\2==(\s|$)?/ ) do |m|
700
+ $2.empty? ? "#{$1}<nodocbook>#{$3}</nodocbook>#{$4}" : "#{$1}==#{$3}==#{$4}"
701
+ end
702
+ text.gsub!( /^ *(\\?)==([^=]+.*?)\1==/m ) do |m|
703
+ $1.empty? ? "<nodocbook>#{$2}</nodocbook>" : "==#{$2}=="
704
+ end
705
+ end
706
+
707
+ def inline_docbook_glyphs( text, level = 0 )
708
+ if text !~ HASTAG_MATCH
709
+ docbook_pgl text
710
+ else
711
+ codepre = 0
712
+ text.gsub!( ALLTAG_MATCH ) do |line|
713
+ ## matches are off if we're between <code>, <pre> etc.
714
+ if $1
715
+ if line =~ OFFTAG_OPEN
716
+ codepre += 1
717
+ elsif line =~ OFFTAG_CLOSE
718
+ codepre -= 1
719
+ codepre = 0 if codepre < 0
720
+ end
721
+ elsif codepre.zero?
722
+ inline_docbook_glyphs( line, level + 1 )
723
+ else
724
+ htmlesc( line, :NoQuotes )
725
+ end
726
+ ## p [level, codepre, orig_line, line]
727
+
728
+ line
729
+ end
730
+ end
731
+ end
732
+
733
+ DOCBOOK_OFFTAGS = /(nodocbook|programlisting)/i
734
+ DOCBOOK_OFFTAG_MATCH = /(?:(<\/#{ DOCBOOK_OFFTAGS }>)|(<#{ DOCBOOK_OFFTAGS }[^>]*>))(.*?)(?=<\/?#{ DOCBOOK_OFFTAGS }|\Z)/mi
735
+ DOCBOOK_OFFTAG_OPEN = /<#{ DOCBOOK_OFFTAGS }/
736
+ DOCBOOK_OFFTAG_CLOSE = /<\/?#{ DOCBOOK_OFFTAGS }/
737
+
738
+ def docbook_rip_offtags( text )
739
+ if text =~ /<.*>/
740
+ ## strip and encode <pre> content
741
+ codepre, used_offtags = 0, {}
742
+ text.gsub!( DOCBOOK_OFFTAG_MATCH ) do |line|
743
+ if $3
744
+ offtag, aftertag = $4, $5
745
+ codepre += 1
746
+ used_offtags[offtag] = true
747
+ if codepre - used_offtags.length > 0
748
+ htmlesc( line, :NoQuotes ) unless used_offtags['nodocbook']
749
+ @pre_list.last << line
750
+ line = ""
751
+ else
752
+ htmlesc( aftertag, :NoQuotes ) if aftertag and not used_offtags['nodocbook']
753
+ line = "<redpre##{ @pre_list.length }>"
754
+ @pre_list << "#{ $3 }#{ aftertag }"
755
+ end
756
+ elsif $1 and codepre > 0
757
+ if codepre - used_offtags.length > 0
758
+ htmlesc( line, :NoQuotes ) unless used_offtags['nodocbook']
759
+ @pre_list.last << line
760
+ line = ""
761
+ end
762
+ codepre -= 1 unless codepre.zero?
763
+ used_offtags = {} if codepre.zero?
764
+ end
765
+ line
766
+ end
767
+ end
768
+ text
769
+ end
770
+
771
+ # In order of appearance: Latin, greek, cyrillian, armenian
772
+ I18N_HIGHER_CASE_LETTERS =
773
+ "√Ä√?√Ç√É√Ñ√փăуÇ√Ü√áƒÜƒåƒàƒäƒéƒ?√à√â√ä√ãƒíƒòƒöƒîƒñƒúƒûƒ†ƒ¢ƒ§ƒ¶√å√?√é√?ƒ™ƒ®ƒ¨ƒÆƒ∞ƒ≤ƒ¥ƒ∂≈?ƒΩƒπƒªƒø√ë≈É≈á≈Ö≈ä√í√ì√î√ï√ñ√ò≈å≈?≈é≈í≈î≈ò≈ñ≈ö≈†≈û≈ú»ò≈§≈¢≈¶»ö√ô√ö√õ√ú≈™≈Æ≈∞≈¨≈®≈≤≈¥√?≈∂≈∏≈π≈Ω≈ª" +
774
+ "ŒëŒíŒìŒîŒïŒñŒóŒòŒôŒöŒõŒúŒ?ŒûŒüŒ†Œ°Œ£Œ§Œ•Œ¶ŒßŒ®Œ©" +
775
+ "ŒÜŒàŒâŒäŒåŒéŒ?—†—¢—§—¶—®—™—¨—Æ—∞—≤—¥—∂—∏—∫—º—æ“Ä“ä“å“é“?“í“î“ñ“ò“ö“ú“û“†“¢“§“¶“®“™“¨“Æ“∞“≤“¥“∂“∏“∫“º“æ”?”ɔ֔á”â”ã”?”?”í”î”ñ”ò”ö”ú”û”†”¢”§”¶”®”™”¨”Æ”∞”≤”¥”∏–ñ" +
776
+ "‘±‘≤‘≥‘¥‘µ‘∂‘∑‘∏‘π‘∫‘ª‘º‘Ω‘æ‘ø’Ä’?’Ç’É’Ñ’Ö’Ü’á’à’â’ä’ã’å’?’?’?’ë’í’ì’î’ï’ñ"
777
+
778
+ I18N_LOWER_CASE_LETTERS =
779
+ "√†√°√¢√£√§√•ƒ?ƒÖƒÉ√¶√߃áƒ?ƒâƒãƒ?ƒë√®√©√™√´ƒìƒôƒõƒïƒó∆íƒ?ƒüƒ°ƒ£ƒ•ƒß√¨√≠√Æ√؃´ƒ©ƒ≠ƒØƒ±ƒ≥ƒµƒ∑ƒ∏≈ǃæƒ∫ƒº≈Ä√±≈Ñ≈à≈Ü≈â≈ã√≤√≥√¥√µ√∂√∏≈?≈ë≈?≈ì≈ï≈ô≈ó≈õ≈°≈ü≈?»ô≈•≈£≈ß»õ√π√∫√ª√º≈´≈Ø≈±≈≠≈©≈≥≈µ√Ω√ø≈∑≈æ≈º≈∫√û√æ√ü≈ø√?√∞" +
780
+ "Œ¨Œ≠ŒÆŒØŒ∞Œ±Œ≤Œ≥Œ¥ŒµŒ∂Œ∑Œ∏ŒπŒ∫ŒªŒºŒΩŒæŒøœÄœ?œÇœÉœÑœÖœÜœáœàœâœäœãœåœ?œéŒ?" +
781
+ "–∞–±–≤–≥–¥–µ–∂–∑–∏–π–∫–ª–º–Ω–æ–ø—Ä—?—Ç—É—Ñ—Ö—Ü—á—à—â—ä—ã—å—?—é—?—?—ë—í—ì—î—ï—ñ—ó—ò—ô—õ—ú—?—û—ü—°—£—•—ß—©—´—≠—Ø—±—≥—µ—∑—π—ª—Ω—ø“?“ã“?“?“ë“ì“ï“ó“ô“õ“?“ü“°“£“•“ß“©“´“≠“Ø“±“≥“µ“∑“𓪓ٓø”Ĕǔєܔà”ä”å”é”ë”ì”ï”ó”ô”õ”?”ü”°”£”•”ß”©”´”≠”Ø”±”≥”µ”π" +
782
+ "’°’¢’£’§’•’¶’ß’®’©’™’´’¨’≠’Æ’Ø’∞’±’≤’≥’¥’µ’∂’∑’∏’π’∫’ª’º’Ω’æ’ø÷Ä÷?÷Ç÷É÷Ñ÷Ö÷Ü÷á"
783
+
784
+ WIKI_WORD_PATTERN = '[A-Z' + I18N_HIGHER_CASE_LETTERS + '][a-z' + I18N_LOWER_CASE_LETTERS + ']+[A-Z' + I18N_HIGHER_CASE_LETTERS + ']\w+'
785
+ CAMEL_CASED_WORD_BORDER = /([a-z#{I18N_LOWER_CASE_LETTERS}])([A-Z#{I18N_HIGHER_CASE_LETTERS}])/u
786
+
787
+ WIKI_WORD = Regexp.new('(":)?(\\\\)?(' + WIKI_WORD_PATTERN + ')\b', 0, "utf-8")
788
+
789
+ WIKI_LINK = /(":)?\[\[([^\]]+)\]\]/
790
+
791
+ def inline_docbook_wiki_words( text )
792
+ text.gsub!( WIKI_WORD ) do |m|
793
+ textile_link_suffix, escape, page_name = $~[1..3]
794
+ if escape.nil? && textile_link_suffix !=~ /https?:\/\/[^\s]+$/
795
+ "#{textile_link_suffix}<xref linkend=\"#{ sanitized_reference_for page_name }\"></xref>"
796
+ else
797
+ "#{textile_link_suffix}#{page_name}"
798
+ end
799
+ end
800
+ end
801
+
802
+ def inline_docbook_wiki_links( text )
803
+ text.gsub!( WIKI_LINK ) do |m|
804
+ textile_link_suffix, content_id = $~[1..2]
805
+ "#{textile_link_suffix}<xref linkend=\"#{ sanitized_reference_for "id#{content_id}" }\"></xref>"
806
+ end
807
+ end
808
+
809
+ # Search and replace for glyphs (quotes, dashes, other symbols)
810
+ def docbook_pgl( text )
811
+ DOCBOOK_GLYPHS.each do |re, resub, tog|
812
+ next if tog and method( tog ).call
813
+ text.gsub! re, resub
814
+ end
815
+ end
816
+
817
+ # Parses attribute lists and builds an HTML attribute string
818
+ def docbook_pba( text_in, element = "" )
819
+
820
+ return '' unless text_in
821
+
822
+ style = []
823
+ text = text_in.dup
824
+ if element == 'td'
825
+ colspan = $1 if text =~ /\\(\d+)/
826
+ rowspan = $1 if text =~ /\/(\d+)/
827
+ end
828
+
829
+ style << "#{ $1 };" if not filter_styles and
830
+ text.sub!( /\{([^}]*)\}/, '' )
831
+
832
+ lang = $1 if
833
+ text.sub!( /\[([^)]+?)\]/, '' )
834
+
835
+ cls = $1 if
836
+ text.sub!( /\(([^()]+?)\)/, '' )
837
+
838
+ cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
839
+
840
+ atts = ''
841
+ atts << " role=\"#{ cls }\"" unless cls.to_s.empty?
842
+ atts << " id=\"#{ id }\"" if id
843
+ atts << " colspan=\"#{ colspan }\"" if colspan
844
+ atts << " rowspan=\"#{ rowspan }\"" if rowspan
845
+
846
+ atts
847
+ end
848
+
849
+ def sanitize_content( text="" )
850
+ text.replace text[/(.*?) role\. (\w+)/] ? $1 : text
851
+ role = $2
852
+ text.replace text[/(.*?) id\. ([0-9]+)/] ? $1 : text
853
+ content_id = $2 ? "id#{$2}" : nil
854
+ return content_id, role
855
+ end
856
+
857
+ def sanitized_id_for( text )
858
+ word = text.gsub(CAMEL_CASED_WORD_BORDER, '\1 \2').downcase.gsub(/\s/,'-').gsub(/[^A-Za-z0-9\-\{\}]/,'').sub(/^[^\w\{]*/, '')
859
+ @ids << word unless @ids.include? word
860
+ return word
861
+ end
862
+
863
+ def sanitized_reference_for( text )
864
+ word = text.gsub(CAMEL_CASED_WORD_BORDER, '\1 \2').downcase.gsub(/\s/,'-').gsub(/[^A-Za-z0-9\-\{\}]/,'').sub(/^[^\w\{]*/, '')
865
+ @references << word unless @references.include? word
866
+ return word
867
+ end
868
+
869
+ DOCBOOK_PARAS = ['para', 'remark', 'tip', 'important']
870
+ def docbook_blocks( text, deep_code = false )
871
+ @current_class ||= nil
872
+
873
+ # Find all occurences of div(class). and process them as blocks
874
+ text.gsub!( /^div\((.*?)\)\.\s*(.*?)(?=div\([^\)]+\)\.\s*)/m ) do |blk|
875
+ block_class = (@current_class == $1) ? nil : %{ role=#{$1.inspect}}
876
+ @current_class = $1
877
+ BLOCK_GROUP_SPLITTER + ( ($2.strip.empty? || block_class.nil?) ? $2 : docbook_div('div', block_class, nil, "\n\n#{$2.strip}\n\n", false) )
878
+ end
879
+
880
+ # Take care of the very last div
881
+ text.sub!( /div\((.*?)\)\.\s*(.*)/m ) do |blk|
882
+ block_class = (@current_class == $1) ? nil : %{ role=#{$1.inspect}}
883
+ @current_class = $1
884
+ BLOCK_GROUP_SPLITTER + ( ($2.strip.empty? || block_class.nil?) ? $2 : docbook_div('div', block_class, nil, "\n\n#{$2.strip}\n\n", false) )
885
+ end
886
+
887
+ # Handle the text now that the placeholders for divs are set, splitting at BLOCK_GROUP_SPLITTER
888
+ text.replace(text.strip.split(BLOCK_GROUP_SPLITTER.strip).map do |chunk|
889
+ tag, tag_name, para, body, end_para, end_tag = $~[1..6] if chunk.strip =~ %r{(<(#{(DOCBOOK_PARAS+DOCBOOK_DIVS).join("|")}).*?>)\s*(<para[^>]*>)?\s*(.*?)\s*(</para>)?\s*(</\2>)}m
890
+
891
+ if tag && chunk.strip.split[0][/<.*?>/] == tag
892
+ if DOCBOOK_PARAS.include? tag_name
893
+ tag = "#{para}#{tag}"
894
+ end_tag = "#{end_para}#{end_tag}"
895
+ end
896
+ body = docbook_block_groups(body, deep_code)
897
+ body = "\n"+body.strip+"\n" unless DOCBOOK_PARAS.include? tag_name
898
+
899
+ tag + body + end_tag + "\n"
900
+ else
901
+ docbook_block_groups(chunk, deep_code)
902
+ end
903
+ end.join)
904
+ end
905
+
906
+ def docbook_block_groups( text, deep_code = false )
907
+ text.replace text.split( BLOCKS_GROUP_RE ).collect { |blk| docbook_blk(blk, deep_code) }.join("\n")
908
+ end
909
+
910
+ def docbook_blk( text, deep_code = false )
911
+ return text if text =~ /<[0-9]+>/
912
+
913
+ plain = text !~ /\A[#*> ]/
914
+
915
+ # skip blocks that are complex HTML
916
+ if text =~ /^<\/?(\w+).*>/ and not SIMPLE_DOCBOOK_TAGS.include? $1
917
+ text
918
+ else
919
+ # search for indentation levels
920
+ text.strip!
921
+ if text.empty?
922
+ text
923
+ else
924
+ code_blk = nil
925
+ text.gsub!( /((?:\n(?:\n^ +[^\n]*)+)+)/m ) do |iblk|
926
+ flush_left iblk
927
+ docbook_blocks iblk, plain
928
+ iblk.gsub( /^(\S)/, "\\1" )
929
+ if plain
930
+ code_blk = iblk; ""
931
+ else
932
+ iblk
933
+ end
934
+ end
935
+
936
+ block_applied = 0
937
+ @rules.each do |rule_name|
938
+ block_applied += 1 if ( rule_name.to_s.match /^block_/ and method( rule_name ).call( text ) )
939
+ end
940
+ if block_applied.zero?
941
+ if deep_code
942
+ text = "<para><programlisting>#{ text }</programlisting></para>" # unless text =~ /list>/
943
+ else
944
+ text = "<para>#{text}</para>\n"
945
+ end
946
+ end
947
+ # hard_break text
948
+ text << "\n#{ code_blk }"
949
+ end
950
+ return text
951
+ end
952
+ end
953
+
954
+ def docbook_sanitize_para(atts, content, ht = "para")
955
+ case atts
956
+ when /comment/
957
+ ht = "remark"
958
+ atts = nil
959
+ when /preface/
960
+ ht = "preface"
961
+ atts = nil
962
+ when /blockquote/
963
+ ht = "blockquote"
964
+ atts = nil
965
+ when /warning/
966
+ ht = "warning"
967
+ atts = nil
968
+ when /note/
969
+ ht = "note"
970
+ atts = nil
971
+ when /tip/
972
+ ht = "tip"
973
+ atts = nil
974
+ when /important/
975
+ ht = "important"
976
+ atts = nil
977
+ when /filename/
978
+ ht = "filename"
979
+ atts = nil
980
+ when /production/
981
+ ht = "remark"
982
+ atts = nil
983
+ when /xref/
984
+ if content =~ /^(.*)\[Hack \#(.*)\]$/
985
+ name = $2
986
+ ht = %Q{link linkend="#{sanitized_reference_for name}"}
987
+ content.gsub!( /^(.*)\s\[Hack \#(.*)\]$/, '\1' )
988
+ else
989
+ ht = %Q{xref linkend="#{sanitized_reference_for content}"}
990
+ content.replace ''
991
+ end
992
+ atts = nil
993
+ when /synopsis/
994
+ ht = "para"
995
+ atts = %{ role="hack synopsis"}
996
+ when /author/
997
+ ht = "para"
998
+ atts = %{ role="hacks-contributor"}
999
+ when /technical/
1000
+ ht = "command"
1001
+ atts = nil
1002
+ end
1003
+ return ht, atts
1004
+ end
1005
+
1006
+ end