RubyGems - docubot - Versions diffs - 0.2.2 → 0.3 - Mend

docubot 0.2.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

data/bin/docubot +4 -0
data/lib/docubot.rb +5 -1
data/lib/docubot/bundle.rb +10 -5
data/lib/docubot/index.rb +5 -4
data/lib/docubot/page.rb +25 -4
data/lib/docubot/templates/index.haml +20 -13
data/lib/docubot/templates/toc.haml +2 -0
data/lib/docubot/writers/chm.rb +5 -1
data/lib/docubot/writers/html.rb +3 -0
data/test/site1_html/A Slight Change of Heart/1 Ze First Page in Ze Section.html +30 -0
data/test/site1_html/A Slight Change of Heart/2 Another Page in the Section.html +30 -0
data/test/site1_html/A Slight Change of Heart/3_more_crap.html +42 -0
data/test/site1_html/A Slight Change of Heart/3_more_crap.html#frist-post +29 -0
data/test/site1_html/A Slight Change of Heart/3_more_crap.html#moar +29 -0
data/test/site1_html/A Slight Change of Heart/index.html +28 -0
data/test/site1_html/_index.html +17 -0
data/test/site1_html/_toc.html +73 -0
data/test/site1_html/appendices/gkheadftw.html +32 -0
data/test/site1_html/appendices/index.html +28 -0
data/test/site1_html/common.css +108 -0
data/test/site1_html/glossary-terms.js +1 -0
data/test/site1_html/headers.html +42 -0
data/test/site1_html/preamble.html +36 -0
data/test/site1_html/raw.html +33 -0
data/test/site1_html/toc.css +0 -0
data/test/site1_html/toc.js +0 -0
metadata +38 -28
data/lib/docubot/shells/nvphysx/0_License.md +0 -3
data/lib/docubot/shells/nvphysx/1_Getting_Started.haml +0 -51
data/lib/docubot/shells/nvphysx/Appendix/Glossary.md +0 -7
data/lib/docubot/shells/nvphysx/_glossary/APEX.md +0 -1
data/lib/docubot/shells/nvphysx/_glossary/NVIDIA.md +0 -1
data/lib/docubot/shells/nvphysx/_glossary/PhysX.textile +0 -3
data/lib/docubot/shells/nvphysx/_static/NVBadge_3D.png +0 -0
data/lib/docubot/shells/nvphysx/_static/PhysXbyNV_Black.png +0 -0
data/lib/docubot/shells/nvphysx/_templates/_root/bg_green_bar_revised.gif +0 -0
data/lib/docubot/shells/nvphysx/_templates/_root/close.png +0 -0
data/lib/docubot/shells/nvphysx/_templates/_root/common.css +0 -264
data/lib/docubot/shells/nvphysx/_templates/_root/glossary.css +0 -4
data/lib/docubot/shells/nvphysx/_templates/_root/glossary.js +0 -24
data/lib/docubot/shells/nvphysx/_templates/_root/nvdevtools.js +0 -31
data/lib/docubot/shells/nvphysx/_templates/_root/nvidia-logo.gif +0 -0
data/lib/docubot/shells/nvphysx/_templates/_root/right-sidebar.png +0 -0
data/lib/docubot/shells/nvphysx/_templates/top.haml +0 -28
data/lib/docubot/shells/nvphysx/index.txt +0 -5

data/bin/docubot CHANGED

@@ -99,7 +99,11 @@ if ARGS[:create]
 		end
 	end
 else
+	start = Time.now
 	bundle = DocuBot::Bundle.new( ARGS[:directory] )
+	lap = Time.now
+	puts "%.2fs to prepare the bundle..." % (lap-start)
 	bundle.write( ARGS[:writer], ARGS[:output] )
+	puts "%.2fs to write everything." % (Time.now-lap)
 end

data/lib/docubot.rb CHANGED

@@ -20,12 +20,16 @@ module FileUtils
 end
 module DocuBot
-	VERSION = '0.2.2'
+	VERSION = '0.3'
 	DIR     = File.expand_path( File.dirname( __FILE__ ) )
 	TEMPLATE_DIR = DIR / 'docubot/templates'
 	SHELL_DIR    = DIR / 'docubot/shells'
 	Dir.chdir( SHELL_DIR ){ SHELLS = Dir['*'] }
+	def self.id_from_text( text )
+		text.strip.gsub(/[^\w.:-]+/,'-').gsub(/^-|-$/,'')
+	end
 end
 require 'docubot/snippet'

data/lib/docubot/bundle.rb CHANGED

@@ -34,11 +34,16 @@ class DocuBot::Bundle
 					# TODO: Move this bloat elsewhere.
 					if page.toc?
-						html = page.to_html
-						page.toc.scan /[a-z][\w.:-]*/i do |id|
-							# TODO: Maybe a lightweight HTML parser would be faster here? (Certainly more robust.)
-							if title = html[/\b(?:id|ID) *= *['"]#{id}['"][^>]*>([^<]+)/,1]
-								page << DocuBot::SubLink.new( page, title.strip, id )
+						ndoc = page.nokodoc
+						toc = page.toc
+						ids = if toc[','] # Comma-delimited toc interpreted as generated ids
+							toc.split(/,\s*/).map{ |title| DocuBot.id_from_text(title) }
+						else
+							toc.scan /[a-z][\w.:-]*/i
+						end
+ 						ids.each do |id|
+							if ele = ndoc.at_css("##{id}")
+								page << DocuBot::SubLink.new( page, ele.inner_text, id )
 							else
 								warn "Could not find requested toc anchor '##{id}' on #{page.html_path}"
 							end

data/lib/docubot/index.rb CHANGED

@@ -29,14 +29,15 @@ class DocuBot::Index
 	def process_page( page )
 		page.keywords.split(/,\s*/).each{ |key| add( key, page ) } if page.keywords?
-		html = page.to_html
+		# FIXME: This is substantially slower (but way more correct) than regexp only.
 		unless page['no-index'] && page['no-index'].include?( 'headings' )
-			#TODO: Fix the regex to use a backreference to ensure the correct closing tag, once 1.8x support is not necessary
-			html.scan( %r{<h[1-6][^>]*>(.+?)</h[1-6]>}im ){ |captures| add( captures.first, page ) }
+			%w[h1 h2 h3 h4 h5 h6].each do |hn|
+				page.nokodoc.css(hn).each{ |head| add( head.inner_text, page ) }
+			end
 		end
 		unless page['no-index'] && page['no-index'].include?( 'definitions' )
-			html.scan( %r{<dt[^>]*>(.+?)</dt>}im ){ |captures| add captures.first, page }
+			page.nokodoc.css("dt").each{ |defn| add( defn.inner_text, page ) }
 		end
 	end

data/lib/docubot/page.rb CHANGED

@@ -1,7 +1,10 @@
 # encoding: UTF-8
 require 'yaml'
+require 'nokogiri'
 class DocuBot::Page
 	META_SEPARATOR = /^\+\+\+\s*$/ # Sort of like +++ATH0
+	AUTO_ID_ELEMENTS = %w[ h1 h2 h3 h4 h5 h6 legend caption dt ].join(',')
 	attr_reader :pages, :type, :folder, :file, :meta
 	attr_accessor :parent, :bundle
@@ -32,7 +35,9 @@ class DocuBot::Page
 			end
 			# Raw markup, untransformed
-			@raw = parts.last
+			if @raw = parts.last && parts.last.strip
+				@raw = @raw
+			end
 		end
 	end
 	def []( key )
@@ -89,8 +94,9 @@ class DocuBot::Page
 	def to_html
 		return @cached_html if @cached_html
-		contents = if @raw
+		contents = if @raw && !@raw.empty?
 			# Directories with no index.* file will not have any @raw
+			# TODO: Swap the order of these once we're sure that all converters will pass raw HTML through.
 			html = DocuBot::convert_to_html( self, @raw, @type )
 			DocuBot::process_snippets( self, html )
 		end
@@ -104,14 +110,29 @@ class DocuBot::Page
 		haml = master_templates / "#{template}.haml" unless File.exists?( haml )
 		haml = master_templates / "page.haml"        unless File.exists?( haml )
 		haml = Haml::Engine.new( IO.read( haml ), DocuBot::Writer::HAML_OPTIONS )
-		contents = haml.render( Object.new, :contents=>contents, :page=>self, :global=>@bundle.toc, :root=>root )
+		html = haml.render( Object.new, :contents=>contents, :page=>self, :global=>@bundle.toc, :root=>root )
+		# Add IDs to elements, only if a toc entry might reference one.
+		if @raw && @meta['toc'] && @meta['toc'][',']
+			nokodoc( html ).css( AUTO_ID_ELEMENTS ).each do |node|
+				next if node.has_attribute?('id')
+				node['id'] = DocuBot.id_from_text(node.inner_text)
+			end
+			html = @nokodoc.at_css('body').children.to_html
+		end
-		@cached_html = contents
+		@cached_html = html
 	end
 	def to_html!
 		@cached_html=nil
 		to_html
 	end
+	def nokodoc( html=nil )
+		@nokodoc ||= Nokogiri::HTML(html || to_html)
+	end
+	def nokodoc!
+		@nokodoc = Nokogiri::HTML(to_html!)
+	end
 end
 class DocuBot::SubLink

data/lib/docubot/templates/index.haml CHANGED

@@ -1,14 +1,21 @@
--# TODO: Breakdown by letter.
-%ul#index
-	- global.index.entries.sort_by{ |k,p| k.downcase }.each do |keyword,pages|
-		- if pages.length == 1
-			%li
-				%a{ :href => pages.first.html_path }= keyword
-		- else
-			%li
-				= keyword
-				%ul
-					- pages.each do |page|
-						%li
-							%a{ :href => page.html_path }= page.title
+!!! Strict
+%html
+	%head
+		%meta(http-equiv='Content-Type' content='text/html; charset=utf-8')
+		%title= global.title + " Index"
+		%link{:rel=>'stylesheet', :type=>'text/css', :href=>"#{root}common.css", :media=>'all'}
+	%body
+		-# TODO: Breakdown by letter.
+		%ul#index
+			- global.index.entries.sort_by{ |k,p| k.downcase }.each do |keyword,pages|
+				- if pages.length == 1
+					%li
+						%a{ :href => pages.first.html_path }= keyword
+				- else
+					%li
+						= keyword
+						%ul
+							- pages.each do |page|
+								%li
+									%a{ :href => page.html_path }= page.title

data/lib/docubot/templates/toc.haml CHANGED

@@ -1,7 +1,9 @@
 !!! Strict
 %html
 	%head
+		%meta(http-equiv='Content-Type' content='text/html; charset=utf-8')
 		%title= global.title
+		%link{:rel=>'stylesheet', :type=>'text/css', :href=>"#{root}common.css", :media=>'all'}
 		%link{:rel=>'stylesheet', :type=>'text/css', :href=>"#{root}toc.css", :media=>'all'}
 		%script{:type=>'text/javascript', :src=>"#{root}toc.js"}
 	%body

data/lib/docubot/writers/chm.rb CHANGED

@@ -8,11 +8,14 @@ class DocuBot::CHMWriter < DocuBot::HTMLWriter
 	def write( destination=nil )
 		super( nil )
+		lap = Time.now
 		@chm_path = destination || "#{@bundle.source}.chm"
 		@toc = @bundle.toc
 		write_hhc
 		write_hhk
 		write_hhp
+		puts "...%.2fs to write the CHM support files" % ((lap=Time.now)-lap)
 		# This will fail if a handle is open to it on Windows
 		begin
 			FileUtils.rm( @chm_path ) if File.exists?( @chm_path )
@@ -22,12 +25,13 @@ class DocuBot::CHMWriter < DocuBot::HTMLWriter
 				process.Terminate if process.CommandLine.include? @chm_path.gsub('/','\\')
 			end
 		end
-		# TODO: output timing and progress results
 		`hhc.exe "#{FileUtils.win_path @hhp}"`.gsub( /[\r\n]+/, "\n" )
+		puts "...%.2fs to create the CHM" % ((lap=Time.now)-lap)
 		# Clean out the intermediary files
 		FileUtils.rm( [ @hhc, @hhp, @hhk ] )
 		FileUtils.rm_r( @html_path )
+		puts "...%.2fs to clean up temporary files" % ((lap=Time.now)-lap)
 		# Spin a new thread so it doesn't hold up the Ruby process, but sleep long enough for it to get going.
 		Thread.new{ `hh.exe "#{FileUtils.win_path @chm_path}"` }

data/lib/docubot/writers/html.rb CHANGED

@@ -3,6 +3,8 @@ class DocuBot::HTMLWriter < DocuBot::Writer
 	# Specify nil for destination to place "<source>_html" next to the source.
 	def write( destination=nil )
+		start = Time.now
 		source = @bundle.source
 		@html_path = destination || File.dirname(source)/"#{File.basename source}_html"
 		FileUtils.rm_rf(@html_path) if File.exists?(@html_path)
@@ -54,6 +56,7 @@ class DocuBot::HTMLWriter < DocuBot::Writer
 			File.open( 'glossary-terms.js', 'w' ){ |f| f << @bundle.glossary.to_js }
 		end
+		puts "...%.2fs to write the HTML" % (Time.now - start)
 	end
 end

data/test/site1_html/A Slight Change of Heart/1 Ze First Page in Ze Section.html ADDED

@@ -0,0 +1,30 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
+<link href='../common.css' media='all' rel='stylesheet' type='text/css'>
+<title>Ze First Page in Ze Section</title>
+</head>
+<body>
+<div id='content'>
+<div id='pagetop'>
+<div id='breadcrumb'>
+Table of Contents
+<span class='sep'>&gt;</span>
+<a href='../A Slight Change of Heart/index.html'>A Slight Change of Heart</a>
+<span class='sep'>&gt;</span>
+Ze First Page in Ze Section
+</div>
+<h1 id='title'>Ze First Page in Ze Section</h1>
+</div>
+<div id='pagebody'>
+<div id='mainbody'><p>The title of this should be "Ze First Page in Ze Section"</p>
+</div>
+<div id='pagefooter'>
+Copyright ©2010.
+All Rights Reserved.
+</div>
+</div>
+</div>
+</body>
+</html>

data/test/site1_html/A Slight Change of Heart/2 Another Page in the Section.html ADDED

@@ -0,0 +1,30 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
+<link href='../common.css' media='all' rel='stylesheet' type='text/css'>
+<title>Another Page in the Section</title>
+</head>
+<body>
+<div id='content'>
+<div id='pagetop'>
+<div id='breadcrumb'>
+Table of Contents
+<span class='sep'>&gt;</span>
+<a href='../A Slight Change of Heart/index.html'>A Slight Change of Heart</a>
+<span class='sep'>&gt;</span>
+Another Page in the Section
+</div>
+<h1 id='title'>Another Page in the Section</h1>
+</div>
+<div id='pagebody'>
+<div id='mainbody'><p>The title of this page should be "Another Page in the Section".</p>
+</div>
+<div id='pagefooter'>
+Copyright ©2010.
+All Rights Reserved.
+</div>
+</div>
+</div>
+</body>
+</html>

data/test/site1_html/A Slight Change of Heart/3_more_crap.html ADDED

@@ -0,0 +1,42 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
+<link href='../common.css' media='all' rel='stylesheet' type='text/css'>
+<title>Additional Helpful Information</title>
+</head>
+<body>
+<div id='content'>
+<div id='pagetop'>
+<div id='breadcrumb'>
+Table of Contents
+<span class='sep'>&gt;</span>
+<a href='../A Slight Change of Heart/index.html'>A Slight Change of Heart</a>
+<span class='sep'>&gt;</span>
+Additional Helpful Information
+</div>
+<h1 id='title'>Additional Helpful Information</h1>
+<div id='author'>Gavin Kistner</div>
+</div>
+<div id='pagebody'>
+<div id='mainbody'><h2 id='frist-post'>Title Verification</h2>
+<p>The title of this page should be "Additional Helpful Information", not "More Crap".</p>
+<h2 id='moar'>TOC Verification</h2>
+<p>The TOC for this page should have sub-entries for the referenced headings.</p>
+<ul>
+<li>The links should be named for the headings.</li>
+<li>The links should link to the headings.</li>
+</ul>
+<h2 id='dumb'>Exclusivity</h2>
+<p>The TOC should not, however, have this last heading.</p>
+</div>
+<div id='pagefooter'>
+Copyright ©2010.
+All Rights Reserved.
+</div>
+</div>
+</div>
+</body>
+</html>

data/test/site1_html/A Slight Change of Heart/3_more_crap.html#frist-post ADDED

@@ -0,0 +1,29 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
+<link href='../common.css' media='all' rel='stylesheet' type='text/css'>
+<title>Title Verification</title>
+</head>
+<body>
+<div id='content'>
+<div id='pagetop'>
+<div id='breadcrumb'>
+Table of Contents
+<span class='sep'>&gt;</span>
+<a href='../A Slight Change of Heart/index.html'>A Slight Change of Heart</a>
+<span class='sep'>&gt;</span>
+Title Verification
+</div>
+<h1 id='title'>Title Verification</h1>
+</div>
+<div id='pagebody'>
+<div id='mainbody'></div>
+<div id='pagefooter'>
+Copyright ©2010.
+All Rights Reserved.
+</div>
+</div>
+</div>
+</body>
+</html>

data/test/site1_html/A Slight Change of Heart/3_more_crap.html#moar ADDED

@@ -0,0 +1,29 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
+<link href='../common.css' media='all' rel='stylesheet' type='text/css'>
+<title>TOC Verification</title>
+</head>
+<body>
+<div id='content'>
+<div id='pagetop'>
+<div id='breadcrumb'>
+Table of Contents
+<span class='sep'>&gt;</span>
+<a href='../A Slight Change of Heart/index.html'>A Slight Change of Heart</a>
+<span class='sep'>&gt;</span>
+TOC Verification
+</div>
+<h1 id='title'>TOC Verification</h1>
+</div>
+<div id='pagebody'>
+<div id='mainbody'></div>
+<div id='pagefooter'>
+Copyright ©2010.
+All Rights Reserved.
+</div>
+</div>
+</div>
+</body>
+</html>

data/test/site1_html/A Slight Change of Heart/index.html ADDED

@@ -0,0 +1,28 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta content='text/html; charset=utf-8' http-equiv='Content-Type'>
+<link href='../common.css' media='all' rel='stylesheet' type='text/css'>
+<title>A Slight Change of Heart</title>
+</head>
+<body>
+<div id='content'>
+<div id='pagetop'>
+<div id='breadcrumb'>
+Table of Contents
+<span class='sep'>&gt;</span>
+A Slight Change of Heart
+</div>
+<h1 id='title'>A Slight Change of Heart</h1>
+</div>
+<div id='pagebody'>
+<div id='mainbody'>
+</div>
+<div id='pagefooter'>
+Copyright ©2010.
+All Rights Reserved.
+</div>
+</div>
+</div>
+</body>
+</html>

data/test/site1_html/_index.html ADDED

@@ -0,0 +1,17 @@
+<ul id='index'>
+<li>
+<a href='headers.html'>Delivery Options</a>
+</li>
+<li>
+<a href='A Slight Change of Heart/3_more_crap.html'>Exclusivity</a>
+</li>
+<li>
+<a href='headers.html'>I like Chicken, I like Liver</a>
+</li>
+<li>
+<a href='A Slight Change of Heart/3_more_crap.html'>Title Verification</a>
+</li>
+<li>
+<a href='A Slight Change of Heart/3_more_crap.html'>TOC Verification</a>
+</li>
+</ul>