RubyGems - docter - Versions diffs - 1.0.0 - Mend

docter 1.0.0

Files changed (12) hide show

data/lib/docter/common.rb ADDED Viewed

@@ -0,0 +1,303 @@
+module Docter
+  module HTML
+    module_function
+    # :call-seq:
+    #   inner_text_from(html) => text
+    #
+    # Returns the inner text from some HTML text, effectively stripping the tags, normalizing whitespaces
+    # and stripping leading/trailing spaces.
+    def inner_text_from(html)
+      html.gsub(/<(\w*).*?>(.*?)<\/\1\s*>/m, "\\2").strip.gsub(/\s+/m, " ")
+    end
+    def regexp_element(name)
+      Regexp.new("<(#{name})\\s*(.*?)>(.*?)<\\/\\1\\s*>", Regexp::MULTILINE + Regexp::IGNORECASE)
+    end
+    def regexp_attribute(name)
+      Regexp.new("(#{name})=([\"'])(.*?)\\2", Regexp::MULTILINE + Regexp::IGNORECASE)
+    end
+  end
+  # Base class for resources like pages, templates, ToC, and anything else that you can create dynamically,
+  # or load from a file. It's the second usage that's more interesting: when coming from a file, the resource
+  # is created lazily, and you can detect when it is modified and reload it.
+  #
+  # A class that inherits from Resource must: a) call #load before using any value obtain from the resource
+  # (e.g page title), and b) implement one or more create_from_[format] methods for each content format it
+  # supports (e.g. create_from_textile).
+  module Resource
+    # Maps various filename extensions to the appropriate format. You only need to use this when the filename
+    # extension is not the same as the format, e.g. map ".txt" to :plain, but not necessary to map ".textile".
+    EXTENSIONS = { ""=>:plain, ".txt"=>:plain, ".text"=>:plain, ".thtml"=>:textile, ".mhtml"=>:markdown }
+    class << self
+      # :call-seq:
+      #   format_from(filename) => symbol
+      #
+      # Returns the format based on the filename. Basically uses the filename extension, possibly mapped
+      # using EXTENSIONS, and returns :plain if the filename has no extension.
+      def format_from(filename)
+        ext = File.extname(filename)
+        EXTENSIONS[ext] || ext[1..-1].to_sym
+      end
+    end
+    module Reloadable
+      # The filename, if this resource comes from a file, otherwise nil.
+      attr_reader :filename
+      # :call-seq:
+      #   modified() => time
+      #
+      # Returns the date/time this resource was last modified. If the resource comes from a file,
+      # the timestamp of the file, otherwise the when the resource was created.
+      def modified()
+        @filename ? File.stat(@filename).mtime : @modified
+      end
+      # :call-seq:
+      #   modified?() => boolean
+      #
+      # Returns true if the resource was modified since it was lase (re)loaded. Only applies to resources
+      # created from a file, all other resources return false.
+      def modified?()
+        @filename ? File.stat(@filename).mtime > @modified : false
+      end
+      # :call-seq:
+      #   reload()
+      #
+      # Reloads the resource. Only applies to resources created from a file, otherwise acts like load.
+      # You can safely call it for all resources, for example:
+      #   page.reload if page.modified?
+      def reload()
+        @loaded = false if @filename
+        load
+      end
+      def to_s() #:nodoc:
+        @filename || super
+      end
+    protected
+      # See Base::new.
+      def init_from(*args, &block)
+        options = Hash === args.last ? args.pop : {}
+        case args.first
+        when String
+          @filename = args.shift
+          raise ArgumentError, "Expecting file name and options, found too may arguments." unless args.empty?
+          # We'll load the file later, but we need to known the mtime in case someone calls modified?/reload first.
+          @modified = File.stat(@filename).mtime
+          @load_using = lambda do
+            puts "Loading #{filename}"
+            # We need to know when the file we're about to read was last modified, but only keep the new mtime
+            # if we managed to read it. We're avoiding race conditions with a user editing this file.
+            modified = File.stat(@filename).mtime
+            create Resource.format_from(@filename), File.read(@filename), options
+            @modified = modified
+          end
+        when Symbol
+          @modified = Time.now # Best guess
+          format, content = args.shift, args.shift
+          raise ArgumentError, "Expecting format (as symbol) followed by content (string), found too many arguments." unless args.empty?
+          @load_using = lambda { create format, content, options }
+        else
+          if args.empty? && block
+            @modified = Time.now # Best guess
+            @load_using = lambda { block.call options }
+          else
+            raise ArgumentError, "Expecting file name, or (format, content), not sure what to do with these arguments."
+          end
+        end
+      end
+      # :call-seq:
+      #   load()
+      #
+      # Loads the resource. Call this method before anything that depends on the content of the resource,
+      # for example:
+      #   def title()
+      #     load
+      #     @title # Created by load
+      #   end
+      def load()
+        unless @loaded
+          @load_using.call
+          @loaded = true
+        end
+      end
+      # :call-seq:
+      #   create(format, content, options)
+      #
+      # Creates the resource using the specified format, content and options passed during construction.
+      #
+      # This method may be called multiple times, specifically each time the resource is loaded. Override,
+      # if you need to perform any clean up to assure propert creation, etc. Otherwise, just let it delegate
+      # to a create_from_[format] method, such as create_from_textile.
+      def create(format, content, options)
+        method = "create_from_#{format}"
+        if respond_to?(method)
+          send method, content, options
+        else
+          raise ArgumentError, "Don't know how to create #{self} from :#{format}."
+        end
+      end
+      # :call-seq:
+      #   erb(content, binding?) => content
+      #
+      # Passes the content through ERB processing. Nothing fancy, but allows you to run filters,
+      # include files, generate timestamps, calculate sales tax, etc.
+      def erb_this(content, binding = nil)
+        ERB.new(content).result(binding)
+      end
+    end
+    class Base
+      include Reloadable, HTML
+      # :call-seq:
+      #   new(filename, options?)
+      #   new(format, content, options?)
+      #   new(options?) { |options| ... }
+      #
+      # The first form loads this resource from the specified file. Decides on the format based on the filename.
+      # You can then detect modification and reload as necessary, for example:
+      #   page.reload if page.modified?
+      #
+      # The second form creates this resource from content in the specified format. This one you cannot reload.
+      # For example:
+      #   Page.new(:plain, "HAI")
+      #
+      # The third form creates this resource by calling the block with the supplied options.
+      def initialize(*args, &block)
+        init_from *args, &block
+      end
+    end
+  end
+  # Table of contents.
+  #
+  # A ToC is an array of entries, each entry providing a link to and a title, and may itself be a ToC.
+  #
+  # Supports the Enumerable methods for operating on the entries, in addition to the methods each,
+  # first/last, size, empty? and index/include?. Use #add to create new entries.
+  #
+  # Use #to_html to transform to an HTML ordered list.
+  class ToC
+    include Enumerable
+    # Array of entries.
+    attr_reader :entries
+    # Create new ToC with no entries.
+    def initialize()
+      @entries = []
+    end
+    ARRAY_METHODS = ["each", "first", "last", "size", "empty?", "include?", "index", "[]"]
+    (Enumerable.instance_methods + ARRAY_METHODS - ["entries"]).each do |method|
+      class_eval "def #{method}(*args, &block) ; entries.send(:#{method}, *args, &block) ; end", __FILE__, __LINE__
+    end
+    # :call-seq:
+    #   add(url, title) => entry
+    #   add(entry) => entry
+    #
+    # Adds (and returns) a new entry. The first form creates an entry with a link (must be a valid URL,
+    # use CGI.escape if necessary) and HTML-encoded title. The second form adds an existing entry,
+    # for example to a page.
+    def add(*args)
+      if ToCEntry === args.first
+        entry = args.shift
+        raise ArgumentError, "Can only accept a ToCEntry argument." unless args.empty?
+      else
+        entry = ToCEntry.new(*args)
+      end
+      entries << entry
+      entry
+    end
+    # :call-seq:
+    #   to_html(options) => html
+    #
+    # Transforms this ToC into an HTML ordered list (OL) by calling to_html on each ToC entry.
+    #
+    # You can use the following options:
+    # * :nested -- For entries that are also ToC, expands them as well. You can specify how many
+    #   levels (e.g. 1 to expand only once), or true to expand all levels.
+    # * :class -- Class to apply to the OL element.
+    #
+    # The +options+ argument can take the form of a Hash, list of symbols or both. Symbols are
+    # treated as +true+ for example:
+    #   to_html(:nested, :class=>"toc")
+    # Is the same as:
+    #   to_html(:nested=>true, :class=>"toc")
+    def to_html(*args)
+      options = Hash === args.last ? args.pop.clone : {}
+      args.each { |arg| options[arg.to_sym] = true }
+      cls = %{ class="#{options[:class]}"} if options[:class]
+      %{<ol #{cls}>#{map { |entry| entry.to_html(options) }}</ol>}
+    end
+  end
+  # Table of contents entry.
+  class ToCEntry < ToC
+    # The URL for this entry.
+    attr_reader :url
+    # The title of this entry.
+    attr_reader :title
+    # :call-seq:
+    #   new(url, title)
+    #
+    # URL links to the ToC entry, and must be a valid URL (use CGI.escape is necessary). The title must
+    # be HTML-encoded (use CGI.escapeHTML if necessary).
+    def initialize(url, title)
+      super()
+      @url, @title = url, title
+    end
+    # :call-seq:
+    #   to_html(nested?) => html
+    #
+    # Transforms this ToC entry into an HTML list item (LI). Depending on the nested argument,
+    # can also expand nested ToC.
+    def to_html(*args)
+      options = Hash === args.last ? args.pop.clone : {}
+      args.each { |arg| options[arg.to_sym] = true }
+      if options[:nested] && !empty?
+        nested = options[:nested].respond_to?(:to_i) && options[:nested].to_i > 0 ?
+          super(options.merge(:nested=>options[:nested] - 1)) : super(options)
+      end
+      %{<li><a href="#{url}">#{title}</a>#{nested}</li>}
+    end
+  end
+end

data/lib/docter/page.rb ADDED Viewed

@@ -0,0 +1,288 @@
+module Docter
+  # A single documentation page. Has title, content and ToC.
+  #
+  # The content is HTML without the H1 header or HEAD element, ripe for including inside the template.
+  # The title is HTML-encoded text, the ToC is created from H2/H3 elements.
+  #
+  # The content is transformed in three stages:
+  # # Transform from the original format (e.g. Textile, plain text) to HTML.
+  # # Parse the HTML to extract the body, title and ToC. The content comes from the body, less any
+  #   H1 element used for the title.
+  # # Apply filters each time the content is retrieved (form #content).
+  #
+  # Supported input formats include:
+  # * :plain -- Plain text, rendered as pre-formatted (pre).
+  # * :html -- The HTML body is extracted as the content, see below for ERB, title and ToC.
+  # * :textile -- Converted to HTML using RedCloth. See below for ERB, code blocks, title and ToC.
+  # * :markdown -- Converted to HTML using RedCloth. See below for ERB, code blocks, title and ToC.
+  #
+  # *EBR* To support dynamic content some formats are run through ERB first. You can use ERB to construct
+  # HTML, Textile, Markdown or content in any format the page is using. This happens before the content
+  # is converted to HTML.
+  #
+  # *Code blocks* Textile and Markdown support code blocks with syntax highlighting. To create a code block:
+  #   {{{!lang
+  #     ...
+  #   }}}
+  # You can use !lang to specify a language for syntax highlighting, e.g. !ruby, !sql, !sh. See Syntax
+  # for more information. The language is optional, code blocks without it are treated as plain text.
+  # You can also use syntax highlighting from HTML by specifying the class attribute on the pre element.
+  #
+  # *Title* The recommended way to specify the page title is using an H1 header. Only one H1 header is allowed,
+  # and that element is removed from the content. Alternatively, you can also use the TITLE element, if both
+  # TITLE and H1 are used, they must match.
+  #
+  # If none of these options are available (e.g. for :plain) the title comes from the filename, treating
+  # underscore as space and capitalizing first letter, e.g. change_log.txt becomes "Change Log".
+  #
+  # *ToC* The table of contents is constructed from H2 and H3 headers. H2 headers provide the top-level sections,
+  # and H3 headers are nested inside H2 headers.
+  #
+  # The ToC links to each section based on the ID attribute of the header. If the header lacks an ID attribute,
+  # one is created using the header title, for example:
+  #   h2. Getting Started
+  # becomes:
+  #   <h2 id="getting_started">Getting Started</h2>
+  # You can rely on these IDs to link inside the page and across pages.
+  #
+  # *Filters* Runs the default chain of filters, or those specified by the :filters option. See Filter
+  # for more information. Filters are typically used to do post-processing on the HTML, e.g. syntax highlighting,
+  # URL rewriting.
+  class Page < Resource::Base
+    # ToC entry for a page. Obtains title and URL from the page, includes entries from the page
+    # ToC and can return the page itself.
+    class ToCEntryForPage < ToCEntry #:nodoc:
+      def initialize(page)
+        @page = page
+      end
+      def title()
+        @page.title
+      end
+      def url()
+        @page.path
+      end
+      def entries()
+        @page.toc.entries
+      end
+    end
+    # :call-seq:
+    #   title() => string
+    #
+    # Returns the page title.
+    def title()
+      load
+      @title
+    end
+    def title=(title)
+      @title = title
+    end
+    # :call-seq:
+    #   content() => string
+    #
+    # Returns the page content (HTML).
+    def content()
+      load
+      Filter.process(@content)
+    end
+    # :call-seq:
+    #   toc() => ToC
+    #
+    # Returns the table of contents.
+    def toc()
+      load
+      @toc
+    end
+    # :call-seq:
+    #   path() => filename
+    #
+    # Returns the path for this page. You can use this to link to the page from any other page.
+    #
+    # For example, if the page name is "intro.textile" the path will be "intro.html".
+    def path()
+      @path ||= File.basename(@filename).downcase.ext(".html")
+    end
+    # :call-seq;
+    #   id() => string
+    #
+    # Returns fragment identifier for this page.
+    def id()
+      @id ||= title.gsub(/\s+/, "_").downcase
+    end
+    def entries() #:nodoc:
+      toc.entries
+    end
+    # :call-seq:
+    #   toc_entry() => ToCEntry
+    #
+    # Returns a ToC entry for this page. Uses the +one_page+ argument to determine whether to return
+    # a link to #path of the fragment #id.
+    def toc_entry()
+      @toc_entry ||= ToCEntryForPage.new(self)
+    end
+  protected
+    def create_from_html(html, options)
+      parse(erb_this(html), options)
+    end
+    def create_from_plain(text, options)
+      parse(%{<pre class="text">#{CGI.escapeHTML(text)}</pre>}, options)
+    end
+    def create_from_textile(textile, options)
+      parse(use_redcloth(:textile, textile, options), options)
+    end
+    def create_from_markdown(markdown, options)
+      parse(use_redcloth(:markdown, markdown, options), options)
+    end
+  private
+    if defined?(::RedCloth)
+      # :call-seq:
+      #   use_redcloth(format, text, options)
+      #
+      # Format may be :textile or :markdown. Runs erb_this on the text first to apply ERB code,
+      # processes code sections ({{{ ... }}}), and converts the Textile/Markdown text to HTML.
+      def use_redcloth(format, text, options)
+        text = erb_this(text)
+        # Process {{{ ... }}} code sections into pre tags.
+        text = text.gsub(/^\{\{\{([^\n]*)\n(.*?)\n\}\}\}/m) do
+          code, spec = $2, $1.scan(/^!(.*?)$/).to_s.strip
+          %{<notextile><pre class="#{spec.split(",").join(" ")}">#{CGI.escapeHTML(code)}</pre></notextile>}
+        end
+        # Create the HTML.
+        RedCloth.new(text, [:no_span_caps]).to_html(format)
+      end
+    else
+      def use_redcloth(format, text, options)
+        fail "You need to install RedCloth first:\n  gem install RedCloth"
+      end
+    end
+    # :call-seq:
+    #   parse(html, options)
+    #
+    # Parses HTML into the content, title and ToC. This method can take an HTML document and will extract
+    # its body. It can deduct the title from the H1 element, TITLE element or :title option, or filename.
+    def parse(html, options)
+      # Get the body (in most cases it's just the page). Make sure when we wreck havoc on the HTML,
+      # we're not changing any content passed to us.
+      body = html[regexp_element("body")] ? $2 : html.clone
+      # The correct structure is to use H1 for the document title (but TITLE element will also work).
+      # If both are used, they must both match. Two or more H1 is a sign you're using H1 instead of H2.
+      title = html.scan(regexp_element("title|h1")).map{ |parts| inner_text_from(parts.last) }.uniq
+      raise ArgumentError, "A page can only have one title, you can use the H1 element (preferred) or TITLE element, or both if they're the same. If you want to title sections, please use the H2 element" if title.size > 1
+      # Lacking that, we need to derive the title somehow.
+      title = title.first || options[:title] || (filename && filename.pathmap("%n").gsub("_", " ").capitalize) || "Untitled"
+      # Get rid of H1 header.
+      body.gsub!(regexp_element("h1"), "")
+      # Catalog all the major sections, based on the H2/H3 headers.
+      toc = ToC.new
+      body.gsub!(regexp_element("h[23]")) do |header|
+        tag, attributes, text = $1.downcase, $2.to_s, inner_text_from($3)
+        # Make sure all H2/H3 headers have a usable ID, create once if necessary.
+        id = CGI.unescape($3) if attributes[regexp_attribute("id")]
+        if id.blank?
+          id = CGI.unescapeHTML(text.downcase.gsub(" ", "_"))
+          header = %{<#{tag} #{attributes} id="#{id}">#{text}</#{tag}>}
+        end
+        if tag == "h2"
+          toc.add "##{id}", text
+        else
+          fail ArgumentError, "H3 section found without any H2 section." unless toc.last
+          toc.last.add "##{id}", text
+        end
+        header
+      end
+      @content, @title, @toc = body, title, toc
+    end
+  end
+  # Filters are used to process HTML before rendering, e.g to apply syntax highlighting, URL rewriting.
+  # To add a new filter:
+  #   filter_for(:upcase) { |html| html.upcase }
+  module Filter
+    class << self
+      # :call-seq:
+      #   list() => names
+      #
+      # Return the names of all defined filters.
+      def list()
+        @filters.keys
+      end
+      # :call-seq:
+      #   filter_for(name) { |html| ... }
+      #
+      # Defines a filter for +name+ using a block that will transform the HTML.
+      def filter_for(name, &block)
+        @filters[name.to_sym] = block
+        self
+      end
+      # :call-seq:
+      #   process(html) => html
+      #   process(html, *name) => html
+      #
+      # Process the HTML using the available filters and returns the resulting HTML.
+      # The second form uses only the selected filters.
+      def process(html, *using)
+        using = using.flatten.compact
+        (using.empty? ? @filters.values : @filters.values_at(*using)).
+          inject(html) { |html, filter| filter.call(html) }
+      end
+    end
+    @filters = {}
+  end
+  class << self
+    # :call-seq:
+    #   filter_for(name) { |html| ... }
+    #
+    # Defines a filter for +name+ using a block that will transform the HTML.
+    def filter_for(name, &block)
+      Filter.filter_for(name, &block)
+    end
+    # :call-seq:
+    #   page(filename, options?)
+    #   page(format, content, options?)
+    #
+    # The first form loads the page from the specified filename. The second creates the page from
+    # the content string based on the specified format.
+    def page(*args)
+      Page.new(*args)
+    end
+  end
+end

data/lib/docter/rake.rb ADDED Viewed

@@ -0,0 +1,25 @@
+module Docter
+  module Rake
+    class << self
+      def generate(target, collection, template, *args)
+        options = Hash === args.last ? args.pop.clone : {}
+        args.each { |arg| options[arg.to_sym] = true }
+        file target=>collection.dependencies + template.dependencies do |task|
+          collection.generate template, task.name, options[:one_page] ? :one_page : :all, options
+        end
+      end
+      def serve(task_name, collection, template, *args)
+        options = Hash === args.last ? args.pop.clone : {}
+        args.each { |arg| options[arg.to_sym] = true }
+        task task_name do
+          collection.serve template, options
+        end
+      end
+    end
+  end
+end