RubyGems - pagehub-markdown - Versions diffs - 0.1.0 - Mend

pagehub-markdown 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/lib/pagehub-markdown.rb +13 -0
data/lib/pagehub-markdown/markdown.rb +128 -0
data/lib/pagehub-markdown/mutators/date_injector.rb +18 -0
data/lib/pagehub-markdown/processors/embedder.rb +250 -0
data/lib/pagehub-markdown/processors/pagehub_options.rb +21 -0
data/lib/pagehub-markdown/processors/toc_generator.rb +98 -0
metadata +115 -0

data/lib/pagehub-markdown.rb ADDED

@@ -0,0 +1,13 @@
+require 'redcarpet'
+require 'albino'
+require 'pagehub-markdown/markdown'
+require 'pagehub-markdown/processor'
+require 'pagehub-markdown/mutator'
+require 'pagehub-markdown/processors/embedder'
+require 'pagehub-markdown/processors/pagehub_options'
+require 'pagehub-markdown/processors/toc_generator'
+require 'pagehub-markdown/mutators/date_injector'
+module PageHub
+end

data/lib/pagehub-markdown/markdown.rb ADDED

@@ -0,0 +1,128 @@
+module PageHub
+  module Markdown
+    class << self
+      def add_processor(stage, p) # :nodoc:
+        Stages.each { |s| @@hooks[s] ||= [] }
+        unless Stages.include?(stage.to_sym)
+          raise "Invalid stage #{stage}. Allowed stages are #{Stages.join(', ')}"
+        end
+        unless p.respond_to?(:call)
+          raise "Processor must be a callable object."
+        end
+        if stage.is_a? Array
+          stage.each { |s| @@hooks[s] << p }
+        else
+          @@hooks[stage.to_sym] << p
+        end
+      end
+      def add_mutator(m) # :nodoc:
+        unless m.respond_to?(:call)
+          raise "Mutator must be a callable object."
+        end
+        @@mutators << m
+      end
+      # (re)constructs the renderer with the given options, see
+      # PageHubOptions, RendererOptions, and RendererExtensions
+      # for accepted values
+      def configure(ph_options = {}, options = {}, extensions = {})
+        @@options  = PageHubOptions.merge(ph_options)
+        @@renderer = Redcarpet::Markdown.new(
+          HTMLWithAlbino.new(RendererOptions.merge(options)),
+          RendererExtensions.merge(extensions))
+      end
+      def render!(str)
+        configure unless @@renderer
+        @@hooks[:pre_render].each { |processor| processor.call(str) }
+        # escape any JavaScript snippets
+        if @@options[:escape_scripts]
+          str.gsub!(/\<script(.*)\>/i) {
+            mutated = true
+            "&lt;script#{$1}&gt;"
+          }
+        end
+        str = @@renderer.render(str)
+        @@hooks[:post_render].each { |processor| processor.call(str) }
+        str
+      end
+      def render(str)
+        o = str.dup; render!(o); o
+      end
+      def mutate!(str)
+        mutated = false
+        @@mutators.each { |m| mutated ||= m.call(str) }
+        mutated
+      end
+    end
+    protected
+    Stages      = [ :pre_render, :post_render ]
+    @@hooks     = { }
+    @@mutators  = [ ]
+    @@options   = { }
+    PageHubOptions = {
+      escape_scripts:   true
+    }
+    RendererOptions = {
+      filter_html:      false,
+      no_images:        false,
+      no_links:         false,
+      no_styles:        false,
+      safe_links_only:  false,
+      with_toc_data:    true,
+      hard_wrap:        false,
+      xhtml:            false
+    }
+    RendererExtensions = {
+      no_intra_emphasis:    true,
+      tables:               false,
+      fenced_code_blocks:   true,
+      autolink:             true,
+      strikethrough:        true,
+      lax_html_blocks:      false,
+      space_after_headers:  true,
+      superscript:          true
+    }
+    private
+    # a renderer that uses Albino to highlight syntax
+    class HTMLWithAlbino < Redcarpet::Render::HTML
+      def block_code(code, language)
+        begin
+          # TODO: try to figure out whether @language is valid
+          out = Albino.colorize(code, language)
+        rescue Exception => e
+          out = ""
+          # return "-- INVALID CODE BLOCK, MAKE SURE YOU'VE SURROUNDED CODE WITH ```"
+        end
+        # just render the code as plain text if the language is invalid
+        out.empty? ? block_code(code, "text") : out
+      end
+    end
+    @@renderer = nil
+  end
+end

data/lib/pagehub-markdown/mutators/date_injector.rb ADDED

@@ -0,0 +1,18 @@
+module PageHub
+module Markdown
+  add_mutator lambda { |str|
+    mutated = false
+    str.gsub!(/\[\!date(.*)\!\]/) {
+      mutated = true
+      format = $1.empty? ? "%D" : $1.strip
+      DateTime.now.strftime(format)
+    }
+    mutated
+  }
+end # Markdown module
+end # PageHub module

data/lib/pagehub-markdown/processors/embedder.rb ADDED

@@ -0,0 +1,250 @@
+require 'open-uri'
+require 'net/http'
+require 'nokogiri'
+module PageHub
+module Markdown
+  # Downloads remote textual resources from websites
+  # and allows for content extraction from HTML pages
+  # so it can be neatly embedded in another page.
+  module Embedder
+    class EmbeddingError    < RuntimeError; end
+    class InvalidSizeError  < EmbeddingError; end
+    class InvalidTypeError  < EmbeddingError; end
+    # Resources whose content-type is not specified in this
+    # list will be rejected
+    AllowedTypes = [/text\/plain/, /text\/html/, /application\/html/]
+    # Resources larger than 1 MByte will be rejected
+    MaximumLength = 1 * 1024 * 1024
+    # Resources served by any of the hosts specified in this list
+    # will be rejected
+    FilteredHosts = []
+    Timeout = 5
+    private
+    @@processors = []
+    public
+    class << self
+      # Performs a HEAD request to validate the resource, and if it
+      # passes the checks it will be downloaded and processed if
+      # any eligible Embedder::Processor is registered.
+      #
+      # Arguments:
+      # 1. raw_uri  the full raw URI of the file to be embedded
+      # 2. source   an optional identifier to specify the Processor
+      #             that should be used to post-process the content
+      # 3. args     options that can be meaningful to the Processor, if any
+      #
+      # Returns:
+      # A string containing the extracted data, or an empty one
+      def get_resource(raw_uri, source = "", args = "")
+        begin
+          uri = URI.parse(raw_uri)
+          # reject if the host is banned
+          return "" if FilteredHosts.include?(uri.host)
+          Net::HTTP.start(uri.host, uri.port) do |http|
+            http.open_timeout = Timeout
+            http.read_timeout = Timeout
+            # get the content type and length
+            ctype = ""
+            clength = 0
+            http.head(uri.path).each { |k,v|
+              # puts "#{k} => #{v}"
+              ctype = v if k == "content-type"
+              clength = v.to_i if k == "content-length"
+            }
+            raise InvalidTypeError.new ctype if !self.allowed?(ctype)
+            raise InvalidSizeError.new clength if clength > MaximumLength
+            open(raw_uri) { |f|
+              content = f.read
+              # invoke processors
+              keys = []
+              keys << source unless source.empty?
+              keys << raw_uri
+              @@processors.each { |p|
+                if p.applies_to?(keys) then
+                  content = p.process(content, raw_uri, args)
+                  break
+                end
+              }
+              return content
+            }
+          end
+        rescue EmbeddingError => e
+          # we want to escalate these errors
+          raise e
+        rescue Exception => e
+          # mask as a generic EmbeddingError
+          raise EmbeddingError.new e.message
+        end
+        ""
+      end
+      def allowed?(ctype)
+        AllowedTypes.each { |t| return true if t.match ctype }
+        false
+      end
+      def register_processor(proc)
+        @@processors ||= []
+        @@processors << proc
+      end
+    end # class << self
+    class Processor
+      # Processors apply to "keys" which can be written manually
+      # in Markdown by the user, or are found in the host portion
+      # of the resource URI
+      #
+      # IE, a Github Wiki processor would bind to the keys:
+      # "github-wiki", or/and <tt>/github.com.*\/wiki\//</tt>
+      #
+      # Manual keys are injected after the !include keyword:
+      # [!include github-wiki!](https://github.com/some-dude/wiki/Home)
+      #
+      def initialize(keys)
+        @keys = keys
+        super()
+      end
+      def process(content, uri, args = "")
+        raise NotImplementedError
+      end
+      def applies_to?(keys)
+        @keys.each { |h| keys.each { |k| return true if h.match k } }
+        false
+      end
+      # Node should be the root node that contains the embedded content,
+      # which will be stripped of all attributes and injected with new ones:
+      # 1. data-embed-uri containing the URI of the embedded resource
+      # 2. data-embed-src the name of the processor used for embedding
+      #
+      # All children nodes that have an @id attribute will have that attribute
+      # removed as well.
+      def stamp(node, uri, key)
+        node.xpath("//*[@id]").each { |node| node.remove_attribute "id" }
+        node.attributes.each_pair { |name,_| node.remove_attribute name }
+        node['data-embed-uri'] = uri
+        node['data-embed-src'] = key
+      end
+    end
+    # Extracts content from GitHub Wiki pages
+    #
+    # Bound keys:
+    # * "github-wiki"
+    # * URI("[...]github.com/[...]/wiki/[...]")
+    #
+    class GithubWikiProcessor < Processor
+      def initialize()
+        super(["github-wiki", /github.com.*\/wiki\//])
+      end
+      # Returns the content of the node <div class='markdown-body'></div>,
+      # it will also remove all id attributes of all content nodes.
+      #
+      # Supported options:
+      # 1. reduce-headings: all heading nodes (<h1> through <h5>) will be
+      # stepped one level, so h1 becomes h2, etc.
+      def process(content, uri, args = "")
+        html_doc = Nokogiri::HTML(content) do |config| config.noerror end
+        node = html_doc.xpath("//div[@class='markdown-body']").first
+        stamp(node, uri, 'github-wiki')
+        if args.include?("reduce-headings") then
+          5.downto(1) { |level|
+            node.xpath("//h#{level}").each { |heading_node|
+              heading_node.name = "h#{level+1}"
+            }
+          }
+        end
+        node
+      end
+    end
+    # Extracts content from PageHub shared documents
+    #
+    # Bound keys:
+    # * "pagehub"
+    # * URI([...]pagehub.org/[...])
+    class PageHubProcessor < Processor
+      def initialize()
+        super(["pagehub", /pagehub.org/])
+      end
+      def process(content, uri, args = "")
+        html_doc = Nokogiri::HTML(content) do |config| config.noerror end
+        node = html_doc.xpath("//div[@id='content']").first
+        node.xpath('div[@id="breadcrumbs"]').remove
+        node.xpath('div[@id="bottom"]').remove
+        stamp(node, uri, 'pagehub')
+        node
+      end
+    end
+    register_processor(GithubWikiProcessor.new)
+    register_processor(PageHubProcessor.new)
+  end # Embedder module
+  add_processor :pre_render, lambda {|str|
+    # Embed remote references, if any
+    str.gsub!(/^\B\[\!include\s?(.*)\!\]\((.*)\)/) {
+      content = ""
+      uri = $2
+      # parse the content source and args, if any
+      source = ($1 || "").split.first || ""
+      args = ($1 || "").split || []
+      args = args[1..args.length].join(' ') unless args.empty?
+      begin
+        content = Embedder.get_resource(uri, source, args)
+      rescue Embedder::InvalidSizeError => e
+        content << "**Embedding error**: the file you tried to embed is too big - #{e.message.to_i} bytes."
+        content << " (**Source**: [#{$2}](#{$2}))\n\n"
+      rescue Embedder::InvalidTypeError => e
+        content << "**Embedding error**: the file type you tried to embed (`#{e.message}`) is not supported."
+        content << " (**Source**: [#{$2}](#{$2}))\n\n"
+      rescue Embedder::EmbeddingError => e
+        content << "**Embedding error**: #{e.message}."
+        content << " (**Source**: [#{$2}](#{$2}))\n\n"
+      end
+      # content = "<div data-embedded=true>#{content.to_s.to_markdown}</div>".to_markdown
+      # content = "#{content}"
+      content
+    }
+    str
+  }
+end # Markdown module
+end # PageHub module

data/lib/pagehub-markdown/processors/pagehub_options.rb ADDED

@@ -0,0 +1,21 @@
+module PageHub
+module Markdown
+  add_processor :post_render, lambda { |str|
+    str.gsub!(/\[\!options(.*)\!\]/) {
+      opts = $1
+      out = ""
+      unless opts.empty?
+        opts = opts.split(' ').each { |opt|
+          case opt
+          when "no-title"
+            out += "<style>header h1 { display: none }</style>"
+          end
+        }
+      end
+      out
+    }
+  }
+end # Markdown module
+end # PageHub module

data/lib/pagehub-markdown/processors/toc_generator.rb ADDED

@@ -0,0 +1,98 @@
+module PageHub
+module Markdown
+  module ToC
+    # Builds a tree of headings from a given block of Markdown
+    # text, the returned list can be turned into HTML using
+    # ToC::to_html()
+    def self.from_markdown(markdown, threshold = 6)
+      self.from_content(/(#+)\s([^\n]+)/, lambda { |l, t| return l.length, t }, markdown, threshold)
+    end
+    # renders a table of content using nested <ol> list nodes
+    # from a given list of Heading objects produced by ToC::from_markdown()
+    def self.to_html(toc)
+      html = "<ol>"
+      toc.each { |heading| html << heading.to_html }
+      html << "</ol>"
+      html
+    end
+    private
+    def self.from_content(pattern, formatter, content, threshold)
+      headings  = []
+      current   = []
+      toc_index = 0
+      content.scan(pattern).each { |l, t|
+        level,title = formatter.call(l, t)
+        if level <= threshold
+          h = Heading.new(title, level, toc_index)
+          headings << h
+          current[level] = h
+          toc_index += 1 # toc_index is used for hyperlinking
+          # if there's a parent, attach this heading as a child to it
+          if current[level-1] then
+            current[level-1] << h
+          end
+        end
+      }
+      toc = []
+      headings.each { |h|
+        next if h.parent
+        toc << h
+      }
+      toc
+    end
+    class Heading
+      attr_accessor :level, :title, :children, :parent, :index
+      def initialize(title, level, index)
+        @title = title
+        @level = level
+        @index = index
+        @parent = nil
+        @children = []
+        super()
+      end
+      def <<(h)
+        @children.each { |child|
+          return if child.title == h.title
+        }
+        h.parent = self
+        @children << h
+      end
+      def to_html()
+        html = ""
+        html << "<li>"
+        html << "<a href=\"\#toc_#{index}\">" << title << "</a>"
+        if children.any? then
+          html << "<ol>"
+          children.each { |child| html << child.to_html }
+          html << "</ol>"
+        end
+        html << "</li>"
+      end
+    end
+  end
+  # register the processor
+  add_processor :pre_render, lambda { |str|
+    str.gsub!(/^\B\[\!toc(.*)\!\]/) {
+      ToC.to_html ToC.from_markdown(str, $1.empty? ? 6 : $1.strip.to_i)
+    }
+    str
+  }
+end # Markdown module
+end # PageHub module

metadata ADDED

@@ -0,0 +1,115 @@
+--- !ruby/object:Gem::Specification
+name: pagehub-markdown
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+  prerelease:
+platform: ruby
+authors:
+- Ahmad Amireh
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-10-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: redcarpet
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 2.1.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 2.1.1
+- !ruby/object:Gem::Dependency
+  name: albino
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.3.3
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.3.3
+- !ruby/object:Gem::Dependency
+  name: json
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.7.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.7.0
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.5.5
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 1.5.5
+description: A bunch of neat features added to the Markdown renderer via pure Markdown
+  syntax.
+email: ahmad@amireh.net
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/pagehub-markdown.rb
+- lib/pagehub-markdown/processors/toc_generator.rb
+- lib/pagehub-markdown/processors/pagehub_options.rb
+- lib/pagehub-markdown/processors/embedder.rb
+- lib/pagehub-markdown/markdown.rb
+- lib/pagehub-markdown/mutators/date_injector.rb
+homepage: http://github.com/amireh/pagehub-markdown
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.23
+signing_key:
+specification_version: 3
+summary: PageHub's extensions of GitHub's Redcarpet Markdown renderer.
+test_files: []