RubyGems - html-pipeline-no-charlock - Versions diffs - 0.0.6 - Mend

html-pipeline-no-charlock 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

data/.gitignore +19 -0
data/.travis.yml +13 -0
data/CHANGELOG.md +16 -0
data/Gemfile +9 -0
data/LICENSE +22 -0
data/README.md +221 -0
data/Rakefile +13 -0
data/html-pipeline-no-charlock.gemspec +25 -0
data/html-pipeline.gemspec +26 -0
data/lib/html/pipeline.rb +130 -0
data/lib/html/pipeline/@mention_filter.rb +118 -0
data/lib/html/pipeline/autolink_filter.rb +22 -0
data/lib/html/pipeline/body_content.rb +42 -0
data/lib/html/pipeline/camo_filter.rb +70 -0
data/lib/html/pipeline/email_reply_filter.rb +56 -0
data/lib/html/pipeline/emoji_filter.rb +54 -0
data/lib/html/pipeline/filter.rb +178 -0
data/lib/html/pipeline/https_filter.rb +13 -0
data/lib/html/pipeline/image_max_width_filter.rb +37 -0
data/lib/html/pipeline/markdown_filter.rb +29 -0
data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
data/lib/html/pipeline/sanitization_filter.rb +105 -0
data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
data/lib/html/pipeline/text_filter.rb +14 -0
data/lib/html/pipeline/textile_filter.rb +21 -0
data/lib/html/pipeline/toc_filter.rb +28 -0
data/lib/html/pipeline/version.rb +5 -0
data/test/html/pipeline/autolink_filter_test.rb +22 -0
data/test/html/pipeline/camo_filter_test.rb +47 -0
data/test/html/pipeline/emoji_filter_test.rb +18 -0
data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
data/test/html/pipeline/markdown_filter_test.rb +101 -0
data/test/html/pipeline/mention_filter_test.rb +158 -0
data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
data/test/html/pipeline/sanitization_filter_test.rb +47 -0
data/test/html/pipeline/toc_filter_test.rb +47 -0
data/test/test_helper.rb +38 -0
metadata +214 -0

data/lib/html/pipeline/@mention_filter.rb ADDED

@@ -0,0 +1,118 @@
+require 'set'
+module HTML
+  class Pipeline
+    # HTML filter that replaces @user mentions with links. Mentions within <pre>,
+    # <code>, and <a> elements are ignored. Mentions that reference users that do
+    # not exist are ignored.
+    #
+    # Context options:
+    #   :base_url - Used to construct links to user profile pages for each
+    #               mention.
+    #   :info_url - Used to link to "more info" when someone mentions @mention
+    #               or @mentioned.
+    #
+    class MentionFilter < Filter
+      # Public: Find user @mentions in text.  See
+      # MentionFilter#mention_link_filter.
+      #
+      #   MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
+      #     "<a href=...>#{login}</a>"
+      #   end
+      #
+      # text - String text to search.
+      #
+      # Yields the String match, the String login name, and a Boolean determining
+      # if the match = "@mention[ed]".  The yield's return replaces the match in
+      # the original text.
+      #
+      # Returns a String replaced with the return of the block.
+      def self.mentioned_logins_in(text)
+        text.gsub MentionPattern do |match|
+          login = $1
+          yield match, login, MentionLogins.include?(login.downcase)
+        end
+      end
+      # Pattern used to extract @mentions from text.
+      MentionPattern = /
+        (?:^|\W)                   # beginning of string or non-word char
+        @((?>[a-z0-9][a-z0-9-]*))  # @username
+        (?!\/)                     # without a trailing slash
+        (?=
+          \.+[ \t\W]|              # dots followed by space or non-word character
+          \.+$|                    # dots at end of line
+          [^0-9a-zA-Z_.]|          # non-word character except dot
+          $                        # end of line
+        )
+      /ix
+      # List of username logins that, when mentioned, link to the blog post
+      # about @mentions instead of triggering a real mention.
+      MentionLogins = %w(
+        mention
+        mentions
+        mentioned
+        mentioning
+      )
+      # Don't look for mentions in text nodes that are children of these elements
+      IGNORE_PARENTS = %w(pre code a).to_set
+      def call
+        doc.search('text()').each do |node|
+          content = node.to_html
+          next if !content.include?('@')
+          next if has_ancestor?(node, IGNORE_PARENTS)
+          html = mention_link_filter(content, base_url, info_url)
+          next if html == content
+          node.replace(html)
+        end
+        doc
+      end
+      # The URL to provide when someone @mentions a "mention" name, such as
+      # @mention or @mentioned, that will give them more info on mentions.
+      def info_url
+        context[:info_url] || nil
+      end
+      # Replace user @mentions in text with links to the mentioned user's
+      # profile page.
+      #
+      # text      - String text to replace @mention usernames in.
+      # base_url  - The base URL used to construct user profile URLs.
+      # info_url  - The "more info" URL used to link to more info on @mentions.
+      #             If nil we don't link @mention or @mentioned.
+      #
+      # Returns a string with @mentions replaced with links. All links have a
+      # 'user-mention' class name attached for styling.
+      def mention_link_filter(text, base_url='/', info_url=nil)
+        self.class.mentioned_logins_in(text) do |match, login, is_mentioned|
+          link =
+            if is_mentioned
+              link_to_mention_info(login, info_url)
+            else
+              link_to_mentioned_user(login)
+            end
+          link ? match.sub("@#{login}", link) : match
+        end
+      end
+      def link_to_mention_info(text, info_url=nil)
+        return "@#{text}" if info_url.nil?
+        "<a href='#{info_url}' class='user-mention'>" +
+        "@#{text}" +
+        "</a>"
+      end
+      def link_to_mentioned_user(login)
+        url = File.join(base_url, login)
+        "<a href='#{url}' class='user-mention'>" +
+        "@#{login}" +
+        "</a>"
+      end
+    end
+  end
+end

data/lib/html/pipeline/autolink_filter.rb ADDED

@@ -0,0 +1,22 @@
+require 'rinku'
+module HTML
+  class Pipeline
+    # HTML Filter for auto_linking urls in HTML.
+    #
+    # Context options:
+    #   :autolink - boolean whether to autolink urls
+    #   :flags    - additional Rinku flags. See https://github.com/vmg/rinku
+    #
+    # This filter does not write additional information to the context.
+    class AutolinkFilter < Filter
+      def call
+        return html if context[:autolink] == false
+        flags = 0
+        flags |= context[:flags] if context[:flags]
+        Rinku.auto_link(html, :urls, nil, %w[a script kbd pre code], flags)
+      end
+    end
+  end
+end

data/lib/html/pipeline/body_content.rb ADDED

@@ -0,0 +1,42 @@
+module HTML
+  class Pipeline
+    # Public: Runs a String of content through an HTML processing pipeline,
+    # providing easy access to a generated DocumentFragment.
+    class BodyContent
+      attr_reader :result
+      # Public: Initialize a BodyContent.
+      #
+      # body     - A String body.
+      # context  - A Hash of context options for the filters.
+      # pipeline - A HTML::Pipeline object with one or more Filters.
+      def initialize(body, context, pipeline)
+        @body = body
+        @context = context
+        @pipeline = pipeline
+      end
+      # Public: Gets the memoized result of the body content as it passed through
+      # the Pipeline.
+      #
+      # Returns a Hash, or something similar as defined by @pipeline.result_class.
+      def result
+        @result ||= @pipeline.call @body, @context
+      end
+      # Public: Gets the updated body from the Pipeline result.
+      #
+      # Returns a String or DocumentFragment.
+      def output
+        @output ||= result[:output]
+      end
+      # Public: Parses the output into a DocumentFragment.
+      #
+      # Returns a DocumentFragment.
+      def document
+        @document ||= HTML::Pipeline.parse output
+      end
+    end
+  end
+end

data/lib/html/pipeline/camo_filter.rb ADDED

@@ -0,0 +1,70 @@
+require 'openssl'
+module HTML
+  class Pipeline
+    # HTML Filter for replacing http image URLs with camo versions. See:
+    #
+    # https://github.com/atmos/camo
+    #
+    # All images provided in user content should be run through this
+    # filter so that http image sources do not cause mixed-content warnings
+    # in browser clients.
+    #
+    # Context options:
+    #   :asset_proxy (required) - Base URL for constructed asset proxy URLs.
+    #   :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
+    #
+    # This filter does not write additional information to the context.
+    class CamoFilter < Filter
+      # Hijacks images in the markup provided, replacing them with URLs that
+      # go through the github asset proxy.
+      def call
+        doc.search("img").each do |element|
+          next if element['src'].nil?
+          src = element['src'].strip
+          src = src.sub(%r!^http://github.com!, 'https://github.com')
+          next if context[:disable_asset_proxy]
+          if src =~ /^http:/ || src =~ /^https:\/\/img.skitch.com\//
+            element['src'] = asset_proxy_url(src)
+          else
+            element['src'] = src
+          end
+        end
+        doc
+      end
+      # Implementation of validate hook.
+      # Errors should raise exceptions or use an existing validator.
+      def validate
+        needs :asset_proxy, :asset_proxy_secret_key
+      end
+      # The camouflaged URL for a given image URL.
+      def asset_proxy_url(url)
+        "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
+      end
+      # Private: calculate the HMAC digest for a image source URL.
+      def asset_url_hash(url)
+        digest = OpenSSL::Digest::Digest.new('sha1')
+        OpenSSL::HMAC.hexdigest(digest, asset_proxy_secret_key, url)
+      end
+      # Private: the hostname to use for generated asset proxied URLs.
+      def asset_proxy_host
+        context[:asset_proxy]
+      end
+      def asset_proxy_secret_key
+        context[:asset_proxy_secret_key]
+      end
+      # Private: helper to hexencode a string. Each byte ends up encoded into
+      # two characters, zero padded value in the range [0-9a-f].
+      def hexencode(str)
+        str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join
+      end
+    end
+  end
+end

data/lib/html/pipeline/email_reply_filter.rb ADDED

@@ -0,0 +1,56 @@
+module HTML
+  class Pipeline
+    # HTML Filter that converts email reply text into an HTML DocumentFragment.
+    # It must be used as the first filter in a pipeline.
+    #
+    # Context options:
+    #   None
+    #
+    # This filter does not write any additional information to the context hash.
+    class EmailReplyFilter < TextFilter
+      include EscapeUtils
+      EMAIL_HIDDEN_HEADER    = %(<span class="email-hidden-toggle"><a href="#">&hellip;</a></span><div class="email-hidden-reply" style="display:none">).freeze
+      EMAIL_QUOTED_HEADER    = %(<div class="email-quoted-reply">).freeze
+      EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze
+      EMAIL_FRAGMENT_HEADER  = %(<div class="email-fragment">).freeze
+      EMAIL_HEADER_END       = "</div>".freeze
+      # Scans an email body to determine which bits are quoted and which should
+      # be hidden. EmailReplyParser is used to split the comment into an Array
+      # of quoted or unquoted Blocks. Now, we loop through them and attempt to
+      # add <div> tags around them so we can hide the hidden blocks, and style
+      # the quoted blocks differently. Since multiple blocks may be hidden, be
+      # sure to keep the "email-hidden-reply" <div>s around "email-quoted-reply"
+      # <div> tags. Call this on each comment of a visible thread in the order
+      # that they are displayed. Note: all comments are processed so we can
+      # maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
+      # markdown step.
+      #
+      # Returns the email comment HTML as a String
+      def call
+        found_hidden = nil
+        paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
+          pieces = [escape_html(fragment.to_s.strip).gsub(/^\s*(>|&gt;)/, '')]
+          if fragment.quoted?
+            pieces.unshift EMAIL_QUOTED_HEADER
+            pieces << EMAIL_HEADER_END
+          elsif fragment.signature?
+            pieces.unshift EMAIL_SIGNATURE_HEADER
+            pieces << EMAIL_HEADER_END
+          else
+            pieces.unshift EMAIL_FRAGMENT_HEADER
+            pieces << EMAIL_HEADER_END
+          end
+          if fragment.hidden? && !found_hidden
+            found_hidden = true
+            pieces.unshift EMAIL_HIDDEN_HEADER
+          end
+          pieces.join
+        end
+        paragraphs << EMAIL_HEADER_END if found_hidden
+        paragraphs.join("\n")
+      end
+    end
+  end
+end

data/lib/html/pipeline/emoji_filter.rb ADDED

@@ -0,0 +1,54 @@
+require 'emoji'
+module HTML
+  class Pipeline
+    # HTML filter that replaces :emoji: with images.
+    #
+    # Context:
+    #   :asset_root (required) - base url to link to emoji sprite
+    class EmojiFilter < Filter
+      # Build a regexp that matches all valid :emoji: names.
+      EmojiPattern = /:(#{Emoji.names.map { |name| Regexp.escape(name) }.join('|')}):/
+      def call
+        doc.search('text()').each do |node|
+          content = node.to_html
+          next if !content.include?(':')
+          next if has_ancestor?(node, %w(pre code))
+          html = emoji_image_filter(content)
+          next if html == content
+          node.replace(html)
+        end
+        doc
+      end
+      # Implementation of validate hook.
+      # Errors should raise exceptions or use an existing validator.
+      def validate
+        needs :asset_root
+      end
+      # Replace :emoji: with corresponding images.
+      #
+      # text - String text to replace :emoji: in.
+      #
+      # Returns a String with :emoji: replaced with images.
+      def emoji_image_filter(text)
+        return text unless text.include?(':')
+        text.gsub EmojiPattern do |match|
+          name = $1
+          "<img class='emoji' title=':#{name}:' alt=':#{name}:' src='#{File.join(asset_root, "emoji", "#{name}.png")}' height='20' width='20' align='absmiddle' />"
+        end
+      end
+      # The base url to link emoji sprites
+      #
+      # Raises ArgumentError if context option has not been provided.
+      # Returns the context's asset_root.
+      def asset_root
+        context[:asset_root]
+      end
+    end
+  end
+end

data/lib/html/pipeline/filter.rb ADDED

@@ -0,0 +1,178 @@
+module HTML
+  class Pipeline
+    # Base class for user content HTML filters. Each filter takes an
+    # HTML string or Nokogiri::HTML::DocumentFragment, performs
+    # modifications and/or writes information to the result hash. Filters must
+    # return a DocumentFragment (typically the same instance provided to the call
+    # method) or a String with HTML markup.
+    #
+    # Example filter that replaces all images with trollface:
+    #
+    #   class FuuuFilter < HTML::Pipeline::Filter
+    #     def call
+    #       doc.search('img').each do |img|
+    #         img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
+    #       end
+    #     end
+    #   end
+    #
+    # The context Hash passes options to filters and should not be changed in
+    # place.  A Result Hash allows filters to make extracted information
+    # available to the caller and is mutable.
+    #
+    # Common context options:
+    #   :base_url   - The site's base URL
+    #   :repository - A Repository providing context for the HTML being processed
+    #
+    # Each filter may define additional options and output values. See the class
+    # docs for more info.
+    class Filter
+      class InvalidDocumentException < StandardError; end
+      def initialize(doc, context = nil, result = nil)
+        if doc.kind_of?(String)
+          @html = doc.to_str
+          @doc = nil
+        else
+          @doc = doc
+          @html = nil
+        end
+        @context = context || {}
+        @result = result || {}
+        validate
+      end
+      # Public: Returns a simple Hash used to pass extra information into filters
+      # and also to allow filters to make extracted information available to the
+      # caller.
+      attr_reader :context
+      # Public: Returns a Hash used to allow filters to pass back information
+      # to callers of the various Pipelines.  This can be used for
+      # #mentioned_users, for example.
+      attr_reader :result
+      # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
+      # provided a String, parse into a DocumentFragment the first time this
+      # method is called.
+      def doc
+        @doc ||= parse_html(html)
+      end
+      # The String representation of the document. If a DocumentFragment was
+      # provided to the Filter, it is serialized into a String when this method is
+      # called.
+      def html
+        raise InvalidDocumentException if @html.nil? && @doc.nil?
+        @html || doc.to_html
+      end
+      # The main filter entry point. The doc attribute is guaranteed to be a
+      # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
+      # this document in place or extract information and add it to the context
+      # hash.
+      def call
+        raise NotImplementedError
+      end
+      # Make sure the context has everything we need. Noop: Subclasses can override.
+      def validate
+      end
+      # The Repository object provided in the context hash, or nil when no
+      # :repository was specified.
+      #
+      # It's assumed that the repository context has already been checked
+      # for permissions
+      def repository
+        context[:repository]
+      end
+      # The User object provided in the context hash, or nil when no user
+      # was specified
+      def current_user
+        context[:current_user]
+      end
+      # Return whether the filter can access a given repo while
+      # applying a filter
+      #
+      # A repo can only be accessed if its pullable by the user who
+      # submitted the content of this filter, or if it's the same as
+      # the repository context in which the filter runs
+      def can_access_repo?(repo)
+        return false if repo.nil?
+        return true if repo == repository
+        repo.pullable_by?(current_user)
+      end
+      # The site's base URL provided in the context hash, or '/' when no
+      # base URL was specified.
+      def base_url
+        context[:base_url] || '/'
+      end
+      # Ensure the passed argument is a DocumentFragment. When a string is
+      # provided, it is parsed and returned; otherwise, the DocumentFragment is
+      # returned unmodified.
+      def parse_html(html)
+        HTML::Pipeline.parse(html)
+      end
+      # Helper method for filter subclasses used to determine if any of a node's
+      # ancestors have one of the tag names specified.
+      #
+      # node - The Node object to check.
+      # tags - An array of tag name strings to check. These should be downcase.
+      #
+      # Returns true when the node has a matching ancestor.
+      def has_ancestor?(node, tags)
+        while node = node.parent
+          if tags.include?(node.name.downcase)
+            break true
+          end
+        end
+      end
+      # Perform a filter on doc with the given context.
+      #
+      # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
+      # markup.
+      def self.call(doc, context = nil, result = nil)
+        new(doc, context, result).call
+      end
+      # Like call but guarantees that a DocumentFragment is returned, even when
+      # the last filter returns a String.
+      def self.to_document(input, context = nil)
+        html = call(input, context)
+        HTML::Pipeline::parse(html)
+      end
+      # Like call but guarantees that a string of HTML markup is returned.
+      def self.to_html(input, context = nil)
+        output = call(input, context)
+        if output.respond_to?(:to_html)
+          output.to_html
+        else
+          output.to_s
+        end
+      end
+      # Validator for required context. This will check that anything passed in
+      # contexts exists in @contexts
+      #
+      # If any errors are found an ArgumentError will be raised with a
+      # message listing all the missing contexts and the filters that
+      # require them.
+      def needs(*keys)
+        missing = keys.reject { |key| context.include? key }
+        if missing.any?
+          raise ArgumentError,
+            "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
+        end
+      end
+    end
+  end
+end