RubyGems - motion-html-pipeline - Versions diffs - 0.1 - Mend

motion-html-pipeline 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

data/lib/motion-html-pipeline/pipeline/disabled/markdown_filter.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# MotionHTMLPipeline::Pipeline.require_dependency('commonmarker', 'MarkdownFilter')
+#
+# module MotionHTMLPipeline
+#   class Pipeline
+#     # HTML Filter that converts Markdown text into HTML and converts into a
+#     # DocumentFragment. This is different from most filters in that it can take a
+#     # non-HTML as input. It must be used as the first filter in a pipeline.
+#     #
+#     # Context options:
+#     #   :gfm      => false    Disable GFM line-end processing
+#     #   :commonmarker_extensions => [ :table, :strikethrough,
+#     #      :tagfilter, :autolink ] Common marker extensions to include
+#     #
+#     # This filter does not write any additional information to the context hash.
+#     class MarkdownFilter < TextFilter
+#       def initialize(text, context = nil, result = nil)
+#         super text, context, result
+#         @text = @text.delete "\r"
+#       end
+#
+#       # Convert Markdown to HTML using the best available implementation
+#       # and convert into a DocumentFragment.
+#       def call
+#         options = [:GITHUB_PRE_LANG]
+#         options << :HARDBREAKS if context[:gfm] != false
+#         options << :UNSAFE if context[:unsafe]
+#         extensions = context.fetch(
+#           :commonmarker_extensions,
+#           ['table', 'strikethrough', 'tagfilter', 'autolink']
+#         )
+#         html = CommonMarker.render_html(@text, options, extensions)
+#         html.rstrip!
+#         html
+#       end
+#     end
+#   end
+# end

data/lib/motion-html-pipeline/pipeline/disabled/plain_text_input_filter.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# MotionHTMLPipeline::Pipeline.require_dependency('escape_utils', 'PlainTextInputFilter')
+#
+# module MotionHTMLPipeline
+#   class Pipeline
+#     # Simple filter for plain text input. HTML escapes the text input and wraps it
+#     # in a div.
+#     class PlainTextInputFilter < TextFilter
+#       def call
+#         "<div>#{EscapeUtils.escape_html(@text, false)}</div>"
+#       end
+#     end
+#   end
+# end

data/lib/motion-html-pipeline/pipeline/disabled/sanitization_filter.rb ADDED Viewed

@@ -0,0 +1,137 @@
+# MotionHTMLPipeline::Pipeline.require_dependency('sanitize', 'SanitizationFilter')
+#
+# module MotionHTMLPipeline
+#   class Pipeline
+#     # HTML filter with sanization routines and whitelists. This module defines
+#     # what HTML is allowed in user provided content and fixes up issues with
+#     # unbalanced tags and whatnot.
+#     #
+#     # See the Sanitize docs for more information on the underlying library:
+#     #
+#     # https://github.com/rgrove/sanitize/#readme
+#     #
+#     # Context options:
+#     #   :whitelist      - The sanitizer whitelist configuration to use. This
+#     #                     can be one of the options constants defined in this
+#     #                     class or a custom sanitize options hash.
+#     #   :anchor_schemes - The URL schemes to allow in <a href> attributes. The
+#     #                     default set is provided in the ANCHOR_SCHEMES
+#     #                     constant in this class. If passed, this overrides any
+#     #                     schemes specified in the whitelist configuration.
+#     #
+#     # This filter does not write additional information to the context.
+#     class SanitizationFilter < Filter
+#       LISTS     = Set.new(%w[ul ol].freeze)
+#       LIST_ITEM = 'li'.freeze
+#
+#       # List of table child elements. These must be contained by a <table> element
+#       # or they are not allowed through. Otherwise they can be used to break out
+#       # of places we're using tables to contain formatted user content (like pull
+#       # request review comments).
+#       TABLE_ITEMS = Set.new(%w[tr td th].freeze)
+#       TABLE = 'table'.freeze
+#       TABLE_SECTIONS = Set.new(%w[thead tbody tfoot].freeze)
+#
+#       # These schemes are the only ones allowed in <a href> attributes by default.
+#       ANCHOR_SCHEMES = ['http', 'https', 'mailto', 'xmpp', :relative, 'github-windows', 'github-mac', 'irc', 'ircs'].freeze
+#
+#       # The main sanitization whitelist. Only these elements and attributes are
+#       # allowed through by default.
+#       WHITELIST = {
+#         elements: %w[
+#           h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
+#           div ins del sup sub p ol ul table thead tbody tfoot blockquote
+#           dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary
+#           details caption figure figcaption
+#           abbr bdo cite dfn mark small span time wbr
+#         ].freeze,
+#         remove_contents: ['script'].freeze,
+#         attributes: {
+#           'a'          => ['href'].freeze,
+#           'img'        => %w[src longdesc].freeze,
+#           'div'        => %w[itemscope itemtype].freeze,
+#           'blockquote' => ['cite'].freeze,
+#           'del'        => ['cite'].freeze,
+#           'ins'        => ['cite'].freeze,
+#           'q'          => ['cite'].freeze,
+#           all: %w[abbr accept accept-charset
+#                   accesskey action align alt
+#                   aria-describedby aria-hidden aria-label aria-labelledby
+#                   axis border cellpadding cellspacing char
+#                   charoff charset checked
+#                   clear cols colspan color
+#                   compact coords datetime dir
+#                   disabled enctype for frame
+#                   headers height hreflang
+#                   hspace ismap label lang
+#                   maxlength media method
+#                   multiple name nohref noshade
+#                   nowrap open prompt readonly rel rev
+#                   rows rowspan rules scope
+#                   selected shape size span
+#                   start summary tabindex target
+#                   title type usemap valign value
+#                   vspace width itemprop].freeze
+#         }.freeze,
+#         protocols: {
+#           'a'          => { 'href' => ANCHOR_SCHEMES }.freeze,
+#           'blockquote' => { 'cite' => ['http', 'https', :relative].freeze },
+#           'del'        => { 'cite' => ['http', 'https', :relative].freeze },
+#           'ins'        => { 'cite' => ['http', 'https', :relative].freeze },
+#           'q'          => { 'cite' => ['http', 'https', :relative].freeze },
+#           'img'        => {
+#             'src'      => ['http', 'https', :relative].freeze,
+#             'longdesc' => ['http', 'https', :relative].freeze
+#           }.freeze
+#         },
+#         transformers: [
+#           # Top-level <li> elements are removed because they can break out of
+#           # containing markup.
+#           lambda { |env|
+#             name = env[:node_name]
+#             node = env[:node]
+#             if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) }
+#               node.replace(node.children)
+#             end
+#           },
+#
+#           # Table child elements that are not contained by a <table> are removed.
+#           lambda { |env|
+#             name = env[:node_name]
+#             node = env[:node]
+#             if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE }
+#               node.replace(node.children)
+#             end
+#           }
+#         ].freeze
+#       }.freeze
+#
+#       # A more limited sanitization whitelist. This includes all attributes,
+#       # protocols, and transformers from WHITELIST but with a more locked down
+#       # set of allowed elements.
+#       LIMITED = WHITELIST.merge(
+#         elements: %w[b i strong em a pre code img ins del sup sub mark abbr p ol ul li]
+#       )
+#
+#       # Strip all HTML tags from the document.
+#       FULL = { elements: [] }.freeze
+#
+#       # Sanitize markup using the Sanitize library.
+#       def call
+#         Sanitize.clean_node!(doc, whitelist)
+#       end
+#
+#       # The whitelist to use when sanitizing. This can be passed in the context
+#       # hash to the filter but defaults to WHITELIST constant value above.
+#       def whitelist
+#         whitelist = context[:whitelist] || WHITELIST
+#         anchor_schemes = context[:anchor_schemes]
+#         return whitelist unless anchor_schemes
+#         whitelist = whitelist.dup
+#         whitelist[:protocols] = (whitelist[:protocols] || {}).dup
+#         whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
+#         whitelist
+#       end
+#     end
+#   end
+# end

data/lib/motion-html-pipeline/pipeline/disabled/syntax_highlight_filter.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# MotionHTMLPipeline::Pipeline.require_dependency('rouge', 'SyntaxHighlightFilter')
+#
+# module MotionHTMLPipeline
+#   class Pipeline
+#     # HTML Filter that syntax highlights code blocks wrapped
+#     # in <pre lang="...">.
+#     class SyntaxHighlightFilter < Filter
+#       def initialize(*args)
+#         super(*args)
+#         @formatter = Rouge::Formatters::HTML.new
+#       end
+#
+#       def call
+#         doc.search('pre').each do |node|
+#           default = context[:highlight] && context[:highlight].to_s
+#           next unless lang = node['lang'] || default
+#           next unless lexer = lexer_for(lang)
+#           text = node.inner_text
+#
+#           html = highlight_with_timeout_handling(text, lang)
+#           next if html.nil?
+#
+#           node.inner_html = html
+#           klass = node['class']
+#           scope = context[:scope] || "highlight-#{lang}"
+#           klass = [klass, scope].compact.join ' '
+#
+#           node['class'] = klass
+#         end
+#         doc
+#       end
+#
+#       def highlight_with_timeout_handling(text, lang)
+#         Rouge.highlight(text, lang, @formatter)
+#       rescue Timeout::Error => _
+#         nil
+#       end
+#
+#       def lexer_for(lang)
+#         Rouge::Lexer.find(lang)
+#       end
+#     end
+#   end
+# end

data/lib/motion-html-pipeline/pipeline/disabled/toc_filter.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# MotionHTMLPipeline::Pipeline.require_dependency('escape_utils', 'TableOfContentsFilter')
+#
+# module MotionHTMLPipeline
+#   class Pipeline
+#     # HTML filter that adds an 'id' attribute to all headers
+#     # in a document, so they can be accessed from a table of contents.
+#     #
+#     # Generates the Table of Contents, with links to each header.
+#     #
+#     # Examples
+#     #
+#     #  TocPipeline =
+#     #    MotionHTMLPipeline::Pipeline.new [
+#     #      MotionHTMLPipeline::Pipeline::TableOfContentsFilter
+#     #    ]
+#     #  # => #<MotionHTMLPipeline::Pipeline:0x007fc13c4528d8...>
+#     #  orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
+#     #  # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
+#     #  result = {}
+#     #  # => {}
+#     #  TocPipeline.call(orig, {}, result)
+#     #  # => {:toc=> ...}
+#     #  result[:toc]
+#     #  # => "<ul class=\"section-nav\">\n<li><a href=\"#ice-cube\">...</li><ul>"
+#     #  result[:output].to_s
+#     #  # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
+#     class TableOfContentsFilter < Filter
+#       PUNCTUATION_REGEXP = RUBY_VERSION > '1.9' ? /[^\p{Word}\- ]/u : /[^\w\- ]/
+#
+#       # The icon that will be placed next to an anchored rendered markdown header
+#       def anchor_icon
+#         context[:anchor_icon] || '<span aria-hidden="true" class="octicon octicon-link"></span>'
+#       end
+#
+#       def call
+#         result[:toc] = ''
+#
+#         headers = Hash.new(0)
+#         doc.css('h1, h2, h3, h4, h5, h6').each do |node|
+#           text = node.text
+#           id = ascii_downcase(text)
+#           id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
+#           id.tr!(' ', '-') # replace spaces with dash
+#
+#           uniq = headers[id] > 0 ? "-#{headers[id]}" : ''
+#           headers[id] += 1
+#           if header_content = node.children.first
+#             result[:toc] << %(<li><a href="##{id}#{uniq}">#{EscapeUtils.escape_html(text)}</a></li>\n)
+#             header_content.add_previous_sibling(%(<a id="#{id}#{uniq}" class="anchor" href="##{id}#{uniq}" aria-hidden="true">#{anchor_icon}</a>))
+#           end
+#         end
+#         result[:toc] = %(<ul class="section-nav">\n#{result[:toc]}</ul>) unless result[:toc].empty?
+#         doc
+#       end
+#
+#       if RUBY_VERSION >= '2.4'
+#         def ascii_downcase(str)
+#           str.downcase(:ascii)
+#         end
+#       else
+#         def ascii_downcase(str)
+#           str.downcase
+#         end
+#       end
+#     end
+#   end
+# end

data/lib/motion-html-pipeline/pipeline/filter.rb ADDED Viewed

@@ -0,0 +1,163 @@
+module MotionHTMLPipeline
+  class Pipeline
+    # Base class for user content HTML filters. Each filter takes an
+    # HTML string or MotionHTMLPipeline::DocumentFragment, performs
+    # modifications and/or writes information to the result hash. Filters must
+    # return a DocumentFragment (typically the same instance provided to the call
+    # method) or a String with HTML markup.
+    #
+    # Example filter that replaces all images with trollface:
+    #
+    #   class FuuuFilter < MotionHTMLPipeline::Pipeline::Filter
+    #     def call
+    #       doc.search('img').each do |img|
+    #         img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
+    #       end
+    #     end
+    #   end
+    #
+    # The context Hash passes options to filters and should not be changed in
+    # place.  A Result Hash allows filters to make extracted information
+    # available to the caller and is mutable.
+    #
+    # Common context options:
+    #   :base_url   - The site's base URL
+    #   :repository - A Repository providing context for the HTML being processed
+    #
+    # Each filter may define additional options and output values. See the class
+    # docs for more info.
+    class Filter
+      class InvalidDocumentException < StandardError; end
+      def initialize(doc, context = nil, result = nil)
+        if doc.is_a?(String)
+          @html = doc.to_str
+          @doc = nil
+        else
+          @doc = doc
+          @html = nil
+        end
+        @context = context || {}
+        @result = result || {}
+        validate
+      end
+      # Public: Returns a simple Hash used to pass extra information into filters
+      # and also to allow filters to make extracted information available to the
+      # caller.
+      attr_reader :context
+      # Public: Returns a Hash used to allow filters to pass back information
+      # to callers of the various Pipelines.  This can be used for
+      # #mentioned_users, for example.
+      attr_reader :result
+      # The MotionHTMLPipeline::DocumentFragment to be manipulated. If the filter was
+      # provided a String, parse into a DocumentFragment the first time this
+      # method is called.
+      def doc
+        @doc ||= parse_html(html)
+      end
+      # The String representation of the document. If a DocumentFragment was
+      # provided to the Filter, it is serialized into a String when this method is
+      # called.
+      def html
+        raise InvalidDocumentException if @html.nil? && @doc.nil?
+        @html || doc.to_html
+      end
+      # The main filter entry point. The doc attribute is guaranteed to be a
+      # MotionHTMLPipeline::DocumentFragment when invoked. Subclasses should modify
+      # this document in place or extract information and add it to the context
+      # hash.
+      def call
+        raise NotImplementedError
+      end
+      # Make sure the context has everything we need. Noop: Subclasses can override.
+      def validate; end
+      # The Repository object provided in the context hash, or nil when no
+      # :repository was specified.
+      #
+      # It's assumed that the repository context has already been checked
+      # for permissions
+      def repository
+        context[:repository]
+      end
+      # The User object provided in the context hash, or nil when no user
+      # was specified
+      def current_user
+        context[:current_user]
+      end
+      # The site's base URL provided in the context hash, or '/' when no
+      # base URL was specified.
+      def base_url
+        context[:base_url] || '/'
+      end
+      # Ensure the passed argument is a DocumentFragment. When a string is
+      # provided, it is parsed and returned; otherwise, the DocumentFragment is
+      # returned unmodified.
+      def parse_html(html)
+        MotionHTMLPipeline::Pipeline.parse(html)
+      end
+      # Helper method for filter subclasses used to determine if any of a node's
+      # ancestors have one of the tag names specified.
+      #
+      # node - The Node object to check.
+      # tags - An array of tag name strings to check. These should be downcase.
+      #
+      # Returns true when the node has a matching ancestor.
+      def has_ancestor?(node, tags)
+        while node = node.parentNode
+          break true if tags.include?(node.name.downcase)
+        end
+      end
+      # Perform a filter on doc with the given context.
+      #
+      # Returns a MotionHTMLPipeline::Pipeline::DocumentFragment or a String containing HTML
+      # markup.
+      def self.call(doc, context = nil, result = nil)
+        new(doc, context, result).call
+      end
+      # Like call but guarantees that a DocumentFragment is returned, even when
+      # the last filter returns a String.
+      def self.to_document(input, context = nil)
+        html = call(input, context)
+        MotionHTMLPipeline::Pipeline.parse(html)
+      end
+      # Like call but guarantees that a string of HTML markup is returned.
+      def self.to_html(input, context = nil)
+        output = call(input, context)
+        if output.respond_to?(:to_html)
+          output.to_html
+        else
+          output.to_s
+        end
+      end
+      # Validator for required context. This will check that anything passed in
+      # contexts exists in @contexts
+      #
+      # If any errors are found an ArgumentError will be raised with a
+      # message listing all the missing contexts and the filters that
+      # require them.
+      def needs(*keys)
+        missing = keys.reject { |key| context.include? key }
+        if missing.any?
+          raise ArgumentError,
+                "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
+        end
+      end
+    end
+  end
+end