html-pipeline-plus 2.10.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ HTML::Pipeline.require_dependency('rinku', 'AutolinkFilter')
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML Filter for auto_linking urls in HTML.
6
+ #
7
+ # Context options:
8
+ # :autolink - boolean whether to autolink urls
9
+ # :link_attr - HTML attributes for the link that will be generated
10
+ # :skip_tags - HTML tags inside which autolinking will be skipped.
11
+ # See Rinku.skip_tags
12
+ # :flags - additional Rinku flags. See https://github.com/vmg/rinku
13
+ #
14
+ # This filter does not write additional information to the context.
15
+ class AutolinkFilter < Filter
16
+ def call
17
+ return html if context[:autolink] == false
18
+
19
+ skip_tags = context[:skip_tags]
20
+ flags = 0
21
+ flags |= context[:flags] if context[:flags]
22
+
23
+ Rinku.auto_link(html, :urls, context[:link_attr], skip_tags, flags)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,42 @@
1
+ module HTML
2
+ class Pipeline
3
+ # Public: Runs a String of content through an HTML processing pipeline,
4
+ # providing easy access to a generated DocumentFragment.
5
+ class BodyContent
6
+ attr_reader :result
7
+
8
+ # Public: Initialize a BodyContent.
9
+ #
10
+ # body - A String body.
11
+ # context - A Hash of context options for the filters.
12
+ # pipeline - A HTML::Pipeline object with one or more Filters.
13
+ def initialize(body, context, pipeline)
14
+ @body = body
15
+ @context = context
16
+ @pipeline = pipeline
17
+ end
18
+
19
+ # Public: Gets the memoized result of the body content as it passed through
20
+ # the Pipeline.
21
+ #
22
+ # Returns a Hash, or something similar as defined by @pipeline.result_class.
23
+ def result
24
+ @result ||= @pipeline.call @body, @context
25
+ end
26
+
27
+ # Public: Gets the updated body from the Pipeline result.
28
+ #
29
+ # Returns a String or DocumentFragment.
30
+ def output
31
+ @output ||= result[:output]
32
+ end
33
+
34
+ # Public: Parses the output into a DocumentFragment.
35
+ #
36
+ # Returns a DocumentFragment.
37
+ def document
38
+ @document ||= HTML::Pipeline.parse output
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,93 @@
1
+ require 'openssl'
2
+ require 'uri'
3
+
4
+ module HTML
5
+ class Pipeline
6
+ # HTML Filter for replacing http image URLs with camo versions. See:
7
+ #
8
+ # https://github.com/atmos/camo
9
+ #
10
+ # All images provided in user content should be run through this
11
+ # filter so that http image sources do not cause mixed-content warnings
12
+ # in browser clients.
13
+ #
14
+ # Context options:
15
+ # :asset_proxy (required) - Base URL for constructed asset proxy URLs.
16
+ # :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
17
+ # :asset_proxy_whitelist - Array of host Strings or Regexps to skip
18
+ # src rewriting.
19
+ #
20
+ # This filter does not write additional information to the context.
21
+ class CamoFilter < Filter
22
+ # Hijacks images in the markup provided, replacing them with URLs that
23
+ # go through the github asset proxy.
24
+ def call
25
+ return doc unless asset_proxy_enabled?
26
+
27
+ doc.search('img').each do |element|
28
+ original_src = element['src']
29
+ next unless original_src
30
+
31
+ begin
32
+ uri = URI.parse(original_src)
33
+ rescue Exception
34
+ next
35
+ end
36
+
37
+ next if uri.host.nil?
38
+ next if asset_host_whitelisted?(uri.host)
39
+
40
+ element['src'] = asset_proxy_url(original_src)
41
+ element['data-canonical-src'] = original_src
42
+ end
43
+ doc
44
+ end
45
+
46
+ # Implementation of validate hook.
47
+ # Errors should raise exceptions or use an existing validator.
48
+ def validate
49
+ needs :asset_proxy, :asset_proxy_secret_key
50
+ end
51
+
52
+ # The camouflaged URL for a given image URL.
53
+ def asset_proxy_url(url)
54
+ "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
55
+ end
56
+
57
+ # Private: calculate the HMAC digest for a image source URL.
58
+ def asset_url_hash(url)
59
+ OpenSSL::HMAC.hexdigest('sha1', asset_proxy_secret_key, url)
60
+ end
61
+
62
+ # Private: Return true if asset proxy filter should be enabled
63
+ def asset_proxy_enabled?
64
+ !context[:disable_asset_proxy]
65
+ end
66
+
67
+ # Private: the host to use for generated asset proxied URLs.
68
+ def asset_proxy_host
69
+ context[:asset_proxy]
70
+ end
71
+
72
+ def asset_proxy_secret_key
73
+ context[:asset_proxy_secret_key]
74
+ end
75
+
76
+ def asset_proxy_whitelist
77
+ context[:asset_proxy_whitelist] || []
78
+ end
79
+
80
+ def asset_host_whitelisted?(host)
81
+ asset_proxy_whitelist.any? do |test|
82
+ test.is_a?(String) ? host == test : test.match(host)
83
+ end
84
+ end
85
+
86
+ # Private: helper to hexencode a string. Each byte ends up encoded into
87
+ # two characters, zero padded value in the range [0-9a-f].
88
+ def hexencode(str)
89
+ str.to_enum(:each_byte).map { |byte| format('%02x', byte) }.join
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,66 @@
1
+ HTML::Pipeline.require_dependency('escape_utils', 'EmailReplyFilter')
2
+ HTML::Pipeline.require_dependency('email_reply_parser', 'EmailReplyFilter')
3
+
4
+ module HTML
5
+ class Pipeline
6
+ # HTML Filter that converts email reply text into an HTML DocumentFragment.
7
+ # It must be used as the first filter in a pipeline.
8
+ #
9
+ # Context options:
10
+ # None
11
+ #
12
+ # This filter does not write any additional information to the context hash.
13
+ class EmailReplyFilter < TextFilter
14
+ include EscapeUtils
15
+
16
+ EMAIL_HIDDEN_HEADER = %(<span class="email-hidden-toggle"><a href="#">&hellip;</a></span><div class="email-hidden-reply" style="display:none">).freeze
17
+ EMAIL_QUOTED_HEADER = %(<div class="email-quoted-reply">).freeze
18
+ EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze
19
+ EMAIL_FRAGMENT_HEADER = %(<div class="email-fragment">).freeze
20
+ EMAIL_HEADER_END = '</div>'.freeze
21
+ EMAIL_REGEX = /[^@\s.][^@\s]*@\[?[a-z0-9.-]+\]?/
22
+ HIDDEN_EMAIL_PATTERN = '***@***.***'.freeze
23
+
24
+ # Scans an email body to determine which bits are quoted and which should
25
+ # be hidden. EmailReplyParser is used to split the comment into an Array
26
+ # of quoted or unquoted Blocks. Now, we loop through them and attempt to
27
+ # add <div> tags around them so we can hide the hidden blocks, and style
28
+ # the quoted blocks differently. Since multiple blocks may be hidden, be
29
+ # sure to keep the "email-hidden-reply" <div>s around "email-quoted-reply"
30
+ # <div> tags. Call this on each comment of a visible thread in the order
31
+ # that they are displayed. Note: all comments are processed so we can
32
+ # maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
33
+ # markdown step.
34
+ #
35
+ # Returns the email comment HTML as a String
36
+ def call
37
+ found_hidden = nil
38
+ paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
39
+ pieces = [escape_html(fragment.to_s.strip).gsub(/^\s*(>|&gt;)/, '')]
40
+ if fragment.quoted?
41
+ if context[:hide_quoted_email_addresses]
42
+ pieces.map! do |piece|
43
+ piece.gsub(EMAIL_REGEX, HIDDEN_EMAIL_PATTERN)
44
+ end
45
+ end
46
+ pieces.unshift EMAIL_QUOTED_HEADER
47
+ pieces << EMAIL_HEADER_END
48
+ elsif fragment.signature?
49
+ pieces.unshift EMAIL_SIGNATURE_HEADER
50
+ pieces << EMAIL_HEADER_END
51
+ else
52
+ pieces.unshift EMAIL_FRAGMENT_HEADER
53
+ pieces << EMAIL_HEADER_END
54
+ end
55
+ if fragment.hidden? && !found_hidden
56
+ found_hidden = true
57
+ pieces.unshift EMAIL_HIDDEN_HEADER
58
+ end
59
+ pieces.join
60
+ end
61
+ paragraphs << EMAIL_HEADER_END if found_hidden
62
+ paragraphs.join("\n")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,125 @@
1
+ require 'cgi'
2
+ HTML::Pipeline.require_dependency('gemoji', 'EmojiFilter')
3
+
4
+ module HTML
5
+ class Pipeline
6
+ # HTML filter that replaces :emoji: with images.
7
+ #
8
+ # Context:
9
+ # :asset_root (required) - base url to link to emoji sprite
10
+ # :asset_path (optional) - url path to link to emoji sprite. :file_name can be used as a placeholder for the sprite file name. If no asset_path is set "emoji/:file_name" is used.
11
+ # :ignored_ancestor_tags (optional) - Tags to stop the emojification. Node has matched ancestor HTML tags will not be emojified. Default to pre, code, and tt tags. Extra tags please pass in the form of array, e.g., %w(blockquote summary).
12
+ # :img_attrs (optional) - Attributes for generated img tag. E.g. Pass { "draggble" => true, "height" => nil } to set draggable attribute to "true" and clear height attribute of generated img tag.
13
+ class EmojiFilter < Filter
14
+ DEFAULT_IGNORED_ANCESTOR_TAGS = %w[pre code tt].freeze
15
+
16
+ def call
17
+ doc.search('.//text()').each do |node|
18
+ content = node.text
19
+ next unless content.include?(':')
20
+ next if has_ancestor?(node, ignored_ancestor_tags)
21
+ html = emoji_image_filter(content)
22
+ next if html == content
23
+ node.replace(html)
24
+ end
25
+ doc
26
+ end
27
+
28
+ # Implementation of validate hook.
29
+ # Errors should raise exceptions or use an existing validator.
30
+ def validate
31
+ needs :asset_root
32
+ end
33
+
34
+ # Replace :emoji: with corresponding images.
35
+ #
36
+ # text - String text to replace :emoji: in.
37
+ #
38
+ # Returns a String with :emoji: replaced with images.
39
+ def emoji_image_filter(text)
40
+ text.gsub(emoji_pattern) do |_match|
41
+ emoji_image_tag(Regexp.last_match(1))
42
+ end
43
+ end
44
+
45
+ # The base url to link emoji sprites
46
+ #
47
+ # Raises ArgumentError if context option has not been provided.
48
+ # Returns the context's asset_root.
49
+ def asset_root
50
+ context[:asset_root]
51
+ end
52
+
53
+ # The url path to link emoji sprites
54
+ #
55
+ # :file_name can be used in the asset_path as a placeholder for the sprite file name. If no asset_path is set in the context "emoji/:file_name" is used.
56
+ # Returns the context's asset_path or the default path if no context asset_path is given.
57
+ def asset_path(name)
58
+ if context[:asset_path]
59
+ context[:asset_path].gsub(':file_name', emoji_filename(name))
60
+ else
61
+ File.join('emoji', emoji_filename(name))
62
+ end
63
+ end
64
+
65
+ private
66
+
67
+ # Build an emoji image tag
68
+ def emoji_image_tag(name)
69
+ require 'active_support/core_ext/hash/indifferent_access'
70
+ html_attrs =
71
+ default_img_attrs(name)
72
+ .merge!((context[:img_attrs] || {}).with_indifferent_access)
73
+ .map { |attr, value| !value.nil? && %(#{attr}="#{value.respond_to?(:call) && value.call(name) || value}") }
74
+ .reject(&:blank?).join(' '.freeze)
75
+
76
+ "<img #{html_attrs} />"
77
+ end
78
+
79
+ # Default attributes for img tag
80
+ def default_img_attrs(name)
81
+ {
82
+ 'class' => 'emoji'.freeze,
83
+ 'title' => ":#{name}:",
84
+ 'alt' => ":#{name}:",
85
+ 'src' => emoji_url(name).to_s,
86
+ 'align' => 'absmiddle'.freeze,
87
+ 'width' => '20'.freeze,
88
+ 'height' => '20'.freeze
89
+ }
90
+ end
91
+
92
+ def emoji_url(name)
93
+ File.join(asset_root, asset_path(name))
94
+ end
95
+
96
+ # Build a regexp that matches all valid :emoji: names.
97
+ def self.emoji_pattern
98
+ @emoji_pattern ||= /:(#{emoji_names.map { |name| Regexp.escape(name) }.join('|')}):/
99
+ end
100
+
101
+ def emoji_pattern
102
+ self.class.emoji_pattern
103
+ end
104
+
105
+ def self.emoji_names
106
+ Emoji.all.map(&:aliases).flatten.sort
107
+ end
108
+
109
+ def emoji_filename(name)
110
+ Emoji.find_by_alias(name).image_filename
111
+ end
112
+
113
+ # Return ancestor tags to stop the emojification.
114
+ #
115
+ # @return [Array<String>] Ancestor tags.
116
+ def ignored_ancestor_tags
117
+ if context[:ignored_ancestor_tags]
118
+ DEFAULT_IGNORED_ANCESTOR_TAGS | context[:ignored_ancestor_tags]
119
+ else
120
+ DEFAULT_IGNORED_ANCESTOR_TAGS
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,163 @@
1
+ module HTML
2
+ class Pipeline
3
+ # Base class for user content HTML filters. Each filter takes an
4
+ # HTML string or Nokogiri::HTML::DocumentFragment, performs
5
+ # modifications and/or writes information to the result hash. Filters must
6
+ # return a DocumentFragment (typically the same instance provided to the call
7
+ # method) or a String with HTML markup.
8
+ #
9
+ # Example filter that replaces all images with trollface:
10
+ #
11
+ # class FuuuFilter < HTML::Pipeline::Filter
12
+ # def call
13
+ # doc.search('img').each do |img|
14
+ # img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
15
+ # end
16
+ # end
17
+ # end
18
+ #
19
+ # The context Hash passes options to filters and should not be changed in
20
+ # place. A Result Hash allows filters to make extracted information
21
+ # available to the caller and is mutable.
22
+ #
23
+ # Common context options:
24
+ # :base_url - The site's base URL
25
+ # :repository - A Repository providing context for the HTML being processed
26
+ #
27
+ # Each filter may define additional options and output values. See the class
28
+ # docs for more info.
29
+ class Filter
30
+ class InvalidDocumentException < StandardError; end
31
+
32
+ def initialize(doc, context = nil, result = nil)
33
+ if doc.is_a?(String)
34
+ @html = doc.to_str
35
+ @doc = nil
36
+ else
37
+ @doc = doc
38
+ @html = nil
39
+ end
40
+ @context = context || {}
41
+ @result = result || {}
42
+ validate
43
+ end
44
+
45
+ # Public: Returns a simple Hash used to pass extra information into filters
46
+ # and also to allow filters to make extracted information available to the
47
+ # caller.
48
+ attr_reader :context
49
+
50
+ # Public: Returns a Hash used to allow filters to pass back information
51
+ # to callers of the various Pipelines. This can be used for
52
+ # #mentioned_users, for example.
53
+ attr_reader :result
54
+
55
+ # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
56
+ # provided a String, parse into a DocumentFragment the first time this
57
+ # method is called.
58
+ def doc
59
+ @doc ||= parse_html(html)
60
+ end
61
+
62
+ # The String representation of the document. If a DocumentFragment was
63
+ # provided to the Filter, it is serialized into a String when this method is
64
+ # called.
65
+ def html
66
+ raise InvalidDocumentException if @html.nil? && @doc.nil?
67
+ @html || doc.to_html
68
+ end
69
+
70
+ # The main filter entry point. The doc attribute is guaranteed to be a
71
+ # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
72
+ # this document in place or extract information and add it to the context
73
+ # hash.
74
+ def call
75
+ raise NotImplementedError
76
+ end
77
+
78
+ # Make sure the context has everything we need. Noop: Subclasses can override.
79
+ def validate; end
80
+
81
+ # The Repository object provided in the context hash, or nil when no
82
+ # :repository was specified.
83
+ #
84
+ # It's assumed that the repository context has already been checked
85
+ # for permissions
86
+ def repository
87
+ context[:repository]
88
+ end
89
+
90
+ # The User object provided in the context hash, or nil when no user
91
+ # was specified
92
+ def current_user
93
+ context[:current_user]
94
+ end
95
+
96
+ # The site's base URL provided in the context hash, or '/' when no
97
+ # base URL was specified.
98
+ def base_url
99
+ context[:base_url] || '/'
100
+ end
101
+
102
+ # Ensure the passed argument is a DocumentFragment. When a string is
103
+ # provided, it is parsed and returned; otherwise, the DocumentFragment is
104
+ # returned unmodified.
105
+ def parse_html(html)
106
+ HTML::Pipeline.parse(html)
107
+ end
108
+
109
+ # Helper method for filter subclasses used to determine if any of a node's
110
+ # ancestors have one of the tag names specified.
111
+ #
112
+ # node - The Node object to check.
113
+ # tags - An array of tag name strings to check. These should be downcase.
114
+ #
115
+ # Returns true when the node has a matching ancestor.
116
+ def has_ancestor?(node, tags)
117
+ while node = node.parent
118
+ break true if tags.include?(node.name.downcase)
119
+ end
120
+ end
121
+
122
+ # Perform a filter on doc with the given context.
123
+ #
124
+ # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
125
+ # markup.
126
+ def self.call(doc, context = nil, result = nil)
127
+ new(doc, context, result).call
128
+ end
129
+
130
+ # Like call but guarantees that a DocumentFragment is returned, even when
131
+ # the last filter returns a String.
132
+ def self.to_document(input, context = nil)
133
+ html = call(input, context)
134
+ HTML::Pipeline.parse(html)
135
+ end
136
+
137
+ # Like call but guarantees that a string of HTML markup is returned.
138
+ def self.to_html(input, context = nil)
139
+ output = call(input, context)
140
+ if output.respond_to?(:to_html)
141
+ output.to_html
142
+ else
143
+ output.to_s
144
+ end
145
+ end
146
+
147
+ # Validator for required context. This will check that anything passed in
148
+ # contexts exists in @contexts
149
+ #
150
+ # If any errors are found an ArgumentError will be raised with a
151
+ # message listing all the missing contexts and the filters that
152
+ # require them.
153
+ def needs(*keys)
154
+ missing = keys.reject { |key| context.include? key }
155
+
156
+ if missing.any?
157
+ raise ArgumentError,
158
+ "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
159
+ end
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,27 @@
1
+ module HTML
2
+ class Pipeline
3
+ # HTML Filter for replacing http references to :http_url with https versions.
4
+ # Subdomain references are not rewritten.
5
+ #
6
+ # Context options:
7
+ # :http_url - The HTTP url to force HTTPS. Falls back to :base_url
8
+ class HttpsFilter < Filter
9
+ def call
10
+ doc.css(%(a[href^="#{http_url}"])).each do |element|
11
+ element['href'] = element['href'].sub(/^http:/, 'https:')
12
+ end
13
+ doc
14
+ end
15
+
16
+ # HTTP url to replace. Falls back to :base_url
17
+ def http_url
18
+ context[:http_url] || context[:base_url]
19
+ end
20
+
21
+ # Raise error if :http_url undefined
22
+ def validate
23
+ needs :http_url unless http_url
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,17 @@
1
+ module HTML
2
+ class Pipeline
3
+ # HTML Filter that converts image's url into <img> tag.
4
+ # For example, it will convert
5
+ # http://example.com/test.jpg
6
+ # into
7
+ # <img src="http://example.com/test.jpg" alt=""/>.
8
+
9
+ class ImageFilter < TextFilter
10
+ def call
11
+ @text.gsub(/(https|http)?:\/\/.+\.(jpg|jpeg|bmp|gif|png)(\?\S+)?/i) do |match|
12
+ %(<img src="#{match}" alt=""/>)
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ module HTML
2
+ class Pipeline
3
+ # This filter rewrites image tags with a max-width inline style and also wraps
4
+ # the image in an <a> tag that causes the full size image to be opened in a
5
+ # new tab.
6
+ #
7
+ # The max-width inline styles are especially useful in HTML email which
8
+ # don't use a global stylesheets.
9
+ class ImageMaxWidthFilter < Filter
10
+ def call
11
+ doc.search('img').each do |element|
12
+ # Skip if there's already a style attribute. Not sure how this
13
+ # would happen but we can reconsider it in the future.
14
+ next if element['style']
15
+
16
+ # Bail out if src doesn't look like a valid http url. trying to avoid weird
17
+ # js injection via javascript: urls.
18
+ next if element['src'].to_s.strip =~ /\Ajavascript/i
19
+
20
+ element['style'] = 'max-width:100%;'
21
+
22
+ link_image element unless has_ancestor?(element, %w[a])
23
+ end
24
+
25
+ doc
26
+ end
27
+
28
+ def link_image(element)
29
+ link = doc.document.create_element('a', href: element['src'], target: '_blank')
30
+ link.add_child(element.dup)
31
+ element.replace(link)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,37 @@
1
+ HTML::Pipeline.require_dependency('commonmarker', 'MarkdownFilter')
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML Filter that converts Markdown text into HTML and converts into a
6
+ # DocumentFragment. This is different from most filters in that it can take a
7
+ # non-HTML as input. It must be used as the first filter in a pipeline.
8
+ #
9
+ # Context options:
10
+ # :gfm => false Disable GFM line-end processing
11
+ # :commonmarker_extensions => [ :table, :strikethrough,
12
+ # :tagfilter, :autolink ] Common marker extensions to include
13
+ #
14
+ # This filter does not write any additional information to the context hash.
15
+ class MarkdownFilter < TextFilter
16
+ def initialize(text, context = nil, result = nil)
17
+ super text, context, result
18
+ @text = @text.delete "\r"
19
+ end
20
+
21
+ # Convert Markdown to HTML using the best available implementation
22
+ # and convert into a DocumentFragment.
23
+ def call
24
+ options = [:GITHUB_PRE_LANG]
25
+ options << :HARDBREAKS if context[:gfm] != false
26
+ options << :UNSAFE if context[:unsafe]
27
+ extensions = context.fetch(
28
+ :commonmarker_extensions,
29
+ %i[table strikethrough tagfilter autolink]
30
+ )
31
+ html = CommonMarker.render_html(@text, options, extensions)
32
+ html.rstrip!
33
+ html
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,13 @@
1
+ HTML::Pipeline.require_dependency('escape_utils', 'PlainTextInputFilter')
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # Simple filter for plain text input. HTML escapes the text input and wraps it
6
+ # in a div.
7
+ class PlainTextInputFilter < TextFilter
8
+ def call
9
+ "<div>#{EscapeUtils.escape_html(@text, false)}</div>"
10
+ end
11
+ end
12
+ end
13
+ end