html-pipeline 2.14.3 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/FUNDING.yml +11 -3
- data/.github/dependabot.yml +20 -0
- data/.github/workflows/automerge.yml +34 -0
- data/.github/workflows/lint.yml +23 -0
- data/.github/workflows/tag_and_release.yml +70 -0
- data/.github/workflows/test.yml +33 -0
- data/.rubocop.yml +17 -0
- data/CHANGELOG.md +28 -2
- data/Gemfile +29 -15
- data/{LICENSE → LICENSE.txt} +2 -2
- data/README.md +209 -218
- data/Rakefile +14 -7
- data/UPGRADING.md +35 -0
- data/html-pipeline.gemspec +31 -21
- data/lib/html-pipeline.rb +3 -0
- data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
- data/lib/html_pipeline/convert_filter.rb +17 -0
- data/lib/html_pipeline/filter.rb +89 -0
- data/lib/{html/pipeline → html_pipeline/node_filter}/absolute_source_filter.rb +23 -21
- data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
- data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
- data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
- data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +55 -69
- data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +68 -0
- data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
- data/lib/html_pipeline/node_filter.rb +31 -0
- data/lib/html_pipeline/sanitization_filter.rb +65 -0
- data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
- data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
- data/lib/html_pipeline/text_filter.rb +21 -0
- data/lib/html_pipeline/version.rb +5 -0
- data/lib/html_pipeline.rb +252 -0
- metadata +52 -54
- data/.travis.yml +0 -43
- data/Appraisals +0 -19
- data/CONTRIBUTING.md +0 -60
- data/bin/html-pipeline +0 -78
- data/lib/html/pipeline/@team_mention_filter.rb +0 -99
- data/lib/html/pipeline/autolink_filter.rb +0 -34
- data/lib/html/pipeline/body_content.rb +0 -44
- data/lib/html/pipeline/camo_filter.rb +0 -105
- data/lib/html/pipeline/email_reply_filter.rb +0 -69
- data/lib/html/pipeline/filter.rb +0 -165
- data/lib/html/pipeline/https_filter.rb +0 -29
- data/lib/html/pipeline/image_max_width_filter.rb +0 -37
- data/lib/html/pipeline/markdown_filter.rb +0 -56
- data/lib/html/pipeline/sanitization_filter.rb +0 -144
- data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
- data/lib/html/pipeline/text_filter.rb +0 -16
- data/lib/html/pipeline/textile_filter.rb +0 -25
- data/lib/html/pipeline/toc_filter.rb +0 -69
- data/lib/html/pipeline/version.rb +0 -7
- data/lib/html/pipeline.rb +0 -210
@@ -1,144 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('sanitize', 'SanitizationFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML filter with sanization routines and allowlists. This module defines
|
8
|
-
# what HTML is allowed in user provided content and fixes up issues with
|
9
|
-
# unbalanced tags and whatnot.
|
10
|
-
#
|
11
|
-
# See the Sanitize docs for more information on the underlying library:
|
12
|
-
#
|
13
|
-
# https://github.com/rgrove/sanitize/#readme
|
14
|
-
#
|
15
|
-
# Context options:
|
16
|
-
# :allowlist - The sanitizer allowlist configuration to use. This
|
17
|
-
# can be one of the options constants defined in this
|
18
|
-
# class or a custom sanitize options hash.
|
19
|
-
# :anchor_schemes - The URL schemes to allow in <a href> attributes. The
|
20
|
-
# default set is provided in the ANCHOR_SCHEMES
|
21
|
-
# constant in this class. If passed, this overrides any
|
22
|
-
# schemes specified in the allowlist configuration.
|
23
|
-
#
|
24
|
-
# This filter does not write additional information to the context.
|
25
|
-
class SanitizationFilter < Filter
|
26
|
-
LISTS = Set.new(%w[ul ol].freeze)
|
27
|
-
LIST_ITEM = 'li'.freeze
|
28
|
-
|
29
|
-
# List of table child elements. These must be contained by a <table> element
|
30
|
-
# or they are not allowed through. Otherwise they can be used to break out
|
31
|
-
# of places we're using tables to contain formatted user content (like pull
|
32
|
-
# request review comments).
|
33
|
-
TABLE_ITEMS = Set.new(%w[tr td th].freeze)
|
34
|
-
TABLE = 'table'.freeze
|
35
|
-
TABLE_SECTIONS = Set.new(%w[thead tbody tfoot].freeze)
|
36
|
-
|
37
|
-
# These schemes are the only ones allowed in <a href> attributes by default.
|
38
|
-
ANCHOR_SCHEMES = ['http', 'https', 'mailto', 'xmpp', :relative, 'github-windows', 'github-mac', 'irc', 'ircs'].freeze
|
39
|
-
|
40
|
-
# The main sanitization allowlist. Only these elements and attributes are
|
41
|
-
# allowed through by default.
|
42
|
-
ALLOWLIST = {
|
43
|
-
elements: %w[
|
44
|
-
h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
|
45
|
-
div ins del sup sub p ol ul table thead tbody tfoot blockquote
|
46
|
-
dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary
|
47
|
-
details caption figure figcaption
|
48
|
-
abbr bdo cite dfn mark small span time wbr
|
49
|
-
].freeze,
|
50
|
-
remove_contents: ['script'].freeze,
|
51
|
-
attributes: {
|
52
|
-
'a' => ['href'].freeze,
|
53
|
-
'img' => %w[src longdesc].freeze,
|
54
|
-
'div' => %w[itemscope itemtype].freeze,
|
55
|
-
'blockquote' => ['cite'].freeze,
|
56
|
-
'del' => ['cite'].freeze,
|
57
|
-
'ins' => ['cite'].freeze,
|
58
|
-
'q' => ['cite'].freeze,
|
59
|
-
all: %w[abbr accept accept-charset
|
60
|
-
accesskey action align alt
|
61
|
-
aria-describedby aria-hidden aria-label aria-labelledby
|
62
|
-
axis border cellpadding cellspacing char
|
63
|
-
charoff charset checked
|
64
|
-
clear cols colspan color
|
65
|
-
compact coords datetime dir
|
66
|
-
disabled enctype for frame
|
67
|
-
headers height hreflang
|
68
|
-
hspace ismap label lang
|
69
|
-
maxlength media method
|
70
|
-
multiple name nohref noshade
|
71
|
-
nowrap open progress prompt readonly rel rev
|
72
|
-
role rows rowspan rules scope
|
73
|
-
selected shape size span
|
74
|
-
start summary tabindex target
|
75
|
-
title type usemap valign value
|
76
|
-
vspace width itemprop].freeze
|
77
|
-
}.freeze,
|
78
|
-
protocols: {
|
79
|
-
'a' => { 'href' => ANCHOR_SCHEMES }.freeze,
|
80
|
-
'blockquote' => { 'cite' => ['http', 'https', :relative].freeze },
|
81
|
-
'del' => { 'cite' => ['http', 'https', :relative].freeze },
|
82
|
-
'ins' => { 'cite' => ['http', 'https', :relative].freeze },
|
83
|
-
'q' => { 'cite' => ['http', 'https', :relative].freeze },
|
84
|
-
'img' => {
|
85
|
-
'src' => ['http', 'https', :relative].freeze,
|
86
|
-
'longdesc' => ['http', 'https', :relative].freeze
|
87
|
-
}.freeze
|
88
|
-
},
|
89
|
-
transformers: [
|
90
|
-
# Top-level <li> elements are removed because they can break out of
|
91
|
-
# containing markup.
|
92
|
-
lambda { |env|
|
93
|
-
name = env[:node_name]
|
94
|
-
node = env[:node]
|
95
|
-
if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) }
|
96
|
-
node.replace(node.children)
|
97
|
-
end
|
98
|
-
},
|
99
|
-
|
100
|
-
# Table child elements that are not contained by a <table> are removed.
|
101
|
-
lambda { |env|
|
102
|
-
name = env[:node_name]
|
103
|
-
node = env[:node]
|
104
|
-
if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE }
|
105
|
-
node.replace(node.children)
|
106
|
-
end
|
107
|
-
}
|
108
|
-
].freeze
|
109
|
-
}.freeze
|
110
|
-
|
111
|
-
# A more limited sanitization allowlist. This includes all attributes,
|
112
|
-
# protocols, and transformers from ALLOWLIST but with a more locked down
|
113
|
-
# set of allowed elements.
|
114
|
-
LIMITED = ALLOWLIST.merge(
|
115
|
-
elements: %w[b i strong em a pre code img ins del sup sub mark abbr p ol ul li]
|
116
|
-
)
|
117
|
-
|
118
|
-
# Strip all HTML tags from the document.
|
119
|
-
FULL = { elements: [] }.freeze
|
120
|
-
|
121
|
-
# Sanitize markup using the Sanitize library.
|
122
|
-
def call
|
123
|
-
Sanitize.clean_node!(doc, allowlist)
|
124
|
-
end
|
125
|
-
|
126
|
-
def whitelist
|
127
|
-
warn "[DEPRECATION] 'whitelist' is deprecated. Please use 'allowlist' instead."
|
128
|
-
allowlist
|
129
|
-
end
|
130
|
-
|
131
|
-
# The allowlist to use when sanitizing. This can be passed in the context
|
132
|
-
# hash to the filter but defaults to ALLOWLIST constant value above.
|
133
|
-
def allowlist
|
134
|
-
allowlist = context[:allowlist] || context[:whitelist] || ALLOWLIST
|
135
|
-
anchor_schemes = context[:anchor_schemes]
|
136
|
-
return allowlist unless anchor_schemes
|
137
|
-
allowlist = allowlist.dup
|
138
|
-
allowlist[:protocols] = (allowlist[:protocols] || {}).dup
|
139
|
-
allowlist[:protocols]['a'] = (allowlist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
|
140
|
-
allowlist
|
141
|
-
end
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('rouge', 'SyntaxHighlightFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML Filter that syntax highlights text inside code blocks.
|
8
|
-
#
|
9
|
-
# Context options:
|
10
|
-
#
|
11
|
-
# :highlight => String represents the language to pick lexer. Defaults to empty string.
|
12
|
-
# :scope => String represents the class attribute adds to pre element after.
|
13
|
-
# Defaults to "highlight highlight-css" if highlights a css code block.
|
14
|
-
#
|
15
|
-
# This filter does not write any additional information to the context hash.
|
16
|
-
class SyntaxHighlightFilter < Filter
|
17
|
-
def initialize(*args)
|
18
|
-
super(*args)
|
19
|
-
@formatter = Rouge::Formatters::HTML.new
|
20
|
-
end
|
21
|
-
|
22
|
-
def call
|
23
|
-
doc.search('pre').each do |node|
|
24
|
-
default = context[:highlight] && context[:highlight].to_s
|
25
|
-
next unless lang = node['lang'] || default
|
26
|
-
next unless lexer = lexer_for(lang)
|
27
|
-
|
28
|
-
text = node.inner_text
|
29
|
-
html = highlight_with_timeout_handling(text, lexer)
|
30
|
-
next if html.nil?
|
31
|
-
|
32
|
-
node.inner_html = html
|
33
|
-
scope = context.fetch(:scope) { 'highlight' }
|
34
|
-
node['class'] = "#{scope} #{scope}-#{lang}"
|
35
|
-
end
|
36
|
-
doc
|
37
|
-
end
|
38
|
-
|
39
|
-
def highlight_with_timeout_handling(text, lexer)
|
40
|
-
Rouge.highlight(text, lexer, @formatter)
|
41
|
-
rescue Timeout::Error => _
|
42
|
-
nil
|
43
|
-
end
|
44
|
-
|
45
|
-
def lexer_for(lang)
|
46
|
-
Rouge::Lexer.find(lang)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module HTML
|
4
|
-
class Pipeline
|
5
|
-
class TextFilter < Filter
|
6
|
-
attr_reader :text
|
7
|
-
|
8
|
-
def initialize(text, context = nil, result = nil)
|
9
|
-
raise TypeError, 'text cannot be HTML' if text.is_a?(DocumentFragment)
|
10
|
-
# Ensure that this is always a string
|
11
|
-
@text = text.respond_to?(:to_str) ? text.to_str : text.to_s
|
12
|
-
super nil, context, result
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('redcloth', 'RedCloth')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML Filter that converts Textile text into HTML and converts into a
|
8
|
-
# DocumentFragment. This is different from most filters in that it can take a
|
9
|
-
# non-HTML as input. It must be used as the first filter in a pipeline.
|
10
|
-
#
|
11
|
-
# Context options:
|
12
|
-
# :autolink => false Disable autolinking URLs
|
13
|
-
#
|
14
|
-
# This filter does not write any additional information to the context hash.
|
15
|
-
#
|
16
|
-
# NOTE This filter is provided for really old comments only. It probably
|
17
|
-
# shouldn't be used for anything new.
|
18
|
-
class TextileFilter < TextFilter
|
19
|
-
# Convert Textile to HTML and convert into a DocumentFragment.
|
20
|
-
def call
|
21
|
-
RedCloth.new(@text).to_html
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
HTML::Pipeline.require_dependency('escape_utils', 'TableOfContentsFilter')
|
4
|
-
|
5
|
-
module HTML
|
6
|
-
class Pipeline
|
7
|
-
# HTML filter that adds an 'id' attribute to all headers
|
8
|
-
# in a document, so they can be accessed from a table of contents.
|
9
|
-
#
|
10
|
-
# Generates the Table of Contents, with links to each header.
|
11
|
-
#
|
12
|
-
# Examples
|
13
|
-
#
|
14
|
-
# TocPipeline =
|
15
|
-
# HTML::Pipeline.new [
|
16
|
-
# HTML::Pipeline::TableOfContentsFilter
|
17
|
-
# ]
|
18
|
-
# # => #<HTML::Pipeline:0x007fc13c4528d8...>
|
19
|
-
# orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
|
20
|
-
# # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
|
21
|
-
# result = {}
|
22
|
-
# # => {}
|
23
|
-
# TocPipeline.call(orig, {}, result)
|
24
|
-
# # => {:toc=> ...}
|
25
|
-
# result[:toc]
|
26
|
-
# # => "<ul class=\"section-nav\">\n<li><a href=\"#ice-cube\">...</li><ul>"
|
27
|
-
# result[:output].to_s
|
28
|
-
# # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
|
29
|
-
class TableOfContentsFilter < Filter
|
30
|
-
PUNCTUATION_REGEXP = RUBY_VERSION > '1.9' ? /[^\p{Word}\- ]/u : /[^\w\- ]/
|
31
|
-
|
32
|
-
# The icon that will be placed next to an anchored rendered markdown header
|
33
|
-
def anchor_icon
|
34
|
-
context[:anchor_icon] || '<span aria-hidden="true" class="octicon octicon-link"></span>'
|
35
|
-
end
|
36
|
-
|
37
|
-
def call
|
38
|
-
result[:toc] = String.new('')
|
39
|
-
|
40
|
-
headers = Hash.new(0)
|
41
|
-
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
|
42
|
-
text = node.text
|
43
|
-
id = ascii_downcase(text)
|
44
|
-
id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
|
45
|
-
id.tr!(' ', '-') # replace spaces with dash
|
46
|
-
|
47
|
-
uniq = headers[id] > 0 ? "-#{headers[id]}" : ''
|
48
|
-
headers[id] += 1
|
49
|
-
if header_content = node.children.first
|
50
|
-
result[:toc] << %(<li><a href="##{id}#{uniq}">#{CGI.escape_html(text)}</a></li>\n)
|
51
|
-
header_content.add_previous_sibling(%(<a id="#{id}#{uniq}" class="anchor" href="##{id}#{uniq}" aria-hidden="true">#{anchor_icon}</a>))
|
52
|
-
end
|
53
|
-
end
|
54
|
-
result[:toc] = %(<ul class="section-nav">\n#{result[:toc]}</ul>) unless result[:toc].empty?
|
55
|
-
doc
|
56
|
-
end
|
57
|
-
|
58
|
-
if RUBY_VERSION >= '2.4'
|
59
|
-
def ascii_downcase(str)
|
60
|
-
str.downcase(:ascii)
|
61
|
-
end
|
62
|
-
else
|
63
|
-
def ascii_downcase(str)
|
64
|
-
str.downcase
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
data/lib/html/pipeline.rb
DELETED
@@ -1,210 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'active_support/xml_mini/nokogiri' # convert Documents to hashes
|
5
|
-
|
6
|
-
module HTML
|
7
|
-
# GitHub HTML processing filters and utilities. This module includes a small
|
8
|
-
# framework for defining DOM based content filters and applying them to user
|
9
|
-
# provided content.
|
10
|
-
#
|
11
|
-
# See HTML::Pipeline::Filter for information on building filters.
|
12
|
-
#
|
13
|
-
# Construct a Pipeline for running multiple HTML filters. A pipeline is created once
|
14
|
-
# with one to many filters, and it then can be `call`ed many times over the course
|
15
|
-
# of its lifetime with input.
|
16
|
-
#
|
17
|
-
# filters - Array of Filter objects. Each must respond to call(doc,
|
18
|
-
# context) and return the modified DocumentFragment or a
|
19
|
-
# String containing HTML markup. Filters are performed in the
|
20
|
-
# order provided.
|
21
|
-
# default_context - The default context hash. Values specified here will be merged
|
22
|
-
# into values from the each individual pipeline run. Can NOT be
|
23
|
-
# nil. Default: empty Hash.
|
24
|
-
# result_class - The default Class of the result object for individual
|
25
|
-
# calls. Default: Hash. Protip: Pass in a Struct to get
|
26
|
-
# some semblance of type safety.
|
27
|
-
class Pipeline
|
28
|
-
autoload :VERSION, 'html/pipeline/version'
|
29
|
-
autoload :Filter, 'html/pipeline/filter'
|
30
|
-
autoload :AbsoluteSourceFilter, 'html/pipeline/absolute_source_filter'
|
31
|
-
autoload :BodyContent, 'html/pipeline/body_content'
|
32
|
-
autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
|
33
|
-
autoload :CamoFilter, 'html/pipeline/camo_filter'
|
34
|
-
autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
|
35
|
-
autoload :EmojiFilter, 'html/pipeline/emoji_filter'
|
36
|
-
autoload :HttpsFilter, 'html/pipeline/https_filter'
|
37
|
-
autoload :ImageFilter, 'html/pipeline/image_filter'
|
38
|
-
autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
|
39
|
-
autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
|
40
|
-
autoload :MentionFilter, 'html/pipeline/@mention_filter'
|
41
|
-
autoload :TeamMentionFilter, 'html/pipeline/@team_mention_filter'
|
42
|
-
autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
|
43
|
-
autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
|
44
|
-
autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
|
45
|
-
autoload :TextileFilter, 'html/pipeline/textile_filter'
|
46
|
-
autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
|
47
|
-
autoload :TextFilter, 'html/pipeline/text_filter'
|
48
|
-
|
49
|
-
class MissingDependencyError < RuntimeError; end
|
50
|
-
def self.require_dependency(name, requirer)
|
51
|
-
require name
|
52
|
-
rescue LoadError => e
|
53
|
-
raise MissingDependencyError,
|
54
|
-
"Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}"
|
55
|
-
end
|
56
|
-
|
57
|
-
# Our DOM implementation.
|
58
|
-
DocumentFragment = Nokogiri::HTML::DocumentFragment
|
59
|
-
|
60
|
-
# Parse a String into a DocumentFragment object. When a DocumentFragment is
|
61
|
-
# provided, return it verbatim.
|
62
|
-
def self.parse(document_or_html)
|
63
|
-
document_or_html ||= ''
|
64
|
-
if document_or_html.is_a?(String)
|
65
|
-
DocumentFragment.parse(document_or_html)
|
66
|
-
else
|
67
|
-
document_or_html
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# Public: Returns an Array of Filter objects for this Pipeline.
|
72
|
-
attr_reader :filters
|
73
|
-
|
74
|
-
# Public: Instrumentation service for the pipeline.
|
75
|
-
# Set an ActiveSupport::Notifications compatible object to enable.
|
76
|
-
attr_accessor :instrumentation_service
|
77
|
-
|
78
|
-
# Public: String name for this Pipeline. Defaults to Class name.
|
79
|
-
attr_writer :instrumentation_name
|
80
|
-
def instrumentation_name
|
81
|
-
return @instrumentation_name if defined?(@instrumentation_name)
|
82
|
-
@instrumentation_name = self.class.name
|
83
|
-
end
|
84
|
-
|
85
|
-
class << self
|
86
|
-
# Public: Default instrumentation service for new pipeline objects.
|
87
|
-
attr_accessor :default_instrumentation_service
|
88
|
-
end
|
89
|
-
|
90
|
-
def initialize(filters, default_context = {}, result_class = nil)
|
91
|
-
raise ArgumentError, 'default_context cannot be nil' if default_context.nil?
|
92
|
-
@filters = filters.flatten.freeze
|
93
|
-
@default_context = default_context.freeze
|
94
|
-
@result_class = result_class || Hash
|
95
|
-
@instrumentation_service = self.class.default_instrumentation_service
|
96
|
-
end
|
97
|
-
|
98
|
-
# Apply all filters in the pipeline to the given HTML.
|
99
|
-
#
|
100
|
-
# html - A String containing HTML or a DocumentFragment object.
|
101
|
-
# context - The context hash passed to each filter. See the Filter docs
|
102
|
-
# for more info on possible values. This object MUST NOT be modified
|
103
|
-
# in place by filters. Use the Result for passing state back.
|
104
|
-
# result - The result Hash passed to each filter for modification. This
|
105
|
-
# is where Filters store extracted information from the content.
|
106
|
-
#
|
107
|
-
# Returns the result Hash after being filtered by this Pipeline. Contains an
|
108
|
-
# :output key with the DocumentFragment or String HTML markup based on the
|
109
|
-
# output of the last filter in the pipeline.
|
110
|
-
def call(html, context = {}, result = nil)
|
111
|
-
context = @default_context.merge(context)
|
112
|
-
context = context.freeze
|
113
|
-
result ||= @result_class.new
|
114
|
-
payload = default_payload filters: @filters.map(&:name),
|
115
|
-
context: context, result: result
|
116
|
-
instrument 'call_pipeline.html_pipeline', payload do
|
117
|
-
result[:output] =
|
118
|
-
@filters.inject(html) do |doc, filter|
|
119
|
-
perform_filter(filter, doc, context, result)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
result
|
123
|
-
end
|
124
|
-
|
125
|
-
# Internal: Applies a specific filter to the supplied doc.
|
126
|
-
#
|
127
|
-
# The filter is instrumented.
|
128
|
-
#
|
129
|
-
# Returns the result of the filter.
|
130
|
-
def perform_filter(filter, doc, context, result)
|
131
|
-
payload = default_payload filter: filter.name,
|
132
|
-
context: context, result: result
|
133
|
-
instrument 'call_filter.html_pipeline', payload do
|
134
|
-
filter.call(doc, context, result)
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
# Like call but guarantee the value returned is a DocumentFragment.
|
139
|
-
# Pipelines may return a DocumentFragment or a String. Callers that need a
|
140
|
-
# DocumentFragment should use this method.
|
141
|
-
def to_document(input, context = {}, result = nil)
|
142
|
-
result = call(input, context, result)
|
143
|
-
HTML::Pipeline.parse(result[:output])
|
144
|
-
end
|
145
|
-
|
146
|
-
# Like call but guarantee the value returned is a string of HTML markup.
|
147
|
-
def to_html(input, context = {}, result = nil)
|
148
|
-
result = call(input, context, result = nil)
|
149
|
-
output = result[:output]
|
150
|
-
if output.respond_to?(:to_html)
|
151
|
-
output.to_html
|
152
|
-
else
|
153
|
-
output.to_s
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
# Public: setup instrumentation for this pipeline.
|
158
|
-
#
|
159
|
-
# Returns nothing.
|
160
|
-
def setup_instrumentation(name = nil, service = nil)
|
161
|
-
self.instrumentation_name = name
|
162
|
-
self.instrumentation_service =
|
163
|
-
service || self.class.default_instrumentation_service
|
164
|
-
end
|
165
|
-
|
166
|
-
# Internal: if the `instrumentation_service` object is set, instruments the
|
167
|
-
# block, otherwise the block is ran without instrumentation.
|
168
|
-
#
|
169
|
-
# Returns the result of the provided block.
|
170
|
-
def instrument(event, payload = nil)
|
171
|
-
payload ||= default_payload
|
172
|
-
return yield(payload) unless instrumentation_service
|
173
|
-
instrumentation_service.instrument event, payload do |payload|
|
174
|
-
yield payload
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
# Internal: Default payload for instrumentation.
|
179
|
-
#
|
180
|
-
# Accepts a Hash of additional payload data to be merged.
|
181
|
-
#
|
182
|
-
# Returns a Hash.
|
183
|
-
def default_payload(payload = {})
|
184
|
-
{ pipeline: instrumentation_name }.merge(payload)
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
# XXX nokogiri monkey patches for 1.8
|
190
|
-
unless ''.respond_to?(:force_encoding)
|
191
|
-
class Nokogiri::XML::Node
|
192
|
-
# Work around an issue with utf-8 encoded data being erroneously converted to
|
193
|
-
# ... some other shit when replacing text nodes. See 'utf-8 output 2' in
|
194
|
-
# user_content_test.rb for details.
|
195
|
-
def replace_with_encoding_fix(replacement)
|
196
|
-
if replacement.respond_to?(:to_str)
|
197
|
-
replacement = document.fragment("<div>#{replacement}</div>").children.first.children
|
198
|
-
end
|
199
|
-
replace_without_encoding_fix(replacement)
|
200
|
-
end
|
201
|
-
|
202
|
-
alias replace_without_encoding_fix replace
|
203
|
-
alias replace replace_with_encoding_fix
|
204
|
-
|
205
|
-
def swap(replacement)
|
206
|
-
replace(replacement)
|
207
|
-
self
|
208
|
-
end
|
209
|
-
end
|
210
|
-
end
|