html-pipeline-plus 2.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/.travis.yml +34 -0
- data/Appraisals +13 -0
- data/CHANGELOG.md +221 -0
- data/CONTRIBUTING.md +60 -0
- data/Gemfile +23 -0
- data/LICENSE +22 -0
- data/README.md +370 -0
- data/Rakefile +15 -0
- data/bin/html-pipeline-plus +78 -0
- data/html-pipeline-plus.gemspec +28 -0
- data/lib/html/pipeline-plus/@mention_filter.rb +138 -0
- data/lib/html/pipeline-plus/absolute_source_filter.rb +45 -0
- data/lib/html/pipeline-plus/autolink_filter.rb +27 -0
- data/lib/html/pipeline-plus/body_content.rb +42 -0
- data/lib/html/pipeline-plus/camo_filter.rb +93 -0
- data/lib/html/pipeline-plus/email_reply_filter.rb +66 -0
- data/lib/html/pipeline-plus/emoji_filter.rb +125 -0
- data/lib/html/pipeline-plus/filter.rb +163 -0
- data/lib/html/pipeline-plus/https_filter.rb +27 -0
- data/lib/html/pipeline-plus/image_filter.rb +17 -0
- data/lib/html/pipeline-plus/image_max_width_filter.rb +35 -0
- data/lib/html/pipeline-plus/markdown_filter.rb +37 -0
- data/lib/html/pipeline-plus/plain_text_input_filter.rb +13 -0
- data/lib/html/pipeline-plus/sanitization_filter.rb +137 -0
- data/lib/html/pipeline-plus/syntax_highlight_filter.rb +44 -0
- data/lib/html/pipeline-plus/text_filter.rb +14 -0
- data/lib/html/pipeline-plus/textile_filter.rb +23 -0
- data/lib/html/pipeline-plus/toc_filter.rb +67 -0
- data/lib/html/pipeline-plus/version.rb +5 -0
- data/lib/html/pipeline-plus.rb +207 -0
- data/test.txt +13 -0
- metadata +115 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
HTML::Pipeline.require_dependency('sanitize', 'SanitizationFilter')
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
class Pipeline
|
5
|
+
# HTML filter with sanization routines and whitelists. This module defines
|
6
|
+
# what HTML is allowed in user provided content and fixes up issues with
|
7
|
+
# unbalanced tags and whatnot.
|
8
|
+
#
|
9
|
+
# See the Sanitize docs for more information on the underlying library:
|
10
|
+
#
|
11
|
+
# https://github.com/rgrove/sanitize/#readme
|
12
|
+
#
|
13
|
+
# Context options:
|
14
|
+
# :whitelist - The sanitizer whitelist configuration to use. This
|
15
|
+
# can be one of the options constants defined in this
|
16
|
+
# class or a custom sanitize options hash.
|
17
|
+
# :anchor_schemes - The URL schemes to allow in <a href> attributes. The
|
18
|
+
# default set is provided in the ANCHOR_SCHEMES
|
19
|
+
# constant in this class. If passed, this overrides any
|
20
|
+
# schemes specified in the whitelist configuration.
|
21
|
+
#
|
22
|
+
# This filter does not write additional information to the context.
|
23
|
+
class SanitizationFilter < Filter
|
24
|
+
LISTS = Set.new(%w[ul ol].freeze)
|
25
|
+
LIST_ITEM = 'li'.freeze
|
26
|
+
|
27
|
+
# List of table child elements. These must be contained by a <table> element
|
28
|
+
# or they are not allowed through. Otherwise they can be used to break out
|
29
|
+
# of places we're using tables to contain formatted user content (like pull
|
30
|
+
# request review comments).
|
31
|
+
TABLE_ITEMS = Set.new(%w[tr td th].freeze)
|
32
|
+
TABLE = 'table'.freeze
|
33
|
+
TABLE_SECTIONS = Set.new(%w[thead tbody tfoot].freeze)
|
34
|
+
|
35
|
+
# These schemes are the only ones allowed in <a href> attributes by default.
|
36
|
+
ANCHOR_SCHEMES = ['http', 'https', 'mailto', 'xmpp', :relative, 'github-windows', 'github-mac', 'irc', 'ircs'].freeze
|
37
|
+
|
38
|
+
# The main sanitization whitelist. Only these elements and attributes are
|
39
|
+
# allowed through by default.
|
40
|
+
WHITELIST = {
|
41
|
+
elements: %w[
|
42
|
+
h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
|
43
|
+
div ins del sup sub p ol ul table thead tbody tfoot blockquote
|
44
|
+
dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary
|
45
|
+
details caption figure figcaption
|
46
|
+
abbr bdo cite dfn mark small span time wbr
|
47
|
+
].freeze,
|
48
|
+
remove_contents: ['script'].freeze,
|
49
|
+
attributes: {
|
50
|
+
'a' => ['href'].freeze,
|
51
|
+
'img' => %w[src longdesc].freeze,
|
52
|
+
'div' => %w[itemscope itemtype].freeze,
|
53
|
+
'blockquote' => ['cite'].freeze,
|
54
|
+
'del' => ['cite'].freeze,
|
55
|
+
'ins' => ['cite'].freeze,
|
56
|
+
'q' => ['cite'].freeze,
|
57
|
+
all: %w[abbr accept accept-charset
|
58
|
+
accesskey action align alt
|
59
|
+
aria-describedby aria-hidden aria-label aria-labelledby
|
60
|
+
axis border cellpadding cellspacing char
|
61
|
+
charoff charset checked
|
62
|
+
clear cols colspan color
|
63
|
+
compact coords datetime dir
|
64
|
+
disabled enctype for frame
|
65
|
+
headers height hreflang
|
66
|
+
hspace ismap label lang
|
67
|
+
maxlength media method
|
68
|
+
multiple name nohref noshade
|
69
|
+
nowrap open prompt readonly rel rev
|
70
|
+
rows rowspan rules scope
|
71
|
+
selected shape size span
|
72
|
+
start summary tabindex target
|
73
|
+
title type usemap valign value
|
74
|
+
vspace width itemprop].freeze
|
75
|
+
}.freeze,
|
76
|
+
protocols: {
|
77
|
+
'a' => { 'href' => ANCHOR_SCHEMES }.freeze,
|
78
|
+
'blockquote' => { 'cite' => ['http', 'https', :relative].freeze },
|
79
|
+
'del' => { 'cite' => ['http', 'https', :relative].freeze },
|
80
|
+
'ins' => { 'cite' => ['http', 'https', :relative].freeze },
|
81
|
+
'q' => { 'cite' => ['http', 'https', :relative].freeze },
|
82
|
+
'img' => {
|
83
|
+
'src' => ['http', 'https', :relative].freeze,
|
84
|
+
'longdesc' => ['http', 'https', :relative].freeze
|
85
|
+
}.freeze
|
86
|
+
},
|
87
|
+
transformers: [
|
88
|
+
# Top-level <li> elements are removed because they can break out of
|
89
|
+
# containing markup.
|
90
|
+
lambda { |env|
|
91
|
+
name = env[:node_name]
|
92
|
+
node = env[:node]
|
93
|
+
if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) }
|
94
|
+
node.replace(node.children)
|
95
|
+
end
|
96
|
+
},
|
97
|
+
|
98
|
+
# Table child elements that are not contained by a <table> are removed.
|
99
|
+
lambda { |env|
|
100
|
+
name = env[:node_name]
|
101
|
+
node = env[:node]
|
102
|
+
if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE }
|
103
|
+
node.replace(node.children)
|
104
|
+
end
|
105
|
+
}
|
106
|
+
].freeze
|
107
|
+
}.freeze
|
108
|
+
|
109
|
+
# A more limited sanitization whitelist. This includes all attributes,
|
110
|
+
# protocols, and transformers from WHITELIST but with a more locked down
|
111
|
+
# set of allowed elements.
|
112
|
+
LIMITED = WHITELIST.merge(
|
113
|
+
elements: %w[b i strong em a pre code img ins del sup sub mark abbr p ol ul li]
|
114
|
+
)
|
115
|
+
|
116
|
+
# Strip all HTML tags from the document.
|
117
|
+
FULL = { elements: [] }.freeze
|
118
|
+
|
119
|
+
# Sanitize markup using the Sanitize library.
|
120
|
+
def call
|
121
|
+
Sanitize.clean_node!(doc, whitelist)
|
122
|
+
end
|
123
|
+
|
124
|
+
# The whitelist to use when sanitizing. This can be passed in the context
|
125
|
+
# hash to the filter but defaults to WHITELIST constant value above.
|
126
|
+
def whitelist
|
127
|
+
whitelist = context[:whitelist] || WHITELIST
|
128
|
+
anchor_schemes = context[:anchor_schemes]
|
129
|
+
return whitelist unless anchor_schemes
|
130
|
+
whitelist = whitelist.dup
|
131
|
+
whitelist[:protocols] = (whitelist[:protocols] || {}).dup
|
132
|
+
whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
|
133
|
+
whitelist
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
HTML::Pipeline.require_dependency('rouge', 'SyntaxHighlightFilter')
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
class Pipeline
|
5
|
+
# HTML Filter that syntax highlights code blocks wrapped
|
6
|
+
# in <pre lang="...">.
|
7
|
+
class SyntaxHighlightFilter < Filter
|
8
|
+
def initialize(*args)
|
9
|
+
super(*args)
|
10
|
+
@formatter = Rouge::Formatters::HTML.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def call
|
14
|
+
doc.search('pre').each do |node|
|
15
|
+
default = context[:highlight] && context[:highlight].to_s
|
16
|
+
next unless lang = node['lang'] || default
|
17
|
+
next unless lexer = lexer_for(lang)
|
18
|
+
text = node.inner_text
|
19
|
+
|
20
|
+
html = highlight_with_timeout_handling(text, lang)
|
21
|
+
next if html.nil?
|
22
|
+
|
23
|
+
node.inner_html = html
|
24
|
+
klass = node['class']
|
25
|
+
scope = context[:scope] || "highlight-#{lang}"
|
26
|
+
klass = [klass, scope].compact.join ' '
|
27
|
+
|
28
|
+
node['class'] = klass
|
29
|
+
end
|
30
|
+
doc
|
31
|
+
end
|
32
|
+
|
33
|
+
def highlight_with_timeout_handling(text, lang)
|
34
|
+
Rouge.highlight(text, lang, @formatter)
|
35
|
+
rescue Timeout::Error => _
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def lexer_for(lang)
|
40
|
+
Rouge::Lexer.find(lang)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module HTML
|
2
|
+
class Pipeline
|
3
|
+
class TextFilter < Filter
|
4
|
+
attr_reader :text
|
5
|
+
|
6
|
+
def initialize(text, context = nil, result = nil)
|
7
|
+
raise TypeError, 'text cannot be HTML' if text.is_a?(DocumentFragment)
|
8
|
+
# Ensure that this is always a string
|
9
|
+
@text = text.respond_to?(:to_str) ? text.to_str : text.to_s
|
10
|
+
super nil, context, result
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTML::Pipeline.require_dependency('redcloth', 'RedCloth')
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
class Pipeline
|
5
|
+
# HTML Filter that converts Textile text into HTML and converts into a
|
6
|
+
# DocumentFragment. This is different from most filters in that it can take a
|
7
|
+
# non-HTML as input. It must be used as the first filter in a pipeline.
|
8
|
+
#
|
9
|
+
# Context options:
|
10
|
+
# :autolink => false Disable autolinking URLs
|
11
|
+
#
|
12
|
+
# This filter does not write any additional information to the context hash.
|
13
|
+
#
|
14
|
+
# NOTE This filter is provided for really old comments only. It probably
|
15
|
+
# shouldn't be used for anything new.
|
16
|
+
class TextileFilter < TextFilter
|
17
|
+
# Convert Textile to HTML and convert into a DocumentFragment.
|
18
|
+
def call
|
19
|
+
RedCloth.new(@text).to_html
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
HTML::Pipeline.require_dependency('escape_utils', 'TableOfContentsFilter')
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
class Pipeline
|
5
|
+
# HTML filter that adds an 'id' attribute to all headers
|
6
|
+
# in a document, so they can be accessed from a table of contents.
|
7
|
+
#
|
8
|
+
# Generates the Table of Contents, with links to each header.
|
9
|
+
#
|
10
|
+
# Examples
|
11
|
+
#
|
12
|
+
# TocPipeline =
|
13
|
+
# HTML::Pipeline.new [
|
14
|
+
# HTML::Pipeline::TableOfContentsFilter
|
15
|
+
# ]
|
16
|
+
# # => #<HTML::Pipeline:0x007fc13c4528d8...>
|
17
|
+
# orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
|
18
|
+
# # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
|
19
|
+
# result = {}
|
20
|
+
# # => {}
|
21
|
+
# TocPipeline.call(orig, {}, result)
|
22
|
+
# # => {:toc=> ...}
|
23
|
+
# result[:toc]
|
24
|
+
# # => "<ul class=\"section-nav\">\n<li><a href=\"#ice-cube\">...</li><ul>"
|
25
|
+
# result[:output].to_s
|
26
|
+
# # => "<h1>\n<a id=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
|
27
|
+
class TableOfContentsFilter < Filter
|
28
|
+
PUNCTUATION_REGEXP = RUBY_VERSION > '1.9' ? /[^\p{Word}\- ]/u : /[^\w\- ]/
|
29
|
+
|
30
|
+
# The icon that will be placed next to an anchored rendered markdown header
|
31
|
+
def anchor_icon
|
32
|
+
context[:anchor_icon] || '<span aria-hidden="true" class="octicon octicon-link"></span>'
|
33
|
+
end
|
34
|
+
|
35
|
+
def call
|
36
|
+
result[:toc] = ''
|
37
|
+
|
38
|
+
headers = Hash.new(0)
|
39
|
+
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
|
40
|
+
text = node.text
|
41
|
+
id = ascii_downcase(text)
|
42
|
+
id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
|
43
|
+
id.tr!(' ', '-') # replace spaces with dash
|
44
|
+
|
45
|
+
uniq = headers[id] > 0 ? "-#{headers[id]}" : ''
|
46
|
+
headers[id] += 1
|
47
|
+
if header_content = node.children.first
|
48
|
+
result[:toc] << %(<li><a href="##{id}#{uniq}">#{EscapeUtils.escape_html(text)}</a></li>\n)
|
49
|
+
header_content.add_previous_sibling(%(<a id="#{id}#{uniq}" class="anchor" href="##{id}#{uniq}" aria-hidden="true">#{anchor_icon}</a>))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
result[:toc] = %(<ul class="section-nav">\n#{result[:toc]}</ul>) unless result[:toc].empty?
|
53
|
+
doc
|
54
|
+
end
|
55
|
+
|
56
|
+
if RUBY_VERSION >= '2.4'
|
57
|
+
def ascii_downcase(str)
|
58
|
+
str.downcase(:ascii)
|
59
|
+
end
|
60
|
+
else
|
61
|
+
def ascii_downcase(str)
|
62
|
+
str.downcase
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,207 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'active_support/xml_mini/nokogiri' # convert Documents to hashes
|
3
|
+
|
4
|
+
module HTML
|
5
|
+
# GitHub HTML processing filters and utilities. This module includes a small
|
6
|
+
# framework for defining DOM based content filters and applying them to user
|
7
|
+
# provided content.
|
8
|
+
#
|
9
|
+
# See HTML::Pipeline::Filter for information on building filters.
|
10
|
+
#
|
11
|
+
# Construct a Pipeline for running multiple HTML filters. A pipeline is created once
|
12
|
+
# with one to many filters, and it then can be `call`ed many times over the course
|
13
|
+
# of its lifetime with input.
|
14
|
+
#
|
15
|
+
# filters - Array of Filter objects. Each must respond to call(doc,
|
16
|
+
# context) and return the modified DocumentFragment or a
|
17
|
+
# String containing HTML markup. Filters are performed in the
|
18
|
+
# order provided.
|
19
|
+
# default_context - The default context hash. Values specified here will be merged
|
20
|
+
# into values from the each individual pipeline run. Can NOT be
|
21
|
+
# nil. Default: empty Hash.
|
22
|
+
# result_class - The default Class of the result object for individual
|
23
|
+
# calls. Default: Hash. Protip: Pass in a Struct to get
|
24
|
+
# some semblance of type safety.
|
25
|
+
class Pipeline
|
26
|
+
autoload :VERSION, 'html/pipeline/version'
|
27
|
+
autoload :Filter, 'html/pipeline/filter'
|
28
|
+
autoload :AbsoluteSourceFilter, 'html/pipeline/absolute_source_filter'
|
29
|
+
autoload :BodyContent, 'html/pipeline/body_content'
|
30
|
+
autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
|
31
|
+
autoload :CamoFilter, 'html/pipeline/camo_filter'
|
32
|
+
autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
|
33
|
+
autoload :EmojiFilter, 'html/pipeline/emoji_filter'
|
34
|
+
autoload :HttpsFilter, 'html/pipeline/https_filter'
|
35
|
+
autoload :ImageFilter, 'html/pipeline/image_filter'
|
36
|
+
autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
|
37
|
+
autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
|
38
|
+
autoload :MentionFilter, 'html/pipeline/@mention_filter'
|
39
|
+
autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
|
40
|
+
autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
|
41
|
+
autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
|
42
|
+
autoload :TextileFilter, 'html/pipeline/textile_filter'
|
43
|
+
autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
|
44
|
+
autoload :TextFilter, 'html/pipeline/text_filter'
|
45
|
+
|
46
|
+
class MissingDependencyError < RuntimeError; end
|
47
|
+
def self.require_dependency(name, requirer)
|
48
|
+
require name
|
49
|
+
rescue LoadError => e
|
50
|
+
raise MissingDependencyError,
|
51
|
+
"Missing dependency '#{name}' for #{requirer}. See README.md for details.\n#{e.class.name}: #{e}"
|
52
|
+
end
|
53
|
+
|
54
|
+
# Our DOM implementation.
|
55
|
+
DocumentFragment = Nokogiri::HTML::DocumentFragment
|
56
|
+
|
57
|
+
# Parse a String into a DocumentFragment object. When a DocumentFragment is
|
58
|
+
# provided, return it verbatim.
|
59
|
+
def self.parse(document_or_html)
|
60
|
+
document_or_html ||= ''
|
61
|
+
if document_or_html.is_a?(String)
|
62
|
+
DocumentFragment.parse(document_or_html)
|
63
|
+
else
|
64
|
+
document_or_html
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Public: Returns an Array of Filter objects for this Pipeline.
|
69
|
+
attr_reader :filters
|
70
|
+
|
71
|
+
# Public: Instrumentation service for the pipeline.
|
72
|
+
# Set an ActiveSupport::Notifications compatible object to enable.
|
73
|
+
attr_accessor :instrumentation_service
|
74
|
+
|
75
|
+
# Public: String name for this Pipeline. Defaults to Class name.
|
76
|
+
attr_writer :instrumentation_name
|
77
|
+
def instrumentation_name
|
78
|
+
return @instrumentation_name if defined?(@instrumentation_name)
|
79
|
+
@instrumentation_name = self.class.name
|
80
|
+
end
|
81
|
+
|
82
|
+
class << self
|
83
|
+
# Public: Default instrumentation service for new pipeline objects.
|
84
|
+
attr_accessor :default_instrumentation_service
|
85
|
+
end
|
86
|
+
|
87
|
+
def initialize(filters, default_context = {}, result_class = nil)
|
88
|
+
raise ArgumentError, 'default_context cannot be nil' if default_context.nil?
|
89
|
+
@filters = filters.flatten.freeze
|
90
|
+
@default_context = default_context.freeze
|
91
|
+
@result_class = result_class || Hash
|
92
|
+
@instrumentation_service = self.class.default_instrumentation_service
|
93
|
+
end
|
94
|
+
|
95
|
+
# Apply all filters in the pipeline to the given HTML.
|
96
|
+
#
|
97
|
+
# html - A String containing HTML or a DocumentFragment object.
|
98
|
+
# context - The context hash passed to each filter. See the Filter docs
|
99
|
+
# for more info on possible values. This object MUST NOT be modified
|
100
|
+
# in place by filters. Use the Result for passing state back.
|
101
|
+
# result - The result Hash passed to each filter for modification. This
|
102
|
+
# is where Filters store extracted information from the content.
|
103
|
+
#
|
104
|
+
# Returns the result Hash after being filtered by this Pipeline. Contains an
|
105
|
+
# :output key with the DocumentFragment or String HTML markup based on the
|
106
|
+
# output of the last filter in the pipeline.
|
107
|
+
def call(html, context = {}, result = nil)
|
108
|
+
context = @default_context.merge(context)
|
109
|
+
context = context.freeze
|
110
|
+
result ||= @result_class.new
|
111
|
+
payload = default_payload filters: @filters.map(&:name),
|
112
|
+
context: context, result: result
|
113
|
+
instrument 'call_pipeline.html_pipeline', payload do
|
114
|
+
result[:output] =
|
115
|
+
@filters.inject(html) do |doc, filter|
|
116
|
+
perform_filter(filter, doc, context, result)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
result
|
120
|
+
end
|
121
|
+
|
122
|
+
# Internal: Applies a specific filter to the supplied doc.
|
123
|
+
#
|
124
|
+
# The filter is instrumented.
|
125
|
+
#
|
126
|
+
# Returns the result of the filter.
|
127
|
+
def perform_filter(filter, doc, context, result)
|
128
|
+
payload = default_payload filter: filter.name,
|
129
|
+
context: context, result: result
|
130
|
+
instrument 'call_filter.html_pipeline', payload do
|
131
|
+
filter.call(doc, context, result)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Like call but guarantee the value returned is a DocumentFragment.
|
136
|
+
# Pipelines may return a DocumentFragment or a String. Callers that need a
|
137
|
+
# DocumentFragment should use this method.
|
138
|
+
def to_document(input, context = {}, result = nil)
|
139
|
+
result = call(input, context, result)
|
140
|
+
HTML::Pipeline.parse(result[:output])
|
141
|
+
end
|
142
|
+
|
143
|
+
# Like call but guarantee the value returned is a string of HTML markup.
|
144
|
+
def to_html(input, context = {}, result = nil)
|
145
|
+
result = call(input, context, result = nil)
|
146
|
+
output = result[:output]
|
147
|
+
if output.respond_to?(:to_html)
|
148
|
+
output.to_html
|
149
|
+
else
|
150
|
+
output.to_s
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Public: setup instrumentation for this pipeline.
|
155
|
+
#
|
156
|
+
# Returns nothing.
|
157
|
+
def setup_instrumentation(name = nil, service = nil)
|
158
|
+
self.instrumentation_name = name
|
159
|
+
self.instrumentation_service =
|
160
|
+
service || self.class.default_instrumentation_service
|
161
|
+
end
|
162
|
+
|
163
|
+
# Internal: if the `instrumentation_service` object is set, instruments the
|
164
|
+
# block, otherwise the block is ran without instrumentation.
|
165
|
+
#
|
166
|
+
# Returns the result of the provided block.
|
167
|
+
def instrument(event, payload = nil)
|
168
|
+
payload ||= default_payload
|
169
|
+
return yield(payload) unless instrumentation_service
|
170
|
+
instrumentation_service.instrument event, payload do |payload|
|
171
|
+
yield payload
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# Internal: Default payload for instrumentation.
|
176
|
+
#
|
177
|
+
# Accepts a Hash of additional payload data to be merged.
|
178
|
+
#
|
179
|
+
# Returns a Hash.
|
180
|
+
def default_payload(payload = {})
|
181
|
+
{ pipeline: instrumentation_name }.merge(payload)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
# XXX nokogiri monkey patches for 1.8
|
187
|
+
unless ''.respond_to?(:force_encoding)
|
188
|
+
class Nokogiri::XML::Node
|
189
|
+
# Work around an issue with utf-8 encoded data being erroneously converted to
|
190
|
+
# ... some other shit when replacing text nodes. See 'utf-8 output 2' in
|
191
|
+
# user_content_test.rb for details.
|
192
|
+
def replace_with_encoding_fix(replacement)
|
193
|
+
if replacement.respond_to?(:to_str)
|
194
|
+
replacement = document.fragment("<div>#{replacement}</div>").children.first.children
|
195
|
+
end
|
196
|
+
replace_without_encoding_fix(replacement)
|
197
|
+
end
|
198
|
+
|
199
|
+
alias replace_without_encoding_fix replace
|
200
|
+
alias replace replace_with_encoding_fix
|
201
|
+
|
202
|
+
def swap(replacement)
|
203
|
+
replace(replacement)
|
204
|
+
self
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
data/test.txt
ADDED
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: html-pipeline-plus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.10.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ryan Tomayko
|
8
|
+
- Jerry Cheung
|
9
|
+
- Garen J. Torikian
|
10
|
+
- shines77
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2019-02-18 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: activesupport
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '2'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: nokogiri
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '1.4'
|
37
|
+
type: :runtime
|
38
|
+
prerelease: false
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '1.4'
|
44
|
+
description: GitHub HTML processing filters and utilities
|
45
|
+
email:
|
46
|
+
- ryan@github.com
|
47
|
+
- jerry@github.com
|
48
|
+
- gjtorikian@gmail.com
|
49
|
+
- gz_shines@msn.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- ".gitignore"
|
55
|
+
- ".travis.yml"
|
56
|
+
- Appraisals
|
57
|
+
- CHANGELOG.md
|
58
|
+
- CONTRIBUTING.md
|
59
|
+
- Gemfile
|
60
|
+
- LICENSE
|
61
|
+
- README.md
|
62
|
+
- Rakefile
|
63
|
+
- bin/html-pipeline-plus
|
64
|
+
- html-pipeline-plus.gemspec
|
65
|
+
- lib/html/pipeline-plus.rb
|
66
|
+
- lib/html/pipeline-plus/@mention_filter.rb
|
67
|
+
- lib/html/pipeline-plus/absolute_source_filter.rb
|
68
|
+
- lib/html/pipeline-plus/autolink_filter.rb
|
69
|
+
- lib/html/pipeline-plus/body_content.rb
|
70
|
+
- lib/html/pipeline-plus/camo_filter.rb
|
71
|
+
- lib/html/pipeline-plus/email_reply_filter.rb
|
72
|
+
- lib/html/pipeline-plus/emoji_filter.rb
|
73
|
+
- lib/html/pipeline-plus/filter.rb
|
74
|
+
- lib/html/pipeline-plus/https_filter.rb
|
75
|
+
- lib/html/pipeline-plus/image_filter.rb
|
76
|
+
- lib/html/pipeline-plus/image_max_width_filter.rb
|
77
|
+
- lib/html/pipeline-plus/markdown_filter.rb
|
78
|
+
- lib/html/pipeline-plus/plain_text_input_filter.rb
|
79
|
+
- lib/html/pipeline-plus/sanitization_filter.rb
|
80
|
+
- lib/html/pipeline-plus/syntax_highlight_filter.rb
|
81
|
+
- lib/html/pipeline-plus/text_filter.rb
|
82
|
+
- lib/html/pipeline-plus/textile_filter.rb
|
83
|
+
- lib/html/pipeline-plus/toc_filter.rb
|
84
|
+
- lib/html/pipeline-plus/version.rb
|
85
|
+
- test.txt
|
86
|
+
homepage: https://github.com/shines77/html-pipeline-plus/
|
87
|
+
licenses:
|
88
|
+
- MIT
|
89
|
+
metadata: {}
|
90
|
+
post_install_message: |
|
91
|
+
-------------------------------------------------
|
92
|
+
Thank you for installing html-pipeline-plus!
|
93
|
+
You must bundle Filter gem dependencies.
|
94
|
+
See html-pipeline-plus README.md for more details.
|
95
|
+
https://github.com/shines77/html-pipeline-plus#dependencies
|
96
|
+
-------------------------------------------------
|
97
|
+
rdoc_options: []
|
98
|
+
require_paths:
|
99
|
+
- lib
|
100
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
requirements: []
|
111
|
+
rubygems_version: 3.0.2
|
112
|
+
signing_key:
|
113
|
+
specification_version: 4
|
114
|
+
summary: Helpers for processing content through a chain of filters
|
115
|
+
test_files: []
|