html-pipeline 2.14.3 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/FUNDING.yml +11 -3
  3. data/.github/dependabot.yml +20 -0
  4. data/.github/workflows/automerge.yml +34 -0
  5. data/.github/workflows/lint.yml +23 -0
  6. data/.github/workflows/tag_and_release.yml +70 -0
  7. data/.github/workflows/test.yml +33 -0
  8. data/.rubocop.yml +17 -0
  9. data/CHANGELOG.md +28 -2
  10. data/Gemfile +29 -15
  11. data/{LICENSE → LICENSE.txt} +2 -2
  12. data/README.md +209 -218
  13. data/Rakefile +14 -7
  14. data/UPGRADING.md +35 -0
  15. data/html-pipeline.gemspec +31 -21
  16. data/lib/html-pipeline.rb +3 -0
  17. data/lib/html_pipeline/convert_filter/markdown_filter.rb +26 -0
  18. data/lib/html_pipeline/convert_filter.rb +17 -0
  19. data/lib/html_pipeline/filter.rb +89 -0
  20. data/lib/{html/pipeline → html_pipeline/node_filter}/absolute_source_filter.rb +23 -21
  21. data/lib/{html/pipeline → html_pipeline/node_filter}/emoji_filter.rb +58 -54
  22. data/lib/html_pipeline/node_filter/https_filter.rb +22 -0
  23. data/lib/html_pipeline/node_filter/image_max_width_filter.rb +40 -0
  24. data/lib/{html/pipeline/@mention_filter.rb → html_pipeline/node_filter/mention_filter.rb} +55 -69
  25. data/lib/html_pipeline/node_filter/table_of_contents_filter.rb +68 -0
  26. data/lib/html_pipeline/node_filter/team_mention_filter.rb +105 -0
  27. data/lib/html_pipeline/node_filter.rb +31 -0
  28. data/lib/html_pipeline/sanitization_filter.rb +65 -0
  29. data/lib/{html/pipeline → html_pipeline/text_filter}/image_filter.rb +3 -3
  30. data/lib/{html/pipeline → html_pipeline/text_filter}/plain_text_input_filter.rb +3 -5
  31. data/lib/html_pipeline/text_filter.rb +21 -0
  32. data/lib/html_pipeline/version.rb +5 -0
  33. data/lib/html_pipeline.rb +252 -0
  34. metadata +52 -54
  35. data/.travis.yml +0 -43
  36. data/Appraisals +0 -19
  37. data/CONTRIBUTING.md +0 -60
  38. data/bin/html-pipeline +0 -78
  39. data/lib/html/pipeline/@team_mention_filter.rb +0 -99
  40. data/lib/html/pipeline/autolink_filter.rb +0 -34
  41. data/lib/html/pipeline/body_content.rb +0 -44
  42. data/lib/html/pipeline/camo_filter.rb +0 -105
  43. data/lib/html/pipeline/email_reply_filter.rb +0 -69
  44. data/lib/html/pipeline/filter.rb +0 -165
  45. data/lib/html/pipeline/https_filter.rb +0 -29
  46. data/lib/html/pipeline/image_max_width_filter.rb +0 -37
  47. data/lib/html/pipeline/markdown_filter.rb +0 -56
  48. data/lib/html/pipeline/sanitization_filter.rb +0 -144
  49. data/lib/html/pipeline/syntax_highlight_filter.rb +0 -50
  50. data/lib/html/pipeline/text_filter.rb +0 -16
  51. data/lib/html/pipeline/textile_filter.rb +0 -25
  52. data/lib/html/pipeline/toc_filter.rb +0 -69
  53. data/lib/html/pipeline/version.rb +0 -7
  54. data/lib/html/pipeline.rb +0 -210
@@ -1,29 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require File.expand_path('../lib/html/pipeline/version', __FILE__)
3
+ $LOAD_PATH.push(File.expand_path("lib", __dir__))
4
+ require "html_pipeline/version"
4
5
 
5
6
  Gem::Specification.new do |gem|
6
- gem.name = 'html-pipeline'
7
- gem.version = HTML::Pipeline::VERSION
8
- gem.license = 'MIT'
9
- gem.authors = ['Ryan Tomayko', 'Jerry Cheung', 'Garen J. Torikian']
10
- gem.email = ['ryan@github.com', 'jerry@github.com', 'gjtorikian@gmail.com']
11
- gem.description = 'GitHub HTML processing filters and utilities'
12
- gem.summary = 'Helpers for processing content through a chain of filters'
13
- gem.homepage = 'https://github.com/jch/html-pipeline'
7
+ gem.name = "html-pipeline"
8
+ gem.version = HTMLPipeline::VERSION
9
+ gem.license = "MIT"
10
+ gem.authors = ["Garen J. Torikian"]
11
+ gem.email = ["gjtorikian@gmail.com"]
12
+ gem.description = "HTML processing filters and utilities"
13
+ gem.summary = "Helpers for processing content through a chain of filters"
14
+ gem.homepage = "https://github.com/gjtorikian/html-pipeline"
14
15
 
15
- gem.files = `git ls-files -z`.split("\x0").reject { |f| f =~ %r{^(test|gemfiles|script)/} }
16
- gem.require_paths = ['lib']
16
+ gem.files = %x(git ls-files -z).split("\x0").reject { |f| f =~ %r{^(test|gemfiles|script)/} }
17
+ gem.require_paths = ["lib"]
17
18
 
18
- gem.add_dependency 'activesupport', '>= 2'
19
- gem.add_dependency 'nokogiri', '>= 1.4'
19
+ gem.required_ruby_version = "~> 3.1"
20
+ # https://github.com/rubygems/rubygems/pull/5852#issuecomment-1231118509
21
+ gem.required_rubygems_version = ">= 3.3.22"
20
22
 
21
- gem.post_install_message = <<msg
22
- -------------------------------------------------
23
- Thank you for installing html-pipeline!
24
- You must bundle Filter gem dependencies.
25
- See html-pipeline README.md for more details.
26
- https://github.com/jch/html-pipeline#dependencies
27
- -------------------------------------------------
28
- msg
23
+ gem.metadata = {
24
+ "funding_uri" => "https://github.com/sponsors/gjtorikian/",
25
+ "rubygems_mfa_required" => "true",
26
+ }
27
+
28
+ gem.add_dependency("selma", "~> 0.0.1")
29
+ gem.add_dependency("zeitwerk", "~> 2.5")
30
+
31
+ gem.post_install_message = <<~MSG
32
+ -------------------------------------------------
33
+ Thank you for installing html-pipeline!
34
+ You must bundle filter gem dependencies.
35
+ See the html-pipeline README.md for more details:
36
+ https://github.com/gjtorikian/html-pipeline#dependencies
37
+ -------------------------------------------------
38
+ MSG
29
39
  end
@@ -0,0 +1,3 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "html_pipeline"
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ HTMLPipeline.require_dependency("commonmarker", "MarkdownFilter")
4
+
5
+ class HTMLPipeline
6
+ class ConvertFilter
7
+ # HTML Filter that converts Markdown text into HTML.
8
+ #
9
+ # Context options:
10
+ # :markdown[:parse] => Commonmarker parse options
11
+ # :markdown[:render] => Commonmarker render options
12
+ # :markdown[:extensions] => Commonmarker extensions options
13
+ class MarkdownFilter < ConvertFilter
14
+ def initialize(context: {}, result: {})
15
+ super(context: context, result: result)
16
+ end
17
+
18
+ # Convert Commonmark to HTML using the best available implementation.
19
+ def call(text)
20
+ options = @context.fetch(:markdown, {})
21
+ plugins = options.fetch(:plugins, {})
22
+ Commonmarker.to_html(text, options: options, plugins: plugins).rstrip!
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ class ConvertFilter < Filter
5
+ attr_reader :text, :html
6
+
7
+ def initialize(context: {}, result: {})
8
+ super(context: context, result: result)
9
+ end
10
+
11
+ class << self
12
+ def call(text, context: {}, result: {})
13
+ new(context: context, result: result).call(text)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ # Base class for user content HTML filters. Each filter takes an
5
+ # HTML string, performs modifications on it, and/or writes information to a result hash.
6
+ # Filters must return a String with HTML markup.
7
+ #
8
+ # The `context` Hash passes options to filters and should not be changed in
9
+ # place. A `result` Hash allows filters to make extracted information
10
+ # available to the caller, and is mutable.
11
+ #
12
+ # Common context options:
13
+ # :base_url - The site's base URL
14
+ # :repository - A Repository providing context for the HTML being processed
15
+ #
16
+ # Each filter may define additional options and output values. See the class
17
+ # docs for more info.
18
+ class Filter
19
+ class InvalidDocumentException < StandardError; end
20
+
21
+ def initialize(context: {}, result: {})
22
+ @context = context
23
+ @result = result
24
+ validate
25
+ end
26
+
27
+ # Public: Returns a simple Hash used to pass extra information into filters
28
+ # and also to allow filters to make extracted information available to the
29
+ # caller.
30
+ attr_reader :context
31
+
32
+ # Public: Returns a Hash used to allow filters to pass back information
33
+ # to callers of the various Pipelines. This can be used for
34
+ # #mentioned_users, for example.
35
+ attr_reader :result
36
+
37
+ # The main filter entry point. The doc attribute is guaranteed to be a
38
+ # string when invoked. Subclasses should modify
39
+ # this text in place or extract information and add it to the context
40
+ # hash.
41
+ def call
42
+ raise NoMethodError
43
+ end
44
+
45
+ class << self
46
+ # Perform a filter on doc with the given context.
47
+ #
48
+ # Returns a String comprised of HTML markup.
49
+ def call(input, context: {})
50
+ raise NoMethodError
51
+ end
52
+ end
53
+ # Make sure the context has everything we need. Noop: Subclasses can override.
54
+ def validate; end
55
+
56
+ # The site's base URL provided in the context hash, or '/' when no
57
+ # base URL was specified.
58
+ def base_url
59
+ context[:base_url] || "/"
60
+ end
61
+
62
+ # Helper method for filter subclasses used to determine if any of a node's
63
+ # ancestors have one of the tag names specified.
64
+ #
65
+ # node - The Node object to check.
66
+ # tags - An array of tag name strings to check. These should be downcase.
67
+ #
68
+ # Returns true when the node has a matching ancestor.
69
+ def has_ancestor?(element, ancestor)
70
+ ancestors = element.ancestors
71
+ ancestors.include?(ancestor)
72
+ end
73
+
74
+ # Validator for required context. This will check that anything passed in
75
+ # contexts exists in @contexts
76
+ #
77
+ # If any errors are found an ArgumentError will be raised with a
78
+ # message listing all the missing contexts and the filters that
79
+ # require them.
80
+ def needs(*keys)
81
+ missing = keys.reject { |key| context.include?(key) }
82
+
83
+ return unless missing.any?
84
+
85
+ raise ArgumentError,
86
+ "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join(", ")}"
87
+ end
88
+ end
89
+ end
@@ -1,10 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'uri'
3
+ require "uri"
4
+
5
+ class HTMLPipeline
6
+ class NodeFilter
7
+ class AbsoluteSourceFilter < NodeFilter
8
+ SELECTOR = Selma::Selector.new(match_element: "img")
9
+
10
+ def selector
11
+ SELECTOR
12
+ end
4
13
 
5
- module HTML
6
- class Pipeline
7
- class AbsoluteSourceFilter < Filter
8
14
  # HTML Filter for replacing relative and root relative image URLs with
9
15
  # fully qualified URLs
10
16
  #
@@ -18,24 +24,20 @@ module HTML
18
24
  #
19
25
  # This filter does not write additional information to the context.
20
26
  # This filter would need to be run before CamoFilter.
21
- def call
22
- doc.search('img').each do |element|
23
- next if element['src'].nil? || element['src'].empty?
24
- src = element['src'].strip
25
- next if src.start_with? 'http'
26
- base = if src.start_with? '/'
27
- image_base_url
28
- else
29
- image_subpage_url
30
- end
31
-
32
- begin
33
- element['src'] = URI.join(base, src).to_s
34
- rescue Exception
35
- next
36
- end
27
+ def handle_element(element)
28
+ src = element["src"]
29
+ return if src.nil? || src.empty?
30
+
31
+ src = src.strip
32
+ return if src.start_with?("http")
33
+
34
+ base = if src.start_with?("/")
35
+ image_base_url
36
+ else
37
+ image_subpage_url
37
38
  end
38
- doc
39
+
40
+ element["src"] = URI.join(base, src).to_s
39
41
  end
40
42
 
41
43
  # Private: the base url you want to use
@@ -1,10 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'cgi'
4
- HTML::Pipeline.require_dependency('gemoji', 'EmojiFilter')
3
+ require "cgi"
4
+ HTMLPipeline.require_dependencies(["gemoji", "gemojione"], "EmojiFilter")
5
5
 
6
- module HTML
7
- class Pipeline
6
+ class HTMLPipeline
7
+ class NodeFilter
8
8
  # HTML filter that replaces :emoji: with images.
9
9
  #
10
10
  # Context:
@@ -12,25 +12,28 @@ module HTML
12
12
  # :asset_path (optional) - url path to link to emoji sprite. :file_name can be used as a placeholder for the sprite file name. If no asset_path is set "emoji/:file_name" is used.
13
13
  # :ignored_ancestor_tags (optional) - Tags to stop the emojification. Node has matched ancestor HTML tags will not be emojified. Default to pre, code, and tt tags. Extra tags please pass in the form of array, e.g., %w(blockquote summary).
14
14
  # :img_attrs (optional) - Attributes for generated img tag. E.g. Pass { "draggble" => true, "height" => nil } to set draggable attribute to "true" and clear height attribute of generated img tag.
15
- class EmojiFilter < Filter
16
- DEFAULT_IGNORED_ANCESTOR_TAGS = %w[pre code tt].freeze
17
-
18
- def call
19
- doc.search('.//text()').each do |node|
20
- content = node.text
21
- next unless content.include?(':')
22
- next if has_ancestor?(node, ignored_ancestor_tags)
23
- html = emoji_image_filter(content)
24
- next if html == content
25
- node.replace(html)
26
- end
27
- doc
15
+ class EmojiFilter < NodeFilter
16
+ DEFAULT_IGNORED_ANCESTOR_TAGS = ["pre", "code", "tt"].freeze
17
+
18
+ # Build a regexp that matches all valid :emoji: names.
19
+ def after_initialize
20
+ @emoji_pattern ||= /:(#{emoji_names.map { |name| Regexp.escape(name) }.join('|')}):/
21
+ end
22
+
23
+ def selector
24
+ Selma::Selector.new(match_text_within: "*", ignore_text_within: ignored_ancestor_tags)
25
+ end
26
+
27
+ def handle_text_chunk(text)
28
+ return unless text.to_s.include?(":")
29
+
30
+ text.replace(emoji_image_filter(text.to_s), as: :html)
28
31
  end
29
32
 
30
33
  # Implementation of validate hook.
31
34
  # Errors should raise exceptions or use an existing validator.
32
35
  def validate
33
- needs :asset_root
36
+ needs(:asset_root)
34
37
  end
35
38
 
36
39
  # Replace :emoji: with corresponding images.
@@ -39,7 +42,7 @@ module HTML
39
42
  #
40
43
  # Returns a String with :emoji: replaced with images.
41
44
  def emoji_image_filter(text)
42
- text.gsub(emoji_pattern) do |_match|
45
+ text.gsub(@emoji_pattern) do
43
46
  emoji_image_tag(Regexp.last_match(1))
44
47
  end
45
48
  end
@@ -58,64 +61,65 @@ module HTML
58
61
  # Returns the context's asset_path or the default path if no context asset_path is given.
59
62
  def asset_path(name)
60
63
  if context[:asset_path]
61
- context[:asset_path].gsub(':file_name', emoji_filename(name))
64
+ context[:asset_path].gsub(":file_name", emoji_filename(name))
62
65
  else
63
- File.join('emoji', emoji_filename(name))
66
+ File.join("emoji", emoji_filename(name))
64
67
  end
65
68
  end
66
69
 
67
- private
68
-
69
70
  # Build an emoji image tag
70
- def emoji_image_tag(name)
71
- require 'active_support/core_ext/hash/indifferent_access'
71
+ private def emoji_image_tag(name)
72
72
  html_attrs =
73
- default_img_attrs(name)
74
- .merge!((context[:img_attrs] || {}).with_indifferent_access)
75
- .map { |attr, value| !value.nil? && %(#{attr}="#{value.respond_to?(:call) && value.call(name) || value}") }
76
- .reject(&:blank?).join(' '.freeze)
73
+ default_img_attrs(name).transform_keys(&:to_sym)
74
+ .merge!(context[:img_attrs] || {}).transform_keys(&:to_sym)
75
+ .each_with_object([]) do |(attr, value), arr|
76
+ next if value.nil?
77
+
78
+ value = value.respond_to?(:call) && value.call(name) || value
79
+ arr << %(#{attr}="#{value}")
80
+ end.compact.join(" ")
77
81
 
78
82
  "<img #{html_attrs}>"
79
83
  end
80
84
 
85
+ def emoji_names
86
+ if self.class.gemoji_loaded?
87
+ Emoji.all.map(&:aliases)
88
+ else
89
+ Gemojione::Index.new.all.map { |i| i[1]["name"] }
90
+ end.flatten.sort
91
+ end
92
+
81
93
  # Default attributes for img tag
82
- def default_img_attrs(name)
94
+ private def default_img_attrs(name)
83
95
  {
84
- 'class' => 'emoji'.freeze,
85
- 'title' => ":#{name}:",
86
- 'alt' => ":#{name}:",
87
- 'src' => emoji_url(name).to_s,
88
- 'height' => '20'.freeze,
89
- 'width' => '20'.freeze,
90
- 'align' => 'absmiddle'.freeze
96
+ "class" => "emoji",
97
+ "title" => ":#{name}:",
98
+ "alt" => ":#{name}:",
99
+ "src" => emoji_url(name).to_s,
100
+ "height" => "20",
101
+ "width" => "20",
102
+ "align" => "absmiddle",
91
103
  }
92
104
  end
93
105
 
94
- def emoji_url(name)
106
+ private def emoji_url(name)
95
107
  File.join(asset_root, asset_path(name))
96
108
  end
97
109
 
98
- # Build a regexp that matches all valid :emoji: names.
99
- def self.emoji_pattern
100
- @emoji_pattern ||= /:(#{emoji_names.map { |name| Regexp.escape(name) }.join('|')}):/
101
- end
102
-
103
- def emoji_pattern
104
- self.class.emoji_pattern
105
- end
106
-
107
- def self.emoji_names
108
- Emoji.all.map(&:aliases).flatten.sort
109
- end
110
-
111
- def emoji_filename(name)
112
- Emoji.find_by_alias(name).image_filename
110
+ private def emoji_filename(name)
111
+ if self.class.gemoji_loaded?
112
+ Emoji.find_by_alias(name).image_filename
113
+ else
114
+ # replace their asset_host with ours
115
+ Gemojione.image_url_for_name(name).sub(Gemojione.asset_host, "")
116
+ end
113
117
  end
114
118
 
115
119
  # Return ancestor tags to stop the emojification.
116
120
  #
117
121
  # @return [Array<String>] Ancestor tags.
118
- def ignored_ancestor_tags
122
+ private def ignored_ancestor_tags
119
123
  if context[:ignored_ancestor_tags]
120
124
  DEFAULT_IGNORED_ANCESTOR_TAGS | context[:ignored_ancestor_tags]
121
125
  else
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ class NodeFilter
5
+ # HTML Filter for replacing http references to :http_url with https versions.
6
+ # Subdomain references are not rewritten.
7
+ #
8
+ # Context options:
9
+ # :http_url - The HTTP url to force HTTPS. Falls back to :base_url
10
+ class HttpsFilter < NodeFilter
11
+ SELECTOR = Selma::Selector.new(match_element: %(a[href^="http:"]))
12
+
13
+ def selector
14
+ SELECTOR
15
+ end
16
+
17
+ def handle_element(element)
18
+ element["href"] = element["href"].sub(/^http:/, "https:")
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ class HTMLPipeline
4
+ class NodeFilter
5
+ # This filter rewrites image tags with a max-width inline style and also wraps
6
+ # the image in an <a> tag that causes the full size image to be opened in a
7
+ # new tab.
8
+ #
9
+ # The max-width inline styles are especially useful in HTML email which
10
+ # don't use a global stylesheets.
11
+ class ImageMaxWidthFilter < NodeFilter
12
+ SELECTOR = Selma::Selector.new(match_element: "img")
13
+
14
+ def selector
15
+ SELECTOR
16
+ end
17
+
18
+ def handle_element(element)
19
+ # Skip if there's already a style attribute. Not sure how this
20
+ # would happen but we can reconsider it in the future.
21
+ return if element["style"]
22
+
23
+ # Bail out if src doesn't look like a valid http url. trying to avoid weird
24
+ # js injection via javascript: urls.
25
+ return if /\Ajavascript/i.match?(element["src"].to_s.strip)
26
+
27
+ element["style"] = "max-width:100%;"
28
+
29
+ link_image(element) unless has_ancestor?(element, "a")
30
+ end
31
+
32
+ def link_image(element)
33
+ link_start = %(<a target="_blank" href="#{element["src"]}">)
34
+ element.before(link_start, as: :html)
35
+ link_end = "</a>"
36
+ element.after(link_end, as: :html)
37
+ end
38
+ end
39
+ end
40
+ end