html-pipeline 2.13.1 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/README.md +4 -4
 - data/lib/html/pipeline/camo_filter.rb +4 -14
 - data/lib/html/pipeline/sanitization_filter.rb +18 -23
 - data/lib/html/pipeline/version.rb +1 -1
 - metadata +4 -4
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: a897e9a165c69f5823336a5ca9cf92205c191ad6f90ae0d283be3234a9fefcc4
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: a0fb7f552a185bc2ffeb4aba3c8c50a5659b0277763ec0dcbbf28b6f4ce3bfc6
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: c90bcf04d6113d1c9d7a33a4f0dc235dc9d4e302fa1683e36e4c34e96777a307bd5b76b23bad07a02b6bdea8e5577e90e90a57f346e579bbcc6d287f4023623d
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 2ea63f52f5365e66d9742d12d28c248794ad7289bd8e68e7b963e42041c1c7ef30be936cd364ed79cb66c43da73c45e9905b90148cae41ecaab5156aa797b0f6
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -33,7 +33,7 @@ And then execute: 
     | 
|
| 
       33 
33 
     | 
    
         
             
            $ bundle
         
     | 
| 
       34 
34 
     | 
    
         
             
            ```
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
       36 
     | 
    
         
            -
            Or install it  
     | 
| 
      
 36 
     | 
    
         
            +
            Or install it yourself as:
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
38 
     | 
    
         
             
            ```sh
         
     | 
| 
       39 
39 
     | 
    
         
             
            $ gem install html-pipeline
         
     | 
| 
         @@ -164,7 +164,7 @@ EmojiPipeline = Pipeline.new [ 
     | 
|
| 
       164 
164 
     | 
    
         
             
            * `ImageMaxWidthFilter` - link to full size image for large images
         
     | 
| 
       165 
165 
     | 
    
         
             
            * `MarkdownFilter` - convert markdown to html
         
     | 
| 
       166 
166 
     | 
    
         
             
            * `PlainTextInputFilter` - html escape text and wrap the result in a div
         
     | 
| 
       167 
     | 
    
         
            -
            * `SanitizationFilter` -  
     | 
| 
      
 167 
     | 
    
         
            +
            * `SanitizationFilter` - whitelist sanitize user markup
         
     | 
| 
       168 
168 
     | 
    
         
             
            * `SyntaxHighlightFilter` - code syntax highlighter
         
     | 
| 
       169 
169 
     | 
    
         
             
            * `TextileFilter` - convert textile to html
         
     | 
| 
       170 
170 
     | 
    
         
             
            * `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings
         
     | 
| 
         @@ -330,9 +330,9 @@ html_fragment = "This is outside of an html element, but <strong>this isn't. :+1 
     | 
|
| 
       330 
330 
     | 
    
         
             
            EmojiPipeline.call("<div>#{html_fragment}</div>") # <- Wrap your own html fragments to avoid escaping
         
     | 
| 
       331 
331 
     | 
    
         
             
            ```
         
     | 
| 
       332 
332 
     | 
    
         | 
| 
       333 
     | 
    
         
            -
            ### 2. How do I customize  
     | 
| 
      
 333 
     | 
    
         
            +
            ### 2. How do I customize a whitelist for `SanitizationFilter`s?
         
     | 
| 
       334 
334 
     | 
    
         | 
| 
       335 
     | 
    
         
            -
            `SanitizationFilter:: 
     | 
| 
      
 335 
     | 
    
         
            +
            `SanitizationFilter::WHITELIST` is the default whitelist used if no `:whitelist`
         
     | 
| 
       336 
336 
     | 
    
         
             
            argument is given in the context. The default is a good starting template for
         
     | 
| 
       337 
337 
     | 
    
         
             
            you to add additional elements. You can either modify the constant's value, or
         
     | 
| 
       338 
338 
     | 
    
         
             
            re-define your own constant and pass that in via the context.
         
     | 
| 
         @@ -16,7 +16,7 @@ module HTML 
     | 
|
| 
       16 
16 
     | 
    
         
             
                # Context options:
         
     | 
| 
       17 
17 
     | 
    
         
             
                #   :asset_proxy (required) - Base URL for constructed asset proxy URLs.
         
     | 
| 
       18 
18 
     | 
    
         
             
                #   :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
         
     | 
| 
       19 
     | 
    
         
            -
                #   : 
     | 
| 
      
 19 
     | 
    
         
            +
                #   :asset_proxy_whitelist - Array of host Strings or Regexps to skip
         
     | 
| 
       20 
20 
     | 
    
         
             
                #                            src rewriting.
         
     | 
| 
       21 
21 
     | 
    
         
             
                #
         
     | 
| 
       22 
22 
     | 
    
         
             
                # This filter does not write additional information to the context.
         
     | 
| 
         @@ -37,7 +37,7 @@ module HTML 
     | 
|
| 
       37 
37 
     | 
    
         
             
                      end
         
     | 
| 
       38 
38 
     | 
    
         | 
| 
       39 
39 
     | 
    
         
             
                      next if uri.host.nil?
         
     | 
| 
       40 
     | 
    
         
            -
                      next if  
     | 
| 
      
 40 
     | 
    
         
            +
                      next if asset_host_whitelisted?(uri.host)
         
     | 
| 
       41 
41 
     | 
    
         | 
| 
       42 
42 
     | 
    
         
             
                      element['src'] = asset_proxy_url(original_src)
         
     | 
| 
       43 
43 
     | 
    
         
             
                      element['data-canonical-src'] = original_src
         
     | 
| 
         @@ -76,21 +76,11 @@ module HTML 
     | 
|
| 
       76 
76 
     | 
    
         
             
                  end
         
     | 
| 
       77 
77 
     | 
    
         | 
| 
       78 
78 
     | 
    
         
             
                  def asset_proxy_whitelist
         
     | 
| 
       79 
     | 
    
         
            -
                     
     | 
| 
       80 
     | 
    
         
            -
                    asset_proxy_allowlist
         
     | 
| 
       81 
     | 
    
         
            -
                  end
         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
                  def asset_proxy_allowlist
         
     | 
| 
       84 
     | 
    
         
            -
                    context[:asset_proxy_allowlist] || context[:asset_proxy_whitelist] || []
         
     | 
| 
      
 79 
     | 
    
         
            +
                    context[:asset_proxy_whitelist] || []
         
     | 
| 
       85 
80 
     | 
    
         
             
                  end
         
     | 
| 
       86 
81 
     | 
    
         | 
| 
       87 
82 
     | 
    
         
             
                  def asset_host_whitelisted?(host)
         
     | 
| 
       88 
     | 
    
         
            -
                     
     | 
| 
       89 
     | 
    
         
            -
                    asset_host_allowed?(host)
         
     | 
| 
       90 
     | 
    
         
            -
                  end
         
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
                  def asset_host_allowed?(host)
         
     | 
| 
       93 
     | 
    
         
            -
                    asset_proxy_allowlist.any? do |test|
         
     | 
| 
      
 83 
     | 
    
         
            +
                    asset_proxy_whitelist.any? do |test|
         
     | 
| 
       94 
84 
     | 
    
         
             
                      test.is_a?(String) ? host == test : test.match(host)
         
     | 
| 
       95 
85 
     | 
    
         
             
                    end
         
     | 
| 
       96 
86 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -4,7 +4,7 @@ HTML::Pipeline.require_dependency('sanitize', 'SanitizationFilter') 
     | 
|
| 
       4 
4 
     | 
    
         | 
| 
       5 
5 
     | 
    
         
             
            module HTML
         
     | 
| 
       6 
6 
     | 
    
         
             
              class Pipeline
         
     | 
| 
       7 
     | 
    
         
            -
                # HTML filter with sanization routines and  
     | 
| 
      
 7 
     | 
    
         
            +
                # HTML filter with sanization routines and whitelists. This module defines
         
     | 
| 
       8 
8 
     | 
    
         
             
                # what HTML is allowed in user provided content and fixes up issues with
         
     | 
| 
       9 
9 
     | 
    
         
             
                # unbalanced tags and whatnot.
         
     | 
| 
       10 
10 
     | 
    
         
             
                #
         
     | 
| 
         @@ -13,13 +13,13 @@ module HTML 
     | 
|
| 
       13 
13 
     | 
    
         
             
                # https://github.com/rgrove/sanitize/#readme
         
     | 
| 
       14 
14 
     | 
    
         
             
                #
         
     | 
| 
       15 
15 
     | 
    
         
             
                # Context options:
         
     | 
| 
       16 
     | 
    
         
            -
                #   : 
     | 
| 
      
 16 
     | 
    
         
            +
                #   :whitelist      - The sanitizer whitelist configuration to use. This
         
     | 
| 
       17 
17 
     | 
    
         
             
                #                     can be one of the options constants defined in this
         
     | 
| 
       18 
18 
     | 
    
         
             
                #                     class or a custom sanitize options hash.
         
     | 
| 
       19 
19 
     | 
    
         
             
                #   :anchor_schemes - The URL schemes to allow in <a href> attributes. The
         
     | 
| 
       20 
20 
     | 
    
         
             
                #                     default set is provided in the ANCHOR_SCHEMES
         
     | 
| 
       21 
21 
     | 
    
         
             
                #                     constant in this class. If passed, this overrides any
         
     | 
| 
       22 
     | 
    
         
            -
                #                     schemes specified in the  
     | 
| 
      
 22 
     | 
    
         
            +
                #                     schemes specified in the whitelist configuration.
         
     | 
| 
       23 
23 
     | 
    
         
             
                #
         
     | 
| 
       24 
24 
     | 
    
         
             
                # This filter does not write additional information to the context.
         
     | 
| 
       25 
25 
     | 
    
         
             
                class SanitizationFilter < Filter
         
     | 
| 
         @@ -37,9 +37,9 @@ module HTML 
     | 
|
| 
       37 
37 
     | 
    
         
             
                  # These schemes are the only ones allowed in <a href> attributes by default.
         
     | 
| 
       38 
38 
     | 
    
         
             
                  ANCHOR_SCHEMES = ['http', 'https', 'mailto', 'xmpp', :relative, 'github-windows', 'github-mac', 'irc', 'ircs'].freeze
         
     | 
| 
       39 
39 
     | 
    
         | 
| 
       40 
     | 
    
         
            -
                  # The main sanitization  
     | 
| 
      
 40 
     | 
    
         
            +
                  # The main sanitization whitelist. Only these elements and attributes are
         
     | 
| 
       41 
41 
     | 
    
         
             
                  # allowed through by default.
         
     | 
| 
       42 
     | 
    
         
            -
                   
     | 
| 
      
 42 
     | 
    
         
            +
                  WHITELIST = {
         
     | 
| 
       43 
43 
     | 
    
         
             
                    elements: %w[
         
     | 
| 
       44 
44 
     | 
    
         
             
                      h1 h2 h3 h4 h5 h6 h7 h8 br b i strong em a pre code img tt
         
     | 
| 
       45 
45 
     | 
    
         
             
                      div ins del sup sub p ol ul table thead tbody tfoot blockquote
         
     | 
| 
         @@ -68,7 +68,7 @@ module HTML 
     | 
|
| 
       68 
68 
     | 
    
         
             
                              hspace ismap label lang
         
     | 
| 
       69 
69 
     | 
    
         
             
                              maxlength media method
         
     | 
| 
       70 
70 
     | 
    
         
             
                              multiple name nohref noshade
         
     | 
| 
       71 
     | 
    
         
            -
                              nowrap open  
     | 
| 
      
 71 
     | 
    
         
            +
                              nowrap open prompt readonly rel rev
         
     | 
| 
       72 
72 
     | 
    
         
             
                              role rows rowspan rules scope
         
     | 
| 
       73 
73 
     | 
    
         
             
                              selected shape size span
         
     | 
| 
       74 
74 
     | 
    
         
             
                              start summary tabindex target
         
     | 
| 
         @@ -108,10 +108,10 @@ module HTML 
     | 
|
| 
       108 
108 
     | 
    
         
             
                    ].freeze
         
     | 
| 
       109 
109 
     | 
    
         
             
                  }.freeze
         
     | 
| 
       110 
110 
     | 
    
         | 
| 
       111 
     | 
    
         
            -
                  # A more limited sanitization  
     | 
| 
       112 
     | 
    
         
            -
                  # protocols, and transformers from  
     | 
| 
      
 111 
     | 
    
         
            +
                  # A more limited sanitization whitelist. This includes all attributes,
         
     | 
| 
      
 112 
     | 
    
         
            +
                  # protocols, and transformers from WHITELIST but with a more locked down
         
     | 
| 
       113 
113 
     | 
    
         
             
                  # set of allowed elements.
         
     | 
| 
       114 
     | 
    
         
            -
                  LIMITED =  
     | 
| 
      
 114 
     | 
    
         
            +
                  LIMITED = WHITELIST.merge(
         
     | 
| 
       115 
115 
     | 
    
         
             
                    elements: %w[b i strong em a pre code img ins del sup sub mark abbr p ol ul li]
         
     | 
| 
       116 
116 
     | 
    
         
             
                  )
         
     | 
| 
       117 
117 
     | 
    
         | 
| 
         @@ -120,24 +120,19 @@ module HTML 
     | 
|
| 
       120 
120 
     | 
    
         | 
| 
       121 
121 
     | 
    
         
             
                  # Sanitize markup using the Sanitize library.
         
     | 
| 
       122 
122 
     | 
    
         
             
                  def call
         
     | 
| 
       123 
     | 
    
         
            -
                    Sanitize.clean_node!(doc,  
     | 
| 
      
 123 
     | 
    
         
            +
                    Sanitize.clean_node!(doc, whitelist)
         
     | 
| 
       124 
124 
     | 
    
         
             
                  end
         
     | 
| 
       125 
125 
     | 
    
         | 
| 
      
 126 
     | 
    
         
            +
                  # The whitelist to use when sanitizing. This can be passed in the context
         
     | 
| 
      
 127 
     | 
    
         
            +
                  # hash to the filter but defaults to WHITELIST constant value above.
         
     | 
| 
       126 
128 
     | 
    
         
             
                  def whitelist
         
     | 
| 
       127 
     | 
    
         
            -
                     
     | 
| 
       128 
     | 
    
         
            -
                    allowlist
         
     | 
| 
       129 
     | 
    
         
            -
                  end
         
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
       131 
     | 
    
         
            -
                  # The allowlist to use when sanitizing. This can be passed in the context
         
     | 
| 
       132 
     | 
    
         
            -
                  # hash to the filter but defaults to ALLOWLIST constant value above.
         
     | 
| 
       133 
     | 
    
         
            -
                  def allowlist
         
     | 
| 
       134 
     | 
    
         
            -
                    allowlist = context[:allowlist] || context[:whitelist] || ALLOWLIST
         
     | 
| 
      
 129 
     | 
    
         
            +
                    whitelist = context[:whitelist] || WHITELIST
         
     | 
| 
       135 
130 
     | 
    
         
             
                    anchor_schemes = context[:anchor_schemes]
         
     | 
| 
       136 
     | 
    
         
            -
                    return  
     | 
| 
       137 
     | 
    
         
            -
                     
     | 
| 
       138 
     | 
    
         
            -
                     
     | 
| 
       139 
     | 
    
         
            -
                     
     | 
| 
       140 
     | 
    
         
            -
                     
     | 
| 
      
 131 
     | 
    
         
            +
                    return whitelist unless anchor_schemes
         
     | 
| 
      
 132 
     | 
    
         
            +
                    whitelist = whitelist.dup
         
     | 
| 
      
 133 
     | 
    
         
            +
                    whitelist[:protocols] = (whitelist[:protocols] || {}).dup
         
     | 
| 
      
 134 
     | 
    
         
            +
                    whitelist[:protocols]['a'] = (whitelist[:protocols]['a'] || {}).merge('href' => anchor_schemes)
         
     | 
| 
      
 135 
     | 
    
         
            +
                    whitelist
         
     | 
| 
       141 
136 
     | 
    
         
             
                  end
         
     | 
| 
       142 
137 
     | 
    
         
             
                end
         
     | 
| 
       143 
138 
     | 
    
         
             
              end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,16 +1,16 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: html-pipeline
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 2. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.14.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Ryan Tomayko
         
     | 
| 
       8 
8 
     | 
    
         
             
            - Jerry Cheung
         
     | 
| 
       9 
9 
     | 
    
         
             
            - Garen J. Torikian
         
     | 
| 
       10 
     | 
    
         
            -
            autorequire:
         
     | 
| 
      
 10 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
       11 
11 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       12 
12 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       13 
     | 
    
         
            -
            date: 2020- 
     | 
| 
      
 13 
     | 
    
         
            +
            date: 2020-08-11 00:00:00.000000000 Z
         
     | 
| 
       14 
14 
     | 
    
         
             
            dependencies:
         
     | 
| 
       15 
15 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       16 
16 
     | 
    
         
             
              name: activesupport
         
     | 
| 
         @@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       108 
108 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       109 
109 
     | 
    
         
             
            requirements: []
         
     | 
| 
       110 
110 
     | 
    
         
             
            rubygems_version: 3.1.2
         
     | 
| 
       111 
     | 
    
         
            -
            signing_key:
         
     | 
| 
      
 111 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
       112 
112 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       113 
113 
     | 
    
         
             
            summary: Helpers for processing content through a chain of filters
         
     | 
| 
       114 
114 
     | 
    
         
             
            test_files: []
         
     |