govspeak 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -0
- data/README.md +1 -0
- data/lib/govspeak.rb +1 -1
- data/lib/govspeak/html_sanitizer.rb +23 -2
- data/lib/govspeak/html_validator.rb +3 -2
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_test.rb +48 -3
- data/test/html_sanitizer_test.rb +11 -0
- data/test/html_validator_test.rb +5 -0
- metadata +4 -4
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,3 +1,8 @@ | |
| 1 | 
            +
            ## 3.0.0
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            * Add an `allowed_image_hosts` options to `HtmlValidator` (and `HtmlSanitizer`)
         | 
| 4 | 
            +
            * BREAKING CHANGE: Added the `$EndLegislativeList` tag which allows line breaks in `LegislativeLists`.
         | 
| 5 | 
            +
             | 
| 1 6 | 
             
            ## 2.0.2
         | 
| 2 7 | 
             
            * Fix a bug with the HtmlValidator to do with kramdown now respecting character
         | 
| 3 8 | 
             
              encodings of input data.
         | 
    
        data/README.md
    CHANGED
    
    
    
        data/lib/govspeak.rb
    CHANGED
    
    | @@ -160,7 +160,7 @@ module Govspeak | |
| 160 160 | 
             
                  %{<div class="address"><div class="adr org fn"><p>\n#{body.sub("\n", "").gsub("\n", "<br />")}\n</p></div></div>\n}
         | 
| 161 161 | 
             
                }
         | 
| 162 162 |  | 
| 163 | 
            -
                extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?) | 
| 163 | 
            +
                extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
         | 
| 164 164 | 
             
                  Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
         | 
| 165 165 | 
             
                    Kramdown::Document.new(body.strip).to_html.tap do |doc|
         | 
| 166 166 | 
             
                      doc.gsub!('<ul>', '<ol>')
         | 
| @@ -4,12 +4,33 @@ require 'with_deep_merge' | |
| 4 4 | 
             
            class Govspeak::HtmlSanitizer
         | 
| 5 5 | 
             
              include WithDeepMerge
         | 
| 6 6 |  | 
| 7 | 
            -
               | 
| 7 | 
            +
              class ImageSourceWhitelister
         | 
| 8 | 
            +
                def initialize(allowed_image_hosts)
         | 
| 9 | 
            +
                  @allowed_image_hosts = allowed_image_hosts
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def call(sanitize_context)
         | 
| 13 | 
            +
                  return unless sanitize_context[:node_name] == "img"
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  node = sanitize_context[:node]
         | 
| 16 | 
            +
                  image_uri = URI.parse(node['src'])
         | 
| 17 | 
            +
                  unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
         | 
| 18 | 
            +
                    node.unlink # the node isn't sanitary. Remove it from the document.
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              def initialize(dirty_html, options = {})
         | 
| 8 24 | 
             
                @dirty_html = dirty_html
         | 
| 25 | 
            +
                @allowed_image_hosts = options[:allowed_image_hosts]
         | 
| 9 26 | 
             
              end
         | 
| 10 27 |  | 
| 11 28 | 
             
              def sanitize
         | 
| 12 | 
            -
                 | 
| 29 | 
            +
                transformers = []
         | 
| 30 | 
            +
                if @allowed_image_hosts && @allowed_image_hosts.any?
         | 
| 31 | 
            +
                  transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
                Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
         | 
| 13 34 | 
             
              end
         | 
| 14 35 |  | 
| 15 36 | 
             
              def sanitize_without_images
         | 
| @@ -1,8 +1,9 @@ | |
| 1 1 | 
             
            class Govspeak::HtmlValidator
         | 
| 2 2 | 
             
              attr_reader :string
         | 
| 3 3 |  | 
| 4 | 
            -
              def initialize(string)
         | 
| 4 | 
            +
              def initialize(string, sanitization_options = {})
         | 
| 5 5 | 
             
                @string = string.dup.force_encoding(Encoding::UTF_8)
         | 
| 6 | 
            +
                @sanitization_options = sanitization_options
         | 
| 6 7 | 
             
              end
         | 
| 7 8 |  | 
| 8 9 | 
             
              def invalid?
         | 
| @@ -11,7 +12,7 @@ class Govspeak::HtmlValidator | |
| 11 12 |  | 
| 12 13 | 
             
              def valid?
         | 
| 13 14 | 
             
                dirty_html = govspeak_to_html
         | 
| 14 | 
            -
                clean_html = Govspeak::HtmlSanitizer.new(dirty_html).sanitize
         | 
| 15 | 
            +
                clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize
         | 
| 15 16 | 
             
                normalise_html(dirty_html) == normalise_html(clean_html)
         | 
| 16 17 | 
             
              end
         | 
| 17 18 |  | 
    
        data/lib/govspeak/version.rb
    CHANGED
    
    
    
        data/test/govspeak_test.rb
    CHANGED
    
    | @@ -396,6 +396,49 @@ $CTA | |
| 396 396 | 
             
                assert_text_output "unordered list step list"
         | 
| 397 397 | 
             
              end
         | 
| 398 398 |  | 
| 399 | 
            +
              test_given_govspeak "
         | 
| 400 | 
            +
                $LegislativeList
         | 
| 401 | 
            +
                * 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
         | 
| 402 | 
            +
                  Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
         | 
| 403 | 
            +
             | 
| 404 | 
            +
                  Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
         | 
| 405 | 
            +
                  fringilla, metus dui scelerisque est.
         | 
| 406 | 
            +
             | 
| 407 | 
            +
                  * a) A list item
         | 
| 408 | 
            +
             | 
| 409 | 
            +
                  * b) Another list item
         | 
| 410 | 
            +
             | 
| 411 | 
            +
                * 1.1 Second entry
         | 
| 412 | 
            +
                  Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
         | 
| 413 | 
            +
                  Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
         | 
| 414 | 
            +
                $EndLegislativeList
         | 
| 415 | 
            +
              " do
         | 
| 416 | 
            +
                assert_html_output %{
         | 
| 417 | 
            +
                  <ol class="legislative-list">
         | 
| 418 | 
            +
                    <li>
         | 
| 419 | 
            +
                      <p>1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
         | 
| 420 | 
            +
                  Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.</p>
         | 
| 421 | 
            +
             | 
| 422 | 
            +
                      <p>Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
         | 
| 423 | 
            +
                  fringilla, metus dui scelerisque est.</p>
         | 
| 424 | 
            +
             | 
| 425 | 
            +
                      <ol>
         | 
| 426 | 
            +
                        <li>
         | 
| 427 | 
            +
                          <p>a) A list item</p>
         | 
| 428 | 
            +
                        </li>
         | 
| 429 | 
            +
                        <li>
         | 
| 430 | 
            +
                          <p>b) Another list item</p>
         | 
| 431 | 
            +
                        </li>
         | 
| 432 | 
            +
                      </ol>
         | 
| 433 | 
            +
                    </li>
         | 
| 434 | 
            +
                    <li>
         | 
| 435 | 
            +
                      <p>1.1 Second entry
         | 
| 436 | 
            +
                  Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
         | 
| 437 | 
            +
                  Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus</p>
         | 
| 438 | 
            +
                    </li>
         | 
| 439 | 
            +
                  </ol>}
         | 
| 440 | 
            +
              end
         | 
| 441 | 
            +
             | 
| 399 442 | 
             
              test_given_govspeak "
         | 
| 400 443 | 
             
                $LegislativeList
         | 
| 401 444 | 
             
                * 1. The quick
         | 
| @@ -403,6 +446,7 @@ $CTA | |
| 403 446 | 
             
                  * a) Jumps over
         | 
| 404 447 | 
             
                  * b) The lazy
         | 
| 405 448 | 
             
                * 3. Dog
         | 
| 449 | 
            +
                $EndLegislativeList
         | 
| 406 450 | 
             
              " do
         | 
| 407 451 | 
             
                assert_html_output %{
         | 
| 408 452 | 
             
                  <ol class="legislative-list">
         | 
| @@ -434,6 +478,7 @@ $CTA | |
| 434 478 |  | 
| 435 479 | 
             
                $LegislativeList
         | 
| 436 480 | 
             
                * 1. jumps over the lazy dog
         | 
| 481 | 
            +
                $EndLegislativeList
         | 
| 437 482 | 
             
              " do
         | 
| 438 483 | 
             
                assert_html_output %{
         | 
| 439 484 | 
             
                  <p>The quick brown fox</p>
         | 
| @@ -444,7 +489,7 @@ $CTA | |
| 444 489 | 
             
                }
         | 
| 445 490 | 
             
              end
         | 
| 446 491 |  | 
| 447 | 
            -
              test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list" do
         | 
| 492 | 
            +
              test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
         | 
| 448 493 | 
             
                assert_html_output %{
         | 
| 449 494 | 
             
                  <p>This bit of text</p>
         | 
| 450 495 |  | 
| @@ -534,12 +579,12 @@ $CTA | |
| 534 579 |  | 
| 535 580 | 
             
              test "can sanitize a document" do
         | 
| 536 581 | 
             
                document = Govspeak::Document.new("<script>doBadThings();</script>")
         | 
| 537 | 
            -
                assert_equal "doBadThings();", document.to_sanitized_html
         | 
| 582 | 
            +
                assert_equal "doBadThings();", document.to_sanitized_html.strip
         | 
| 538 583 | 
             
              end
         | 
| 539 584 |  | 
| 540 585 | 
             
              test "can sanitize a document without image" do
         | 
| 541 586 | 
             
                document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
         | 
| 542 | 
            -
                assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images
         | 
| 587 | 
            +
                assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
         | 
| 543 588 | 
             
              end
         | 
| 544 589 |  | 
| 545 590 | 
             
              test "identifies a Govspeak document containing malicious HTML as invalid" do
         | 
    
        data/test/html_sanitizer_test.rb
    CHANGED
    
    | @@ -28,6 +28,17 @@ class HtmlSanitizerTest < Test::Unit::TestCase | |
| 28 28 | 
             
                assert_equal "Fortnum & Mason", Govspeak::HtmlSanitizer.new(html).sanitize
         | 
| 29 29 | 
             
              end
         | 
| 30 30 |  | 
| 31 | 
            +
              test "allows images on whitelisted domains" do
         | 
| 32 | 
            +
                html = "<img src='http://allowed.com/image.jgp'>"
         | 
| 33 | 
            +
                sanitized_html = Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
         | 
| 34 | 
            +
                assert_equal "<img src=\"http://allowed.com/image.jgp\">", sanitized_html
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              test "removes images not on whitelisted domains" do
         | 
| 38 | 
            +
                html = "<img src='http://evil.com/image.jgp'>"
         | 
| 39 | 
            +
                assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
         | 
| 40 | 
            +
              end
         | 
| 41 | 
            +
             | 
| 31 42 | 
             
              test "can strip images" do
         | 
| 32 43 | 
             
                html = "<img src='http://example.com/image.jgp'>"
         | 
| 33 44 | 
             
                assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
         | 
    
        data/test/html_validator_test.rb
    CHANGED
    
    | @@ -85,4 +85,9 @@ class HtmlValidatorTest < Test::Unit::TestCase | |
| 85 85 | 
             
              test "allow things that will end up as HTML entities" do
         | 
| 86 86 | 
             
                assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
         | 
| 87 87 | 
             
              end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
              test "optionally disallow images not on a whitelisted domain" do
         | 
| 90 | 
            +
                html = "<img src='http://evil.com/image.jgp'>"
         | 
| 91 | 
            +
                assert Govspeak::HtmlValidator.new(html, allowed_image_hosts: ['allowed.com']).invalid?
         | 
| 92 | 
            +
              end
         | 
| 88 93 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: govspeak
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version:  | 
| 4 | 
            +
              version: 3.0.0
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -10,7 +10,7 @@ authors: | |
| 10 10 | 
             
            autorequire: 
         | 
| 11 11 | 
             
            bindir: bin
         | 
| 12 12 | 
             
            cert_chain: []
         | 
| 13 | 
            -
            date: 2014-08- | 
| 13 | 
            +
            date: 2014-08-14 00:00:00.000000000 Z
         | 
| 14 14 | 
             
            dependencies:
         | 
| 15 15 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 16 16 | 
             
              name: kramdown
         | 
| @@ -184,7 +184,7 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 184 184 | 
             
                  version: '0'
         | 
| 185 185 | 
             
                  segments:
         | 
| 186 186 | 
             
                  - 0
         | 
| 187 | 
            -
                  hash: - | 
| 187 | 
            +
                  hash: -1089488848379077838
         | 
| 188 188 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 189 189 | 
             
              none: false
         | 
| 190 190 | 
             
              requirements:
         | 
| @@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 193 193 | 
             
                  version: '0'
         | 
| 194 194 | 
             
                  segments:
         | 
| 195 195 | 
             
                  - 0
         | 
| 196 | 
            -
                  hash: - | 
| 196 | 
            +
                  hash: -1089488848379077838
         | 
| 197 197 | 
             
            requirements: []
         | 
| 198 198 | 
             
            rubyforge_project: 
         | 
| 199 199 | 
             
            rubygems_version: 1.8.23
         |