RubyGems - govspeak - Versions diffs - 2.0.2 → 3.0.0 - Mend

govspeak 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/CHANGELOG.md +5 -0
data/README.md +1 -0
data/lib/govspeak.rb +1 -1
data/lib/govspeak/html_sanitizer.rb +23 -2
data/lib/govspeak/html_validator.rb +3 -2
data/lib/govspeak/version.rb +1 -1
data/test/govspeak_test.rb +48 -3
data/test/html_sanitizer_test.rb +11 -0
data/test/html_validator_test.rb +5 -0
metadata +4 -4

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## 3.0.0
+* Add an `allowed_image_hosts` options to `HtmlValidator` (and `HtmlSanitizer`)
+* BREAKING CHANGE: Added the `$EndLegislativeList` tag which allows line breaks in `LegislativeLists`.
 ## 2.0.2
 * Fix a bug with the HtmlValidator to do with kramdown now respecting character
   encodings of input data.

data/README.md CHANGED Viewed

@@ -148,6 +148,7 @@ For lists where you want to specify the numbering and have multiple indent level
         * i. Item 2 b i
         * ii. Item 2 b ii
     * 3. Item 3
+    $EndLegislativeList
     (to indent, add 2 spaces)
 ## Abbreviations

data/lib/govspeak.rb CHANGED Viewed

@@ -160,7 +160,7 @@ module Govspeak
       %{<div class="address"><div class="adr org fn"><p>\n#{body.sub("\n", "").gsub("\n", "<br />")}\n</p></div></div>\n}
     }
-    extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)(?:^\s*$|\Z)/m) do |body|
+    extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
       Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
         Kramdown::Document.new(body.strip).to_html.tap do |doc|
           doc.gsub!('<ul>', '<ol>')

data/lib/govspeak/html_sanitizer.rb CHANGED Viewed

@@ -4,12 +4,33 @@ require 'with_deep_merge'
 class Govspeak::HtmlSanitizer
   include WithDeepMerge
-  def initialize(dirty_html)
+  class ImageSourceWhitelister
+    def initialize(allowed_image_hosts)
+      @allowed_image_hosts = allowed_image_hosts
+    end
+    def call(sanitize_context)
+      return unless sanitize_context[:node_name] == "img"
+      node = sanitize_context[:node]
+      image_uri = URI.parse(node['src'])
+      unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
+        node.unlink # the node isn't sanitary. Remove it from the document.
+      end
+    end
+  end
+  def initialize(dirty_html, options = {})
     @dirty_html = dirty_html
+    @allowed_image_hosts = options[:allowed_image_hosts]
   end
   def sanitize
-    Sanitize.clean(@dirty_html, sanitize_config)
+    transformers = []
+    if @allowed_image_hosts && @allowed_image_hosts.any?
+      transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
+    end
+    Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
   end
   def sanitize_without_images

data/lib/govspeak/html_validator.rb CHANGED Viewed

@@ -1,8 +1,9 @@
 class Govspeak::HtmlValidator
   attr_reader :string
-  def initialize(string)
+  def initialize(string, sanitization_options = {})
     @string = string.dup.force_encoding(Encoding::UTF_8)
+    @sanitization_options = sanitization_options
   end
   def invalid?
@@ -11,7 +12,7 @@ class Govspeak::HtmlValidator
   def valid?
     dirty_html = govspeak_to_html
-    clean_html = Govspeak::HtmlSanitizer.new(dirty_html).sanitize
+    clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize
     normalise_html(dirty_html) == normalise_html(clean_html)
   end

data/lib/govspeak/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Govspeak
-  VERSION = "2.0.2"
+  VERSION = "3.0.0"
 end

data/test/govspeak_test.rb CHANGED Viewed

@@ -396,6 +396,49 @@ $CTA
     assert_text_output "unordered list step list"
   end
+  test_given_govspeak "
+    $LegislativeList
+    * 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+      Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
+      Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
+      fringilla, metus dui scelerisque est.
+      * a) A list item
+      * b) Another list item
+    * 1.1 Second entry
+      Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
+      Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
+    $EndLegislativeList
+  " do
+    assert_html_output %{
+      <ol class="legislative-list">
+        <li>
+          <p>1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+      Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.</p>
+          <p>Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
+      fringilla, metus dui scelerisque est.</p>
+          <ol>
+            <li>
+              <p>a) A list item</p>
+            </li>
+            <li>
+              <p>b) Another list item</p>
+            </li>
+          </ol>
+        </li>
+        <li>
+          <p>1.1 Second entry
+      Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
+      Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus</p>
+        </li>
+      </ol>}
+  end
   test_given_govspeak "
     $LegislativeList
     * 1. The quick
@@ -403,6 +446,7 @@ $CTA
       * a) Jumps over
       * b) The lazy
     * 3. Dog
+    $EndLegislativeList
   " do
     assert_html_output %{
       <ol class="legislative-list">
@@ -434,6 +478,7 @@ $CTA
     $LegislativeList
     * 1. jumps over the lazy dog
+    $EndLegislativeList
   " do
     assert_html_output %{
       <p>The quick brown fox</p>
@@ -444,7 +489,7 @@ $CTA
     }
   end
-  test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list" do
+  test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
     assert_html_output %{
       <p>This bit of text</p>
@@ -534,12 +579,12 @@ $CTA
   test "can sanitize a document" do
     document = Govspeak::Document.new("<script>doBadThings();</script>")
-    assert_equal "doBadThings();", document.to_sanitized_html
+    assert_equal "doBadThings();", document.to_sanitized_html.strip
   end
   test "can sanitize a document without image" do
     document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
-    assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images
+    assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
   end
   test "identifies a Govspeak document containing malicious HTML as invalid" do

data/test/html_sanitizer_test.rb CHANGED Viewed

@@ -28,6 +28,17 @@ class HtmlSanitizerTest < Test::Unit::TestCase
     assert_equal "Fortnum &amp; Mason", Govspeak::HtmlSanitizer.new(html).sanitize
   end
+  test "allows images on whitelisted domains" do
+    html = "<img src='http://allowed.com/image.jgp'>"
+    sanitized_html = Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
+    assert_equal "<img src=\"http://allowed.com/image.jgp\">", sanitized_html
+  end
+  test "removes images not on whitelisted domains" do
+    html = "<img src='http://evil.com/image.jgp'>"
+    assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
+  end
   test "can strip images" do
     html = "<img src='http://example.com/image.jgp'>"
     assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images

data/test/html_validator_test.rb CHANGED Viewed

@@ -85,4 +85,9 @@ class HtmlValidatorTest < Test::Unit::TestCase
   test "allow things that will end up as HTML entities" do
     assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
   end
+  test "optionally disallow images not on a whitelisted domain" do
+    html = "<img src='http://evil.com/image.jgp'>"
+    assert Govspeak::HtmlValidator.new(html, allowed_image_hosts: ['allowed.com']).invalid?
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: govspeak
 version: !ruby/object:Gem::Version
-  version: 2.0.2
+  version: 3.0.0
   prerelease:
 platform: ruby
 authors:
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-08-13 00:00:00.000000000 Z
+date: 2014-08-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: kramdown
@@ -184,7 +184,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -1322540803525731364
+      hash: -1089488848379077838
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -1322540803525731364
+      hash: -1089488848379077838
 requirements: []
 rubyforge_project:
 rubygems_version: 1.8.23