govspeak 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 3.0.0
2
+
3
+ * Add an `allowed_image_hosts` options to `HtmlValidator` (and `HtmlSanitizer`)
4
+ * BREAKING CHANGE: Added the `$EndLegislativeList` tag which allows line breaks in `LegislativeLists`.
5
+
1
6
  ## 2.0.2
2
7
  * Fix a bug with the HtmlValidator to do with kramdown now respecting character
3
8
  encodings of input data.
data/README.md CHANGED
@@ -148,6 +148,7 @@ For lists where you want to specify the numbering and have multiple indent level
148
148
  * i. Item 2 b i
149
149
  * ii. Item 2 b ii
150
150
  * 3. Item 3
151
+ $EndLegislativeList
151
152
  (to indent, add 2 spaces)
152
153
 
153
154
  ## Abbreviations
data/lib/govspeak.rb CHANGED
@@ -160,7 +160,7 @@ module Govspeak
160
160
  %{<div class="address"><div class="adr org fn"><p>\n#{body.sub("\n", "").gsub("\n", "<br />")}\n</p></div></div>\n}
161
161
  }
162
162
 
163
- extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)(?:^\s*$|\Z)/m) do |body|
163
+ extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
164
164
  Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
165
165
  Kramdown::Document.new(body.strip).to_html.tap do |doc|
166
166
  doc.gsub!('<ul>', '<ol>')
@@ -4,12 +4,33 @@ require 'with_deep_merge'
4
4
  class Govspeak::HtmlSanitizer
5
5
  include WithDeepMerge
6
6
 
7
- def initialize(dirty_html)
7
+ class ImageSourceWhitelister
8
+ def initialize(allowed_image_hosts)
9
+ @allowed_image_hosts = allowed_image_hosts
10
+ end
11
+
12
+ def call(sanitize_context)
13
+ return unless sanitize_context[:node_name] == "img"
14
+
15
+ node = sanitize_context[:node]
16
+ image_uri = URI.parse(node['src'])
17
+ unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
18
+ node.unlink # the node isn't sanitary. Remove it from the document.
19
+ end
20
+ end
21
+ end
22
+
23
+ def initialize(dirty_html, options = {})
8
24
  @dirty_html = dirty_html
25
+ @allowed_image_hosts = options[:allowed_image_hosts]
9
26
  end
10
27
 
11
28
  def sanitize
12
- Sanitize.clean(@dirty_html, sanitize_config)
29
+ transformers = []
30
+ if @allowed_image_hosts && @allowed_image_hosts.any?
31
+ transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
32
+ end
33
+ Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
13
34
  end
14
35
 
15
36
  def sanitize_without_images
@@ -1,8 +1,9 @@
1
1
  class Govspeak::HtmlValidator
2
2
  attr_reader :string
3
3
 
4
- def initialize(string)
4
+ def initialize(string, sanitization_options = {})
5
5
  @string = string.dup.force_encoding(Encoding::UTF_8)
6
+ @sanitization_options = sanitization_options
6
7
  end
7
8
 
8
9
  def invalid?
@@ -11,7 +12,7 @@ class Govspeak::HtmlValidator
11
12
 
12
13
  def valid?
13
14
  dirty_html = govspeak_to_html
14
- clean_html = Govspeak::HtmlSanitizer.new(dirty_html).sanitize
15
+ clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize
15
16
  normalise_html(dirty_html) == normalise_html(clean_html)
16
17
  end
17
18
 
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "2.0.2"
2
+ VERSION = "3.0.0"
3
3
  end
@@ -396,6 +396,49 @@ $CTA
396
396
  assert_text_output "unordered list step list"
397
397
  end
398
398
 
399
+ test_given_govspeak "
400
+ $LegislativeList
401
+ * 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
402
+ Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
403
+
404
+ Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
405
+ fringilla, metus dui scelerisque est.
406
+
407
+ * a) A list item
408
+
409
+ * b) Another list item
410
+
411
+ * 1.1 Second entry
412
+ Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
413
+ Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
414
+ $EndLegislativeList
415
+ " do
416
+ assert_html_output %{
417
+ <ol class="legislative-list">
418
+ <li>
419
+ <p>1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
420
+ Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.</p>
421
+
422
+ <p>Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
423
+ fringilla, metus dui scelerisque est.</p>
424
+
425
+ <ol>
426
+ <li>
427
+ <p>a) A list item</p>
428
+ </li>
429
+ <li>
430
+ <p>b) Another list item</p>
431
+ </li>
432
+ </ol>
433
+ </li>
434
+ <li>
435
+ <p>1.1 Second entry
436
+ Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
437
+ Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus</p>
438
+ </li>
439
+ </ol>}
440
+ end
441
+
399
442
  test_given_govspeak "
400
443
  $LegislativeList
401
444
  * 1. The quick
@@ -403,6 +446,7 @@ $CTA
403
446
  * a) Jumps over
404
447
  * b) The lazy
405
448
  * 3. Dog
449
+ $EndLegislativeList
406
450
  " do
407
451
  assert_html_output %{
408
452
  <ol class="legislative-list">
@@ -434,6 +478,7 @@ $CTA
434
478
 
435
479
  $LegislativeList
436
480
  * 1. jumps over the lazy dog
481
+ $EndLegislativeList
437
482
  " do
438
483
  assert_html_output %{
439
484
  <p>The quick brown fox</p>
@@ -444,7 +489,7 @@ $CTA
444
489
  }
445
490
  end
446
491
 
447
- test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list" do
492
+ test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
448
493
  assert_html_output %{
449
494
  <p>This bit of text</p>
450
495
 
@@ -534,12 +579,12 @@ $CTA
534
579
 
535
580
  test "can sanitize a document" do
536
581
  document = Govspeak::Document.new("<script>doBadThings();</script>")
537
- assert_equal "doBadThings();", document.to_sanitized_html
582
+ assert_equal "doBadThings();", document.to_sanitized_html.strip
538
583
  end
539
584
 
540
585
  test "can sanitize a document without image" do
541
586
  document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
542
- assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images
587
+ assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
543
588
  end
544
589
 
545
590
  test "identifies a Govspeak document containing malicious HTML as invalid" do
@@ -28,6 +28,17 @@ class HtmlSanitizerTest < Test::Unit::TestCase
28
28
  assert_equal "Fortnum &amp; Mason", Govspeak::HtmlSanitizer.new(html).sanitize
29
29
  end
30
30
 
31
+ test "allows images on whitelisted domains" do
32
+ html = "<img src='http://allowed.com/image.jgp'>"
33
+ sanitized_html = Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
34
+ assert_equal "<img src=\"http://allowed.com/image.jgp\">", sanitized_html
35
+ end
36
+
37
+ test "removes images not on whitelisted domains" do
38
+ html = "<img src='http://evil.com/image.jgp'>"
39
+ assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
40
+ end
41
+
31
42
  test "can strip images" do
32
43
  html = "<img src='http://example.com/image.jgp'>"
33
44
  assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
@@ -85,4 +85,9 @@ class HtmlValidatorTest < Test::Unit::TestCase
85
85
  test "allow things that will end up as HTML entities" do
86
86
  assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
87
87
  end
88
+
89
+ test "optionally disallow images not on a whitelisted domain" do
90
+ html = "<img src='http://evil.com/image.jgp'>"
91
+ assert Govspeak::HtmlValidator.new(html, allowed_image_hosts: ['allowed.com']).invalid?
92
+ end
88
93
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 3.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-08-13 00:00:00.000000000 Z
13
+ date: 2014-08-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: kramdown
@@ -184,7 +184,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  segments:
186
186
  - 0
187
- hash: -1322540803525731364
187
+ hash: -1089488848379077838
188
188
  required_rubygems_version: !ruby/object:Gem::Requirement
189
189
  none: false
190
190
  requirements:
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
193
  version: '0'
194
194
  segments:
195
195
  - 0
196
- hash: -1322540803525731364
196
+ hash: -1089488848379077838
197
197
  requirements: []
198
198
  rubyforge_project:
199
199
  rubygems_version: 1.8.23