govspeak 2.0.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 3.0.0
2
+
3
+ * Add an `allowed_image_hosts` options to `HtmlValidator` (and `HtmlSanitizer`)
4
+ * BREAKING CHANGE: Added the `$EndLegislativeList` tag which allows line breaks in `LegislativeLists`.
5
+
1
6
  ## 2.0.2
2
7
  * Fix a bug with the HtmlValidator to do with kramdown now respecting character
3
8
  encodings of input data.
data/README.md CHANGED
@@ -148,6 +148,7 @@ For lists where you want to specify the numbering and have multiple indent level
148
148
  * i. Item 2 b i
149
149
  * ii. Item 2 b ii
150
150
  * 3. Item 3
151
+ $EndLegislativeList
151
152
  (to indent, add 2 spaces)
152
153
 
153
154
  ## Abbreviations
data/lib/govspeak.rb CHANGED
@@ -160,7 +160,7 @@ module Govspeak
160
160
  %{<div class="address"><div class="adr org fn"><p>\n#{body.sub("\n", "").gsub("\n", "<br />")}\n</p></div></div>\n}
161
161
  }
162
162
 
163
- extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)(?:^\s*$|\Z)/m) do |body|
163
+ extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
164
164
  Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
165
165
  Kramdown::Document.new(body.strip).to_html.tap do |doc|
166
166
  doc.gsub!('<ul>', '<ol>')
@@ -4,12 +4,33 @@ require 'with_deep_merge'
4
4
  class Govspeak::HtmlSanitizer
5
5
  include WithDeepMerge
6
6
 
7
- def initialize(dirty_html)
7
+ class ImageSourceWhitelister
8
+ def initialize(allowed_image_hosts)
9
+ @allowed_image_hosts = allowed_image_hosts
10
+ end
11
+
12
+ def call(sanitize_context)
13
+ return unless sanitize_context[:node_name] == "img"
14
+
15
+ node = sanitize_context[:node]
16
+ image_uri = URI.parse(node['src'])
17
+ unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
18
+ node.unlink # the node isn't sanitary. Remove it from the document.
19
+ end
20
+ end
21
+ end
22
+
23
+ def initialize(dirty_html, options = {})
8
24
  @dirty_html = dirty_html
25
+ @allowed_image_hosts = options[:allowed_image_hosts]
9
26
  end
10
27
 
11
28
  def sanitize
12
- Sanitize.clean(@dirty_html, sanitize_config)
29
+ transformers = []
30
+ if @allowed_image_hosts && @allowed_image_hosts.any?
31
+ transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
32
+ end
33
+ Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
13
34
  end
14
35
 
15
36
  def sanitize_without_images
@@ -1,8 +1,9 @@
1
1
  class Govspeak::HtmlValidator
2
2
  attr_reader :string
3
3
 
4
- def initialize(string)
4
+ def initialize(string, sanitization_options = {})
5
5
  @string = string.dup.force_encoding(Encoding::UTF_8)
6
+ @sanitization_options = sanitization_options
6
7
  end
7
8
 
8
9
  def invalid?
@@ -11,7 +12,7 @@ class Govspeak::HtmlValidator
11
12
 
12
13
  def valid?
13
14
  dirty_html = govspeak_to_html
14
- clean_html = Govspeak::HtmlSanitizer.new(dirty_html).sanitize
15
+ clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize
15
16
  normalise_html(dirty_html) == normalise_html(clean_html)
16
17
  end
17
18
 
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "2.0.2"
2
+ VERSION = "3.0.0"
3
3
  end
@@ -396,6 +396,49 @@ $CTA
396
396
  assert_text_output "unordered list step list"
397
397
  end
398
398
 
399
+ test_given_govspeak "
400
+ $LegislativeList
401
+ * 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
402
+ Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
403
+
404
+ Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
405
+ fringilla, metus dui scelerisque est.
406
+
407
+ * a) A list item
408
+
409
+ * b) Another list item
410
+
411
+ * 1.1 Second entry
412
+ Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
413
+ Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
414
+ $EndLegislativeList
415
+ " do
416
+ assert_html_output %{
417
+ <ol class="legislative-list">
418
+ <li>
419
+ <p>1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
420
+ Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.</p>
421
+
422
+ <p>Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
423
+ fringilla, metus dui scelerisque est.</p>
424
+
425
+ <ol>
426
+ <li>
427
+ <p>a) A list item</p>
428
+ </li>
429
+ <li>
430
+ <p>b) Another list item</p>
431
+ </li>
432
+ </ol>
433
+ </li>
434
+ <li>
435
+ <p>1.1 Second entry
436
+ Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
437
+ Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus</p>
438
+ </li>
439
+ </ol>}
440
+ end
441
+
399
442
  test_given_govspeak "
400
443
  $LegislativeList
401
444
  * 1. The quick
@@ -403,6 +446,7 @@ $CTA
403
446
  * a) Jumps over
404
447
  * b) The lazy
405
448
  * 3. Dog
449
+ $EndLegislativeList
406
450
  " do
407
451
  assert_html_output %{
408
452
  <ol class="legislative-list">
@@ -434,6 +478,7 @@ $CTA
434
478
 
435
479
  $LegislativeList
436
480
  * 1. jumps over the lazy dog
481
+ $EndLegislativeList
437
482
  " do
438
483
  assert_html_output %{
439
484
  <p>The quick brown fox</p>
@@ -444,7 +489,7 @@ $CTA
444
489
  }
445
490
  end
446
491
 
447
- test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list" do
492
+ test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
448
493
  assert_html_output %{
449
494
  <p>This bit of text</p>
450
495
 
@@ -534,12 +579,12 @@ $CTA
534
579
 
535
580
  test "can sanitize a document" do
536
581
  document = Govspeak::Document.new("<script>doBadThings();</script>")
537
- assert_equal "doBadThings();", document.to_sanitized_html
582
+ assert_equal "doBadThings();", document.to_sanitized_html.strip
538
583
  end
539
584
 
540
585
  test "can sanitize a document without image" do
541
586
  document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
542
- assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images
587
+ assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
543
588
  end
544
589
 
545
590
  test "identifies a Govspeak document containing malicious HTML as invalid" do
@@ -28,6 +28,17 @@ class HtmlSanitizerTest < Test::Unit::TestCase
28
28
  assert_equal "Fortnum &amp; Mason", Govspeak::HtmlSanitizer.new(html).sanitize
29
29
  end
30
30
 
31
+ test "allows images on whitelisted domains" do
32
+ html = "<img src='http://allowed.com/image.jgp'>"
33
+ sanitized_html = Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
34
+ assert_equal "<img src=\"http://allowed.com/image.jgp\">", sanitized_html
35
+ end
36
+
37
+ test "removes images not on whitelisted domains" do
38
+ html = "<img src='http://evil.com/image.jgp'>"
39
+ assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
40
+ end
41
+
31
42
  test "can strip images" do
32
43
  html = "<img src='http://example.com/image.jgp'>"
33
44
  assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
@@ -85,4 +85,9 @@ class HtmlValidatorTest < Test::Unit::TestCase
85
85
  test "allow things that will end up as HTML entities" do
86
86
  assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
87
87
  end
88
+
89
+ test "optionally disallow images not on a whitelisted domain" do
90
+ html = "<img src='http://evil.com/image.jgp'>"
91
+ assert Govspeak::HtmlValidator.new(html, allowed_image_hosts: ['allowed.com']).invalid?
92
+ end
88
93
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 3.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-08-13 00:00:00.000000000 Z
13
+ date: 2014-08-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: kramdown
@@ -184,7 +184,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
184
184
  version: '0'
185
185
  segments:
186
186
  - 0
187
- hash: -1322540803525731364
187
+ hash: -1089488848379077838
188
188
  required_rubygems_version: !ruby/object:Gem::Requirement
189
189
  none: false
190
190
  requirements:
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
193
  version: '0'
194
194
  segments:
195
195
  - 0
196
- hash: -1322540803525731364
196
+ hash: -1089488848379077838
197
197
  requirements: []
198
198
  rubyforge_project:
199
199
  rubygems_version: 1.8.23