govspeak 2.0.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +5 -0
- data/README.md +1 -0
- data/lib/govspeak.rb +1 -1
- data/lib/govspeak/html_sanitizer.rb +23 -2
- data/lib/govspeak/html_validator.rb +3 -2
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_test.rb +48 -3
- data/test/html_sanitizer_test.rb +11 -0
- data/test/html_validator_test.rb +5 -0
- metadata +4 -4
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 3.0.0
|
2
|
+
|
3
|
+
* Add an `allowed_image_hosts` options to `HtmlValidator` (and `HtmlSanitizer`)
|
4
|
+
* BREAKING CHANGE: Added the `$EndLegislativeList` tag which allows line breaks in `LegislativeLists`.
|
5
|
+
|
1
6
|
## 2.0.2
|
2
7
|
* Fix a bug with the HtmlValidator to do with kramdown now respecting character
|
3
8
|
encodings of input data.
|
data/README.md
CHANGED
data/lib/govspeak.rb
CHANGED
@@ -160,7 +160,7 @@ module Govspeak
|
|
160
160
|
%{<div class="address"><div class="adr org fn"><p>\n#{body.sub("\n", "").gsub("\n", "<br />")}\n</p></div></div>\n}
|
161
161
|
}
|
162
162
|
|
163
|
-
extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)
|
163
|
+
extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
|
164
164
|
Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
|
165
165
|
Kramdown::Document.new(body.strip).to_html.tap do |doc|
|
166
166
|
doc.gsub!('<ul>', '<ol>')
|
@@ -4,12 +4,33 @@ require 'with_deep_merge'
|
|
4
4
|
class Govspeak::HtmlSanitizer
|
5
5
|
include WithDeepMerge
|
6
6
|
|
7
|
-
|
7
|
+
class ImageSourceWhitelister
|
8
|
+
def initialize(allowed_image_hosts)
|
9
|
+
@allowed_image_hosts = allowed_image_hosts
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(sanitize_context)
|
13
|
+
return unless sanitize_context[:node_name] == "img"
|
14
|
+
|
15
|
+
node = sanitize_context[:node]
|
16
|
+
image_uri = URI.parse(node['src'])
|
17
|
+
unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
|
18
|
+
node.unlink # the node isn't sanitary. Remove it from the document.
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(dirty_html, options = {})
|
8
24
|
@dirty_html = dirty_html
|
25
|
+
@allowed_image_hosts = options[:allowed_image_hosts]
|
9
26
|
end
|
10
27
|
|
11
28
|
def sanitize
|
12
|
-
|
29
|
+
transformers = []
|
30
|
+
if @allowed_image_hosts && @allowed_image_hosts.any?
|
31
|
+
transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
|
32
|
+
end
|
33
|
+
Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
|
13
34
|
end
|
14
35
|
|
15
36
|
def sanitize_without_images
|
@@ -1,8 +1,9 @@
|
|
1
1
|
class Govspeak::HtmlValidator
|
2
2
|
attr_reader :string
|
3
3
|
|
4
|
-
def initialize(string)
|
4
|
+
def initialize(string, sanitization_options = {})
|
5
5
|
@string = string.dup.force_encoding(Encoding::UTF_8)
|
6
|
+
@sanitization_options = sanitization_options
|
6
7
|
end
|
7
8
|
|
8
9
|
def invalid?
|
@@ -11,7 +12,7 @@ class Govspeak::HtmlValidator
|
|
11
12
|
|
12
13
|
def valid?
|
13
14
|
dirty_html = govspeak_to_html
|
14
|
-
clean_html = Govspeak::HtmlSanitizer.new(dirty_html).sanitize
|
15
|
+
clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize
|
15
16
|
normalise_html(dirty_html) == normalise_html(clean_html)
|
16
17
|
end
|
17
18
|
|
data/lib/govspeak/version.rb
CHANGED
data/test/govspeak_test.rb
CHANGED
@@ -396,6 +396,49 @@ $CTA
|
|
396
396
|
assert_text_output "unordered list step list"
|
397
397
|
end
|
398
398
|
|
399
|
+
test_given_govspeak "
|
400
|
+
$LegislativeList
|
401
|
+
* 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
402
|
+
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
|
403
|
+
|
404
|
+
Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
|
405
|
+
fringilla, metus dui scelerisque est.
|
406
|
+
|
407
|
+
* a) A list item
|
408
|
+
|
409
|
+
* b) Another list item
|
410
|
+
|
411
|
+
* 1.1 Second entry
|
412
|
+
Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
|
413
|
+
Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
|
414
|
+
$EndLegislativeList
|
415
|
+
" do
|
416
|
+
assert_html_output %{
|
417
|
+
<ol class="legislative-list">
|
418
|
+
<li>
|
419
|
+
<p>1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
420
|
+
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.</p>
|
421
|
+
|
422
|
+
<p>Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
|
423
|
+
fringilla, metus dui scelerisque est.</p>
|
424
|
+
|
425
|
+
<ol>
|
426
|
+
<li>
|
427
|
+
<p>a) A list item</p>
|
428
|
+
</li>
|
429
|
+
<li>
|
430
|
+
<p>b) Another list item</p>
|
431
|
+
</li>
|
432
|
+
</ol>
|
433
|
+
</li>
|
434
|
+
<li>
|
435
|
+
<p>1.1 Second entry
|
436
|
+
Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
|
437
|
+
Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus</p>
|
438
|
+
</li>
|
439
|
+
</ol>}
|
440
|
+
end
|
441
|
+
|
399
442
|
test_given_govspeak "
|
400
443
|
$LegislativeList
|
401
444
|
* 1. The quick
|
@@ -403,6 +446,7 @@ $CTA
|
|
403
446
|
* a) Jumps over
|
404
447
|
* b) The lazy
|
405
448
|
* 3. Dog
|
449
|
+
$EndLegislativeList
|
406
450
|
" do
|
407
451
|
assert_html_output %{
|
408
452
|
<ol class="legislative-list">
|
@@ -434,6 +478,7 @@ $CTA
|
|
434
478
|
|
435
479
|
$LegislativeList
|
436
480
|
* 1. jumps over the lazy dog
|
481
|
+
$EndLegislativeList
|
437
482
|
" do
|
438
483
|
assert_html_output %{
|
439
484
|
<p>The quick brown fox</p>
|
@@ -444,7 +489,7 @@ $CTA
|
|
444
489
|
}
|
445
490
|
end
|
446
491
|
|
447
|
-
test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list" do
|
492
|
+
test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
|
448
493
|
assert_html_output %{
|
449
494
|
<p>This bit of text</p>
|
450
495
|
|
@@ -534,12 +579,12 @@ $CTA
|
|
534
579
|
|
535
580
|
test "can sanitize a document" do
|
536
581
|
document = Govspeak::Document.new("<script>doBadThings();</script>")
|
537
|
-
assert_equal "doBadThings();", document.to_sanitized_html
|
582
|
+
assert_equal "doBadThings();", document.to_sanitized_html.strip
|
538
583
|
end
|
539
584
|
|
540
585
|
test "can sanitize a document without image" do
|
541
586
|
document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
|
542
|
-
assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images
|
587
|
+
assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
|
543
588
|
end
|
544
589
|
|
545
590
|
test "identifies a Govspeak document containing malicious HTML as invalid" do
|
data/test/html_sanitizer_test.rb
CHANGED
@@ -28,6 +28,17 @@ class HtmlSanitizerTest < Test::Unit::TestCase
|
|
28
28
|
assert_equal "Fortnum & Mason", Govspeak::HtmlSanitizer.new(html).sanitize
|
29
29
|
end
|
30
30
|
|
31
|
+
test "allows images on whitelisted domains" do
|
32
|
+
html = "<img src='http://allowed.com/image.jgp'>"
|
33
|
+
sanitized_html = Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
|
34
|
+
assert_equal "<img src=\"http://allowed.com/image.jgp\">", sanitized_html
|
35
|
+
end
|
36
|
+
|
37
|
+
test "removes images not on whitelisted domains" do
|
38
|
+
html = "<img src='http://evil.com/image.jgp'>"
|
39
|
+
assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
|
40
|
+
end
|
41
|
+
|
31
42
|
test "can strip images" do
|
32
43
|
html = "<img src='http://example.com/image.jgp'>"
|
33
44
|
assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
|
data/test/html_validator_test.rb
CHANGED
@@ -85,4 +85,9 @@ class HtmlValidatorTest < Test::Unit::TestCase
|
|
85
85
|
test "allow things that will end up as HTML entities" do
|
86
86
|
assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
|
87
87
|
end
|
88
|
+
|
89
|
+
test "optionally disallow images not on a whitelisted domain" do
|
90
|
+
html = "<img src='http://evil.com/image.jgp'>"
|
91
|
+
assert Govspeak::HtmlValidator.new(html, allowed_image_hosts: ['allowed.com']).invalid?
|
92
|
+
end
|
88
93
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-08-
|
13
|
+
date: 2014-08-14 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: kramdown
|
@@ -184,7 +184,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
segments:
|
186
186
|
- 0
|
187
|
-
hash: -
|
187
|
+
hash: -1089488848379077838
|
188
188
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
189
189
|
none: false
|
190
190
|
requirements:
|
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
193
|
version: '0'
|
194
194
|
segments:
|
195
195
|
- 0
|
196
|
-
hash: -
|
196
|
+
hash: -1089488848379077838
|
197
197
|
requirements: []
|
198
198
|
rubyforge_project:
|
199
199
|
rubygems_version: 1.8.23
|