govspeak 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -0
- data/README.md +1 -0
- data/lib/govspeak.rb +1 -1
- data/lib/govspeak/html_sanitizer.rb +23 -2
- data/lib/govspeak/html_validator.rb +3 -2
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_test.rb +48 -3
- data/test/html_sanitizer_test.rb +11 -0
- data/test/html_validator_test.rb +5 -0
- metadata +4 -4
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 3.0.0
|
2
|
+
|
3
|
+
* Add an `allowed_image_hosts` options to `HtmlValidator` (and `HtmlSanitizer`)
|
4
|
+
* BREAKING CHANGE: Added the `$EndLegislativeList` tag which allows line breaks in `LegislativeLists`.
|
5
|
+
|
1
6
|
## 2.0.2
|
2
7
|
* Fix a bug with the HtmlValidator to do with kramdown now respecting character
|
3
8
|
encodings of input data.
|
data/README.md
CHANGED
data/lib/govspeak.rb
CHANGED
@@ -160,7 +160,7 @@ module Govspeak
|
|
160
160
|
%{<div class="address"><div class="adr org fn"><p>\n#{body.sub("\n", "").gsub("\n", "<br />")}\n</p></div></div>\n}
|
161
161
|
}
|
162
162
|
|
163
|
-
extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)
|
163
|
+
extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
|
164
164
|
Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
|
165
165
|
Kramdown::Document.new(body.strip).to_html.tap do |doc|
|
166
166
|
doc.gsub!('<ul>', '<ol>')
|
@@ -4,12 +4,33 @@ require 'with_deep_merge'
|
|
4
4
|
class Govspeak::HtmlSanitizer
|
5
5
|
include WithDeepMerge
|
6
6
|
|
7
|
-
|
7
|
+
class ImageSourceWhitelister
|
8
|
+
def initialize(allowed_image_hosts)
|
9
|
+
@allowed_image_hosts = allowed_image_hosts
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(sanitize_context)
|
13
|
+
return unless sanitize_context[:node_name] == "img"
|
14
|
+
|
15
|
+
node = sanitize_context[:node]
|
16
|
+
image_uri = URI.parse(node['src'])
|
17
|
+
unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
|
18
|
+
node.unlink # the node isn't sanitary. Remove it from the document.
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(dirty_html, options = {})
|
8
24
|
@dirty_html = dirty_html
|
25
|
+
@allowed_image_hosts = options[:allowed_image_hosts]
|
9
26
|
end
|
10
27
|
|
11
28
|
def sanitize
|
12
|
-
|
29
|
+
transformers = []
|
30
|
+
if @allowed_image_hosts && @allowed_image_hosts.any?
|
31
|
+
transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
|
32
|
+
end
|
33
|
+
Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
|
13
34
|
end
|
14
35
|
|
15
36
|
def sanitize_without_images
|
@@ -1,8 +1,9 @@
|
|
1
1
|
class Govspeak::HtmlValidator
|
2
2
|
attr_reader :string
|
3
3
|
|
4
|
-
def initialize(string)
|
4
|
+
def initialize(string, sanitization_options = {})
|
5
5
|
@string = string.dup.force_encoding(Encoding::UTF_8)
|
6
|
+
@sanitization_options = sanitization_options
|
6
7
|
end
|
7
8
|
|
8
9
|
def invalid?
|
@@ -11,7 +12,7 @@ class Govspeak::HtmlValidator
|
|
11
12
|
|
12
13
|
def valid?
|
13
14
|
dirty_html = govspeak_to_html
|
14
|
-
clean_html = Govspeak::HtmlSanitizer.new(dirty_html).sanitize
|
15
|
+
clean_html = Govspeak::HtmlSanitizer.new(dirty_html, @sanitization_options).sanitize
|
15
16
|
normalise_html(dirty_html) == normalise_html(clean_html)
|
16
17
|
end
|
17
18
|
|
data/lib/govspeak/version.rb
CHANGED
data/test/govspeak_test.rb
CHANGED
@@ -396,6 +396,49 @@ $CTA
|
|
396
396
|
assert_text_output "unordered list step list"
|
397
397
|
end
|
398
398
|
|
399
|
+
test_given_govspeak "
|
400
|
+
$LegislativeList
|
401
|
+
* 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
402
|
+
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
|
403
|
+
|
404
|
+
Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
|
405
|
+
fringilla, metus dui scelerisque est.
|
406
|
+
|
407
|
+
* a) A list item
|
408
|
+
|
409
|
+
* b) Another list item
|
410
|
+
|
411
|
+
* 1.1 Second entry
|
412
|
+
Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
|
413
|
+
Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
|
414
|
+
$EndLegislativeList
|
415
|
+
" do
|
416
|
+
assert_html_output %{
|
417
|
+
<ol class="legislative-list">
|
418
|
+
<li>
|
419
|
+
<p>1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
420
|
+
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.</p>
|
421
|
+
|
422
|
+
<p>Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
|
423
|
+
fringilla, metus dui scelerisque est.</p>
|
424
|
+
|
425
|
+
<ol>
|
426
|
+
<li>
|
427
|
+
<p>a) A list item</p>
|
428
|
+
</li>
|
429
|
+
<li>
|
430
|
+
<p>b) Another list item</p>
|
431
|
+
</li>
|
432
|
+
</ol>
|
433
|
+
</li>
|
434
|
+
<li>
|
435
|
+
<p>1.1 Second entry
|
436
|
+
Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
|
437
|
+
Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus</p>
|
438
|
+
</li>
|
439
|
+
</ol>}
|
440
|
+
end
|
441
|
+
|
399
442
|
test_given_govspeak "
|
400
443
|
$LegislativeList
|
401
444
|
* 1. The quick
|
@@ -403,6 +446,7 @@ $CTA
|
|
403
446
|
* a) Jumps over
|
404
447
|
* b) The lazy
|
405
448
|
* 3. Dog
|
449
|
+
$EndLegislativeList
|
406
450
|
" do
|
407
451
|
assert_html_output %{
|
408
452
|
<ol class="legislative-list">
|
@@ -434,6 +478,7 @@ $CTA
|
|
434
478
|
|
435
479
|
$LegislativeList
|
436
480
|
* 1. jumps over the lazy dog
|
481
|
+
$EndLegislativeList
|
437
482
|
" do
|
438
483
|
assert_html_output %{
|
439
484
|
<p>The quick brown fox</p>
|
@@ -444,7 +489,7 @@ $CTA
|
|
444
489
|
}
|
445
490
|
end
|
446
491
|
|
447
|
-
test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list" do
|
492
|
+
test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
|
448
493
|
assert_html_output %{
|
449
494
|
<p>This bit of text</p>
|
450
495
|
|
@@ -534,12 +579,12 @@ $CTA
|
|
534
579
|
|
535
580
|
test "can sanitize a document" do
|
536
581
|
document = Govspeak::Document.new("<script>doBadThings();</script>")
|
537
|
-
assert_equal "doBadThings();", document.to_sanitized_html
|
582
|
+
assert_equal "doBadThings();", document.to_sanitized_html.strip
|
538
583
|
end
|
539
584
|
|
540
585
|
test "can sanitize a document without image" do
|
541
586
|
document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
|
542
|
-
assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images
|
587
|
+
assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
|
543
588
|
end
|
544
589
|
|
545
590
|
test "identifies a Govspeak document containing malicious HTML as invalid" do
|
data/test/html_sanitizer_test.rb
CHANGED
@@ -28,6 +28,17 @@ class HtmlSanitizerTest < Test::Unit::TestCase
|
|
28
28
|
assert_equal "Fortnum & Mason", Govspeak::HtmlSanitizer.new(html).sanitize
|
29
29
|
end
|
30
30
|
|
31
|
+
test "allows images on whitelisted domains" do
|
32
|
+
html = "<img src='http://allowed.com/image.jgp'>"
|
33
|
+
sanitized_html = Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
|
34
|
+
assert_equal "<img src=\"http://allowed.com/image.jgp\">", sanitized_html
|
35
|
+
end
|
36
|
+
|
37
|
+
test "removes images not on whitelisted domains" do
|
38
|
+
html = "<img src='http://evil.com/image.jgp'>"
|
39
|
+
assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
|
40
|
+
end
|
41
|
+
|
31
42
|
test "can strip images" do
|
32
43
|
html = "<img src='http://example.com/image.jgp'>"
|
33
44
|
assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
|
data/test/html_validator_test.rb
CHANGED
@@ -85,4 +85,9 @@ class HtmlValidatorTest < Test::Unit::TestCase
|
|
85
85
|
test "allow things that will end up as HTML entities" do
|
86
86
|
assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
|
87
87
|
end
|
88
|
+
|
89
|
+
test "optionally disallow images not on a whitelisted domain" do
|
90
|
+
html = "<img src='http://evil.com/image.jgp'>"
|
91
|
+
assert Govspeak::HtmlValidator.new(html, allowed_image_hosts: ['allowed.com']).invalid?
|
92
|
+
end
|
88
93
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-08-
|
13
|
+
date: 2014-08-14 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: kramdown
|
@@ -184,7 +184,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
184
184
|
version: '0'
|
185
185
|
segments:
|
186
186
|
- 0
|
187
|
-
hash: -
|
187
|
+
hash: -1089488848379077838
|
188
188
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
189
189
|
none: false
|
190
190
|
requirements:
|
@@ -193,7 +193,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
193
|
version: '0'
|
194
194
|
segments:
|
195
195
|
- 0
|
196
|
-
hash: -
|
196
|
+
hash: -1089488848379077838
|
197
197
|
requirements: []
|
198
198
|
rubyforge_project:
|
199
199
|
rubygems_version: 1.8.23
|