govspeak 7.1.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3ea39476d15515cff8cf40fa41951ed6d551a495e6f8a40415fdb4eb7752660
4
- data.tar.gz: e084cd737b290c2254fd43796ea4c0a99b906790034a22f258d509a556a726a8
3
+ metadata.gz: 0b77227c0a115225f51dc35e8dc7e177212aae5b215400f4b0384bd99fdf04a2
4
+ data.tar.gz: dcca6a433304ceb7d1cf1281ac0d16e738e7e7fb3479551ec69c0fe2e2dfa2ad
5
5
  SHA512:
6
- metadata.gz: c732aaee36be6d012574e52e6d7dc07a906a539f19e9e5d6f4bbf793a7936761fdc5f5c1e8cade26da7ed93830044017fea897ef7890a1d226796407166e372a
7
- data.tar.gz: b7c8a84a1baa01b04483674f10708dd96467b343560dd3c1465de7a0846870ab7a99a6a63da892b48399e4d9cb30f7f18f28273d1a7ad85d433a8a9b79a49fac
6
+ metadata.gz: f8302fc09c9160bc5ea3d8cc33af663e106ba36287d10105272c03467e5aed41c5fb7f5e76da934f93c121b79687287f1c9edc92dab2aebca486c256cd4fa76f
7
+ data.tar.gz: fc0c093d411ccb309172828f8d21671d8e31bc7458a2285b1fb7a0ba22a63a598e27397b78e52af93e756eb52353e7c53ec1c2b11e16d0b42b6e26b5a981c613
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 8.0.0
2
+
3
+ * BREAKING: HTML style attribute and style element, which were never supposed to be available, are forbidden. [#279](https://github.com/alphagov/govspeak/pull/279)
4
+
5
+ ## 7.1.1
6
+
7
+ * Make image and attachment embedding syntax more consistent [#274](https://github.com/alphagov/govspeak/pull/274)
8
+
1
9
  ## 7.1.0
2
10
 
3
11
  * Drop support for Ruby 2.7 [#272](https://github.com/alphagov/govspeak/pull/272)
@@ -17,31 +17,13 @@ class Govspeak::HtmlSanitizer
17
17
  end
18
18
  end
19
19
 
20
- class TableCellTextAlignWhitelister
21
- def call(sanitize_context)
22
- return unless %w[td th].include?(sanitize_context[:node_name])
23
-
24
- node = sanitize_context[:node]
25
-
26
- # Kramdown uses text-align to allow table cells to be aligned
27
- # http://kramdown.gettalong.org/quickref.html#tables
28
- if invalid_style_attribute?(node["style"])
29
- node.remove_attribute("style")
30
- end
31
- end
32
-
33
- def invalid_style_attribute?(style)
34
- style && !style.match(/^text-align:\s*(center|left|right)$/)
35
- end
36
- end
37
-
38
20
  def initialize(dirty_html, options = {})
39
21
  @dirty_html = dirty_html
40
22
  @allowed_image_hosts = options[:allowed_image_hosts]
41
23
  end
42
24
 
43
25
  def sanitize(allowed_elements: [])
44
- transformers = [TableCellTextAlignWhitelister.new]
26
+ transformers = []
45
27
  if @allowed_image_hosts && @allowed_image_hosts.any?
46
28
  transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
47
29
  end
@@ -60,21 +42,29 @@ class Govspeak::HtmlSanitizer
60
42
  end
61
43
 
62
44
  def sanitize_config(allowed_elements: [])
45
+ # We purposefully disable style elements which Sanitize::Config::RELAXED allows
46
+ elements = Sanitize::Config::RELAXED[:elements] - %w[style] +
47
+ %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements)
48
+
63
49
  Sanitize::Config.merge(
64
50
  Sanitize::Config::RELAXED,
65
- elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements),
51
+ elements: elements,
66
52
  attributes: {
67
- :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label],
53
+ # We purposefully disable style attributes which Sanitize::Config::RELAXED allows
54
+ :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label] - %w[style],
68
55
  "a" => Sanitize::Config::RELAXED[:attributes]["a"] + [:data] + %w[draggable],
69
- "svg" => Sanitize::Config::RELAXED[:attributes][:all] + %w[xmlns width height viewbox focusable],
70
- "path" => Sanitize::Config::RELAXED[:attributes][:all] + %w[fill d],
56
+ "svg" => %w[xmlns width height viewbox focusable],
57
+ "path" => %w[fill d],
71
58
  "div" => [:data],
72
- # @TODO These style attributes can be removed once we've checked there
73
- # isn't hardcoded HTML in documents that uses them
59
+ # The style attributes are permitted here just for the ones Kramdown for table alignment
60
+ # we replace them in a post processor.
74
61
  "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
75
62
  "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
76
63
  "govspeak-embed-attachment" => %w[content-id],
77
64
  },
65
+ # The only styling we permit is text-align on table cells (which is the CSS kramdown
66
+ # generates), we can therefore only allow this one CSS property
67
+ css: { properties: %w[text-align] },
78
68
  )
79
69
  end
80
70
  end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "7.1.0".freeze
2
+ VERSION = "8.0.0".freeze
3
3
  end
data/lib/govspeak.rb CHANGED
@@ -424,14 +424,14 @@ module Govspeak
424
424
  renderer.render(contact: ContactPresenter.new(contact))
425
425
  end
426
426
 
427
- extension("Image", /#{NEW_PARAGRAPH_LOOKBEHIND}\[Image:\s*(.*?)\s*\]/) do |image_id|
427
+ extension("Image", /^\[Image:\s*(.*?)\s*\]/) do |image_id|
428
428
  image = images.detect { |c| c.is_a?(Hash) && c[:id] == image_id }
429
429
  next "" unless image
430
430
 
431
431
  render_image(ImagePresenter.new(image))
432
432
  end
433
433
 
434
- extension("Attachment", /#{NEW_PARAGRAPH_LOOKBEHIND}\[Attachment:\s*(.*?)\s*\]/) do |attachment_id|
434
+ extension("Attachment", /^\[Attachment:\s*(.*?)\s*\]/) do |attachment_id|
435
435
  next "" if attachments.none? { |a| a[:id] == attachment_id }
436
436
 
437
437
  %(<govspeak-embed-attachment id="#{attachment_id}"></govspeak-embed-attachment>)
@@ -35,4 +35,15 @@ class GovspeakAttachmentLinkTest < Minitest::Test
35
35
  assert(root.css("p").size, 0)
36
36
  assert_match(/Attachment Title\s*test/, root.text)
37
37
  end
38
+
39
+ test "allows spaces and special characters in the identifier" do
40
+ attachment = {
41
+ id: "This is the name of my &%$@€? attachment",
42
+ url: "http://example.com/attachment.pdf",
43
+ title: "Attachment Title",
44
+ }
45
+
46
+ rendered = render_govspeak("[AttachmentLink: This is the name of my &%$@€? attachment]", [attachment])
47
+ assert_match(/Attachment Title/, rendered)
48
+ end
38
49
  end
@@ -21,7 +21,19 @@ class GovspeakAttachmentTest < Minitest::Test
21
21
  assert_match(/Attachment Title/, rendered)
22
22
  end
23
23
 
24
- test "only renders attachment when markdown extension starts on a line" do
24
+ test "allows spaces and special characters in the identifier" do
25
+ attachment = {
26
+ id: "This is the name of my &%$@€? attachment",
27
+ url: "http://example.com/attachment.pdf",
28
+ title: "Attachment Title",
29
+ }
30
+
31
+ rendered = render_govspeak("[Attachment: This is the name of my &%$@€? attachment]", [attachment])
32
+ assert_match(/<section class="gem-c-attachment/, rendered)
33
+ assert_match(/Attachment Title/, rendered)
34
+ end
35
+
36
+ test "only renders attachment when markdown extension starts on a new line" do
25
37
  attachment = {
26
38
  id: "attachment.pdf",
27
39
  url: "http://example.com/attachment.pdf",
@@ -34,5 +46,10 @@ class GovspeakAttachmentTest < Minitest::Test
34
46
  rendered = render_govspeak("[Attachment:attachment.pdf] some text", [attachment])
35
47
  assert_match(/<section class="gem-c-attachment/, rendered)
36
48
  assert_match(/<p>some text<\/p>/, rendered)
49
+
50
+ rendered = render_govspeak("some text\n[Attachment:attachment.pdf]\nsome more text", [attachment])
51
+ assert_match(/<p>some text<\/p>/, rendered)
52
+ assert_match(/<section class="gem-c-attachment/, rendered)
53
+ assert_match(/<p>some more text<\/p>/, rendered)
37
54
  end
38
55
  end
@@ -80,4 +80,30 @@ class GovspeakImagesBangTest < Minitest::Test
80
80
  )
81
81
  end
82
82
  end
83
+
84
+ test "!!n syntax must start on a new line" do
85
+ given_govspeak "some text !!1", images: [Image.new] do
86
+ assert_html_output("<p>some text !!1</p>")
87
+ end
88
+
89
+ given_govspeak "!!1", images: [Image.new] do
90
+ assert_html_output(
91
+ "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>",
92
+ )
93
+ end
94
+
95
+ given_govspeak "!!1 some text", images: [Image.new] do
96
+ assert_html_output(
97
+ "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>\n<p>some text</p>",
98
+ )
99
+ end
100
+
101
+ given_govspeak "some text\n!!1\nsome more text", images: [Image.new] do
102
+ assert_html_output <<~HTML
103
+ <p>some text</p>
104
+ <figure class="image embedded"><div class="img"><img src="http://example.com/image.jpg" alt="my alt"></div></figure>
105
+ <p>some more text</p>
106
+ HTML
107
+ end
108
+ end
83
109
  end
@@ -72,6 +72,15 @@ class GovspeakImagesTest < Minitest::Test
72
72
  end
73
73
  end
74
74
 
75
+ test "allows spaces and special characters in the identifier" do
76
+ image = build_image(id: "This is the name of my &%$@€? image")
77
+ given_govspeak "[Image: This is the name of my &%$@€? image]", images: [image] do
78
+ assert_html_output(
79
+ "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>",
80
+ )
81
+ end
82
+ end
83
+
75
84
  test "Image is not inserted when it does not start on a new line" do
76
85
  given_govspeak "some text [Image:image-id]", images: [build_image] do
77
86
  assert_html_output("<p>some text [Image:image-id]</p>")
@@ -88,5 +97,13 @@ class GovspeakImagesTest < Minitest::Test
88
97
  "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>\n<p>some text</p>",
89
98
  )
90
99
  end
100
+
101
+ given_govspeak "some text\n[Image:image-id]\nsome more text", images: [build_image] do
102
+ assert_html_output <<~HTML
103
+ <p>some text</p>
104
+ <figure class="image embedded"><div class="img"><img src="http://example.com/image.jpg" alt="my alt"></div></figure>
105
+ <p>some more text</p>
106
+ HTML
107
+ end
91
108
  end
92
109
  end
@@ -1,7 +1,7 @@
1
1
  require "test_helper"
2
2
 
3
- class GovspeakTableWithHeadersTest < Minitest::Test
4
- def expected_outcome
3
+ class GovspeakTablesTest < Minitest::Test
4
+ def expected_outcome_for_headers
5
5
  %(
6
6
  <table>
7
7
  <thead>
@@ -248,30 +248,44 @@ class GovspeakTableWithHeadersTest < Minitest::Test
248
248
  end
249
249
 
250
250
  test "Cells with |# are headers" do
251
- assert_equal document_body_with_hashes_for_all_headers.to_html, expected_outcome
251
+ assert_equal expected_outcome_for_headers, document_body_with_hashes_for_all_headers.to_html
252
252
  end
253
253
 
254
254
  test "Cells outside of thead with |# are th; thead still only contains th" do
255
- assert_equal document_body_with_hashes_for_row_headers.to_html, expected_outcome
255
+ assert_equal expected_outcome_for_headers, document_body_with_hashes_for_row_headers.to_html
256
256
  end
257
257
 
258
258
  test "Cells are given classes to indicate alignment" do
259
- assert_equal document_body_with_alignments.to_html, expected_outcome_for_table_with_alignments
259
+ assert_equal expected_outcome_for_table_with_alignments, document_body_with_alignments.to_html
260
+ end
261
+
262
+ test "Invalid alignment properties are dropped from cells" do
263
+ html = %(<table><tbody><tr><td style="text-align: middle">middle</td></tr></tbody></table>)
264
+ expected = "<table><tbody><tr><td>middle</td></tr></tbody></table>\n"
265
+
266
+ assert_equal expected, Govspeak::Document.new(html).to_html
267
+ end
268
+
269
+ test "Styles other than text-align are ignored on a table cell" do
270
+ html = %(<table><tbody><tr><td style="text-align: center; width: 100px;">middle</td></tr></tbody></table>)
271
+ expected = %(<table><tbody><tr><td class="cell-text-center">middle</td></tr></tbody></table>\n)
272
+
273
+ assert_equal expected, Govspeak::Document.new(html).to_html
260
274
  end
261
275
 
262
276
  test "Table headers with a scope of row are only in the first column of the table" do
263
- assert_equal document_body_with_table_headers_in_the_wrong_place.to_html, expected_outcome_for_table_headers_in_the_wrong_place
277
+ assert_equal expected_outcome_for_table_headers_in_the_wrong_place, document_body_with_table_headers_in_the_wrong_place.to_html
264
278
  end
265
279
 
266
280
  test "Table headers with a scope of row can have embedded links" do
267
- assert_equal document_body_with_table_headers_containing_links.to_html, expected_outcome_for_table_headers_containing_links
281
+ assert_equal expected_outcome_for_table_headers_containing_links, document_body_with_table_headers_containing_links.to_html
268
282
  end
269
283
 
270
284
  test "Table headers are not blank" do
271
- assert_equal document_body_with_blank_table_headers.to_html, expected_outcome_for_table_with_blank_table_headers
285
+ assert_equal expected_outcome_for_table_with_blank_table_headers, document_body_with_blank_table_headers.to_html
272
286
  end
273
287
 
274
288
  test "Table header superscript should parse" do
275
- assert_equal document_body_with_table_headers_containing_superscript.to_html, expected_outcome_for_table_with_table_headers_containing_superscript
289
+ assert_equal expected_outcome_for_table_with_table_headers_containing_superscript, document_body_with_table_headers_containing_superscript.to_html, expected_outcome_for_table_with_table_headers_containing_superscript
276
290
  end
277
291
  end
@@ -17,6 +17,16 @@ class HtmlSanitizerTest < Minitest::Test
17
17
  assert_equal "<a href=\"/\">Link</a>", Govspeak::HtmlSanitizer.new(html).sanitize
18
18
  end
19
19
 
20
+ test "disallow style attributes" do
21
+ html = '<a href="/" style="font-weight:bold">Link</a>'
22
+ assert_equal '<a href="/">Link</a>', Govspeak::HtmlSanitizer.new(html).sanitize
23
+ end
24
+
25
+ test "disallow style elements" do
26
+ html = "<style>h1 { color: pink; }</style><h1>Hi</h1>"
27
+ assert_equal "<h1>Hi</h1>", Govspeak::HtmlSanitizer.new(html).sanitize
28
+ end
29
+
20
30
  test "allow non-JS HTML content" do
21
31
  html = "<a href='foo'>"
22
32
  assert_equal "<a href=\"foo\"></a>", Govspeak::HtmlSanitizer.new(html).sanitize
@@ -79,16 +89,16 @@ class HtmlSanitizerTest < Minitest::Test
79
89
  assert_equal "<table><tbody><tr><th>thing</th><td>thing</td></tr></tbody></table>", Govspeak::HtmlSanitizer.new(html).sanitize
80
90
  end
81
91
 
82
- test "allows valid text-align properties on the style attribute for table cells and table headings" do
83
- %w[left right center].each do |alignment|
84
- html = "<table><thead><tr><th style=\"text-align: #{alignment}\">thing</th></tr></thead><tbody><tr><td style=\"text-align: #{alignment}\">thing</td></tr></tbody></table>"
85
- assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize
86
- end
92
+ test "allows text-align properties on the style attribute for table cells and table headings" do
93
+ html = "<table><thead><tr><th style=\"text-align: right\">thing</th></tr></thead><tbody><tr><td style=\"text-align: center\">thing</td></tr></tbody></table>"
94
+ assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize
95
+
96
+ input = "<table><thead><tr><th style=\"text-align: left;width: 100px;\">thing</th></tr></thead><tbody><tr><td style=\"text-align: center;background-color: blue;\">thing</td></tr></tbody></table>"
97
+ expected = "<table><thead><tr><th style=\"text-align: left;\">thing</th></tr></thead><tbody><tr><td style=\"text-align: center;\">thing</td></tr></tbody></table>"
98
+ assert_equal expected, Govspeak::HtmlSanitizer.new(input).sanitize
87
99
 
88
100
  [
89
101
  "width: 10000px",
90
- "text-align: middle",
91
- "text-align: left; width: 10px",
92
102
  "background-image: url(javascript:alert('XSS'))",
93
103
  "expression(alert('XSS'));",
94
104
  ].each do |style|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.1.0
4
+ version: 8.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-28 00:00:00.000000000 Z
11
+ date: 2023-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionview
@@ -190,14 +190,14 @@ dependencies:
190
190
  requirements:
191
191
  - - '='
192
192
  - !ruby/object:Gem::Version
193
- version: 4.10.0
193
+ version: 4.11.0
194
194
  type: :development
195
195
  prerelease: false
196
196
  version_requirements: !ruby/object:Gem::Requirement
197
197
  requirements:
198
198
  - - '='
199
199
  - !ruby/object:Gem::Version
200
- version: 4.10.0
200
+ version: 4.11.0
201
201
  - !ruby/object:Gem::Dependency
202
202
  name: simplecov
203
203
  requirement: !ruby/object:Gem::Requirement
@@ -302,7 +302,7 @@ files:
302
302
  - test/govspeak_link_extractor_test.rb
303
303
  - test/govspeak_link_test.rb
304
304
  - test/govspeak_structured_headers_test.rb
305
- - test/govspeak_table_with_headers_test.rb
305
+ - test/govspeak_tables_test.rb
306
306
  - test/govspeak_test.rb
307
307
  - test/govspeak_test_helper.rb
308
308
  - test/html_sanitizer_test.rb
@@ -327,7 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
327
327
  - !ruby/object:Gem::Version
328
328
  version: '0'
329
329
  requirements: []
330
- rubygems_version: 3.4.10
330
+ rubygems_version: 3.4.15
331
331
  signing_key:
332
332
  specification_version: 4
333
333
  summary: Markup language for single domain
@@ -346,7 +346,7 @@ test_files:
346
346
  - test/govspeak_link_extractor_test.rb
347
347
  - test/govspeak_link_test.rb
348
348
  - test/govspeak_structured_headers_test.rb
349
- - test/govspeak_table_with_headers_test.rb
349
+ - test/govspeak_tables_test.rb
350
350
  - test/govspeak_test.rb
351
351
  - test/govspeak_test_helper.rb
352
352
  - test/html_sanitizer_test.rb