govspeak 7.1.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d3ea39476d15515cff8cf40fa41951ed6d551a495e6f8a40415fdb4eb7752660
4
- data.tar.gz: e084cd737b290c2254fd43796ea4c0a99b906790034a22f258d509a556a726a8
3
+ metadata.gz: 0b77227c0a115225f51dc35e8dc7e177212aae5b215400f4b0384bd99fdf04a2
4
+ data.tar.gz: dcca6a433304ceb7d1cf1281ac0d16e738e7e7fb3479551ec69c0fe2e2dfa2ad
5
5
  SHA512:
6
- metadata.gz: c732aaee36be6d012574e52e6d7dc07a906a539f19e9e5d6f4bbf793a7936761fdc5f5c1e8cade26da7ed93830044017fea897ef7890a1d226796407166e372a
7
- data.tar.gz: b7c8a84a1baa01b04483674f10708dd96467b343560dd3c1465de7a0846870ab7a99a6a63da892b48399e4d9cb30f7f18f28273d1a7ad85d433a8a9b79a49fac
6
+ metadata.gz: f8302fc09c9160bc5ea3d8cc33af663e106ba36287d10105272c03467e5aed41c5fb7f5e76da934f93c121b79687287f1c9edc92dab2aebca486c256cd4fa76f
7
+ data.tar.gz: fc0c093d411ccb309172828f8d21671d8e31bc7458a2285b1fb7a0ba22a63a598e27397b78e52af93e756eb52353e7c53ec1c2b11e16d0b42b6e26b5a981c613
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 8.0.0
2
+
3
+ * BREAKING: HTML style attribute and style element, which were never supposed to be available, are forbidden. [#279](https://github.com/alphagov/govspeak/pull/279)
4
+
5
+ ## 7.1.1
6
+
7
+ * Make image and attachment embedding syntax more consistent [#274](https://github.com/alphagov/govspeak/pull/274)
8
+
1
9
  ## 7.1.0
2
10
 
3
11
  * Drop support for Ruby 2.7 [#272](https://github.com/alphagov/govspeak/pull/272)
@@ -17,31 +17,13 @@ class Govspeak::HtmlSanitizer
17
17
  end
18
18
  end
19
19
 
20
- class TableCellTextAlignWhitelister
21
- def call(sanitize_context)
22
- return unless %w[td th].include?(sanitize_context[:node_name])
23
-
24
- node = sanitize_context[:node]
25
-
26
- # Kramdown uses text-align to allow table cells to be aligned
27
- # http://kramdown.gettalong.org/quickref.html#tables
28
- if invalid_style_attribute?(node["style"])
29
- node.remove_attribute("style")
30
- end
31
- end
32
-
33
- def invalid_style_attribute?(style)
34
- style && !style.match(/^text-align:\s*(center|left|right)$/)
35
- end
36
- end
37
-
38
20
  def initialize(dirty_html, options = {})
39
21
  @dirty_html = dirty_html
40
22
  @allowed_image_hosts = options[:allowed_image_hosts]
41
23
  end
42
24
 
43
25
  def sanitize(allowed_elements: [])
44
- transformers = [TableCellTextAlignWhitelister.new]
26
+ transformers = []
45
27
  if @allowed_image_hosts && @allowed_image_hosts.any?
46
28
  transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
47
29
  end
@@ -60,21 +42,29 @@ class Govspeak::HtmlSanitizer
60
42
  end
61
43
 
62
44
  def sanitize_config(allowed_elements: [])
45
+ # We purposefully disable style elements which Sanitize::Config::RELAXED allows
46
+ elements = Sanitize::Config::RELAXED[:elements] - %w[style] +
47
+ %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements)
48
+
63
49
  Sanitize::Config.merge(
64
50
  Sanitize::Config::RELAXED,
65
- elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment govspeak-embed-attachment-link svg path].concat(allowed_elements),
51
+ elements: elements,
66
52
  attributes: {
67
- :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label],
53
+ # We purposefully disable style attributes which Sanitize::Config::RELAXED allows
54
+ :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label] - %w[style],
68
55
  "a" => Sanitize::Config::RELAXED[:attributes]["a"] + [:data] + %w[draggable],
69
- "svg" => Sanitize::Config::RELAXED[:attributes][:all] + %w[xmlns width height viewbox focusable],
70
- "path" => Sanitize::Config::RELAXED[:attributes][:all] + %w[fill d],
56
+ "svg" => %w[xmlns width height viewbox focusable],
57
+ "path" => %w[fill d],
71
58
  "div" => [:data],
72
- # @TODO These style attributes can be removed once we've checked there
73
- # isn't hardcoded HTML in documents that uses them
59
+ # The style attributes are permitted here just for the ones Kramdown for table alignment
60
+ # we replace them in a post processor.
74
61
  "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
75
62
  "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
76
63
  "govspeak-embed-attachment" => %w[content-id],
77
64
  },
65
+ # The only styling we permit is text-align on table cells (which is the CSS kramdown
66
+ # generates), we can therefore only allow this one CSS property
67
+ css: { properties: %w[text-align] },
78
68
  )
79
69
  end
80
70
  end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "7.1.0".freeze
2
+ VERSION = "8.0.0".freeze
3
3
  end
data/lib/govspeak.rb CHANGED
@@ -424,14 +424,14 @@ module Govspeak
424
424
  renderer.render(contact: ContactPresenter.new(contact))
425
425
  end
426
426
 
427
- extension("Image", /#{NEW_PARAGRAPH_LOOKBEHIND}\[Image:\s*(.*?)\s*\]/) do |image_id|
427
+ extension("Image", /^\[Image:\s*(.*?)\s*\]/) do |image_id|
428
428
  image = images.detect { |c| c.is_a?(Hash) && c[:id] == image_id }
429
429
  next "" unless image
430
430
 
431
431
  render_image(ImagePresenter.new(image))
432
432
  end
433
433
 
434
- extension("Attachment", /#{NEW_PARAGRAPH_LOOKBEHIND}\[Attachment:\s*(.*?)\s*\]/) do |attachment_id|
434
+ extension("Attachment", /^\[Attachment:\s*(.*?)\s*\]/) do |attachment_id|
435
435
  next "" if attachments.none? { |a| a[:id] == attachment_id }
436
436
 
437
437
  %(<govspeak-embed-attachment id="#{attachment_id}"></govspeak-embed-attachment>)
@@ -35,4 +35,15 @@ class GovspeakAttachmentLinkTest < Minitest::Test
35
35
  assert(root.css("p").size, 0)
36
36
  assert_match(/Attachment Title\s*test/, root.text)
37
37
  end
38
+
39
+ test "allows spaces and special characters in the identifier" do
40
+ attachment = {
41
+ id: "This is the name of my &%$@€? attachment",
42
+ url: "http://example.com/attachment.pdf",
43
+ title: "Attachment Title",
44
+ }
45
+
46
+ rendered = render_govspeak("[AttachmentLink: This is the name of my &%$@€? attachment]", [attachment])
47
+ assert_match(/Attachment Title/, rendered)
48
+ end
38
49
  end
@@ -21,7 +21,19 @@ class GovspeakAttachmentTest < Minitest::Test
21
21
  assert_match(/Attachment Title/, rendered)
22
22
  end
23
23
 
24
- test "only renders attachment when markdown extension starts on a line" do
24
+ test "allows spaces and special characters in the identifier" do
25
+ attachment = {
26
+ id: "This is the name of my &%$@€? attachment",
27
+ url: "http://example.com/attachment.pdf",
28
+ title: "Attachment Title",
29
+ }
30
+
31
+ rendered = render_govspeak("[Attachment: This is the name of my &%$@€? attachment]", [attachment])
32
+ assert_match(/<section class="gem-c-attachment/, rendered)
33
+ assert_match(/Attachment Title/, rendered)
34
+ end
35
+
36
+ test "only renders attachment when markdown extension starts on a new line" do
25
37
  attachment = {
26
38
  id: "attachment.pdf",
27
39
  url: "http://example.com/attachment.pdf",
@@ -34,5 +46,10 @@ class GovspeakAttachmentTest < Minitest::Test
34
46
  rendered = render_govspeak("[Attachment:attachment.pdf] some text", [attachment])
35
47
  assert_match(/<section class="gem-c-attachment/, rendered)
36
48
  assert_match(/<p>some text<\/p>/, rendered)
49
+
50
+ rendered = render_govspeak("some text\n[Attachment:attachment.pdf]\nsome more text", [attachment])
51
+ assert_match(/<p>some text<\/p>/, rendered)
52
+ assert_match(/<section class="gem-c-attachment/, rendered)
53
+ assert_match(/<p>some more text<\/p>/, rendered)
37
54
  end
38
55
  end
@@ -80,4 +80,30 @@ class GovspeakImagesBangTest < Minitest::Test
80
80
  )
81
81
  end
82
82
  end
83
+
84
+ test "!!n syntax must start on a new line" do
85
+ given_govspeak "some text !!1", images: [Image.new] do
86
+ assert_html_output("<p>some text !!1</p>")
87
+ end
88
+
89
+ given_govspeak "!!1", images: [Image.new] do
90
+ assert_html_output(
91
+ "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>",
92
+ )
93
+ end
94
+
95
+ given_govspeak "!!1 some text", images: [Image.new] do
96
+ assert_html_output(
97
+ "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>\n<p>some text</p>",
98
+ )
99
+ end
100
+
101
+ given_govspeak "some text\n!!1\nsome more text", images: [Image.new] do
102
+ assert_html_output <<~HTML
103
+ <p>some text</p>
104
+ <figure class="image embedded"><div class="img"><img src="http://example.com/image.jpg" alt="my alt"></div></figure>
105
+ <p>some more text</p>
106
+ HTML
107
+ end
108
+ end
83
109
  end
@@ -72,6 +72,15 @@ class GovspeakImagesTest < Minitest::Test
72
72
  end
73
73
  end
74
74
 
75
+ test "allows spaces and special characters in the identifier" do
76
+ image = build_image(id: "This is the name of my &%$@€? image")
77
+ given_govspeak "[Image: This is the name of my &%$@€? image]", images: [image] do
78
+ assert_html_output(
79
+ "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>",
80
+ )
81
+ end
82
+ end
83
+
75
84
  test "Image is not inserted when it does not start on a new line" do
76
85
  given_govspeak "some text [Image:image-id]", images: [build_image] do
77
86
  assert_html_output("<p>some text [Image:image-id]</p>")
@@ -88,5 +97,13 @@ class GovspeakImagesTest < Minitest::Test
88
97
  "<figure class=\"image embedded\"><div class=\"img\"><img src=\"http://example.com/image.jpg\" alt=\"my alt\"></div></figure>\n<p>some text</p>",
89
98
  )
90
99
  end
100
+
101
+ given_govspeak "some text\n[Image:image-id]\nsome more text", images: [build_image] do
102
+ assert_html_output <<~HTML
103
+ <p>some text</p>
104
+ <figure class="image embedded"><div class="img"><img src="http://example.com/image.jpg" alt="my alt"></div></figure>
105
+ <p>some more text</p>
106
+ HTML
107
+ end
91
108
  end
92
109
  end
@@ -1,7 +1,7 @@
1
1
  require "test_helper"
2
2
 
3
- class GovspeakTableWithHeadersTest < Minitest::Test
4
- def expected_outcome
3
+ class GovspeakTablesTest < Minitest::Test
4
+ def expected_outcome_for_headers
5
5
  %(
6
6
  <table>
7
7
  <thead>
@@ -248,30 +248,44 @@ class GovspeakTableWithHeadersTest < Minitest::Test
248
248
  end
249
249
 
250
250
  test "Cells with |# are headers" do
251
- assert_equal document_body_with_hashes_for_all_headers.to_html, expected_outcome
251
+ assert_equal expected_outcome_for_headers, document_body_with_hashes_for_all_headers.to_html
252
252
  end
253
253
 
254
254
  test "Cells outside of thead with |# are th; thead still only contains th" do
255
- assert_equal document_body_with_hashes_for_row_headers.to_html, expected_outcome
255
+ assert_equal expected_outcome_for_headers, document_body_with_hashes_for_row_headers.to_html
256
256
  end
257
257
 
258
258
  test "Cells are given classes to indicate alignment" do
259
- assert_equal document_body_with_alignments.to_html, expected_outcome_for_table_with_alignments
259
+ assert_equal expected_outcome_for_table_with_alignments, document_body_with_alignments.to_html
260
+ end
261
+
262
+ test "Invalid alignment properties are dropped from cells" do
263
+ html = %(<table><tbody><tr><td style="text-align: middle">middle</td></tr></tbody></table>)
264
+ expected = "<table><tbody><tr><td>middle</td></tr></tbody></table>\n"
265
+
266
+ assert_equal expected, Govspeak::Document.new(html).to_html
267
+ end
268
+
269
+ test "Styles other than text-align are ignored on a table cell" do
270
+ html = %(<table><tbody><tr><td style="text-align: center; width: 100px;">middle</td></tr></tbody></table>)
271
+ expected = %(<table><tbody><tr><td class="cell-text-center">middle</td></tr></tbody></table>\n)
272
+
273
+ assert_equal expected, Govspeak::Document.new(html).to_html
260
274
  end
261
275
 
262
276
  test "Table headers with a scope of row are only in the first column of the table" do
263
- assert_equal document_body_with_table_headers_in_the_wrong_place.to_html, expected_outcome_for_table_headers_in_the_wrong_place
277
+ assert_equal expected_outcome_for_table_headers_in_the_wrong_place, document_body_with_table_headers_in_the_wrong_place.to_html
264
278
  end
265
279
 
266
280
  test "Table headers with a scope of row can have embedded links" do
267
- assert_equal document_body_with_table_headers_containing_links.to_html, expected_outcome_for_table_headers_containing_links
281
+ assert_equal expected_outcome_for_table_headers_containing_links, document_body_with_table_headers_containing_links.to_html
268
282
  end
269
283
 
270
284
  test "Table headers are not blank" do
271
- assert_equal document_body_with_blank_table_headers.to_html, expected_outcome_for_table_with_blank_table_headers
285
+ assert_equal expected_outcome_for_table_with_blank_table_headers, document_body_with_blank_table_headers.to_html
272
286
  end
273
287
 
274
288
  test "Table header superscript should parse" do
275
- assert_equal document_body_with_table_headers_containing_superscript.to_html, expected_outcome_for_table_with_table_headers_containing_superscript
289
+ assert_equal expected_outcome_for_table_with_table_headers_containing_superscript, document_body_with_table_headers_containing_superscript.to_html, expected_outcome_for_table_with_table_headers_containing_superscript
276
290
  end
277
291
  end
@@ -17,6 +17,16 @@ class HtmlSanitizerTest < Minitest::Test
17
17
  assert_equal "<a href=\"/\">Link</a>", Govspeak::HtmlSanitizer.new(html).sanitize
18
18
  end
19
19
 
20
+ test "disallow style attributes" do
21
+ html = '<a href="/" style="font-weight:bold">Link</a>'
22
+ assert_equal '<a href="/">Link</a>', Govspeak::HtmlSanitizer.new(html).sanitize
23
+ end
24
+
25
+ test "disallow style elements" do
26
+ html = "<style>h1 { color: pink; }</style><h1>Hi</h1>"
27
+ assert_equal "<h1>Hi</h1>", Govspeak::HtmlSanitizer.new(html).sanitize
28
+ end
29
+
20
30
  test "allow non-JS HTML content" do
21
31
  html = "<a href='foo'>"
22
32
  assert_equal "<a href=\"foo\"></a>", Govspeak::HtmlSanitizer.new(html).sanitize
@@ -79,16 +89,16 @@ class HtmlSanitizerTest < Minitest::Test
79
89
  assert_equal "<table><tbody><tr><th>thing</th><td>thing</td></tr></tbody></table>", Govspeak::HtmlSanitizer.new(html).sanitize
80
90
  end
81
91
 
82
- test "allows valid text-align properties on the style attribute for table cells and table headings" do
83
- %w[left right center].each do |alignment|
84
- html = "<table><thead><tr><th style=\"text-align: #{alignment}\">thing</th></tr></thead><tbody><tr><td style=\"text-align: #{alignment}\">thing</td></tr></tbody></table>"
85
- assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize
86
- end
92
+ test "allows text-align properties on the style attribute for table cells and table headings" do
93
+ html = "<table><thead><tr><th style=\"text-align: right\">thing</th></tr></thead><tbody><tr><td style=\"text-align: center\">thing</td></tr></tbody></table>"
94
+ assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize
95
+
96
+ input = "<table><thead><tr><th style=\"text-align: left;width: 100px;\">thing</th></tr></thead><tbody><tr><td style=\"text-align: center;background-color: blue;\">thing</td></tr></tbody></table>"
97
+ expected = "<table><thead><tr><th style=\"text-align: left;\">thing</th></tr></thead><tbody><tr><td style=\"text-align: center;\">thing</td></tr></tbody></table>"
98
+ assert_equal expected, Govspeak::HtmlSanitizer.new(input).sanitize
87
99
 
88
100
  [
89
101
  "width: 10000px",
90
- "text-align: middle",
91
- "text-align: left; width: 10px",
92
102
  "background-image: url(javascript:alert('XSS'))",
93
103
  "expression(alert('XSS'));",
94
104
  ].each do |style|
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.1.0
4
+ version: 8.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-03-28 00:00:00.000000000 Z
11
+ date: 2023-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionview
@@ -190,14 +190,14 @@ dependencies:
190
190
  requirements:
191
191
  - - '='
192
192
  - !ruby/object:Gem::Version
193
- version: 4.10.0
193
+ version: 4.11.0
194
194
  type: :development
195
195
  prerelease: false
196
196
  version_requirements: !ruby/object:Gem::Requirement
197
197
  requirements:
198
198
  - - '='
199
199
  - !ruby/object:Gem::Version
200
- version: 4.10.0
200
+ version: 4.11.0
201
201
  - !ruby/object:Gem::Dependency
202
202
  name: simplecov
203
203
  requirement: !ruby/object:Gem::Requirement
@@ -302,7 +302,7 @@ files:
302
302
  - test/govspeak_link_extractor_test.rb
303
303
  - test/govspeak_link_test.rb
304
304
  - test/govspeak_structured_headers_test.rb
305
- - test/govspeak_table_with_headers_test.rb
305
+ - test/govspeak_tables_test.rb
306
306
  - test/govspeak_test.rb
307
307
  - test/govspeak_test_helper.rb
308
308
  - test/html_sanitizer_test.rb
@@ -327,7 +327,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
327
327
  - !ruby/object:Gem::Version
328
328
  version: '0'
329
329
  requirements: []
330
- rubygems_version: 3.4.10
330
+ rubygems_version: 3.4.15
331
331
  signing_key:
332
332
  specification_version: 4
333
333
  summary: Markup language for single domain
@@ -346,7 +346,7 @@ test_files:
346
346
  - test/govspeak_link_extractor_test.rb
347
347
  - test/govspeak_link_test.rb
348
348
  - test/govspeak_structured_headers_test.rb
349
- - test/govspeak_table_with_headers_test.rb
349
+ - test/govspeak_tables_test.rb
350
350
  - test/govspeak_test.rb
351
351
  - test/govspeak_test_helper.rb
352
352
  - test/html_sanitizer_test.rb