govspeak 6.2.1 → 6.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39b6f369a4fda004a0d426e27bd298465354769215c0e5147caa0191456879c1
4
- data.tar.gz: ad5e45eb16183f9498ba686ff3694c7ec645c0d2959f271b9cae39746e090934
3
+ metadata.gz: bc558071169d3b288446dddcb1657bef2dfac22add83004e13f8acfbdc6f8b90
4
+ data.tar.gz: 4147b7545a3fb2aa1fe09a1d494d02f14065cf3020a65ac46d985e7fc8059f35
5
5
  SHA512:
6
- metadata.gz: 162a60aad8e18cb213a6df073c9d3f3fae8dc41d490cea9ed871804b3e9d66d1829e2f8a58a20e07d09d80af595450110a4dbc4c7b424dc8d0b990fec0d17da7
7
- data.tar.gz: f5ba3ef15abed2fd01750ce68f6aff8482c7e7407d6f4ac689e75f23aa7f6fa507171e2eae1815424c4593686234351fc88d12d09c63dde9b560409f9c3a9fd0
6
+ metadata.gz: 412b37d58e002eb577de464235f5bc1ec59f7267835320f4d6e509ea16c99d45943922f2697443bdad125cb8dbb68c903062b78185c3ae1bc15f98bb80ae7192
7
+ data.tar.gz: '0869244c4588b27a308cee8c54e3aed025a8b7b4f921ebe4989a9e490eb90a6f21e69af31f71bb7b00db750627c4d686ff6a96b55b1c2d5b1f8c28af6d034481'
@@ -1,8 +1,13 @@
1
+ ## 6.3.0
2
+
3
+ * Unicode characters forbidden in HTML are stripped from input
4
+ * Validation is now more lenient for HTML input
5
+
1
6
  ## 6.2.1
2
7
 
3
8
  * Update warning callout label text from 'Help' to 'Warning'
4
9
 
5
- # 6.2.0
10
+ ## 6.2.0
6
11
 
7
12
  * Remove experimental status on `AttachementLink:attachment-id` and `Attachement:attachment-id`
8
13
  * Deprecate `embed:attachments:inline:content-id`
@@ -54,25 +59,32 @@
54
59
  * Update sanitize version to 4.6.x [#127](https://github.com/alphagov/govspeak/issues/127)
55
60
 
56
61
  ## 5.5.0
62
+
57
63
  * Ignore links with blank or missing `href`s when extracting links from a document with `Govspeak::Document#extracted_links` [#124](https://github.com/alphagov/govspeak/pull/124)
58
64
 
59
65
  ## 5.4.0
66
+
60
67
  * Add an optional `website_root` argument to `Govspeak::Document#extracted_links` in order to get all links as fully qualified URLs [#122](https://github.com/alphagov/govspeak/pull/122)
61
68
 
62
69
  ## 5.3.0
70
+
63
71
  * Add a link extraction class for finding links in documents [#120](https://github.com/alphagov/govspeak/pull/120)
64
72
 
65
73
  ## 5.2.2
74
+
66
75
  * Fix rendering buttons with inconsistent linebreaks seen in publishing [#118](https://github.com/alphagov/govspeak/pull/118)
67
76
 
68
77
  ## 5.2.1
78
+
69
79
  * Fix validation to make sure buttons are considered valid
70
80
  * Only allow buttons to be used on new lines, not when indented or inline within text (useful for guides) [#116](https://github.com/alphagov/govspeak/pull/116)
71
81
 
72
82
  ## 5.2.0
83
+
73
84
  * Add button component for govspeak [#114](https://github.com/alphagov/govspeak/pull/114) see README for usage
74
85
 
75
86
  ## 5.1.0
87
+
76
88
  * Update Kramdown version to 1.15.0
77
89
 
78
90
  ## 5.0.3
@@ -80,14 +92,17 @@
80
92
  * Fix matching links/attachments/contacts by regex to use equality [#105](https://github.com/alphagov/govspeak/pull/105)
81
93
 
82
94
  ## 5.0.2
95
+
83
96
  * Loosen ActionView dependency to allow use with Rails
84
97
  5 [#99](https://github.com/alphagov/govspeak/pull/99)
85
98
 
86
99
  ## 5.0.1
100
+
87
101
  * Move presenters into the Govspeak namespace [#93](https://github.com/alphagov/govspeak/pull/93)
88
102
  * Embedded links now will automatically be marked with `rel="external"` [#96](https://github.com/alphagov/govspeak/pull/96)
89
103
 
90
104
  ## 5.0.0
105
+
91
106
  * Update Kramdown version to 1.12.0
92
107
  * Add pry-byebug to development dependencies
93
108
  * Ability to run Govspeak as a binary from command line [#87](https://github.com/alphagov/govspeak/pull/87)
@@ -1,11 +1,14 @@
1
1
  require 'active_support/core_ext/hash'
2
2
  require 'active_support/core_ext/array'
3
3
  require 'erb'
4
+ require 'govuk_publishing_components'
4
5
  require 'htmlentities'
5
6
  require 'kramdown'
6
7
  require 'kramdown/parser/govuk'
8
+ require 'nokogiri'
9
+ require 'nokogumbo'
7
10
  require 'rinku'
8
- require 'govuk_publishing_components'
11
+ require 'sanitize'
9
12
  require 'govspeak/header_extractor'
10
13
  require 'govspeak/structured_header_extractor'
11
14
  require 'govspeak/html_validator'
@@ -103,6 +106,7 @@ module Govspeak
103
106
 
104
107
  def preprocess(source)
105
108
  source = Govspeak::BlockquoteExtraQuoteRemover.remove(source)
109
+ source = remove_forbidden_characters(source)
106
110
  self.class.extensions.each do |_, regexp, block|
107
111
  source.gsub!(regexp) {
108
112
  instance_exec(*Regexp.last_match.captures, &block)
@@ -111,6 +115,12 @@ module Govspeak
111
115
  source
112
116
  end
113
117
 
118
+ def remove_forbidden_characters(source)
119
+ # These are characters that are not deemed not suitable for
120
+ # markup: https://www.w3.org/TR/unicode-xml/#Charlist
121
+ source.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, '')
122
+ end
123
+
114
124
  def self.extension(title, regexp = nil, &block)
115
125
  regexp ||= %r${::#{title}}(.*?){:/#{title}}$m
116
126
  @extensions << [title, regexp, block]
@@ -1,5 +1,4 @@
1
1
  require 'addressable/uri'
2
- require 'sanitize'
3
2
 
4
3
  class Govspeak::HtmlSanitizer
5
4
  class ImageSourceWhitelister
@@ -50,10 +49,10 @@ class Govspeak::HtmlSanitizer
50
49
  end
51
50
 
52
51
  def button_sanitize_config
53
- [
54
- "data-module",
55
- "data-tracking-code",
56
- "data-tracking-name"
52
+ %w[
53
+ data-module
54
+ data-tracking-code
55
+ data-tracking-name
57
56
  ]
58
57
  end
59
58
 
@@ -62,7 +61,7 @@ class Govspeak::HtmlSanitizer
62
61
  Sanitize::Config::RELAXED,
63
62
  elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment govspeak-embed-attachment-link],
64
63
  attributes: {
65
- :all => Sanitize::Config::RELAXED[:attributes][:all] + ["role", "aria-label"],
64
+ :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label],
66
65
  "a" => Sanitize::Config::RELAXED[:attributes]["a"] + button_sanitize_config,
67
66
  "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
68
67
  "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
@@ -18,7 +18,7 @@ class Govspeak::HtmlValidator
18
18
 
19
19
  # Make whitespace in html tags consistent
20
20
  def normalise_html(html)
21
- Nokogiri::HTML.parse(html).to_s
21
+ Nokogiri::HTML5.fragment(html).to_s
22
22
  end
23
23
 
24
24
  def govspeak_to_html
@@ -1,5 +1,3 @@
1
- require 'nokogiri'
2
-
3
1
  module Govspeak
4
2
  class PostProcessor
5
3
  @extensions = []
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "6.2.1".freeze
2
+ VERSION = "6.3.0".freeze
3
3
  end
@@ -18,6 +18,13 @@ class GovspeakTest < Minitest::Test
18
18
  assert_equal "<p><em>this is markdown</em></p>\n", rendered
19
19
  end
20
20
 
21
+ test "strips forbidden unicode characters" do
22
+ rendered = Govspeak::Document.new(
23
+ "this is text with forbidden characters \ufffc\u2028\ufeff\u202c\u202a"
24
+ ).to_html
25
+ assert_equal "<p>this is text with forbidden characters</p>\n", rendered
26
+ end
27
+
21
28
  test "highlight-answer block extension" do
22
29
  rendered = Govspeak::Document.new("this \n{::highlight-answer}Lead in to *BIG TEXT*\n{:/highlight-answer}").to_html
23
30
  assert_equal %{<p>this</p>\n\n<div class="highlight-answer">\n<p>Lead in to <em>BIG TEXT</em></p>\n</div>\n}, rendered
@@ -101,4 +101,11 @@ class HtmlValidatorTest < Minitest::Test
101
101
  assert Govspeak::HtmlValidator.new("{button start}[Start now](https://gov.uk){/button}").valid?
102
102
  assert Govspeak::HtmlValidator.new("{button start cross-domain-tracking:UA-XXXXXX-Y}[Start now](https://gov.uk){/button}").valid?
103
103
  end
104
+
105
+ test "allow HTML tables with and without tbody elements" do
106
+ # An upgrade of govspeak broke HTML table entries as tbody elements were inserted.
107
+ # An example of one of these is: https://www.gov.uk/government/publications/what-works-network-membership-requirements/what-works-network
108
+ assert Govspeak::HtmlValidator.new("<table><tr><td>Hello</td></tr></table>").valid?, "No <tbody> is valid"
109
+ assert Govspeak::HtmlValidator.new("<table><tbody><tr><td>Hello</td></tr></tbody></table>").valid?, "<tbody> is valid"
110
+ end
104
111
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.2.1
4
+ version: 6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-04 00:00:00.000000000 Z
11
+ date: 2019-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionview
@@ -114,6 +114,20 @@ dependencies:
114
114
  - - "~>"
115
115
  - !ruby/object:Gem::Version
116
116
  version: '1.5'
117
+ - !ruby/object:Gem::Dependency
118
+ name: nokogumbo
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2'
124
+ type: :runtime
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '2'
117
131
  - !ruby/object:Gem::Dependency
118
132
  name: rinku
119
133
  requirement: !ruby/object:Gem::Requirement
@@ -146,16 +160,16 @@ dependencies:
146
160
  name: govuk-lint
147
161
  requirement: !ruby/object:Gem::Requirement
148
162
  requirements:
149
- - - ">="
163
+ - - "~>"
150
164
  - !ruby/object:Gem::Version
151
- version: '0'
165
+ version: 3.11.5
152
166
  type: :development
153
167
  prerelease: false
154
168
  version_requirements: !ruby/object:Gem::Requirement
155
169
  requirements:
156
- - - ">="
170
+ - - "~>"
157
171
  - !ruby/object:Gem::Version
158
- version: '0'
172
+ version: 3.11.5
159
173
  - !ruby/object:Gem::Dependency
160
174
  name: minitest
161
175
  requirement: !ruby/object:Gem::Requirement
@@ -344,22 +358,22 @@ signing_key:
344
358
  specification_version: 4
345
359
  summary: Markup language for single domain
346
360
  test_files:
361
+ - test/blockquote_extra_quote_remover_test.rb
362
+ - test/govspeak_test_helper.rb
363
+ - test/govspeak_structured_headers_test.rb
347
364
  - test/govspeak_attachment_link_test.rb
365
+ - test/govspeak_attachments_image_test.rb
348
366
  - test/test_helper.rb
367
+ - test/govspeak_attachments_inline_test.rb
368
+ - test/html_sanitizer_test.rb
349
369
  - test/govspeak_button_test.rb
350
- - test/govspeak_test.rb
370
+ - test/govspeak_images_bang_test.rb
371
+ - test/govspeak_images_test.rb
351
372
  - test/html_validator_test.rb
352
373
  - test/govspeak_attachment_test.rb
353
- - test/govspeak_contacts_test.rb
354
- - test/govspeak_test_helper.rb
355
- - test/blockquote_extra_quote_remover_test.rb
356
- - test/govspeak_attachments_image_test.rb
357
- - test/html_sanitizer_test.rb
358
- - test/govspeak_link_test.rb
359
374
  - test/govspeak_extract_contact_content_ids_test.rb
360
- - test/govspeak_structured_headers_test.rb
361
- - test/govspeak_images_test.rb
362
- - test/presenters/h_card_presenter_test.rb
363
- - test/govspeak_images_bang_test.rb
375
+ - test/govspeak_test.rb
364
376
  - test/govspeak_link_extractor_test.rb
365
- - test/govspeak_attachments_inline_test.rb
377
+ - test/govspeak_link_test.rb
378
+ - test/govspeak_contacts_test.rb
379
+ - test/presenters/h_card_presenter_test.rb