govspeak 6.2.1 → 6.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 39b6f369a4fda004a0d426e27bd298465354769215c0e5147caa0191456879c1
4
- data.tar.gz: ad5e45eb16183f9498ba686ff3694c7ec645c0d2959f271b9cae39746e090934
3
+ metadata.gz: bc558071169d3b288446dddcb1657bef2dfac22add83004e13f8acfbdc6f8b90
4
+ data.tar.gz: 4147b7545a3fb2aa1fe09a1d494d02f14065cf3020a65ac46d985e7fc8059f35
5
5
  SHA512:
6
- metadata.gz: 162a60aad8e18cb213a6df073c9d3f3fae8dc41d490cea9ed871804b3e9d66d1829e2f8a58a20e07d09d80af595450110a4dbc4c7b424dc8d0b990fec0d17da7
7
- data.tar.gz: f5ba3ef15abed2fd01750ce68f6aff8482c7e7407d6f4ac689e75f23aa7f6fa507171e2eae1815424c4593686234351fc88d12d09c63dde9b560409f9c3a9fd0
6
+ metadata.gz: 412b37d58e002eb577de464235f5bc1ec59f7267835320f4d6e509ea16c99d45943922f2697443bdad125cb8dbb68c903062b78185c3ae1bc15f98bb80ae7192
7
+ data.tar.gz: '0869244c4588b27a308cee8c54e3aed025a8b7b4f921ebe4989a9e490eb90a6f21e69af31f71bb7b00db750627c4d686ff6a96b55b1c2d5b1f8c28af6d034481'
@@ -1,8 +1,13 @@
1
+ ## 6.3.0
2
+
3
+ * Unicode characters forbidden in HTML are stripped from input
4
+ * Validation is now more lenient for HTML input
5
+
1
6
  ## 6.2.1
2
7
 
3
8
  * Update warning callout label text from 'Help' to 'Warning'
4
9
 
5
- # 6.2.0
10
+ ## 6.2.0
6
11
 
7
12
  * Remove experimental status on `AttachementLink:attachment-id` and `Attachement:attachment-id`
8
13
  * Deprecate `embed:attachments:inline:content-id`
@@ -54,25 +59,32 @@
54
59
  * Update sanitize version to 4.6.x [#127](https://github.com/alphagov/govspeak/issues/127)
55
60
 
56
61
  ## 5.5.0
62
+
57
63
  * Ignore links with blank or missing `href`s when extracting links from a document with `Govspeak::Document#extracted_links` [#124](https://github.com/alphagov/govspeak/pull/124)
58
64
 
59
65
  ## 5.4.0
66
+
60
67
  * Add an optional `website_root` argument to `Govspeak::Document#extracted_links` in order to get all links as fully qualified URLs [#122](https://github.com/alphagov/govspeak/pull/122)
61
68
 
62
69
  ## 5.3.0
70
+
63
71
  * Add a link extraction class for finding links in documents [#120](https://github.com/alphagov/govspeak/pull/120)
64
72
 
65
73
  ## 5.2.2
74
+
66
75
  * Fix rendering buttons with inconsistent linebreaks seen in publishing [#118](https://github.com/alphagov/govspeak/pull/118)
67
76
 
68
77
  ## 5.2.1
78
+
69
79
  * Fix validation to make sure buttons are considered valid
70
80
  * Only allow buttons to be used on new lines, not when indented or inline within text (useful for guides) [#116](https://github.com/alphagov/govspeak/pull/116)
71
81
 
72
82
  ## 5.2.0
83
+
73
84
  * Add button component for govspeak [#114](https://github.com/alphagov/govspeak/pull/114) see README for usage
74
85
 
75
86
  ## 5.1.0
87
+
76
88
  * Update Kramdown version to 1.15.0
77
89
 
78
90
  ## 5.0.3
@@ -80,14 +92,17 @@
80
92
  * Fix matching links/attachments/contacts by regex to use equality [#105](https://github.com/alphagov/govspeak/pull/105)
81
93
 
82
94
  ## 5.0.2
95
+
83
96
  * Loosen ActionView dependency to allow use with Rails
84
97
  5 [#99](https://github.com/alphagov/govspeak/pull/99)
85
98
 
86
99
  ## 5.0.1
100
+
87
101
  * Move presenters into the Govspeak namespace [#93](https://github.com/alphagov/govspeak/pull/93)
88
102
  * Embedded links now will automatically be marked with `rel="external"` [#96](https://github.com/alphagov/govspeak/pull/96)
89
103
 
90
104
  ## 5.0.0
105
+
91
106
  * Update Kramdown version to 1.12.0
92
107
  * Add pry-byebug to development dependencies
93
108
  * Ability to run Govspeak as a binary from command line [#87](https://github.com/alphagov/govspeak/pull/87)
@@ -1,11 +1,14 @@
1
1
  require 'active_support/core_ext/hash'
2
2
  require 'active_support/core_ext/array'
3
3
  require 'erb'
4
+ require 'govuk_publishing_components'
4
5
  require 'htmlentities'
5
6
  require 'kramdown'
6
7
  require 'kramdown/parser/govuk'
8
+ require 'nokogiri'
9
+ require 'nokogumbo'
7
10
  require 'rinku'
8
- require 'govuk_publishing_components'
11
+ require 'sanitize'
9
12
  require 'govspeak/header_extractor'
10
13
  require 'govspeak/structured_header_extractor'
11
14
  require 'govspeak/html_validator'
@@ -103,6 +106,7 @@ module Govspeak
103
106
 
104
107
  def preprocess(source)
105
108
  source = Govspeak::BlockquoteExtraQuoteRemover.remove(source)
109
+ source = remove_forbidden_characters(source)
106
110
  self.class.extensions.each do |_, regexp, block|
107
111
  source.gsub!(regexp) {
108
112
  instance_exec(*Regexp.last_match.captures, &block)
@@ -111,6 +115,12 @@ module Govspeak
111
115
  source
112
116
  end
113
117
 
118
+ def remove_forbidden_characters(source)
119
+ # These are characters that are not deemed not suitable for
120
+ # markup: https://www.w3.org/TR/unicode-xml/#Charlist
121
+ source.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, '')
122
+ end
123
+
114
124
  def self.extension(title, regexp = nil, &block)
115
125
  regexp ||= %r${::#{title}}(.*?){:/#{title}}$m
116
126
  @extensions << [title, regexp, block]
@@ -1,5 +1,4 @@
1
1
  require 'addressable/uri'
2
- require 'sanitize'
3
2
 
4
3
  class Govspeak::HtmlSanitizer
5
4
  class ImageSourceWhitelister
@@ -50,10 +49,10 @@ class Govspeak::HtmlSanitizer
50
49
  end
51
50
 
52
51
  def button_sanitize_config
53
- [
54
- "data-module",
55
- "data-tracking-code",
56
- "data-tracking-name"
52
+ %w[
53
+ data-module
54
+ data-tracking-code
55
+ data-tracking-name
57
56
  ]
58
57
  end
59
58
 
@@ -62,7 +61,7 @@ class Govspeak::HtmlSanitizer
62
61
  Sanitize::Config::RELAXED,
63
62
  elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment govspeak-embed-attachment-link],
64
63
  attributes: {
65
- :all => Sanitize::Config::RELAXED[:attributes][:all] + ["role", "aria-label"],
64
+ :all => Sanitize::Config::RELAXED[:attributes][:all] + %w[role aria-label],
66
65
  "a" => Sanitize::Config::RELAXED[:attributes]["a"] + button_sanitize_config,
67
66
  "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
68
67
  "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
@@ -18,7 +18,7 @@ class Govspeak::HtmlValidator
18
18
 
19
19
  # Make whitespace in html tags consistent
20
20
  def normalise_html(html)
21
- Nokogiri::HTML.parse(html).to_s
21
+ Nokogiri::HTML5.fragment(html).to_s
22
22
  end
23
23
 
24
24
  def govspeak_to_html
@@ -1,5 +1,3 @@
1
- require 'nokogiri'
2
-
3
1
  module Govspeak
4
2
  class PostProcessor
5
3
  @extensions = []
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "6.2.1".freeze
2
+ VERSION = "6.3.0".freeze
3
3
  end
@@ -18,6 +18,13 @@ class GovspeakTest < Minitest::Test
18
18
  assert_equal "<p><em>this is markdown</em></p>\n", rendered
19
19
  end
20
20
 
21
+ test "strips forbidden unicode characters" do
22
+ rendered = Govspeak::Document.new(
23
+ "this is text with forbidden characters \ufffc\u2028\ufeff\u202c\u202a"
24
+ ).to_html
25
+ assert_equal "<p>this is text with forbidden characters</p>\n", rendered
26
+ end
27
+
21
28
  test "highlight-answer block extension" do
22
29
  rendered = Govspeak::Document.new("this \n{::highlight-answer}Lead in to *BIG TEXT*\n{:/highlight-answer}").to_html
23
30
  assert_equal %{<p>this</p>\n\n<div class="highlight-answer">\n<p>Lead in to <em>BIG TEXT</em></p>\n</div>\n}, rendered
@@ -101,4 +101,11 @@ class HtmlValidatorTest < Minitest::Test
101
101
  assert Govspeak::HtmlValidator.new("{button start}[Start now](https://gov.uk){/button}").valid?
102
102
  assert Govspeak::HtmlValidator.new("{button start cross-domain-tracking:UA-XXXXXX-Y}[Start now](https://gov.uk){/button}").valid?
103
103
  end
104
+
105
+ test "allow HTML tables with and without tbody elements" do
106
+ # An upgrade of govspeak broke HTML table entries as tbody elements were inserted.
107
+ # An example of one of these is: https://www.gov.uk/government/publications/what-works-network-membership-requirements/what-works-network
108
+ assert Govspeak::HtmlValidator.new("<table><tr><td>Hello</td></tr></table>").valid?, "No <tbody> is valid"
109
+ assert Govspeak::HtmlValidator.new("<table><tbody><tr><td>Hello</td></tr></tbody></table>").valid?, "<tbody> is valid"
110
+ end
104
111
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.2.1
4
+ version: 6.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-06-04 00:00:00.000000000 Z
11
+ date: 2019-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionview
@@ -114,6 +114,20 @@ dependencies:
114
114
  - - "~>"
115
115
  - !ruby/object:Gem::Version
116
116
  version: '1.5'
117
+ - !ruby/object:Gem::Dependency
118
+ name: nokogumbo
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2'
124
+ type: :runtime
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '2'
117
131
  - !ruby/object:Gem::Dependency
118
132
  name: rinku
119
133
  requirement: !ruby/object:Gem::Requirement
@@ -146,16 +160,16 @@ dependencies:
146
160
  name: govuk-lint
147
161
  requirement: !ruby/object:Gem::Requirement
148
162
  requirements:
149
- - - ">="
163
+ - - "~>"
150
164
  - !ruby/object:Gem::Version
151
- version: '0'
165
+ version: 3.11.5
152
166
  type: :development
153
167
  prerelease: false
154
168
  version_requirements: !ruby/object:Gem::Requirement
155
169
  requirements:
156
- - - ">="
170
+ - - "~>"
157
171
  - !ruby/object:Gem::Version
158
- version: '0'
172
+ version: 3.11.5
159
173
  - !ruby/object:Gem::Dependency
160
174
  name: minitest
161
175
  requirement: !ruby/object:Gem::Requirement
@@ -344,22 +358,22 @@ signing_key:
344
358
  specification_version: 4
345
359
  summary: Markup language for single domain
346
360
  test_files:
361
+ - test/blockquote_extra_quote_remover_test.rb
362
+ - test/govspeak_test_helper.rb
363
+ - test/govspeak_structured_headers_test.rb
347
364
  - test/govspeak_attachment_link_test.rb
365
+ - test/govspeak_attachments_image_test.rb
348
366
  - test/test_helper.rb
367
+ - test/govspeak_attachments_inline_test.rb
368
+ - test/html_sanitizer_test.rb
349
369
  - test/govspeak_button_test.rb
350
- - test/govspeak_test.rb
370
+ - test/govspeak_images_bang_test.rb
371
+ - test/govspeak_images_test.rb
351
372
  - test/html_validator_test.rb
352
373
  - test/govspeak_attachment_test.rb
353
- - test/govspeak_contacts_test.rb
354
- - test/govspeak_test_helper.rb
355
- - test/blockquote_extra_quote_remover_test.rb
356
- - test/govspeak_attachments_image_test.rb
357
- - test/html_sanitizer_test.rb
358
- - test/govspeak_link_test.rb
359
374
  - test/govspeak_extract_contact_content_ids_test.rb
360
- - test/govspeak_structured_headers_test.rb
361
- - test/govspeak_images_test.rb
362
- - test/presenters/h_card_presenter_test.rb
363
- - test/govspeak_images_bang_test.rb
375
+ - test/govspeak_test.rb
364
376
  - test/govspeak_link_extractor_test.rb
365
- - test/govspeak_attachments_inline_test.rb
377
+ - test/govspeak_link_test.rb
378
+ - test/govspeak_contacts_test.rb
379
+ - test/presenters/h_card_presenter_test.rb