govspeak 5.9.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a66dce46f2af9208dfc78c813b601fe8a447a0c5a68c085516dcf37d9fd5567d
4
- data.tar.gz: 62323ec0ee81856973bea87d1575a5aac0d1c4f957ad2261d3081ec6206a45f9
3
+ metadata.gz: 93b2022644e3af2c929faf57f33c9fe70680977e599bde24d59c71ee20402713
4
+ data.tar.gz: 7dec8938eedb2183514c54665dd9609f130ac535fbb336739f0f8b6e3d528bc3
5
5
  SHA512:
6
- metadata.gz: 4b14f7a1707a14e6342b709425556c5d69f64c12ba6523fc0c30ad5a8a98580e05115842cb7c70aa949c219683901e050c415df7b63249c910c59c73a75e011c
7
- data.tar.gz: '02386b5204b077af3a5fa03512e0ed451b77ec3a65e38b17015bab5267431b25382b0103338128c02583e6a3b075eb6ff7f41210a3bdcb8a236d139d182cd6c6'
6
+ metadata.gz: 7b96f54ac8cfdf3e0595cb2de3bc205df86c7efc9869c4e9d06f22623067f4e114d878e9f31c72817d76657b9d759e0b3344ae87724db8394a5a79a727765407
7
+ data.tar.gz: 9fc03735199e9fe6016ffe1196e57eac617a744c7aaa3421fc1a57a4ba53190c3dc85c5dda7f76b202aaf29c7c5da51f47ed0fcf71ff43a5b504097bcf8b6ebb
@@ -1,3 +1,11 @@
1
+ ## 6.0.0
2
+
3
+ * BREAKING CHANGE: Input is sanitized by default, to use unsafe HTML initialize with a sanitize option of false
4
+ * Allow sanitize option on remove invalid HTML from source input
5
+ * BREAKING CHANGE: Remove `to_sanitized_html` method in favour of `sanitize` option on initialize
6
+ * BREAKING CHANGE: Remove `to_sanitized_html_without_images` as no apps use this anymore
7
+ * BREAKING CHANGE: Remove CLI usage
8
+
1
9
  ## 5.9.1
2
10
 
3
11
  * Don't render `[Image: {file-name}]` within a paragraph to avoid invalid HTML
data/README.md CHANGED
@@ -8,7 +8,7 @@ Install the gem
8
8
 
9
9
  or add it to your Gemfile
10
10
 
11
- gem "govspeak", "~> 3.4.0"
11
+ gem "govspeak"
12
12
 
13
13
  then create a new document
14
14
 
@@ -18,18 +18,6 @@ then create a new document
18
18
  doc = Govspeak::Document.new "^Test^"
19
19
  puts doc.to_html
20
20
 
21
- or alternatively, run it from the command line
22
-
23
- $ govspeak "render-me"
24
- $ govspeak --file render-me.md
25
- $ echo "render-me" | govspeak
26
-
27
- options can be passed in through `--options` as a string of JSON or a file
28
- of JSON can be passed in as `--options-file options.json`.
29
-
30
- if installed via bundler prefix commands with bundle exec eg `$ bundle exec govspeak "render-me"`
31
-
32
-
33
21
  # Extensions
34
22
 
35
23
  In addition to the [standard Markdown syntax](http://daringfireball.net/projects/markdown/syntax "Markdown syntax"), we have added our own extensions.
@@ -13,6 +13,7 @@ require 'govspeak/kramdown_overrides'
13
13
  require 'govspeak/blockquote_extra_quote_remover'
14
14
  require 'govspeak/post_processor'
15
15
  require 'govspeak/link_extractor'
16
+ require 'govspeak/template_renderer'
16
17
  require 'govspeak/presenters/attachment_presenter'
17
18
  require 'govspeak/presenters/contact_presenter'
18
19
  require 'govspeak/presenters/h_card_presenter'
@@ -47,41 +48,32 @@ module Govspeak
47
48
  def initialize(source, options = {})
48
49
  options = options.dup.deep_symbolize_keys
49
50
  @source = source ? source.dup : ""
51
+
50
52
  @images = options.delete(:images) || []
51
53
  @attachments = Array.wrap(options.delete(:attachments))
52
54
  @links = Array.wrap(options.delete(:links))
53
55
  @contacts = Array.wrap(options.delete(:contacts))
54
56
  @locale = options.fetch(:locale, "en")
55
- @options = { input: PARSER_CLASS_NAME }.merge(options)
57
+ @options = { input: PARSER_CLASS_NAME, sanitize: true }.merge(options)
56
58
  @options[:entity_output] = :symbolic
57
59
  end
58
60
 
59
61
  def to_html
60
- @to_html ||= Govspeak::PostProcessor.process(kramdown_doc.to_html)
62
+ @to_html ||= begin
63
+ html = if @options[:sanitize]
64
+ HtmlSanitizer.new(kramdown_doc.to_html).sanitize
65
+ else
66
+ kramdown_doc.to_html
67
+ end
68
+
69
+ Govspeak::PostProcessor.process(html, self)
70
+ end
61
71
  end
62
72
 
63
73
  def to_liquid
64
74
  to_html
65
75
  end
66
76
 
67
- def t(*args)
68
- options = args.last.is_a?(Hash) ? args.last.dup : {}
69
- key = args.shift
70
- I18n.t!(key, options.merge(locale: locale))
71
- end
72
-
73
- def format_with_html_line_breaks(string)
74
- ERB::Util.html_escape(string || "").strip.gsub(/(?:\r?\n)/, "<br/>").html_safe
75
- end
76
-
77
- def to_sanitized_html
78
- HtmlSanitizer.new(to_html).sanitize
79
- end
80
-
81
- def to_sanitized_html_without_images
82
- HtmlSanitizer.new(to_html).sanitize_without_images
83
- end
84
-
85
77
  def to_text
86
78
  HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
87
79
  end
@@ -225,13 +217,11 @@ module Govspeak
225
217
  render_image(ImagePresenter.new(image))
226
218
  end
227
219
 
228
- extension('attachment', /\[embed:attachments:(?!inline:|image:)\s*(.*?)\s*\]/) do |content_id, body|
229
- attachment = attachments.detect { |a| a[:content_id] == content_id }
230
- next "" unless attachment
231
-
232
- attachment = AttachmentPresenter.new(attachment)
233
- content = File.read(__dir__ + '/templates/attachment.html.erb')
234
- ERB.new(content).result(binding)
220
+ extension('attachment', /\[embed:attachments:(?!inline:|image:)\s*(.*?)\s*\]/) do |content_id|
221
+ # not treating this as a self closing tag seems to avoid some oddities
222
+ # such as an extra new line being inserted when explicitly closed or
223
+ # swallowing subsequent elements when not closed
224
+ %{<govspeak-embed-attachment content-id="#{content_id}"></govspeak-embed-attachment>}
235
225
  end
236
226
 
237
227
  extension('attachment inline', /\[embed:attachments:inline:\s*(.*?)\s*\]/) do |content_id|
@@ -353,9 +343,8 @@ module Govspeak
353
343
  contact = contacts.detect { |c| c[:content_id] == content_id }
354
344
  next "" unless contact
355
345
 
356
- contact = ContactPresenter.new(contact)
357
- @renderer ||= ERB.new(File.read(__dir__ + '/templates/contact.html.erb'))
358
- @renderer.result(binding)
346
+ renderer = TemplateRenderer.new('contact.html.erb', locale)
347
+ renderer.render(contact: ContactPresenter.new(contact))
359
348
  end
360
349
 
361
350
  extension('Image', /#{NEW_PARAGRAPH_LOOKBEHIND}\[Image:\s*(.*?)\s*\]/) do |image_id|
@@ -374,10 +363,6 @@ module Govspeak
374
363
  def encode(text)
375
364
  HTMLEntities.new.encode(text)
376
365
  end
377
-
378
- def render_hcard_address(contact_address)
379
- HCardPresenter.new(contact_address).render
380
- end
381
366
  end
382
367
  end
383
368
 
@@ -49,11 +49,6 @@ class Govspeak::HtmlSanitizer
49
49
  Sanitize.clean(@dirty_html, Sanitize::Config.merge(sanitize_config, transformers: transformers))
50
50
  end
51
51
 
52
- def sanitize_without_images
53
- config = sanitize_config
54
- Sanitize.clean(@dirty_html, Sanitize::Config.merge(config, elements: config[:elements] - %w[img]))
55
- end
56
-
57
52
  def button_sanitize_config
58
53
  [
59
54
  "data-module",
@@ -65,11 +60,13 @@ class Govspeak::HtmlSanitizer
65
60
  def sanitize_config
66
61
  Sanitize::Config.merge(
67
62
  Sanitize::Config::RELAXED,
63
+ elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment],
68
64
  attributes: {
69
65
  :all => Sanitize::Config::RELAXED[:attributes][:all] + ["role", "aria-label"],
70
66
  "a" => Sanitize::Config::RELAXED[:attributes]["a"] + button_sanitize_config,
71
67
  "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
72
68
  "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
69
+ "govspeak-embed-attachment" => %w[content-id],
73
70
  }
74
71
  )
75
72
  end
@@ -22,6 +22,6 @@ class Govspeak::HtmlValidator
22
22
  end
23
23
 
24
24
  def govspeak_to_html
25
- Govspeak::Document.new(govspeak_string).to_html
25
+ Govspeak::Document.new(govspeak_string, sanitize: false).to_html
26
26
  end
27
27
  end
@@ -8,8 +8,8 @@ module Govspeak
8
8
  @extensions
9
9
  end
10
10
 
11
- def self.process(html)
12
- new(html).output
11
+ def self.process(html, govspeak_document)
12
+ new(html, govspeak_document).output
13
13
  end
14
14
 
15
15
  def self.extension(title, &block)
@@ -49,10 +49,25 @@ module Govspeak
49
49
  end
50
50
  end
51
51
 
52
- attr_reader :input
52
+ extension("embed attachment HTML") do |document|
53
+ document.css("govspeak-embed-attachment").map do |el|
54
+ attachment = govspeak_document.attachments.detect { |a| a[:content_id] == el["content-id"] }
55
+ unless attachment
56
+ el.remove
57
+ next
58
+ end
53
59
 
54
- def initialize(html)
60
+ renderer = TemplateRenderer.new('attachment.html.erb', govspeak_document.locale)
61
+ attachment_html = renderer.render(attachment: AttachmentPresenter.new(attachment))
62
+ el.swap(attachment_html)
63
+ end
64
+ end
65
+
66
+ attr_reader :input, :govspeak_document
67
+
68
+ def initialize(html, govspeak_document)
55
69
  @input = html
70
+ @govspeak_document = govspeak_document
56
71
  end
57
72
 
58
73
  def output
@@ -63,7 +78,6 @@ module Govspeak
63
78
  document.to_html
64
79
  end
65
80
 
66
-
67
81
  private
68
82
 
69
83
  def nokogiri_document
@@ -0,0 +1,27 @@
1
+ module Govspeak
2
+ class TemplateRenderer
3
+ attr_reader :template, :locale
4
+
5
+ def initialize(template, locale)
6
+ @template = template
7
+ @locale = locale
8
+ end
9
+
10
+ def render(locals)
11
+ template_binding = binding
12
+ locals.each { |k, v| template_binding.local_variable_set(k, v) }
13
+ erb = ERB.new(File.read(__dir__ + "/../templates/#{template}"))
14
+ erb.result(template_binding)
15
+ end
16
+
17
+ def t(*args)
18
+ options = args.last.is_a?(Hash) ? args.last.dup : {}
19
+ key = args.shift
20
+ I18n.t!(key, options.merge(locale: locale))
21
+ end
22
+
23
+ def format_with_html_line_breaks(string)
24
+ ERB::Util.html_escape(string || "").strip.gsub(/(?:\r?\n)/, "<br/>").html_safe
25
+ end
26
+ end
27
+ end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "5.9.1".freeze
2
+ VERSION = "6.0.0".freeze
3
3
  end
@@ -7,7 +7,7 @@
7
7
  <h3><%= contact.title %></h3>
8
8
  <div class="vcard contact-inner">
9
9
  <% contact.post_addresses.each do |address| %>
10
- <%= render_hcard_address(address) %>
10
+ <%= Govspeak::HCardPresenter.new(address).render %>
11
11
  <% end %>
12
12
  <% if contact.email_addresses.any? || contact.phone_numbers.any? || contact.contact_form_links.any? %>
13
13
  <div class="email-url-number">
@@ -632,14 +632,14 @@ Teston
632
632
  }
633
633
  end
634
634
 
635
- test "can sanitize a document" do
635
+ test 'sanitize source input by default' do
636
636
  document = Govspeak::Document.new("<script>doBadThings();</script>")
637
- assert_equal "doBadThings();", document.to_sanitized_html.strip
637
+ assert_equal "", document.to_html.strip
638
638
  end
639
639
 
640
- test "can sanitize a document without image" do
641
- document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
642
- assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
640
+ test 'it can have sanitizing disabled' do
641
+ document = Govspeak::Document.new("<script>doGoodThings();</script>", sanitize: false)
642
+ assert_equal "<script>doGoodThings();</script>", document.to_html.strip
643
643
  end
644
644
 
645
645
  test "identifies a Govspeak document containing malicious HTML as invalid" do
@@ -3,7 +3,7 @@ require "test_helper"
3
3
  class HtmlSanitizerTest < Minitest::Test
4
4
  test "disallow a script tag" do
5
5
  html = "<script>alert('XSS')</script>"
6
- assert_equal "alert('XSS')", Govspeak::HtmlSanitizer.new(html).sanitize
6
+ assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize
7
7
  end
8
8
 
9
9
  test "disallow a javascript protocol in an attribute" do
@@ -46,11 +46,6 @@ class HtmlSanitizerTest < Minitest::Test
46
46
  assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
47
47
  end
48
48
 
49
- test "can strip images" do
50
- html = "<img src='http://example.com/image.jgp'>"
51
- assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
52
- end
53
-
54
49
  test "allows table cells and table headings without a style attribute" do
55
50
  html = "<table><thead><tr><th>thing</th></tr></thead><tbody><tr><td>thing</td></tr></tbody></table>"
56
51
  assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.9.1
4
+ version: 6.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-07 00:00:00.000000000 Z
11
+ date: 2019-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionview
@@ -44,20 +44,6 @@ dependencies:
44
44
  - - "<"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '3'
47
- - !ruby/object:Gem::Dependency
48
- name: commander
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '4.4'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '4.4'
61
47
  - !ruby/object:Gem::Dependency
62
48
  name: htmlentities
63
49
  requirement: !ruby/object:Gem::Requirement
@@ -148,14 +134,14 @@ dependencies:
148
134
  requirements:
149
135
  - - "~>"
150
136
  - !ruby/object:Gem::Version
151
- version: '4.6'
137
+ version: '5'
152
138
  type: :runtime
153
139
  prerelease: false
154
140
  version_requirements: !ruby/object:Gem::Requirement
155
141
  requirements:
156
142
  - - "~>"
157
143
  - !ruby/object:Gem::Version
158
- version: '4.6'
144
+ version: '5'
159
145
  - !ruby/object:Gem::Dependency
160
146
  name: govuk-lint
161
147
  requirement: !ruby/object:Gem::Requirement
@@ -245,8 +231,7 @@ description: |-
245
231
  library for use in the UK Government Single Domain project
246
232
  email:
247
233
  - govuk-dev@digital.cabinet-office.gov.uk
248
- executables:
249
- - govspeak
234
+ executables: []
250
235
  extensions: []
251
236
  extra_rdoc_files: []
252
237
  files:
@@ -258,11 +243,9 @@ files:
258
243
  - assets/example.png
259
244
  - assets/information_callout.png
260
245
  - assets/warning_callout.png
261
- - bin/govspeak
262
246
  - config/address_formats.yml
263
247
  - lib/govspeak.rb
264
248
  - lib/govspeak/blockquote_extra_quote_remover.rb
265
- - lib/govspeak/cli.rb
266
249
  - lib/govspeak/header_extractor.rb
267
250
  - lib/govspeak/html_sanitizer.rb
268
251
  - lib/govspeak/html_validator.rb
@@ -275,11 +258,11 @@ files:
275
258
  - lib/govspeak/presenters/h_card_presenter.rb
276
259
  - lib/govspeak/presenters/image_presenter.rb
277
260
  - lib/govspeak/structured_header_extractor.rb
261
+ - lib/govspeak/template_renderer.rb
278
262
  - lib/govspeak/version.rb
279
263
  - lib/kramdown/parser/kramdown_with_automatic_external_links.rb
280
264
  - lib/templates/attachment.html.erb
281
265
  - lib/templates/contact.html.erb
282
- - lib/templates/inline_attachment.html.erb
283
266
  - locales/ar.yml
284
267
  - locales/be.yml
285
268
  - locales/bg.yml
@@ -364,21 +347,21 @@ signing_key:
364
347
  specification_version: 4
365
348
  summary: Markup language for single domain
366
349
  test_files:
367
- - test/govspeak_link_extractor_test.rb
368
- - test/govspeak_structured_headers_test.rb
369
- - test/govspeak_images_bang_test.rb
370
- - test/govspeak_button_test.rb
371
- - test/govspeak_extract_contact_content_ids_test.rb
372
350
  - test/blockquote_extra_quote_remover_test.rb
373
351
  - test/govspeak_test_helper.rb
374
- - test/govspeak_link_test.rb
375
- - test/govspeak_images_test.rb
376
- - test/govspeak_contacts_test.rb
352
+ - test/govspeak_structured_headers_test.rb
353
+ - test/govspeak_attachments_image_test.rb
354
+ - test/govspeak_attachments_test.rb
377
355
  - test/test_helper.rb
378
- - test/html_validator_test.rb
379
- - test/html_sanitizer_test.rb
380
356
  - test/govspeak_attachments_inline_test.rb
357
+ - test/html_sanitizer_test.rb
358
+ - test/govspeak_button_test.rb
359
+ - test/govspeak_images_bang_test.rb
360
+ - test/govspeak_images_test.rb
361
+ - test/html_validator_test.rb
362
+ - test/govspeak_extract_contact_content_ids_test.rb
381
363
  - test/govspeak_test.rb
382
- - test/govspeak_attachments_test.rb
383
- - test/govspeak_attachments_image_test.rb
364
+ - test/govspeak_link_extractor_test.rb
365
+ - test/govspeak_link_test.rb
366
+ - test/govspeak_contacts_test.rb
384
367
  - test/presenters/h_card_presenter_test.rb
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- lib = File.expand_path('../lib', __dir__)
4
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
-
6
- require "govspeak/cli"
7
-
8
- Govspeak::CLI.new.run
@@ -1,53 +0,0 @@
1
- require 'govspeak/version'
2
- require 'govspeak'
3
- require 'commander'
4
-
5
- module Govspeak
6
- class CLI
7
- include Commander::Methods
8
-
9
- def run
10
- program(:name, 'Govspeak')
11
- program(:version, Govspeak::VERSION)
12
- program(:description, "A tool for rendering the GOV.UK dialect of markdown into HTML")
13
- default_command(:render)
14
- command(:render) do |command|
15
- command.syntax = "govspeak render [options] <input>"
16
- command.description = "Render Govspeak into HTML, can be sourced from stdin, as an argument or from a file"
17
- command.option("--file FILENAME", String, "File to render")
18
- command.option("--options JSON", String, "JSON to use as options")
19
- command.option("--options-file FILENAME", String, "A file of JSON options")
20
- command.action do |args, options|
21
- input = get_input($stdin, args, options)
22
- raise "Nothing to render. Use --help for assistance" unless input
23
-
24
- puts Govspeak::Document.new(input, govspeak_options(options)).to_html
25
- end
26
- end
27
- run!
28
- end
29
-
30
- private
31
-
32
- def get_input(stdin, args, options)
33
- return stdin.read unless stdin.tty?
34
- return read_file(options.file) if options.file
35
-
36
- args.empty? ? nil : args.join(" ")
37
- end
38
-
39
- def read_file(file_path)
40
- path = Pathname.new(file_path).realpath
41
- File.read(path)
42
- end
43
-
44
- def govspeak_options(command_options)
45
- string = if command_options.options_file
46
- read_file(command_options.options_file)
47
- else
48
- command_options.options
49
- end
50
- string ? JSON.parse(string) : {}
51
- end
52
- end
53
- end
@@ -1,6 +0,0 @@
1
- <span <% if attachment.id %>id="attachment_<%= attachment.id %>" <% end %>class="attachment-inline">
2
- <%= attachment.link attachment.title, attachment.url %>
3
- <% unless attachment.attachment_attributes.empty? %>
4
- (<%= attachment.attachment_attributes %>)
5
- <% end %>
6
- </span>