govspeak 5.9.1 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a66dce46f2af9208dfc78c813b601fe8a447a0c5a68c085516dcf37d9fd5567d
4
- data.tar.gz: 62323ec0ee81856973bea87d1575a5aac0d1c4f957ad2261d3081ec6206a45f9
3
+ metadata.gz: 93b2022644e3af2c929faf57f33c9fe70680977e599bde24d59c71ee20402713
4
+ data.tar.gz: 7dec8938eedb2183514c54665dd9609f130ac535fbb336739f0f8b6e3d528bc3
5
5
  SHA512:
6
- metadata.gz: 4b14f7a1707a14e6342b709425556c5d69f64c12ba6523fc0c30ad5a8a98580e05115842cb7c70aa949c219683901e050c415df7b63249c910c59c73a75e011c
7
- data.tar.gz: '02386b5204b077af3a5fa03512e0ed451b77ec3a65e38b17015bab5267431b25382b0103338128c02583e6a3b075eb6ff7f41210a3bdcb8a236d139d182cd6c6'
6
+ metadata.gz: 7b96f54ac8cfdf3e0595cb2de3bc205df86c7efc9869c4e9d06f22623067f4e114d878e9f31c72817d76657b9d759e0b3344ae87724db8394a5a79a727765407
7
+ data.tar.gz: 9fc03735199e9fe6016ffe1196e57eac617a744c7aaa3421fc1a57a4ba53190c3dc85c5dda7f76b202aaf29c7c5da51f47ed0fcf71ff43a5b504097bcf8b6ebb
@@ -1,3 +1,11 @@
1
+ ## 6.0.0
2
+
3
+ * BREAKING CHANGE: Input is sanitized by default, to use unsafe HTML initialize with a sanitize option of false
4
+ * Allow sanitize option on remove invalid HTML from source input
5
+ * BREAKING CHANGE: Remove `to_sanitized_html` method in favour of `sanitize` option on initialize
6
+ * BREAKING CHANGE: Remove `to_sanitized_html_without_images` as no apps use this anymore
7
+ * BREAKING CHANGE: Remove CLI usage
8
+
1
9
  ## 5.9.1
2
10
 
3
11
  * Don't render `[Image: {file-name}]` within a paragraph to avoid invalid HTML
data/README.md CHANGED
@@ -8,7 +8,7 @@ Install the gem
8
8
 
9
9
  or add it to your Gemfile
10
10
 
11
- gem "govspeak", "~> 3.4.0"
11
+ gem "govspeak"
12
12
 
13
13
  then create a new document
14
14
 
@@ -18,18 +18,6 @@ then create a new document
18
18
  doc = Govspeak::Document.new "^Test^"
19
19
  puts doc.to_html
20
20
 
21
- or alternatively, run it from the command line
22
-
23
- $ govspeak "render-me"
24
- $ govspeak --file render-me.md
25
- $ echo "render-me" | govspeak
26
-
27
- options can be passed in through `--options` as a string of JSON or a file
28
- of JSON can be passed in as `--options-file options.json`.
29
-
30
- if installed via bundler prefix commands with bundle exec eg `$ bundle exec govspeak "render-me"`
31
-
32
-
33
21
  # Extensions
34
22
 
35
23
  In addition to the [standard Markdown syntax](http://daringfireball.net/projects/markdown/syntax "Markdown syntax"), we have added our own extensions.
@@ -13,6 +13,7 @@ require 'govspeak/kramdown_overrides'
13
13
  require 'govspeak/blockquote_extra_quote_remover'
14
14
  require 'govspeak/post_processor'
15
15
  require 'govspeak/link_extractor'
16
+ require 'govspeak/template_renderer'
16
17
  require 'govspeak/presenters/attachment_presenter'
17
18
  require 'govspeak/presenters/contact_presenter'
18
19
  require 'govspeak/presenters/h_card_presenter'
@@ -47,41 +48,32 @@ module Govspeak
47
48
  def initialize(source, options = {})
48
49
  options = options.dup.deep_symbolize_keys
49
50
  @source = source ? source.dup : ""
51
+
50
52
  @images = options.delete(:images) || []
51
53
  @attachments = Array.wrap(options.delete(:attachments))
52
54
  @links = Array.wrap(options.delete(:links))
53
55
  @contacts = Array.wrap(options.delete(:contacts))
54
56
  @locale = options.fetch(:locale, "en")
55
- @options = { input: PARSER_CLASS_NAME }.merge(options)
57
+ @options = { input: PARSER_CLASS_NAME, sanitize: true }.merge(options)
56
58
  @options[:entity_output] = :symbolic
57
59
  end
58
60
 
59
61
  def to_html
60
- @to_html ||= Govspeak::PostProcessor.process(kramdown_doc.to_html)
62
+ @to_html ||= begin
63
+ html = if @options[:sanitize]
64
+ HtmlSanitizer.new(kramdown_doc.to_html).sanitize
65
+ else
66
+ kramdown_doc.to_html
67
+ end
68
+
69
+ Govspeak::PostProcessor.process(html, self)
70
+ end
61
71
  end
62
72
 
63
73
  def to_liquid
64
74
  to_html
65
75
  end
66
76
 
67
- def t(*args)
68
- options = args.last.is_a?(Hash) ? args.last.dup : {}
69
- key = args.shift
70
- I18n.t!(key, options.merge(locale: locale))
71
- end
72
-
73
- def format_with_html_line_breaks(string)
74
- ERB::Util.html_escape(string || "").strip.gsub(/(?:\r?\n)/, "<br/>").html_safe
75
- end
76
-
77
- def to_sanitized_html
78
- HtmlSanitizer.new(to_html).sanitize
79
- end
80
-
81
- def to_sanitized_html_without_images
82
- HtmlSanitizer.new(to_html).sanitize_without_images
83
- end
84
-
85
77
  def to_text
86
78
  HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
87
79
  end
@@ -225,13 +217,11 @@ module Govspeak
225
217
  render_image(ImagePresenter.new(image))
226
218
  end
227
219
 
228
- extension('attachment', /\[embed:attachments:(?!inline:|image:)\s*(.*?)\s*\]/) do |content_id, body|
229
- attachment = attachments.detect { |a| a[:content_id] == content_id }
230
- next "" unless attachment
231
-
232
- attachment = AttachmentPresenter.new(attachment)
233
- content = File.read(__dir__ + '/templates/attachment.html.erb')
234
- ERB.new(content).result(binding)
220
+ extension('attachment', /\[embed:attachments:(?!inline:|image:)\s*(.*?)\s*\]/) do |content_id|
221
+ # not treating this as a self closing tag seems to avoid some oddities
222
+ # such as an extra new line being inserted when explicitly closed or
223
+ # swallowing subsequent elements when not closed
224
+ %{<govspeak-embed-attachment content-id="#{content_id}"></govspeak-embed-attachment>}
235
225
  end
236
226
 
237
227
  extension('attachment inline', /\[embed:attachments:inline:\s*(.*?)\s*\]/) do |content_id|
@@ -353,9 +343,8 @@ module Govspeak
353
343
  contact = contacts.detect { |c| c[:content_id] == content_id }
354
344
  next "" unless contact
355
345
 
356
- contact = ContactPresenter.new(contact)
357
- @renderer ||= ERB.new(File.read(__dir__ + '/templates/contact.html.erb'))
358
- @renderer.result(binding)
346
+ renderer = TemplateRenderer.new('contact.html.erb', locale)
347
+ renderer.render(contact: ContactPresenter.new(contact))
359
348
  end
360
349
 
361
350
  extension('Image', /#{NEW_PARAGRAPH_LOOKBEHIND}\[Image:\s*(.*?)\s*\]/) do |image_id|
@@ -374,10 +363,6 @@ module Govspeak
374
363
  def encode(text)
375
364
  HTMLEntities.new.encode(text)
376
365
  end
377
-
378
- def render_hcard_address(contact_address)
379
- HCardPresenter.new(contact_address).render
380
- end
381
366
  end
382
367
  end
383
368
 
@@ -49,11 +49,6 @@ class Govspeak::HtmlSanitizer
49
49
  Sanitize.clean(@dirty_html, Sanitize::Config.merge(sanitize_config, transformers: transformers))
50
50
  end
51
51
 
52
- def sanitize_without_images
53
- config = sanitize_config
54
- Sanitize.clean(@dirty_html, Sanitize::Config.merge(config, elements: config[:elements] - %w[img]))
55
- end
56
-
57
52
  def button_sanitize_config
58
53
  [
59
54
  "data-module",
@@ -65,11 +60,13 @@ class Govspeak::HtmlSanitizer
65
60
  def sanitize_config
66
61
  Sanitize::Config.merge(
67
62
  Sanitize::Config::RELAXED,
63
+ elements: Sanitize::Config::RELAXED[:elements] + %w[govspeak-embed-attachment],
68
64
  attributes: {
69
65
  :all => Sanitize::Config::RELAXED[:attributes][:all] + ["role", "aria-label"],
70
66
  "a" => Sanitize::Config::RELAXED[:attributes]["a"] + button_sanitize_config,
71
67
  "th" => Sanitize::Config::RELAXED[:attributes]["th"] + %w[style],
72
68
  "td" => Sanitize::Config::RELAXED[:attributes]["td"] + %w[style],
69
+ "govspeak-embed-attachment" => %w[content-id],
73
70
  }
74
71
  )
75
72
  end
@@ -22,6 +22,6 @@ class Govspeak::HtmlValidator
22
22
  end
23
23
 
24
24
  def govspeak_to_html
25
- Govspeak::Document.new(govspeak_string).to_html
25
+ Govspeak::Document.new(govspeak_string, sanitize: false).to_html
26
26
  end
27
27
  end
@@ -8,8 +8,8 @@ module Govspeak
8
8
  @extensions
9
9
  end
10
10
 
11
- def self.process(html)
12
- new(html).output
11
+ def self.process(html, govspeak_document)
12
+ new(html, govspeak_document).output
13
13
  end
14
14
 
15
15
  def self.extension(title, &block)
@@ -49,10 +49,25 @@ module Govspeak
49
49
  end
50
50
  end
51
51
 
52
- attr_reader :input
52
+ extension("embed attachment HTML") do |document|
53
+ document.css("govspeak-embed-attachment").map do |el|
54
+ attachment = govspeak_document.attachments.detect { |a| a[:content_id] == el["content-id"] }
55
+ unless attachment
56
+ el.remove
57
+ next
58
+ end
53
59
 
54
- def initialize(html)
60
+ renderer = TemplateRenderer.new('attachment.html.erb', govspeak_document.locale)
61
+ attachment_html = renderer.render(attachment: AttachmentPresenter.new(attachment))
62
+ el.swap(attachment_html)
63
+ end
64
+ end
65
+
66
+ attr_reader :input, :govspeak_document
67
+
68
+ def initialize(html, govspeak_document)
55
69
  @input = html
70
+ @govspeak_document = govspeak_document
56
71
  end
57
72
 
58
73
  def output
@@ -63,7 +78,6 @@ module Govspeak
63
78
  document.to_html
64
79
  end
65
80
 
66
-
67
81
  private
68
82
 
69
83
  def nokogiri_document
@@ -0,0 +1,27 @@
1
+ module Govspeak
2
+ class TemplateRenderer
3
+ attr_reader :template, :locale
4
+
5
+ def initialize(template, locale)
6
+ @template = template
7
+ @locale = locale
8
+ end
9
+
10
+ def render(locals)
11
+ template_binding = binding
12
+ locals.each { |k, v| template_binding.local_variable_set(k, v) }
13
+ erb = ERB.new(File.read(__dir__ + "/../templates/#{template}"))
14
+ erb.result(template_binding)
15
+ end
16
+
17
+ def t(*args)
18
+ options = args.last.is_a?(Hash) ? args.last.dup : {}
19
+ key = args.shift
20
+ I18n.t!(key, options.merge(locale: locale))
21
+ end
22
+
23
+ def format_with_html_line_breaks(string)
24
+ ERB::Util.html_escape(string || "").strip.gsub(/(?:\r?\n)/, "<br/>").html_safe
25
+ end
26
+ end
27
+ end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "5.9.1".freeze
2
+ VERSION = "6.0.0".freeze
3
3
  end
@@ -7,7 +7,7 @@
7
7
  <h3><%= contact.title %></h3>
8
8
  <div class="vcard contact-inner">
9
9
  <% contact.post_addresses.each do |address| %>
10
- <%= render_hcard_address(address) %>
10
+ <%= Govspeak::HCardPresenter.new(address).render %>
11
11
  <% end %>
12
12
  <% if contact.email_addresses.any? || contact.phone_numbers.any? || contact.contact_form_links.any? %>
13
13
  <div class="email-url-number">
@@ -632,14 +632,14 @@ Teston
632
632
  }
633
633
  end
634
634
 
635
- test "can sanitize a document" do
635
+ test 'sanitize source input by default' do
636
636
  document = Govspeak::Document.new("<script>doBadThings();</script>")
637
- assert_equal "doBadThings();", document.to_sanitized_html.strip
637
+ assert_equal "", document.to_html.strip
638
638
  end
639
639
 
640
- test "can sanitize a document without image" do
641
- document = Govspeak::Document.new("<script>doBadThings();</script><img src='https://example.com/image.jpg'>")
642
- assert_equal "doBadThings();<p></p>", document.to_sanitized_html_without_images.gsub(/\s/, "")
640
+ test 'it can have sanitizing disabled' do
641
+ document = Govspeak::Document.new("<script>doGoodThings();</script>", sanitize: false)
642
+ assert_equal "<script>doGoodThings();</script>", document.to_html.strip
643
643
  end
644
644
 
645
645
  test "identifies a Govspeak document containing malicious HTML as invalid" do
@@ -3,7 +3,7 @@ require "test_helper"
3
3
  class HtmlSanitizerTest < Minitest::Test
4
4
  test "disallow a script tag" do
5
5
  html = "<script>alert('XSS')</script>"
6
- assert_equal "alert('XSS')", Govspeak::HtmlSanitizer.new(html).sanitize
6
+ assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize
7
7
  end
8
8
 
9
9
  test "disallow a javascript protocol in an attribute" do
@@ -46,11 +46,6 @@ class HtmlSanitizerTest < Minitest::Test
46
46
  assert_equal "", Govspeak::HtmlSanitizer.new(html, allowed_image_hosts: ['allowed.com']).sanitize
47
47
  end
48
48
 
49
- test "can strip images" do
50
- html = "<img src='http://example.com/image.jgp'>"
51
- assert_equal "", Govspeak::HtmlSanitizer.new(html).sanitize_without_images
52
- end
53
-
54
49
  test "allows table cells and table headings without a style attribute" do
55
50
  html = "<table><thead><tr><th>thing</th></tr></thead><tbody><tr><td>thing</td></tr></tbody></table>"
56
51
  assert_equal html, Govspeak::HtmlSanitizer.new(html).sanitize
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.9.1
4
+ version: 6.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GOV.UK Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-07 00:00:00.000000000 Z
11
+ date: 2019-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: actionview
@@ -44,20 +44,6 @@ dependencies:
44
44
  - - "<"
45
45
  - !ruby/object:Gem::Version
46
46
  version: '3'
47
- - !ruby/object:Gem::Dependency
48
- name: commander
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '4.4'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '4.4'
61
47
  - !ruby/object:Gem::Dependency
62
48
  name: htmlentities
63
49
  requirement: !ruby/object:Gem::Requirement
@@ -148,14 +134,14 @@ dependencies:
148
134
  requirements:
149
135
  - - "~>"
150
136
  - !ruby/object:Gem::Version
151
- version: '4.6'
137
+ version: '5'
152
138
  type: :runtime
153
139
  prerelease: false
154
140
  version_requirements: !ruby/object:Gem::Requirement
155
141
  requirements:
156
142
  - - "~>"
157
143
  - !ruby/object:Gem::Version
158
- version: '4.6'
144
+ version: '5'
159
145
  - !ruby/object:Gem::Dependency
160
146
  name: govuk-lint
161
147
  requirement: !ruby/object:Gem::Requirement
@@ -245,8 +231,7 @@ description: |-
245
231
  library for use in the UK Government Single Domain project
246
232
  email:
247
233
  - govuk-dev@digital.cabinet-office.gov.uk
248
- executables:
249
- - govspeak
234
+ executables: []
250
235
  extensions: []
251
236
  extra_rdoc_files: []
252
237
  files:
@@ -258,11 +243,9 @@ files:
258
243
  - assets/example.png
259
244
  - assets/information_callout.png
260
245
  - assets/warning_callout.png
261
- - bin/govspeak
262
246
  - config/address_formats.yml
263
247
  - lib/govspeak.rb
264
248
  - lib/govspeak/blockquote_extra_quote_remover.rb
265
- - lib/govspeak/cli.rb
266
249
  - lib/govspeak/header_extractor.rb
267
250
  - lib/govspeak/html_sanitizer.rb
268
251
  - lib/govspeak/html_validator.rb
@@ -275,11 +258,11 @@ files:
275
258
  - lib/govspeak/presenters/h_card_presenter.rb
276
259
  - lib/govspeak/presenters/image_presenter.rb
277
260
  - lib/govspeak/structured_header_extractor.rb
261
+ - lib/govspeak/template_renderer.rb
278
262
  - lib/govspeak/version.rb
279
263
  - lib/kramdown/parser/kramdown_with_automatic_external_links.rb
280
264
  - lib/templates/attachment.html.erb
281
265
  - lib/templates/contact.html.erb
282
- - lib/templates/inline_attachment.html.erb
283
266
  - locales/ar.yml
284
267
  - locales/be.yml
285
268
  - locales/bg.yml
@@ -364,21 +347,21 @@ signing_key:
364
347
  specification_version: 4
365
348
  summary: Markup language for single domain
366
349
  test_files:
367
- - test/govspeak_link_extractor_test.rb
368
- - test/govspeak_structured_headers_test.rb
369
- - test/govspeak_images_bang_test.rb
370
- - test/govspeak_button_test.rb
371
- - test/govspeak_extract_contact_content_ids_test.rb
372
350
  - test/blockquote_extra_quote_remover_test.rb
373
351
  - test/govspeak_test_helper.rb
374
- - test/govspeak_link_test.rb
375
- - test/govspeak_images_test.rb
376
- - test/govspeak_contacts_test.rb
352
+ - test/govspeak_structured_headers_test.rb
353
+ - test/govspeak_attachments_image_test.rb
354
+ - test/govspeak_attachments_test.rb
377
355
  - test/test_helper.rb
378
- - test/html_validator_test.rb
379
- - test/html_sanitizer_test.rb
380
356
  - test/govspeak_attachments_inline_test.rb
357
+ - test/html_sanitizer_test.rb
358
+ - test/govspeak_button_test.rb
359
+ - test/govspeak_images_bang_test.rb
360
+ - test/govspeak_images_test.rb
361
+ - test/html_validator_test.rb
362
+ - test/govspeak_extract_contact_content_ids_test.rb
381
363
  - test/govspeak_test.rb
382
- - test/govspeak_attachments_test.rb
383
- - test/govspeak_attachments_image_test.rb
364
+ - test/govspeak_link_extractor_test.rb
365
+ - test/govspeak_link_test.rb
366
+ - test/govspeak_contacts_test.rb
384
367
  - test/presenters/h_card_presenter_test.rb
@@ -1,8 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- lib = File.expand_path('../lib', __dir__)
4
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
-
6
- require "govspeak/cli"
7
-
8
- Govspeak::CLI.new.run
@@ -1,53 +0,0 @@
1
- require 'govspeak/version'
2
- require 'govspeak'
3
- require 'commander'
4
-
5
- module Govspeak
6
- class CLI
7
- include Commander::Methods
8
-
9
- def run
10
- program(:name, 'Govspeak')
11
- program(:version, Govspeak::VERSION)
12
- program(:description, "A tool for rendering the GOV.UK dialect of markdown into HTML")
13
- default_command(:render)
14
- command(:render) do |command|
15
- command.syntax = "govspeak render [options] <input>"
16
- command.description = "Render Govspeak into HTML, can be sourced from stdin, as an argument or from a file"
17
- command.option("--file FILENAME", String, "File to render")
18
- command.option("--options JSON", String, "JSON to use as options")
19
- command.option("--options-file FILENAME", String, "A file of JSON options")
20
- command.action do |args, options|
21
- input = get_input($stdin, args, options)
22
- raise "Nothing to render. Use --help for assistance" unless input
23
-
24
- puts Govspeak::Document.new(input, govspeak_options(options)).to_html
25
- end
26
- end
27
- run!
28
- end
29
-
30
- private
31
-
32
- def get_input(stdin, args, options)
33
- return stdin.read unless stdin.tty?
34
- return read_file(options.file) if options.file
35
-
36
- args.empty? ? nil : args.join(" ")
37
- end
38
-
39
- def read_file(file_path)
40
- path = Pathname.new(file_path).realpath
41
- File.read(path)
42
- end
43
-
44
- def govspeak_options(command_options)
45
- string = if command_options.options_file
46
- read_file(command_options.options_file)
47
- else
48
- command_options.options
49
- end
50
- string ? JSON.parse(string) : {}
51
- end
52
- end
53
- end
@@ -1,6 +0,0 @@
1
- <span <% if attachment.id %>id="attachment_<%= attachment.id %>" <% end %>class="attachment-inline">
2
- <%= attachment.link attachment.title, attachment.url %>
3
- <% unless attachment.attachment_attributes.empty? %>
4
- (<%= attachment.attachment_attributes %>)
5
- <% end %>
6
- </span>