govspeak 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/govspeak.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'kramdown'
2
2
  require 'govspeak/header_extractor'
3
+ require 'govspeak/html_validator'
3
4
  require 'kramdown/parser/kramdown_with_automatic_external_links'
4
5
  require 'htmlentities'
5
6
 
@@ -23,7 +24,6 @@ module Govspeak
23
24
  Parser.document_domains = options.delete(:document_domains)
24
25
  @options = {input: PARSER_CLASS_NAME, entity_output: :symbolic}.merge(options)
25
26
  @images = []
26
- super()
27
27
  end
28
28
 
29
29
  def kramdown_doc
@@ -39,6 +39,10 @@ module Govspeak
39
39
  HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
40
40
  end
41
41
 
42
+ def valid?
43
+ Govspeak::HtmlValidator.new(@source).valid?
44
+ end
45
+
42
46
  def headers
43
47
  Govspeak::HeaderExtractor.convert(kramdown_doc).first
44
48
  end
@@ -0,0 +1,40 @@
1
+ require 'sanitize'
2
+
3
+ class Govspeak::HtmlValidator
4
+ attr_reader :string
5
+
6
+ def initialize(string)
7
+ @string = string
8
+ end
9
+
10
+ def invalid?
11
+ !valid?
12
+ end
13
+
14
+ def valid?
15
+ dirty_html = govspeak_to_html
16
+ clean_html = sanitize_html(dirty_html)
17
+ normalise_html(dirty_html) == normalise_html(clean_html)
18
+ end
19
+
20
+ # Make whitespace in html tags consistent
21
+ def normalise_html(html)
22
+ Nokogiri::HTML.parse(html).to_s
23
+ end
24
+
25
+ def govspeak_to_html
26
+ Govspeak::Document.new(string).to_html
27
+ end
28
+
29
+ def sanitize_html(dirty_html)
30
+ Sanitize.clean(dirty_html, sanitize_config)
31
+ end
32
+
33
+ def sanitize_config
34
+ config = Sanitize::Config::RELAXED.dup
35
+ config[:attributes][:all].push("id", "class")
36
+ config[:attributes]["a"].push("rel")
37
+ config[:elements].push("div", "hr")
38
+ config
39
+ end
40
+ end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -432,4 +432,13 @@ $CTA
432
432
  end
433
433
  end
434
434
 
435
+ test "identifies a Govspeak document containing malicious HTML as invalid" do
436
+ document = Govspeak::Document.new("<script>doBadThings();</script>")
437
+ refute document.valid?
438
+ end
439
+
440
+ test "identifies a Govspeak document containing acceptable HTML as valid" do
441
+ document = Govspeak::Document.new("<div>some content</div>")
442
+ assert document.valid?
443
+ end
435
444
  end
@@ -0,0 +1,87 @@
1
+ require "test_helper"
2
+
3
+ class HtmlValidatorTest < Test::Unit::TestCase
4
+ test "allow Govspeak Markdown" do
5
+ values = [
6
+ "## is H2",
7
+ "*bold text*",
8
+ "* bullet",
9
+ "- alternative bullet",
10
+ "+ another bullet",
11
+ "1. Numbered list",
12
+ "s2. Step",
13
+ """
14
+ Table | Header
15
+ - | -
16
+ Build | cells
17
+ """,
18
+ "This is [an example](/an-inline-link \"Title\") inline link.",
19
+ "<http://example.com/>",
20
+ "<address@example.com>",
21
+ "This is [an example](http://example.com/ \"Title\"){:rel=\"external\"} inline link to an external resource.",
22
+ "^Your text here^ - creates a callout with an info (i) icon.",
23
+ "%Your text here% - creates a callout with a warning or alert (!) icon",
24
+ "@Your text here@ - highlights the enclosed text in yellow",
25
+ "$CSome contact information here$C - contact information",
26
+ "$A Hercules House Hercules Road London SE1 7DU $A",
27
+ "$D [An example form download link](http://example.com/ \"Example form\") Something about this form download $D",
28
+ "$EAn example for the citizen$E - examples boxout",
29
+ "$!...$! - answer summary",
30
+ "{::highlight-answer}...{:/highlight-answer} - creates a large pink highlight box with optional preamble text and giant text denoted with **.",
31
+ "{::highlight-answer}",
32
+ "The VAT rate is *20%*",
33
+ "{:/highlight-answer}",
34
+ "---",
35
+ "*[GDS]: Government Digital Service",
36
+ """
37
+ $P
38
+
39
+ $I
40
+ $A
41
+ Hercules House
42
+ Hercules Road
43
+ London SE1 7DU
44
+ $A
45
+
46
+ $AI
47
+ There is access to the building from the street via a ramp.
48
+ $AI
49
+ $I
50
+ $P
51
+ """,
52
+ ":england:content goes here:england:",
53
+ ":scotland:content goes here:scotland:"
54
+ ]
55
+ values.each do |value|
56
+ assert Govspeak::HtmlValidator.new(value).valid?
57
+ end
58
+ end
59
+
60
+ test "disallow a script tags" do
61
+ assert Govspeak::HtmlValidator.new("<script>alert('XSS')</script>").invalid?
62
+ end
63
+
64
+ test "disallow a javascript protocol in an attribute" do
65
+ html = %q{<a href="javascript:alert(document.location);"
66
+ title="Title">an example</a>}
67
+ assert Govspeak::HtmlValidator.new(html).invalid?
68
+ end
69
+
70
+ test "disallow a javascript protocol in a Markdown link" do
71
+ html = %q{This is [an example](javascript:alert(""); "Title") inline link.}
72
+ assert Govspeak::HtmlValidator.new(html).invalid?
73
+ end
74
+
75
+ test "disallow on* attributes" do
76
+ html = %q{<a href="/" onclick="alert('xss');">Link</a>}
77
+ assert Govspeak::HtmlValidator.new(html).invalid?
78
+ end
79
+
80
+ test "allow non-JS HTML content" do
81
+ assert Govspeak::HtmlValidator.new("<a href='foo'>").valid?
82
+ end
83
+
84
+ test "allow things that will end up as HTML entities" do
85
+ assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
86
+ end
87
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.0.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ben Griffiths
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2012-08-30 00:00:00 Z
14
+ date: 2012-09-06 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: kramdown
@@ -36,8 +36,19 @@ dependencies:
36
36
  prerelease: false
37
37
  version_requirements: *id002
38
38
  - !ruby/object:Gem::Dependency
39
- name: rake
39
+ name: sanitize
40
40
  requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - "="
44
+ - !ruby/object:Gem::Version
45
+ version: 2.0.3
46
+ type: :runtime
47
+ prerelease: false
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: rake
51
+ requirement: &id004 !ruby/object:Gem::Requirement
41
52
  none: false
42
53
  requirements:
43
54
  - - ~>
@@ -45,10 +56,10 @@ dependencies:
45
56
  version: 0.9.0
46
57
  type: :development
47
58
  prerelease: false
48
- version_requirements: *id003
59
+ version_requirements: *id004
49
60
  - !ruby/object:Gem::Dependency
50
61
  name: gem_publisher
51
- requirement: &id004 !ruby/object:Gem::Requirement
62
+ requirement: &id005 !ruby/object:Gem::Requirement
52
63
  none: false
53
64
  requirements:
54
65
  - - ~>
@@ -56,10 +67,10 @@ dependencies:
56
67
  version: 1.1.1
57
68
  type: :development
58
69
  prerelease: false
59
- version_requirements: *id004
70
+ version_requirements: *id005
60
71
  - !ruby/object:Gem::Dependency
61
72
  name: simplecov
62
- requirement: &id005 !ruby/object:Gem::Requirement
73
+ requirement: &id006 !ruby/object:Gem::Requirement
63
74
  none: false
64
75
  requirements:
65
76
  - - ">="
@@ -67,10 +78,10 @@ dependencies:
67
78
  version: "0"
68
79
  type: :development
69
80
  prerelease: false
70
- version_requirements: *id005
81
+ version_requirements: *id006
71
82
  - !ruby/object:Gem::Dependency
72
83
  name: simplecov-rcov
73
- requirement: &id006 !ruby/object:Gem::Requirement
84
+ requirement: &id007 !ruby/object:Gem::Requirement
74
85
  none: false
75
86
  requirements:
76
87
  - - ">="
@@ -78,7 +89,7 @@ dependencies:
78
89
  version: "0"
79
90
  type: :development
80
91
  prerelease: false
81
- version_requirements: *id006
92
+ version_requirements: *id007
82
93
  description: |-
83
94
  A set of extensions to markdown layered on top of the kramdown
84
95
  library for use in the UK Government Single Domain project
@@ -94,11 +105,13 @@ extra_rdoc_files: []
94
105
  files:
95
106
  - lib/govspeak/version.rb
96
107
  - lib/govspeak/header_extractor.rb
108
+ - lib/govspeak/html_validator.rb
97
109
  - lib/govspeak.rb
98
110
  - lib/kramdown/parser/kramdown_with_automatic_external_links.rb
99
111
  - README.md
100
112
  - Gemfile
101
113
  - Rakefile
114
+ - test/html_validator_test.rb
102
115
  - test/govspeak_test_helper.rb
103
116
  - test/govspeak_test.rb
104
117
  - test/test_helper.rb
@@ -115,7 +128,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
115
128
  requirements:
116
129
  - - ">="
117
130
  - !ruby/object:Gem::Version
118
- hash: 2632148642113088526
131
+ hash: -3025037898440193908
119
132
  segments:
120
133
  - 0
121
134
  version: "0"
@@ -124,7 +137,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
137
  requirements:
125
138
  - - ">="
126
139
  - !ruby/object:Gem::Version
127
- hash: 2632148642113088526
140
+ hash: -3025037898440193908
128
141
  segments:
129
142
  - 0
130
143
  version: "0"
@@ -136,6 +149,7 @@ signing_key:
136
149
  specification_version: 3
137
150
  summary: Markup language for single domain
138
151
  test_files:
152
+ - test/html_validator_test.rb
139
153
  - test/govspeak_test_helper.rb
140
154
  - test/govspeak_test.rb
141
155
  - test/test_helper.rb