govspeak 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/govspeak.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'kramdown'
2
2
  require 'govspeak/header_extractor'
3
+ require 'govspeak/html_validator'
3
4
  require 'kramdown/parser/kramdown_with_automatic_external_links'
4
5
  require 'htmlentities'
5
6
 
@@ -23,7 +24,6 @@ module Govspeak
23
24
  Parser.document_domains = options.delete(:document_domains)
24
25
  @options = {input: PARSER_CLASS_NAME, entity_output: :symbolic}.merge(options)
25
26
  @images = []
26
- super()
27
27
  end
28
28
 
29
29
  def kramdown_doc
@@ -39,6 +39,10 @@ module Govspeak
39
39
  HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
40
40
  end
41
41
 
42
+ def valid?
43
+ Govspeak::HtmlValidator.new(@source).valid?
44
+ end
45
+
42
46
  def headers
43
47
  Govspeak::HeaderExtractor.convert(kramdown_doc).first
44
48
  end
@@ -0,0 +1,40 @@
1
+ require 'sanitize'
2
+
3
+ class Govspeak::HtmlValidator
4
+ attr_reader :string
5
+
6
+ def initialize(string)
7
+ @string = string
8
+ end
9
+
10
+ def invalid?
11
+ !valid?
12
+ end
13
+
14
+ def valid?
15
+ dirty_html = govspeak_to_html
16
+ clean_html = sanitize_html(dirty_html)
17
+ normalise_html(dirty_html) == normalise_html(clean_html)
18
+ end
19
+
20
+ # Make whitespace in html tags consistent
21
+ def normalise_html(html)
22
+ Nokogiri::HTML.parse(html).to_s
23
+ end
24
+
25
+ def govspeak_to_html
26
+ Govspeak::Document.new(string).to_html
27
+ end
28
+
29
+ def sanitize_html(dirty_html)
30
+ Sanitize.clean(dirty_html, sanitize_config)
31
+ end
32
+
33
+ def sanitize_config
34
+ config = Sanitize::Config::RELAXED.dup
35
+ config[:attributes][:all].push("id", "class")
36
+ config[:attributes]["a"].push("rel")
37
+ config[:elements].push("div", "hr")
38
+ config
39
+ end
40
+ end
@@ -1,3 +1,3 @@
1
1
  module Govspeak
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -432,4 +432,13 @@ $CTA
432
432
  end
433
433
  end
434
434
 
435
+ test "identifies a Govspeak document containing malicious HTML as invalid" do
436
+ document = Govspeak::Document.new("<script>doBadThings();</script>")
437
+ refute document.valid?
438
+ end
439
+
440
+ test "identifies a Govspeak document containing acceptable HTML as valid" do
441
+ document = Govspeak::Document.new("<div>some content</div>")
442
+ assert document.valid?
443
+ end
435
444
  end
@@ -0,0 +1,87 @@
1
+ require "test_helper"
2
+
3
+ class HtmlValidatorTest < Test::Unit::TestCase
4
+ test "allow Govspeak Markdown" do
5
+ values = [
6
+ "## is H2",
7
+ "*bold text*",
8
+ "* bullet",
9
+ "- alternative bullet",
10
+ "+ another bullet",
11
+ "1. Numbered list",
12
+ "s2. Step",
13
+ """
14
+ Table | Header
15
+ - | -
16
+ Build | cells
17
+ """,
18
+ "This is [an example](/an-inline-link \"Title\") inline link.",
19
+ "<http://example.com/>",
20
+ "<address@example.com>",
21
+ "This is [an example](http://example.com/ \"Title\"){:rel=\"external\"} inline link to an external resource.",
22
+ "^Your text here^ - creates a callout with an info (i) icon.",
23
+ "%Your text here% - creates a callout with a warning or alert (!) icon",
24
+ "@Your text here@ - highlights the enclosed text in yellow",
25
+ "$CSome contact information here$C - contact information",
26
+ "$A Hercules House Hercules Road London SE1 7DU $A",
27
+ "$D [An example form download link](http://example.com/ \"Example form\") Something about this form download $D",
28
+ "$EAn example for the citizen$E - examples boxout",
29
+ "$!...$! - answer summary",
30
+ "{::highlight-answer}...{:/highlight-answer} - creates a large pink highlight box with optional preamble text and giant text denoted with **.",
31
+ "{::highlight-answer}",
32
+ "The VAT rate is *20%*",
33
+ "{:/highlight-answer}",
34
+ "---",
35
+ "*[GDS]: Government Digital Service",
36
+ """
37
+ $P
38
+
39
+ $I
40
+ $A
41
+ Hercules House
42
+ Hercules Road
43
+ London SE1 7DU
44
+ $A
45
+
46
+ $AI
47
+ There is access to the building from the street via a ramp.
48
+ $AI
49
+ $I
50
+ $P
51
+ """,
52
+ ":england:content goes here:england:",
53
+ ":scotland:content goes here:scotland:"
54
+ ]
55
+ values.each do |value|
56
+ assert Govspeak::HtmlValidator.new(value).valid?
57
+ end
58
+ end
59
+
60
+ test "disallow a script tags" do
61
+ assert Govspeak::HtmlValidator.new("<script>alert('XSS')</script>").invalid?
62
+ end
63
+
64
+ test "disallow a javascript protocol in an attribute" do
65
+ html = %q{<a href="javascript:alert(document.location);"
66
+ title="Title">an example</a>}
67
+ assert Govspeak::HtmlValidator.new(html).invalid?
68
+ end
69
+
70
+ test "disallow a javascript protocol in a Markdown link" do
71
+ html = %q{This is [an example](javascript:alert(""); "Title") inline link.}
72
+ assert Govspeak::HtmlValidator.new(html).invalid?
73
+ end
74
+
75
+ test "disallow on* attributes" do
76
+ html = %q{<a href="/" onclick="alert('xss');">Link</a>}
77
+ assert Govspeak::HtmlValidator.new(html).invalid?
78
+ end
79
+
80
+ test "allow non-JS HTML content" do
81
+ assert Govspeak::HtmlValidator.new("<a href='foo'>").valid?
82
+ end
83
+
84
+ test "allow things that will end up as HTML entities" do
85
+ assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
86
+ end
87
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: govspeak
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.0
5
+ version: 1.0.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ben Griffiths
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2012-08-30 00:00:00 Z
14
+ date: 2012-09-06 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: kramdown
@@ -36,8 +36,19 @@ dependencies:
36
36
  prerelease: false
37
37
  version_requirements: *id002
38
38
  - !ruby/object:Gem::Dependency
39
- name: rake
39
+ name: sanitize
40
40
  requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - "="
44
+ - !ruby/object:Gem::Version
45
+ version: 2.0.3
46
+ type: :runtime
47
+ prerelease: false
48
+ version_requirements: *id003
49
+ - !ruby/object:Gem::Dependency
50
+ name: rake
51
+ requirement: &id004 !ruby/object:Gem::Requirement
41
52
  none: false
42
53
  requirements:
43
54
  - - ~>
@@ -45,10 +56,10 @@ dependencies:
45
56
  version: 0.9.0
46
57
  type: :development
47
58
  prerelease: false
48
- version_requirements: *id003
59
+ version_requirements: *id004
49
60
  - !ruby/object:Gem::Dependency
50
61
  name: gem_publisher
51
- requirement: &id004 !ruby/object:Gem::Requirement
62
+ requirement: &id005 !ruby/object:Gem::Requirement
52
63
  none: false
53
64
  requirements:
54
65
  - - ~>
@@ -56,10 +67,10 @@ dependencies:
56
67
  version: 1.1.1
57
68
  type: :development
58
69
  prerelease: false
59
- version_requirements: *id004
70
+ version_requirements: *id005
60
71
  - !ruby/object:Gem::Dependency
61
72
  name: simplecov
62
- requirement: &id005 !ruby/object:Gem::Requirement
73
+ requirement: &id006 !ruby/object:Gem::Requirement
63
74
  none: false
64
75
  requirements:
65
76
  - - ">="
@@ -67,10 +78,10 @@ dependencies:
67
78
  version: "0"
68
79
  type: :development
69
80
  prerelease: false
70
- version_requirements: *id005
81
+ version_requirements: *id006
71
82
  - !ruby/object:Gem::Dependency
72
83
  name: simplecov-rcov
73
- requirement: &id006 !ruby/object:Gem::Requirement
84
+ requirement: &id007 !ruby/object:Gem::Requirement
74
85
  none: false
75
86
  requirements:
76
87
  - - ">="
@@ -78,7 +89,7 @@ dependencies:
78
89
  version: "0"
79
90
  type: :development
80
91
  prerelease: false
81
- version_requirements: *id006
92
+ version_requirements: *id007
82
93
  description: |-
83
94
  A set of extensions to markdown layered on top of the kramdown
84
95
  library for use in the UK Government Single Domain project
@@ -94,11 +105,13 @@ extra_rdoc_files: []
94
105
  files:
95
106
  - lib/govspeak/version.rb
96
107
  - lib/govspeak/header_extractor.rb
108
+ - lib/govspeak/html_validator.rb
97
109
  - lib/govspeak.rb
98
110
  - lib/kramdown/parser/kramdown_with_automatic_external_links.rb
99
111
  - README.md
100
112
  - Gemfile
101
113
  - Rakefile
114
+ - test/html_validator_test.rb
102
115
  - test/govspeak_test_helper.rb
103
116
  - test/govspeak_test.rb
104
117
  - test/test_helper.rb
@@ -115,7 +128,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
115
128
  requirements:
116
129
  - - ">="
117
130
  - !ruby/object:Gem::Version
118
- hash: 2632148642113088526
131
+ hash: -3025037898440193908
119
132
  segments:
120
133
  - 0
121
134
  version: "0"
@@ -124,7 +137,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
137
  requirements:
125
138
  - - ">="
126
139
  - !ruby/object:Gem::Version
127
- hash: 2632148642113088526
140
+ hash: -3025037898440193908
128
141
  segments:
129
142
  - 0
130
143
  version: "0"
@@ -136,6 +149,7 @@ signing_key:
136
149
  specification_version: 3
137
150
  summary: Markup language for single domain
138
151
  test_files:
152
+ - test/html_validator_test.rb
139
153
  - test/govspeak_test_helper.rb
140
154
  - test/govspeak_test.rb
141
155
  - test/test_helper.rb