govspeak 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/govspeak.rb +5 -1
- data/lib/govspeak/html_validator.rb +40 -0
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_test.rb +9 -0
- data/test/html_validator_test.rb +87 -0
- metadata +26 -12
data/lib/govspeak.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'kramdown'
|
2
2
|
require 'govspeak/header_extractor'
|
3
|
+
require 'govspeak/html_validator'
|
3
4
|
require 'kramdown/parser/kramdown_with_automatic_external_links'
|
4
5
|
require 'htmlentities'
|
5
6
|
|
@@ -23,7 +24,6 @@ module Govspeak
|
|
23
24
|
Parser.document_domains = options.delete(:document_domains)
|
24
25
|
@options = {input: PARSER_CLASS_NAME, entity_output: :symbolic}.merge(options)
|
25
26
|
@images = []
|
26
|
-
super()
|
27
27
|
end
|
28
28
|
|
29
29
|
def kramdown_doc
|
@@ -39,6 +39,10 @@ module Govspeak
|
|
39
39
|
HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
|
40
40
|
end
|
41
41
|
|
42
|
+
def valid?
|
43
|
+
Govspeak::HtmlValidator.new(@source).valid?
|
44
|
+
end
|
45
|
+
|
42
46
|
def headers
|
43
47
|
Govspeak::HeaderExtractor.convert(kramdown_doc).first
|
44
48
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'sanitize'
|
2
|
+
|
3
|
+
class Govspeak::HtmlValidator
|
4
|
+
attr_reader :string
|
5
|
+
|
6
|
+
def initialize(string)
|
7
|
+
@string = string
|
8
|
+
end
|
9
|
+
|
10
|
+
def invalid?
|
11
|
+
!valid?
|
12
|
+
end
|
13
|
+
|
14
|
+
def valid?
|
15
|
+
dirty_html = govspeak_to_html
|
16
|
+
clean_html = sanitize_html(dirty_html)
|
17
|
+
normalise_html(dirty_html) == normalise_html(clean_html)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Make whitespace in html tags consistent
|
21
|
+
def normalise_html(html)
|
22
|
+
Nokogiri::HTML.parse(html).to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
def govspeak_to_html
|
26
|
+
Govspeak::Document.new(string).to_html
|
27
|
+
end
|
28
|
+
|
29
|
+
def sanitize_html(dirty_html)
|
30
|
+
Sanitize.clean(dirty_html, sanitize_config)
|
31
|
+
end
|
32
|
+
|
33
|
+
def sanitize_config
|
34
|
+
config = Sanitize::Config::RELAXED.dup
|
35
|
+
config[:attributes][:all].push("id", "class")
|
36
|
+
config[:attributes]["a"].push("rel")
|
37
|
+
config[:elements].push("div", "hr")
|
38
|
+
config
|
39
|
+
end
|
40
|
+
end
|
data/lib/govspeak/version.rb
CHANGED
data/test/govspeak_test.rb
CHANGED
@@ -432,4 +432,13 @@ $CTA
|
|
432
432
|
end
|
433
433
|
end
|
434
434
|
|
435
|
+
test "identifies a Govspeak document containing malicious HTML as invalid" do
|
436
|
+
document = Govspeak::Document.new("<script>doBadThings();</script>")
|
437
|
+
refute document.valid?
|
438
|
+
end
|
439
|
+
|
440
|
+
test "identifies a Govspeak document containing acceptable HTML as valid" do
|
441
|
+
document = Govspeak::Document.new("<div>some content</div>")
|
442
|
+
assert document.valid?
|
443
|
+
end
|
435
444
|
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
class HtmlValidatorTest < Test::Unit::TestCase
|
4
|
+
test "allow Govspeak Markdown" do
|
5
|
+
values = [
|
6
|
+
"## is H2",
|
7
|
+
"*bold text*",
|
8
|
+
"* bullet",
|
9
|
+
"- alternative bullet",
|
10
|
+
"+ another bullet",
|
11
|
+
"1. Numbered list",
|
12
|
+
"s2. Step",
|
13
|
+
"""
|
14
|
+
Table | Header
|
15
|
+
- | -
|
16
|
+
Build | cells
|
17
|
+
""",
|
18
|
+
"This is [an example](/an-inline-link \"Title\") inline link.",
|
19
|
+
"<http://example.com/>",
|
20
|
+
"<address@example.com>",
|
21
|
+
"This is [an example](http://example.com/ \"Title\"){:rel=\"external\"} inline link to an external resource.",
|
22
|
+
"^Your text here^ - creates a callout with an info (i) icon.",
|
23
|
+
"%Your text here% - creates a callout with a warning or alert (!) icon",
|
24
|
+
"@Your text here@ - highlights the enclosed text in yellow",
|
25
|
+
"$CSome contact information here$C - contact information",
|
26
|
+
"$A Hercules House Hercules Road London SE1 7DU $A",
|
27
|
+
"$D [An example form download link](http://example.com/ \"Example form\") Something about this form download $D",
|
28
|
+
"$EAn example for the citizen$E - examples boxout",
|
29
|
+
"$!...$! - answer summary",
|
30
|
+
"{::highlight-answer}...{:/highlight-answer} - creates a large pink highlight box with optional preamble text and giant text denoted with **.",
|
31
|
+
"{::highlight-answer}",
|
32
|
+
"The VAT rate is *20%*",
|
33
|
+
"{:/highlight-answer}",
|
34
|
+
"---",
|
35
|
+
"*[GDS]: Government Digital Service",
|
36
|
+
"""
|
37
|
+
$P
|
38
|
+
|
39
|
+
$I
|
40
|
+
$A
|
41
|
+
Hercules House
|
42
|
+
Hercules Road
|
43
|
+
London SE1 7DU
|
44
|
+
$A
|
45
|
+
|
46
|
+
$AI
|
47
|
+
There is access to the building from the street via a ramp.
|
48
|
+
$AI
|
49
|
+
$I
|
50
|
+
$P
|
51
|
+
""",
|
52
|
+
":england:content goes here:england:",
|
53
|
+
":scotland:content goes here:scotland:"
|
54
|
+
]
|
55
|
+
values.each do |value|
|
56
|
+
assert Govspeak::HtmlValidator.new(value).valid?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
test "disallow a script tags" do
|
61
|
+
assert Govspeak::HtmlValidator.new("<script>alert('XSS')</script>").invalid?
|
62
|
+
end
|
63
|
+
|
64
|
+
test "disallow a javascript protocol in an attribute" do
|
65
|
+
html = %q{<a href="javascript:alert(document.location);"
|
66
|
+
title="Title">an example</a>}
|
67
|
+
assert Govspeak::HtmlValidator.new(html).invalid?
|
68
|
+
end
|
69
|
+
|
70
|
+
test "disallow a javascript protocol in a Markdown link" do
|
71
|
+
html = %q{This is [an example](javascript:alert(""); "Title") inline link.}
|
72
|
+
assert Govspeak::HtmlValidator.new(html).invalid?
|
73
|
+
end
|
74
|
+
|
75
|
+
test "disallow on* attributes" do
|
76
|
+
html = %q{<a href="/" onclick="alert('xss');">Link</a>}
|
77
|
+
assert Govspeak::HtmlValidator.new(html).invalid?
|
78
|
+
end
|
79
|
+
|
80
|
+
test "allow non-JS HTML content" do
|
81
|
+
assert Govspeak::HtmlValidator.new("<a href='foo'>").valid?
|
82
|
+
end
|
83
|
+
|
84
|
+
test "allow things that will end up as HTML entities" do
|
85
|
+
assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
|
86
|
+
end
|
87
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0.
|
5
|
+
version: 1.0.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ben Griffiths
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-09-06 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: kramdown
|
@@ -36,8 +36,19 @@ dependencies:
|
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: *id002
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
name:
|
39
|
+
name: sanitize
|
40
40
|
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - "="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.0.3
|
46
|
+
type: :runtime
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rake
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
41
52
|
none: false
|
42
53
|
requirements:
|
43
54
|
- - ~>
|
@@ -45,10 +56,10 @@ dependencies:
|
|
45
56
|
version: 0.9.0
|
46
57
|
type: :development
|
47
58
|
prerelease: false
|
48
|
-
version_requirements: *
|
59
|
+
version_requirements: *id004
|
49
60
|
- !ruby/object:Gem::Dependency
|
50
61
|
name: gem_publisher
|
51
|
-
requirement: &
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
52
63
|
none: false
|
53
64
|
requirements:
|
54
65
|
- - ~>
|
@@ -56,10 +67,10 @@ dependencies:
|
|
56
67
|
version: 1.1.1
|
57
68
|
type: :development
|
58
69
|
prerelease: false
|
59
|
-
version_requirements: *
|
70
|
+
version_requirements: *id005
|
60
71
|
- !ruby/object:Gem::Dependency
|
61
72
|
name: simplecov
|
62
|
-
requirement: &
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
63
74
|
none: false
|
64
75
|
requirements:
|
65
76
|
- - ">="
|
@@ -67,10 +78,10 @@ dependencies:
|
|
67
78
|
version: "0"
|
68
79
|
type: :development
|
69
80
|
prerelease: false
|
70
|
-
version_requirements: *
|
81
|
+
version_requirements: *id006
|
71
82
|
- !ruby/object:Gem::Dependency
|
72
83
|
name: simplecov-rcov
|
73
|
-
requirement: &
|
84
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
74
85
|
none: false
|
75
86
|
requirements:
|
76
87
|
- - ">="
|
@@ -78,7 +89,7 @@ dependencies:
|
|
78
89
|
version: "0"
|
79
90
|
type: :development
|
80
91
|
prerelease: false
|
81
|
-
version_requirements: *
|
92
|
+
version_requirements: *id007
|
82
93
|
description: |-
|
83
94
|
A set of extensions to markdown layered on top of the kramdown
|
84
95
|
library for use in the UK Government Single Domain project
|
@@ -94,11 +105,13 @@ extra_rdoc_files: []
|
|
94
105
|
files:
|
95
106
|
- lib/govspeak/version.rb
|
96
107
|
- lib/govspeak/header_extractor.rb
|
108
|
+
- lib/govspeak/html_validator.rb
|
97
109
|
- lib/govspeak.rb
|
98
110
|
- lib/kramdown/parser/kramdown_with_automatic_external_links.rb
|
99
111
|
- README.md
|
100
112
|
- Gemfile
|
101
113
|
- Rakefile
|
114
|
+
- test/html_validator_test.rb
|
102
115
|
- test/govspeak_test_helper.rb
|
103
116
|
- test/govspeak_test.rb
|
104
117
|
- test/test_helper.rb
|
@@ -115,7 +128,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
115
128
|
requirements:
|
116
129
|
- - ">="
|
117
130
|
- !ruby/object:Gem::Version
|
118
|
-
hash:
|
131
|
+
hash: -3025037898440193908
|
119
132
|
segments:
|
120
133
|
- 0
|
121
134
|
version: "0"
|
@@ -124,7 +137,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
137
|
requirements:
|
125
138
|
- - ">="
|
126
139
|
- !ruby/object:Gem::Version
|
127
|
-
hash:
|
140
|
+
hash: -3025037898440193908
|
128
141
|
segments:
|
129
142
|
- 0
|
130
143
|
version: "0"
|
@@ -136,6 +149,7 @@ signing_key:
|
|
136
149
|
specification_version: 3
|
137
150
|
summary: Markup language for single domain
|
138
151
|
test_files:
|
152
|
+
- test/html_validator_test.rb
|
139
153
|
- test/govspeak_test_helper.rb
|
140
154
|
- test/govspeak_test.rb
|
141
155
|
- test/test_helper.rb
|