govspeak 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/govspeak.rb +5 -1
- data/lib/govspeak/html_validator.rb +40 -0
- data/lib/govspeak/version.rb +1 -1
- data/test/govspeak_test.rb +9 -0
- data/test/html_validator_test.rb +87 -0
- metadata +26 -12
data/lib/govspeak.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'kramdown'
|
2
2
|
require 'govspeak/header_extractor'
|
3
|
+
require 'govspeak/html_validator'
|
3
4
|
require 'kramdown/parser/kramdown_with_automatic_external_links'
|
4
5
|
require 'htmlentities'
|
5
6
|
|
@@ -23,7 +24,6 @@ module Govspeak
|
|
23
24
|
Parser.document_domains = options.delete(:document_domains)
|
24
25
|
@options = {input: PARSER_CLASS_NAME, entity_output: :symbolic}.merge(options)
|
25
26
|
@images = []
|
26
|
-
super()
|
27
27
|
end
|
28
28
|
|
29
29
|
def kramdown_doc
|
@@ -39,6 +39,10 @@ module Govspeak
|
|
39
39
|
HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
|
40
40
|
end
|
41
41
|
|
42
|
+
def valid?
|
43
|
+
Govspeak::HtmlValidator.new(@source).valid?
|
44
|
+
end
|
45
|
+
|
42
46
|
def headers
|
43
47
|
Govspeak::HeaderExtractor.convert(kramdown_doc).first
|
44
48
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'sanitize'
|
2
|
+
|
3
|
+
class Govspeak::HtmlValidator
|
4
|
+
attr_reader :string
|
5
|
+
|
6
|
+
def initialize(string)
|
7
|
+
@string = string
|
8
|
+
end
|
9
|
+
|
10
|
+
def invalid?
|
11
|
+
!valid?
|
12
|
+
end
|
13
|
+
|
14
|
+
def valid?
|
15
|
+
dirty_html = govspeak_to_html
|
16
|
+
clean_html = sanitize_html(dirty_html)
|
17
|
+
normalise_html(dirty_html) == normalise_html(clean_html)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Make whitespace in html tags consistent
|
21
|
+
def normalise_html(html)
|
22
|
+
Nokogiri::HTML.parse(html).to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
def govspeak_to_html
|
26
|
+
Govspeak::Document.new(string).to_html
|
27
|
+
end
|
28
|
+
|
29
|
+
def sanitize_html(dirty_html)
|
30
|
+
Sanitize.clean(dirty_html, sanitize_config)
|
31
|
+
end
|
32
|
+
|
33
|
+
def sanitize_config
|
34
|
+
config = Sanitize::Config::RELAXED.dup
|
35
|
+
config[:attributes][:all].push("id", "class")
|
36
|
+
config[:attributes]["a"].push("rel")
|
37
|
+
config[:elements].push("div", "hr")
|
38
|
+
config
|
39
|
+
end
|
40
|
+
end
|
data/lib/govspeak/version.rb
CHANGED
data/test/govspeak_test.rb
CHANGED
@@ -432,4 +432,13 @@ $CTA
|
|
432
432
|
end
|
433
433
|
end
|
434
434
|
|
435
|
+
test "identifies a Govspeak document containing malicious HTML as invalid" do
|
436
|
+
document = Govspeak::Document.new("<script>doBadThings();</script>")
|
437
|
+
refute document.valid?
|
438
|
+
end
|
439
|
+
|
440
|
+
test "identifies a Govspeak document containing acceptable HTML as valid" do
|
441
|
+
document = Govspeak::Document.new("<div>some content</div>")
|
442
|
+
assert document.valid?
|
443
|
+
end
|
435
444
|
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
class HtmlValidatorTest < Test::Unit::TestCase
|
4
|
+
test "allow Govspeak Markdown" do
|
5
|
+
values = [
|
6
|
+
"## is H2",
|
7
|
+
"*bold text*",
|
8
|
+
"* bullet",
|
9
|
+
"- alternative bullet",
|
10
|
+
"+ another bullet",
|
11
|
+
"1. Numbered list",
|
12
|
+
"s2. Step",
|
13
|
+
"""
|
14
|
+
Table | Header
|
15
|
+
- | -
|
16
|
+
Build | cells
|
17
|
+
""",
|
18
|
+
"This is [an example](/an-inline-link \"Title\") inline link.",
|
19
|
+
"<http://example.com/>",
|
20
|
+
"<address@example.com>",
|
21
|
+
"This is [an example](http://example.com/ \"Title\"){:rel=\"external\"} inline link to an external resource.",
|
22
|
+
"^Your text here^ - creates a callout with an info (i) icon.",
|
23
|
+
"%Your text here% - creates a callout with a warning or alert (!) icon",
|
24
|
+
"@Your text here@ - highlights the enclosed text in yellow",
|
25
|
+
"$CSome contact information here$C - contact information",
|
26
|
+
"$A Hercules House Hercules Road London SE1 7DU $A",
|
27
|
+
"$D [An example form download link](http://example.com/ \"Example form\") Something about this form download $D",
|
28
|
+
"$EAn example for the citizen$E - examples boxout",
|
29
|
+
"$!...$! - answer summary",
|
30
|
+
"{::highlight-answer}...{:/highlight-answer} - creates a large pink highlight box with optional preamble text and giant text denoted with **.",
|
31
|
+
"{::highlight-answer}",
|
32
|
+
"The VAT rate is *20%*",
|
33
|
+
"{:/highlight-answer}",
|
34
|
+
"---",
|
35
|
+
"*[GDS]: Government Digital Service",
|
36
|
+
"""
|
37
|
+
$P
|
38
|
+
|
39
|
+
$I
|
40
|
+
$A
|
41
|
+
Hercules House
|
42
|
+
Hercules Road
|
43
|
+
London SE1 7DU
|
44
|
+
$A
|
45
|
+
|
46
|
+
$AI
|
47
|
+
There is access to the building from the street via a ramp.
|
48
|
+
$AI
|
49
|
+
$I
|
50
|
+
$P
|
51
|
+
""",
|
52
|
+
":england:content goes here:england:",
|
53
|
+
":scotland:content goes here:scotland:"
|
54
|
+
]
|
55
|
+
values.each do |value|
|
56
|
+
assert Govspeak::HtmlValidator.new(value).valid?
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
test "disallow a script tags" do
|
61
|
+
assert Govspeak::HtmlValidator.new("<script>alert('XSS')</script>").invalid?
|
62
|
+
end
|
63
|
+
|
64
|
+
test "disallow a javascript protocol in an attribute" do
|
65
|
+
html = %q{<a href="javascript:alert(document.location);"
|
66
|
+
title="Title">an example</a>}
|
67
|
+
assert Govspeak::HtmlValidator.new(html).invalid?
|
68
|
+
end
|
69
|
+
|
70
|
+
test "disallow a javascript protocol in a Markdown link" do
|
71
|
+
html = %q{This is [an example](javascript:alert(""); "Title") inline link.}
|
72
|
+
assert Govspeak::HtmlValidator.new(html).invalid?
|
73
|
+
end
|
74
|
+
|
75
|
+
test "disallow on* attributes" do
|
76
|
+
html = %q{<a href="/" onclick="alert('xss');">Link</a>}
|
77
|
+
assert Govspeak::HtmlValidator.new(html).invalid?
|
78
|
+
end
|
79
|
+
|
80
|
+
test "allow non-JS HTML content" do
|
81
|
+
assert Govspeak::HtmlValidator.new("<a href='foo'>").valid?
|
82
|
+
end
|
83
|
+
|
84
|
+
test "allow things that will end up as HTML entities" do
|
85
|
+
assert Govspeak::HtmlValidator.new("Fortnum & Mason").valid?
|
86
|
+
end
|
87
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: govspeak
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0.
|
5
|
+
version: 1.0.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ben Griffiths
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2012-
|
14
|
+
date: 2012-09-06 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: kramdown
|
@@ -36,8 +36,19 @@ dependencies:
|
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: *id002
|
38
38
|
- !ruby/object:Gem::Dependency
|
39
|
-
name:
|
39
|
+
name: sanitize
|
40
40
|
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - "="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.0.3
|
46
|
+
type: :runtime
|
47
|
+
prerelease: false
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rake
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
41
52
|
none: false
|
42
53
|
requirements:
|
43
54
|
- - ~>
|
@@ -45,10 +56,10 @@ dependencies:
|
|
45
56
|
version: 0.9.0
|
46
57
|
type: :development
|
47
58
|
prerelease: false
|
48
|
-
version_requirements: *
|
59
|
+
version_requirements: *id004
|
49
60
|
- !ruby/object:Gem::Dependency
|
50
61
|
name: gem_publisher
|
51
|
-
requirement: &
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
52
63
|
none: false
|
53
64
|
requirements:
|
54
65
|
- - ~>
|
@@ -56,10 +67,10 @@ dependencies:
|
|
56
67
|
version: 1.1.1
|
57
68
|
type: :development
|
58
69
|
prerelease: false
|
59
|
-
version_requirements: *
|
70
|
+
version_requirements: *id005
|
60
71
|
- !ruby/object:Gem::Dependency
|
61
72
|
name: simplecov
|
62
|
-
requirement: &
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
63
74
|
none: false
|
64
75
|
requirements:
|
65
76
|
- - ">="
|
@@ -67,10 +78,10 @@ dependencies:
|
|
67
78
|
version: "0"
|
68
79
|
type: :development
|
69
80
|
prerelease: false
|
70
|
-
version_requirements: *
|
81
|
+
version_requirements: *id006
|
71
82
|
- !ruby/object:Gem::Dependency
|
72
83
|
name: simplecov-rcov
|
73
|
-
requirement: &
|
84
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
74
85
|
none: false
|
75
86
|
requirements:
|
76
87
|
- - ">="
|
@@ -78,7 +89,7 @@ dependencies:
|
|
78
89
|
version: "0"
|
79
90
|
type: :development
|
80
91
|
prerelease: false
|
81
|
-
version_requirements: *
|
92
|
+
version_requirements: *id007
|
82
93
|
description: |-
|
83
94
|
A set of extensions to markdown layered on top of the kramdown
|
84
95
|
library for use in the UK Government Single Domain project
|
@@ -94,11 +105,13 @@ extra_rdoc_files: []
|
|
94
105
|
files:
|
95
106
|
- lib/govspeak/version.rb
|
96
107
|
- lib/govspeak/header_extractor.rb
|
108
|
+
- lib/govspeak/html_validator.rb
|
97
109
|
- lib/govspeak.rb
|
98
110
|
- lib/kramdown/parser/kramdown_with_automatic_external_links.rb
|
99
111
|
- README.md
|
100
112
|
- Gemfile
|
101
113
|
- Rakefile
|
114
|
+
- test/html_validator_test.rb
|
102
115
|
- test/govspeak_test_helper.rb
|
103
116
|
- test/govspeak_test.rb
|
104
117
|
- test/test_helper.rb
|
@@ -115,7 +128,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
115
128
|
requirements:
|
116
129
|
- - ">="
|
117
130
|
- !ruby/object:Gem::Version
|
118
|
-
hash:
|
131
|
+
hash: -3025037898440193908
|
119
132
|
segments:
|
120
133
|
- 0
|
121
134
|
version: "0"
|
@@ -124,7 +137,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
137
|
requirements:
|
125
138
|
- - ">="
|
126
139
|
- !ruby/object:Gem::Version
|
127
|
-
hash:
|
140
|
+
hash: -3025037898440193908
|
128
141
|
segments:
|
129
142
|
- 0
|
130
143
|
version: "0"
|
@@ -136,6 +149,7 @@ signing_key:
|
|
136
149
|
specification_version: 3
|
137
150
|
summary: Markup language for single domain
|
138
151
|
test_files:
|
152
|
+
- test/html_validator_test.rb
|
139
153
|
- test/govspeak_test_helper.rb
|
140
154
|
- test/govspeak_test.rb
|
141
155
|
- test/test_helper.rb
|