decontaminator 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1342e0fb22e699e0c0713c095b31231f2d161c46
4
+ data.tar.gz: a00919d401c57f5c0aae265842d6bfc8b7f946a2
5
+ SHA512:
6
+ metadata.gz: 9bd7cd77f541ec1176fce4b3e7ca3f00f6fc24c1bd6e813e06212694a1b2c62abba407dfa030883ecda07f41527054018ccd15bceff99b82dc84394ef2c5f802
7
+ data.tar.gz: dd07913b676ca10a1230158fdc2aeab321a3fbeff9ee436960b346a34135f4671e61ec2f03bf6da1fd84dc157524371a58bd5d44d3c56998e039068019854bae
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Altmetric LLP
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,48 @@
1
+ # Decontaminator [![Build Status](https://travis-ci.org/altmetric/decontaminator.svg?branch=master)](https://travis-ci.org/altmetric/decontaminator)
2
+
3
+ Ruby HTML sanitizer based on a lightweight Oga parser.
4
+
5
+ **Current version:** 1.0.0
6
+
7
+ **Supported Ruby versions:** 1.9.3, 2.0, 2.1, 2.2, JRuby 1.7, and Rubinius 2.5
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ ```ruby
14
+ gem 'decontaminator'
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ $ bundle
20
+
21
+ Or install it yourself as:
22
+
23
+ $ gem install decontaminator
24
+
25
+ ## Usage
26
+
27
+ ```ruby
28
+ require 'decontaminator'
29
+
30
+ input = '<h1>Heading</h1><p>Lorem ipsum...</p><script>alert(1)</script>'
31
+ fragment = Decontaminator::Fragment.new(input)
32
+ puts fragment.decontaminate.inspect
33
+ " Heading Lorem ipsum... "
34
+ ```
35
+
36
+ ## Contributing
37
+
38
+ 1. Fork it ( https://github.com/altmetric/decontaminator/fork )
39
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
40
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
41
+ 4. Push to the branch (`git push origin my-new-feature`)
42
+ 5. Create a new Pull Request
43
+
44
+ ## License
45
+
46
+ Copyright © 2015 Altmetric LLP
47
+
48
+ Distributed under the [MIT license](https://github.com/altmetric/decontaminator/blob/master/LICENSE.txt).
@@ -0,0 +1 @@
1
+ require 'decontaminator/fragment'
@@ -0,0 +1,80 @@
1
+ require 'oga'
2
+
3
+ module Decontaminator
4
+ class Fragment
5
+ def initialize(html_fragment)
6
+ @html_fragment = html_fragment
7
+ end
8
+
9
+ def decontaminate(options = {})
10
+ blacklisted_tags = NON_CONTENT_TAGS + options.fetch(:blacklist, [])
11
+
12
+ sanitize(Oga.parse_html(html_fragment).children, blacklisted_tags)
13
+ end
14
+
15
+ private
16
+
17
+ attr_reader :html_fragment
18
+
19
+ NON_CONTENT_TAGS = %w(
20
+ script
21
+ style
22
+ )
23
+
24
+ WHITESPACE_CONTENT_TAGS = %w(
25
+ address
26
+ article
27
+ aside
28
+ blockquote
29
+ br
30
+ dd
31
+ div
32
+ dl
33
+ dt
34
+ footer
35
+ h1
36
+ h2
37
+ h3
38
+ h4
39
+ h5
40
+ h6
41
+ header
42
+ hgroup
43
+ hr
44
+ li
45
+ nav
46
+ ol
47
+ p
48
+ pre
49
+ section
50
+ ul
51
+ )
52
+
53
+ def sanitize(node_set, blacklisted_tags)
54
+ node_set
55
+ .reject { |node| !text?(node) && blacklisted_tags.include?(node.name) }
56
+ .flat_map { |node| [whitespace(node, :prefix), text(node, blacklisted_tags), whitespace(node, :suffix)] }
57
+ .join
58
+ end
59
+
60
+ def text?(node)
61
+ node.is_a?(Oga::XML::Text)
62
+ end
63
+
64
+ def whitespace(node, _position)
65
+ if !text?(node) && WHITESPACE_CONTENT_TAGS.include?(node.name)
66
+ ' '
67
+ else
68
+ ''
69
+ end
70
+ end
71
+
72
+ def text(node, blacklisted_tags)
73
+ if text?(node)
74
+ node.text
75
+ else
76
+ sanitize(node.children, blacklisted_tags)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,3 @@
1
+ module Decontaminator
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,49 @@
1
+ require 'decontaminator'
2
+
3
+ RSpec.describe Decontaminator::Fragment do
4
+ describe '#decontaminate' do
5
+ it 'sanitizes an empty string' do
6
+ expect(described_class.new('').decontaminate).to eq('')
7
+ end
8
+
9
+ it 'sanitizes an empty paragraph' do
10
+ expect(described_class.new('<p></p>').decontaminate).to eq(' ')
11
+ end
12
+
13
+ it 'sanitizes a paragraph' do
14
+ expect(described_class.new('<p>Text</p>').decontaminate).to eq(' Text ')
15
+ end
16
+
17
+ it 'sanitizes a formatted paragraph' do
18
+ expect(described_class.new('<p><b>Some</b> <i>text</i></p>').decontaminate).to eq(' Some text ')
19
+ end
20
+
21
+ it 'sanitizes a formatted paragraph with attributes' do
22
+ expect(described_class.new('<p class="text"><b data-important>Some</b> <i>text</i></p>').decontaminate).to eq(' Some text ')
23
+ end
24
+
25
+ it 'sanitizes two formatted paragraphs' do
26
+ expect(described_class.new('<p>Paragraph one.</p><p>Paragraph two.</p>').decontaminate).to eq(' Paragraph one. Paragraph two. ')
27
+ end
28
+
29
+ it 'sanitizes a link' do
30
+ expect(described_class.new('<a href="#">link</a>').decontaminate).to eq('link')
31
+ end
32
+
33
+ it 'sanitizes a script' do
34
+ expect(described_class.new('<script>alert("I am evil but well formatted!");</script').decontaminate).to eq('')
35
+ end
36
+
37
+ it 'sanitizes a stylesheet' do
38
+ expect(described_class.new('<style>a{color:red}</style>').decontaminate).to eq('')
39
+ end
40
+
41
+ it 'sanitizes multiple tags' do
42
+ expect(described_class.new('<div><section><p>Section.</p></section><aside><p>Aside.</p></aside></div>').decontaminate).to eq(' Section. Aside. ')
43
+ end
44
+
45
+ it 'sanitizes content with blacklisted tags' do
46
+ expect(described_class.new('<figcaption>Blacklist this</figcaption><p>but not that</p>').decontaminate(blacklist: %w(figcaption))).to eq(' but not that ')
47
+ end
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: decontaminator
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Matthew MacLeod
8
+ - Paul Mucur
9
+ - Jakub Pawlowicz
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2015-09-16 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: oga
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - "~>"
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - "~>"
27
+ - !ruby/object:Gem::Version
28
+ version: '1.3'
29
+ description: Ruby HTML sanitizer based on a lightweight Oga parser.
30
+ email:
31
+ - matt@matt-m.co.uk
32
+ - mudge@mudge.name
33
+ - jakub@altmetric.com
34
+ executables: []
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - LICENSE.txt
39
+ - README.md
40
+ - lib/decontaminator.rb
41
+ - lib/decontaminator/fragment.rb
42
+ - lib/decontaminator/version.rb
43
+ - spec/decontaminator/fragment_spec.rb
44
+ homepage: https://github.com/altmetric/decontaminator
45
+ licenses:
46
+ - MIT
47
+ metadata: {}
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubyforge_project:
64
+ rubygems_version: 2.4.8
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: HTML sanitizer using lightweight Oga HTML parser.
68
+ test_files:
69
+ - spec/decontaminator/fragment_spec.rb