trenni-sanitize 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be872bc0f1f2f9b2fca5c5a8738090aea3faf1f2880b9201447edad78419f0ab
4
- data.tar.gz: 822005447509aaabe1c4ee6fbc9b97fe17c7b4eb7f7f9faa7e0697e394e955be
3
+ metadata.gz: bf3bbcdec106d73031c57448faf27778de703faa62645433a3d533589886aa56
4
+ data.tar.gz: 335bf0b72300dcf0fbceb03da54fae1b22d30cdf30c73073803f9d402dab4cbd
5
5
  SHA512:
6
- metadata.gz: 70651108fffab5e259895627a312b9eadfa66233c8f6f7e0ec8de4f56cdfe0bf7cbec26d6ca88d4db3ad1e0be24c664282a1f3d932e73b7336b0054a305a790d
7
- data.tar.gz: 31c18d416cc9735cdf39e4b348b854db3a81a06041e66fcbcd87970165ba8b08e2a7b491f0f617aa5e58d051a66337a5dca4040c17f53f55f706dfe032355916
6
+ metadata.gz: dfeffd7990d49492e2cbb18fef5a630359464b3b35e2dab8026134674c0d7d9a560a45e806a8de1f0d4b4a32c867c2ae4116b9f25a119ce837ae83930251b52c
7
+ data.tar.gz: 91dab7bf62fb90065d0e25088d0df36c883ced831f05047bab6dc431252b9f005faaae3e5a7a579a985761a5b1f03482d7bd2fc0740f45c29cc4559985969471
Binary file
Binary file
@@ -36,6 +36,11 @@ module Trenni
36
36
  'em' => STANDARD_ATTRIBUTES,
37
37
  'strong' => STANDARD_ATTRIBUTES,
38
38
  'ul' => STANDARD_ATTRIBUTES,
39
+ 'ol' => STANDARD_ATTRIBUTES,
40
+ 'li' => STANDARD_ATTRIBUTES,
41
+ 'dl' => STANDARD_ATTRIBUTES,
42
+ 'dt' => STANDARD_ATTRIBUTES,
43
+ 'dd' => STANDARD_ATTRIBUTES,
39
44
  'strike' => STANDARD_ATTRIBUTES,
40
45
  'h1' => STANDARD_ATTRIBUTES,
41
46
  'h2' => STANDARD_ATTRIBUTES,
@@ -20,6 +20,6 @@
20
20
 
21
21
  module Trenni
22
22
  module Sanitize
23
- VERSION = "0.5.0"
23
+ VERSION = "0.6.0"
24
24
  end
25
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trenni-sanitize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Samuel Williams
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-20 00:00:00.000000000 Z
11
+ date: 2020-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: trenni
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '3.5'
27
27
  - !ruby/object:Gem::Dependency
28
- name: covered
28
+ name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: bundler
42
+ name: covered
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -66,48 +66,25 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '3.4'
69
- - !ruby/object:Gem::Dependency
70
- name: rake
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- description:
69
+ description:
84
70
  email:
85
- - samuel.williams@oriontransfer.co.nz
86
71
  executables: []
87
72
  extensions: []
88
73
  extra_rdoc_files: []
89
74
  files:
90
- - ".gitignore"
91
- - ".rspec"
92
- - ".travis.yml"
93
- - Gemfile
94
- - README.md
95
- - Rakefile
75
+ - lib/.DS_Store
76
+ - lib/trenni/.DS_Store
96
77
  - lib/trenni/sanitize.rb
97
78
  - lib/trenni/sanitize/filter.rb
98
79
  - lib/trenni/sanitize/fragment.rb
99
80
  - lib/trenni/sanitize/text.rb
100
81
  - lib/trenni/sanitize/version.rb
101
- - spec/spec_helper.rb
102
- - spec/trenni/sanitize/benchmark_spec.rb
103
- - spec/trenni/sanitize/fragment_spec.rb
104
- - spec/trenni/sanitize/sample.html
105
- - spec/trenni/sanitize/text_spec.rb
106
- - trenni-sanitize.gemspec
107
82
  homepage: https://github.com/ioquatix/trenni-sanitize
108
- licenses: []
109
- metadata: {}
110
- post_install_message:
83
+ licenses:
84
+ - MIT
85
+ metadata:
86
+ funding_uri: https://github.com/sponsors/ioquatix/
87
+ post_install_message:
111
88
  rdoc_options: []
112
89
  require_paths:
113
90
  - lib
@@ -115,7 +92,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
115
92
  requirements:
116
93
  - - "~>"
117
94
  - !ruby/object:Gem::Version
118
- version: '2.4'
95
+ version: '2.5'
119
96
  required_rubygems_version: !ruby/object:Gem::Requirement
120
97
  requirements:
121
98
  - - ">="
@@ -123,12 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
123
100
  version: '0'
124
101
  requirements: []
125
102
  rubygems_version: 3.1.2
126
- signing_key:
103
+ signing_key:
127
104
  specification_version: 4
128
105
  summary: Sanitize markdown according to a set of rules.
129
- test_files:
130
- - spec/spec_helper.rb
131
- - spec/trenni/sanitize/benchmark_spec.rb
132
- - spec/trenni/sanitize/fragment_spec.rb
133
- - spec/trenni/sanitize/sample.html
134
- - spec/trenni/sanitize/text_spec.rb
106
+ test_files: []
data/.gitignore DELETED
@@ -1,19 +0,0 @@
1
- *.gem
2
- *.rbc
3
- .bundle
4
- .config
5
- .yardoc
6
- Gemfile.lock
7
- InstalledFiles
8
- _yardoc
9
- coverage
10
- doc/
11
- lib/bundler/man
12
- pkg
13
- rdoc
14
- spec/reports
15
- test/tmp
16
- test/version_tmp
17
- tmp
18
-
19
- lib/trenni/trenni.bundle
data/.rspec DELETED
@@ -1,5 +0,0 @@
1
- --color
2
- --format documentation
3
- --backtrace
4
- --warnings
5
- --require spec_helper
@@ -1,22 +0,0 @@
1
- language: ruby
2
- dist: xenial
3
- cache: bundler
4
-
5
- matrix:
6
- include:
7
- - rvm: 2.4
8
- - rvm: 2.5
9
- - rvm: 2.6
10
- - rvm: 2.6
11
- os: osx
12
- - rvm: 2.6
13
- env: COVERAGE=BriefSummary,Coveralls
14
- - rvm: 2.7
15
- - rvm: truffleruby
16
- - rvm: jruby-head
17
- env: JRUBY_OPTS="--debug -X+O"
18
- - rvm: ruby-head
19
- allow_failures:
20
- - rvm: truffleruby
21
- - rvm: ruby-head
22
- - rvm: jruby-head
data/Gemfile DELETED
@@ -1,16 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- # Specify your gem's dependencies in trenni.gemspec
4
- gemspec
5
-
6
- group :development do
7
- gem 'pry'
8
- end
9
-
10
- group :test do
11
- gem 'ruby-prof', platforms: [:mri]
12
- gem "benchmark-ips"
13
-
14
- # For comparisons:
15
- gem "sanitize"
16
- end
data/README.md DELETED
@@ -1,139 +0,0 @@
1
- # Trenni::Sanitize
2
-
3
- Sanitize markup by adding, changing or removing tags, using the [trenni] stream processor (which has a naive C implementation).
4
-
5
- [![Build Status](https://travis-ci.com/ioquatix/trenni-sanitize.svg)](https://travis-ci.com/ioquatix/trenni-sanitize)
6
- [![Code Climate](https://codeclimate.com/github/ioquatix/trenni-sanitize.svg)](https://codeclimate.com/github/ioquatix/trenni-sanitize)
7
- [![Coverage Status](https://coveralls.io/repos/ioquatix/trenni-sanitize/badge.svg)](https://coveralls.io/r/ioquatix/trenni-sanitize)
8
-
9
- [trenni]: https://github.com/ioquatix/trenni
10
-
11
- ## Motivation
12
-
13
- I use the [sanitize] gem and generally it's great. However, it's performance can be an issue and additionally, it doesn't preserve tag namespaces when parsing fragments due to how Nokogiri works internally. This is a problem when processing content destined for [utopia] since it heavily depends on tag namespaces.
14
-
15
- [sanitize]: https://github.com/rgrove/sanitize/
16
- [utopia]: https://github.com/ioquatix/utopia
17
-
18
- ## Is it fast?
19
-
20
- In my informal testing, this gem is about ~50x faster than the [sanitize] gem when generating plain text.
21
-
22
- ```
23
- Warming up --------------------------------------
24
- Sanitize 96.000 i/100ms
25
- Trenni::Sanitize 4.447k i/100ms
26
- Calculating -------------------------------------
27
- Sanitize 958.020 (± 4.5%) i/s - 4.800k in 5.020564s
28
- Trenni::Sanitize 44.718k (± 4.2%) i/s - 226.797k in 5.080756s
29
-
30
- Comparison:
31
- Trenni::Sanitize: 44718.1 i/s
32
- Sanitize: 958.0 i/s - 46.68x slower
33
- ```
34
-
35
- ## Installation
36
-
37
- Add this line to your application's Gemfile:
38
-
39
- gem 'trenni-sanitize'
40
-
41
- And then execute:
42
-
43
- $ bundle
44
-
45
- Or install it yourself as:
46
-
47
- $ gem install trenni-sanitize
48
-
49
- ## Usage
50
-
51
- `Trenni::Sanitize::Delegate` is a stream-based processor. That means it parses the incoming markup and makes decisions about what to keep and what to discard during parsing.
52
-
53
- ### Extracting Text
54
-
55
- You can extract text using something similar to the following parser delegate:
56
-
57
- ```ruby
58
- class Text < Trenni::Sanitize::Filter
59
- def filter(node)
60
- node.skip!(TAG)
61
- end
62
-
63
- def doctype(string)
64
- end
65
-
66
- def instruction(string)
67
- end
68
- end
69
-
70
- text = Text.parse("<p>Hello World</p>").output
71
- # => "Hello World"
72
- ```
73
-
74
- ### Extracting Safe Markup
75
-
76
- Here is a simple filter that only allows a limited set of tags:
77
-
78
- ```ruby
79
- class Fragment < Trenni::Sanitize::Filter
80
- STANDARD_ATTRIBUTES = ['class'].freeze
81
-
82
- ALLOWED_TAGS = {
83
- 'em' => [],
84
- 'strong' => [],
85
- 'p' => [],
86
- 'img' => ['src', 'alt', 'width', 'height'],
87
- 'a' => ['href']
88
- }.freeze
89
-
90
- def filter(node)
91
- if attributes = ALLOWED_TAGS[node.name]
92
- node.tag.attributes.slice!(*attributes)
93
- else
94
- # Skip the tag, and all contents
95
- node.skip!(ALL)
96
- end
97
- end
98
-
99
- def doctype(string)
100
- end
101
-
102
- def instruction(string)
103
- end
104
- end
105
- ```
106
-
107
- As you can see, while [sanitize] is driven by configuration, `Trenni::Sanitize::Filter` is driven by code.
108
-
109
- ## Contributing
110
-
111
- 1. Fork it
112
- 2. Create your feature branch (`git checkout -b my-new-feature`)
113
- 3. Commit your changes (`git commit -am 'Add some feature'`)
114
- 4. Push to the branch (`git push origin my-new-feature`)
115
- 5. Create new Pull Request
116
-
117
- ## License
118
-
119
- Released under the MIT license.
120
-
121
- Copyright, 2018, by [Samuel G. D. Williams](http://www.codeotaku.com/samuel-williams).
122
-
123
- Permission is hereby granted, free of charge, to any person obtaining a copy
124
- of this software and associated documentation files (the "Software"), to deal
125
- in the Software without restriction, including without limitation the rights
126
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
127
- copies of the Software, and to permit persons to whom the Software is
128
- furnished to do so, subject to the following conditions:
129
-
130
- The above copyright notice and this permission notice shall be included in
131
- all copies or substantial portions of the Software.
132
-
133
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
134
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
135
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
136
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
137
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
138
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
139
- THE SOFTWARE.
data/Rakefile DELETED
@@ -1,19 +0,0 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
3
-
4
- # Load all rake tasks:
5
- import(*Dir.glob('tasks/**/*.rake'))
6
-
7
- RSpec::Core::RakeTask.new
8
-
9
- task :environment do
10
- $LOAD_PATH.unshift File.expand_path('lib', __dir__)
11
- end
12
-
13
- task :console => :environment do
14
- require 'pry'
15
-
16
- Pry.start
17
- end
18
-
19
- task :default => :spec
@@ -1,36 +0,0 @@
1
-
2
- require 'covered/rspec'
3
- require 'bundler/setup'
4
-
5
- begin
6
- require 'ruby-prof'
7
-
8
- RSpec.shared_context "profile" do
9
- before(:all) do
10
- RubyProf.start
11
- end
12
-
13
- after(:all) do
14
- result = RubyProf.stop
15
-
16
- # Print a flat profile to text
17
- printer = RubyProf::FlatPrinter.new(result)
18
- printer.print(STDOUT)
19
- end
20
- end
21
- rescue LoadError
22
- RSpec.shared_context "profile" do
23
- before(:all) do
24
- puts "Profiling not supported on this platform."
25
- end
26
- end
27
- end
28
-
29
- RSpec.configure do |config|
30
- # Enable flags like --only-failures and --next-failure
31
- config.example_status_persistence_file_path = ".rspec_status"
32
-
33
- config.expect_with :rspec do |c|
34
- c.syntax = :expect
35
- end
36
- end
@@ -1,36 +0,0 @@
1
-
2
- require 'sanitize'
3
- require 'benchmark/ips'
4
-
5
- require 'trenni/sanitize/text'
6
-
7
- RSpec.describe Trenni::Sanitize do
8
- let(:buffer) {Trenni::Buffer.load_file(File.join(__dir__, "sample.html"))}
9
-
10
- it "should be faster than alternatives" do
11
- config = Sanitize::Config.freeze_config(
12
- :elements => %w[b i em strong ul li strike h1 h2 h3 h4 h5 h6 p img image a],
13
- :attributes => {
14
- 'img' => %w[src alt width],
15
- 'a' => %w[href]
16
- },
17
- )
18
-
19
- text = buffer.read
20
-
21
- # puts Sanitize.fragment(text).inspect
22
- # puts Trenni::Sanitize::Text.parse(buffer).output.inspect
23
-
24
- Benchmark.ips do |x|
25
- x.report("Sanitize") do
26
- Sanitize.fragment text
27
- end
28
-
29
- x.report("Trenni::Sanitize") do
30
- Trenni::Sanitize::Text.parse(buffer)
31
- end
32
-
33
- x.compare!
34
- end
35
- end
36
- end
@@ -1,66 +0,0 @@
1
- # Copyright, 2018, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
- #
3
- # Permission is hereby granted, free of charge, to any person obtaining a copy
4
- # of this software and associated documentation files (the "Software"), to deal
5
- # in the Software without restriction, including without limitation the rights
6
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- # copies of the Software, and to permit persons to whom the Software is
8
- # furnished to do so, subject to the following conditions:
9
- #
10
- # The above copyright notice and this permission notice shall be included in
11
- # all copies or substantial portions of the Software.
12
- #
13
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
- # THE SOFTWARE.
20
-
21
- require 'trenni/sanitize/fragment'
22
-
23
- RSpec.describe Trenni::Sanitize::Fragment do
24
- it "should filter out script tags" do
25
- fragment = described_class.parse("<p onclick='malicious()'>Hello World</p><script>doot()</script>")
26
-
27
- expect(fragment.output).to be == "<p>Hello World</p>"
28
- end
29
-
30
- it "should filter out nested script tags" do
31
- fragment = described_class.parse("<div><p>Hello World</p><script>doot()</script></div>")
32
-
33
- expect(fragment.output).to be == "<div><p>Hello World</p></div>"
34
- end
35
-
36
- it "should filter out tags" do
37
- fragment = described_class.parse("<p onclick='malicious()'>Hello World</p><script>script</script>")
38
-
39
- expect(fragment.output).to be == "<p>Hello World</p>"
40
- end
41
-
42
- it "should ignore unbalanced closing tags" do
43
- fragment = described_class.parse("<p>Hello World</a></p>")
44
-
45
- expect(fragment.output).to be == "<p>Hello World</p>"
46
- end
47
-
48
- it "should include trailing text" do
49
- fragment = described_class.parse("Hello<script/>World")
50
-
51
- expect(fragment.output).to be == "HelloWorld"
52
- end
53
-
54
- it "should escape text" do
55
- fragment = described_class.parse("x&amp;y")
56
-
57
- expect(fragment.output).to be == "x&amp;y"
58
- end
59
-
60
- it "should include nested img" do
61
- fragment = described_class.parse("<table><img src='foo'/></table>")
62
-
63
- expect(fragment.output).to be == "<img src=\"foo\"/>"
64
- end
65
- end
66
-
@@ -1,12 +0,0 @@
1
- <hr>
2
- <a href="http://somegreatsite.com">Link Name</a>
3
- is a link to another nifty site
4
- <h1>This is a Header</h1>
5
- <h1>This is a Medium Header</h2>
6
- Send me mail at <a href="mailto:support@yourcompany.com">
7
- support@yourcompany.com</a>.
8
- <hr>
9
- <p>This is a new paragraph!</p>
10
- <p><b>This is a new paragraph!</b></p>
11
- <br/><b><i>This is a new sentence without a paragraph break, in bold italics.</i></b>
12
- <hr>
@@ -1,43 +0,0 @@
1
- # Copyright, 2019, by Samuel G. D. Williams. <http://www.codeotaku.com>
2
- #
3
- # Permission is hereby granted, free of charge, to any person obtaining a copy
4
- # of this software and associated documentation files (the "Software"), to deal
5
- # in the Software without restriction, including without limitation the rights
6
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
- # copies of the Software, and to permit persons to whom the Software is
8
- # furnished to do so, subject to the following conditions:
9
- #
10
- # The above copyright notice and this permission notice shall be included in
11
- # all copies or substantial portions of the Software.
12
- #
13
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
- # THE SOFTWARE.
20
-
21
- require 'trenni/sanitize/text'
22
-
23
- RSpec.describe Trenni::Sanitize::Text do
24
- let(:text) {"One\n\nTwo\n\nThree\n\n"}
25
-
26
- it "passes through plain text unchanged" do
27
- fragment = described_class.parse(text)
28
-
29
- expect(fragment.output).to be == text
30
- end
31
-
32
- it "should extract text" do
33
- fragment = described_class.parse("<p onclick='malicious()'>Hello World</p><script>doot()</script>")
34
-
35
- expect(fragment.output).to be == "Hello World\n\n"
36
- end
37
-
38
- it "replaces line breaks" do
39
- fragment = described_class.parse("One<br/>Two<br/>Three")
40
-
41
- expect(fragment.output).to be == "One\n\nTwo\n\nThree"
42
- end
43
- end
@@ -1,26 +0,0 @@
1
-
2
- require_relative 'lib/trenni/sanitize/version'
3
-
4
- Gem::Specification.new do |spec|
5
- spec.name = "trenni-sanitize"
6
- spec.platform = Gem::Platform::RUBY
7
- spec.version = Trenni::Sanitize::VERSION
8
- spec.authors = ["Samuel Williams"]
9
- spec.email = ["samuel.williams@oriontransfer.co.nz"]
10
- spec.summary = %q{Sanitize markdown according to a set of rules.}
11
- spec.homepage = "https://github.com/ioquatix/trenni-sanitize"
12
-
13
- spec.files = `git ls-files`.split($/)
14
- spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
16
- spec.require_paths = ["lib"]
17
-
18
- spec.required_ruby_version = '~> 2.4'
19
-
20
- spec.add_dependency "trenni", '~> 3.5'
21
-
22
- spec.add_development_dependency "covered"
23
- spec.add_development_dependency "bundler"
24
- spec.add_development_dependency "rspec", "~> 3.4"
25
- spec.add_development_dependency "rake"
26
- end