trenni-sanitize 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -19
- data/lib/trenni/sanitize/filter.rb +17 -2
- data/lib/trenni/sanitize/fragment.rb +3 -1
- data/lib/trenni/sanitize/version.rb +1 -1
- data/spec/trenni/sanitize/extensions_spec.rb +26 -0
- data/spec/trenni/sanitize/fragment_spec.rb +6 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd1bd015d1fe0b61494d1289eb43e200e1610e5a
|
4
|
+
data.tar.gz: 4ae3952d6ef7bf38a8b16ae3aff29f0a1f38dee4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e352c69337f48b62c5292970cbb849563d4e10974a0547d9abcdc79979cc80e26523db4453c5e9c26d085b628b57a883ca86b6bbbdf05cea27f861db53b888e
|
7
|
+
data.tar.gz: 77aa0739c366265b9cf2fa91c256155d709f59c179fa18cac859cf26d5bcfcf5fcd61fc61c7286ce098e67c812962be39da860815f09b1ab1713b2af33377d1f
|
data/README.md
CHANGED
@@ -17,15 +17,15 @@ I use the [sanitize] gem and generally it's great. However, it's performance can
|
|
17
17
|
In my informal testing, this gem is about ~50x faster than the [sanitize] gem when generating plain text.
|
18
18
|
|
19
19
|
Warming up --------------------------------------
|
20
|
-
|
21
|
-
|
20
|
+
Sanitize 96.000 i/100ms
|
21
|
+
Trenni::Sanitize 4.447k i/100ms
|
22
22
|
Calculating -------------------------------------
|
23
|
-
|
24
|
-
|
23
|
+
Sanitize 958.020 (± 4.5%) i/s - 4.800k in 5.020564s
|
24
|
+
Trenni::Sanitize 44.718k (± 4.2%) i/s - 226.797k in 5.080756s
|
25
25
|
|
26
26
|
Comparison:
|
27
|
-
|
28
|
-
|
27
|
+
Trenni::Sanitize: 44718.1 i/s
|
28
|
+
Sanitize: 958.0 i/s - 46.68x slower
|
29
29
|
|
30
30
|
## Installation
|
31
31
|
|
@@ -51,9 +51,8 @@ You can extract text using something similar to the following parser delegate:
|
|
51
51
|
|
52
52
|
```ruby
|
53
53
|
class Text < Trenni::Sanitize::Filter
|
54
|
-
def filter(
|
55
|
-
|
56
|
-
return false
|
54
|
+
def filter(node)
|
55
|
+
skip!(TAG)
|
57
56
|
end
|
58
57
|
|
59
58
|
def doctype(string)
|
@@ -61,11 +60,6 @@ class Text < Trenni::Sanitize::Filter
|
|
61
60
|
|
62
61
|
def instruction(string)
|
63
62
|
end
|
64
|
-
|
65
|
-
def text(string)
|
66
|
-
# Output all text
|
67
|
-
@output << string
|
68
|
-
end
|
69
63
|
end
|
70
64
|
|
71
65
|
text = Text.parse("<p>Hello World</p>").output
|
@@ -88,11 +82,12 @@ class Fragment < Trenni::Sanitize::Filter
|
|
88
82
|
'a' => ['href', 'target']
|
89
83
|
}.freeze
|
90
84
|
|
91
|
-
def filter(
|
92
|
-
if attributes = ALLOWED_TAGS[
|
93
|
-
tag.attributes.slice!(attributes)
|
94
|
-
|
95
|
-
|
85
|
+
def filter(node)
|
86
|
+
if attributes = ALLOWED_TAGS[node.name]
|
87
|
+
node.tag.attributes.slice!(attributes)
|
88
|
+
else
|
89
|
+
# Skip the tag, and all contents
|
90
|
+
skip!(ALL)
|
96
91
|
end
|
97
92
|
end
|
98
93
|
|
@@ -59,6 +59,10 @@ module Trenni
|
|
59
59
|
(self.skip & mode) == mode
|
60
60
|
end
|
61
61
|
|
62
|
+
def accept!(mode = ALL)
|
63
|
+
self.skip &= ~mode
|
64
|
+
end
|
65
|
+
|
62
66
|
def [] key
|
63
67
|
self.tag.attributes[key]
|
64
68
|
end
|
@@ -82,18 +86,29 @@ module Trenni
|
|
82
86
|
# The current node being parsed.
|
83
87
|
attr :current
|
84
88
|
|
89
|
+
attr :stack
|
90
|
+
|
85
91
|
def top
|
86
92
|
@stack.last || @top
|
87
93
|
end
|
88
94
|
|
89
95
|
def parse!(input)
|
96
|
+
parse_begin
|
97
|
+
|
90
98
|
Trenni::Parsers.parse_markup(input, self, @entities)
|
91
99
|
|
100
|
+
parse_end
|
101
|
+
|
102
|
+
return self
|
103
|
+
end
|
104
|
+
|
105
|
+
def parse_begin
|
106
|
+
end
|
107
|
+
|
108
|
+
def parse_end
|
92
109
|
while @stack.size > 1
|
93
110
|
close_tag(@stack.last.name)
|
94
111
|
end
|
95
|
-
|
96
|
-
return self
|
97
112
|
end
|
98
113
|
|
99
114
|
def open_tag_begin(name, offset)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
require 'trenni/sanitize/extensions'
|
3
|
+
|
4
|
+
RSpec.describe Hash do
|
5
|
+
let(:hash) {{x: 10, y: 20, z: 30}}
|
6
|
+
|
7
|
+
it "can slice the hash" do
|
8
|
+
result = hash.slice(:x)
|
9
|
+
|
10
|
+
expect(hash.size).to be == 3
|
11
|
+
expect(result.size).to be == 1
|
12
|
+
|
13
|
+
expect(result[:x]).to be == 10
|
14
|
+
expect(result[:y]).to be_nil
|
15
|
+
expect(result[:z]).to be_nil
|
16
|
+
end
|
17
|
+
|
18
|
+
it "can slice! the hash in-place" do
|
19
|
+
hash.slice!(:x)
|
20
|
+
|
21
|
+
expect(hash.size).to be == 1
|
22
|
+
expect(hash[:x]).to be == 10
|
23
|
+
expect(hash[:y]).to be_nil
|
24
|
+
expect(hash[:z]).to be_nil
|
25
|
+
end
|
26
|
+
end
|
@@ -56,5 +56,11 @@ RSpec.describe Trenni::Sanitize::Fragment do
|
|
56
56
|
|
57
57
|
expect(fragment.output).to be == "x&y"
|
58
58
|
end
|
59
|
+
|
60
|
+
it "should include nested img" do
|
61
|
+
fragment = described_class.parse("<table><img src='foo'/></table>")
|
62
|
+
|
63
|
+
expect(fragment.output).to be == "<img src=\"foo\"/>"
|
64
|
+
end
|
59
65
|
end
|
60
66
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: trenni-sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Samuel Williams
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-02-
|
11
|
+
date: 2018-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: trenni
|
@@ -88,6 +88,7 @@ files:
|
|
88
88
|
- lib/trenni/sanitize/version.rb
|
89
89
|
- spec/spec_helper.rb
|
90
90
|
- spec/trenni/sanitize/benchmark_spec.rb
|
91
|
+
- spec/trenni/sanitize/extensions_spec.rb
|
91
92
|
- spec/trenni/sanitize/fragment_spec.rb
|
92
93
|
- spec/trenni/sanitize/sample.html
|
93
94
|
- spec/trenni/sanitize/text_spec.rb
|
@@ -118,6 +119,7 @@ summary: Sanitize markdown according to a set of rules.
|
|
118
119
|
test_files:
|
119
120
|
- spec/spec_helper.rb
|
120
121
|
- spec/trenni/sanitize/benchmark_spec.rb
|
122
|
+
- spec/trenni/sanitize/extensions_spec.rb
|
121
123
|
- spec/trenni/sanitize/fragment_spec.rb
|
122
124
|
- spec/trenni/sanitize/sample.html
|
123
125
|
- spec/trenni/sanitize/text_spec.rb
|