selma 0.2.2-x86_64-linux → 0.3.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +108 -24
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/3.3/selma.so +0 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer.rb +6 -1
- data/lib/selma/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 024f7f6dd2ad3aa84f8097fb0df3839e2dd356566b62d3450bf1443432cd6e92
|
4
|
+
data.tar.gz: 741ae668dcc739eb00011a2b638d7830e13737cc29012656ad53a75e62d2b407
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b8d7a74ec5d02dcc3b07805930440d407818c29e327b1b79cb507e29186057069e106baa4a2fc44e252787597ffabef485be4896074a2824065d4ce73b957e4
|
7
|
+
data.tar.gz: 7161964711b1a14e06b09950d725f5f6bf85f76a20bcea3726d87e7674a988e08ff35011cdb31fb85ad664e7844b2a2d61dbc8442a217ae161f31013c9ccb719
|
data/README.md
CHANGED
@@ -76,7 +76,7 @@ attributes: {
|
|
76
76
|
|
77
77
|
# URL handling protocols to allow in specific attributes. By default, no
|
78
78
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
79
|
-
# to allow relative URLs sans protocol.
|
79
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
80
80
|
protocols: {
|
81
81
|
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
82
82
|
"img" => { "href" => ["http", "https"] },
|
@@ -103,7 +103,11 @@ Here's an example which rewrites the `href` attribute on `a` and the `src` attri
|
|
103
103
|
|
104
104
|
```ruby
|
105
105
|
class MatchAttribute
|
106
|
-
SELECTOR = Selma::Selector(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
106
|
+
SELECTOR = Selma::Selector.new(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
107
|
+
|
108
|
+
def selector
|
109
|
+
SELECTOR
|
110
|
+
end
|
107
111
|
|
108
112
|
def handle_element(element)
|
109
113
|
if element.tag_name == "a"
|
@@ -178,38 +182,118 @@ The `element` argument in `handle_element` has the following methods:
|
|
178
182
|
|
179
183
|
## Benchmarks
|
180
184
|
|
185
|
+
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
186
|
+
|
187
|
+
### Benchmarks for just the sanitization process
|
188
|
+
|
189
|
+
Comparing Selma against popular Ruby sanitization gems:
|
190
|
+
|
191
|
+
<!-- prettier-ignore-start -->
|
181
192
|
<details>
|
182
193
|
<pre>
|
183
|
-
ruby test/benchmark.rb
|
184
|
-
ruby test/benchmark.rb
|
185
194
|
Warming up --------------------------------------
|
186
|
-
sanitize-
|
187
|
-
|
188
|
-
|
195
|
+
sanitize-sm 15.000 i/100ms
|
196
|
+
selma-sm 126.000 i/100ms
|
197
|
+
Calculating -------------------------------------
|
198
|
+
sanitize-sm 155.074 (± 1.9%) i/s - 4.665k in 30.092214s
|
199
|
+
selma-sm 1.290k (± 1.3%) i/s - 38.808k in 30.085333s
|
200
|
+
|
201
|
+
Comparison:
|
202
|
+
selma-sm: 1290.1 i/s
|
203
|
+
sanitize-sm: 155.1 i/s - 8.32x slower
|
204
|
+
|
205
|
+
input size = 86686 bytes, 0.09 MB
|
206
|
+
|
207
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
208
|
+
Warming up --------------------------------------
|
209
|
+
sanitize-md 3.000 i/100ms
|
210
|
+
selma-md 33.000 i/100ms
|
189
211
|
Calculating -------------------------------------
|
190
|
-
sanitize-
|
191
|
-
|
192
|
-
|
212
|
+
sanitize-md 40.321 (± 5.0%) i/s - 1.206k in 30.004711s
|
213
|
+
selma-md 337.417 (± 1.5%) i/s - 10.131k in 30.032772s
|
214
|
+
|
215
|
+
Comparison:
|
216
|
+
selma-md: 337.4 i/s
|
217
|
+
sanitize-md: 40.3 i/s - 8.37x slower
|
218
|
+
|
219
|
+
input size = 7172510 bytes, 7.17 MB
|
220
|
+
|
221
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
193
222
|
Warming up --------------------------------------
|
194
|
-
sanitize-
|
195
|
-
|
196
|
-
selma-document-medium
|
197
|
-
22.000 i/100ms
|
223
|
+
sanitize-lg 1.000 i/100ms
|
224
|
+
selma-lg 1.000 i/100ms
|
198
225
|
Calculating -------------------------------------
|
199
|
-
sanitize-
|
200
|
-
|
201
|
-
|
202
|
-
|
226
|
+
sanitize-lg 0.144 (± 0.0%) i/s - 5.000 in 34.772526s
|
227
|
+
selma-lg 4.026 (± 0.0%) i/s - 121.000 in 30.067415s
|
228
|
+
|
229
|
+
Comparison:
|
230
|
+
selma-lg: 4.0 i/s
|
231
|
+
sanitize-lg: 0.1 i/s - 27.99x slower
|
232
|
+
</pre>
|
233
|
+
</details>
|
234
|
+
<!-- prettier-ignore-end -->
|
235
|
+
|
236
|
+
### Benchmarks for just the rewriting process
|
237
|
+
|
238
|
+
Comparing Selma against popular Ruby HTML parsing gems:
|
239
|
+
|
240
|
+
<!-- prettier-ignore-start -->
|
241
|
+
<details>
|
242
|
+
<pre>
|
243
|
+
|
244
|
+
input size = 25309 bytes, 0.03 MB
|
245
|
+
|
246
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
247
|
+
Warming up --------------------------------------
|
248
|
+
nokogiri-sm 79.000 i/100ms
|
249
|
+
nokolexbor-sm 285.000 i/100ms
|
250
|
+
selma-sm 244.000 i/100ms
|
251
|
+
Calculating -------------------------------------
|
252
|
+
nokogiri-sm 807.790 (± 3.1%) i/s - 24.253k in 30.056301s
|
253
|
+
nokolexbor-sm 2.880k (± 6.4%) i/s - 86.070k in 30.044766s
|
254
|
+
selma-sm 2.508k (± 1.2%) i/s - 75.396k in 30.068792s
|
255
|
+
|
256
|
+
Comparison:
|
257
|
+
nokolexbor-sm: 2880.3 i/s
|
258
|
+
selma-sm: 2507.8 i/s - 1.15x slower
|
259
|
+
nokogiri-sm: 807.8 i/s - 3.57x slower
|
260
|
+
|
261
|
+
input size = 86686 bytes, 0.09 MB
|
262
|
+
|
263
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
264
|
+
Warming up --------------------------------------
|
265
|
+
nokogiri-md 8.000 i/100ms
|
266
|
+
nokolexbor-md 43.000 i/100ms
|
267
|
+
selma-md 39.000 i/100ms
|
268
|
+
Calculating -------------------------------------
|
269
|
+
nokogiri-md 87.367 (± 3.4%) i/s - 2.624k in 30.061642s
|
270
|
+
nokolexbor-md 438.782 (± 3.9%) i/s - 13.158k in 30.031163s
|
271
|
+
selma-md 392.591 (± 3.1%) i/s - 11.778k in 30.031391s
|
272
|
+
|
273
|
+
Comparison:
|
274
|
+
nokolexbor-md: 438.8 i/s
|
275
|
+
selma-md: 392.6 i/s - 1.12x slower
|
276
|
+
nokogiri-md: 87.4 i/s - 5.02x slower
|
277
|
+
|
278
|
+
input size = 7172510 bytes, 7.17 MB
|
279
|
+
|
280
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
203
281
|
Warming up --------------------------------------
|
204
|
-
|
205
|
-
|
206
|
-
selma-
|
282
|
+
nokogiri-lg 1.000 i/100ms
|
283
|
+
nokolexbor-lg 1.000 i/100ms
|
284
|
+
selma-lg 1.000 i/100ms
|
207
285
|
Calculating -------------------------------------
|
208
|
-
|
209
|
-
|
210
|
-
selma-
|
286
|
+
nokogiri-lg 0.895 (± 0.0%) i/s - 27.000 in 30.300832s
|
287
|
+
nokolexbor-lg 2.163 (± 0.0%) i/s - 65.000 in 30.085656s
|
288
|
+
selma-lg 5.867 (± 0.0%) i/s - 176.000 in 30.006240s
|
289
|
+
|
290
|
+
Comparison:
|
291
|
+
selma-lg: 5.9 i/s
|
292
|
+
nokolexbor-lg: 2.2 i/s - 2.71x slower
|
293
|
+
nokogiri-lg: 0.9 i/s - 6.55x slower
|
211
294
|
</pre>
|
212
295
|
</details>
|
296
|
+
<!-- prettier-ignore-end -->
|
213
297
|
|
214
298
|
## Contributing
|
215
299
|
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
data/lib/selma/3.2/selma.so
CHANGED
Binary file
|
data/lib/selma/3.3/selma.so
CHANGED
Binary file
|
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -66,7 +66,12 @@ module Selma
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def allow_protocol(element, attr, protos)
|
69
|
-
|
69
|
+
if protos.is_a?(Array)
|
70
|
+
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
71
|
+
else
|
72
|
+
protos = [protos]
|
73
|
+
end
|
74
|
+
|
70
75
|
set_allowed_protocols(element, attr, protos)
|
71
76
|
end
|
72
77
|
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|