selma 0.2.2-arm64-darwin → 0.3.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +108 -24
- data/lib/selma/3.1/selma.bundle +0 -0
- data/lib/selma/3.2/selma.bundle +0 -0
- data/lib/selma/3.3/selma.bundle +0 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer.rb +6 -1
- data/lib/selma/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c53b3ab50c311aff6e3fbe0424be6626f670b25894b8bfe292e565922faaa3a4
|
4
|
+
data.tar.gz: 6d9774f96c48d2346070fcf184e05dd7cc26e5c85632d2bc2fdbe9c78d1258fc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 65b51c29b2c581dbed78d1af010fb87642d5022b36a073e3bb665c4f775d8ea78b0361e53247d8f81d6d8fa0daf8adbe7c6fb1bb9c364d750911153388fe1521
|
7
|
+
data.tar.gz: 07a635e8bd77ab933780b9258330e049b0dd1661f2b24fb30440128887c50fb24efc82d73b9e2148572d817ccf2c30a8c31409d5615d8bd77776af1fbafc1a90
|
data/README.md
CHANGED
@@ -76,7 +76,7 @@ attributes: {
|
|
76
76
|
|
77
77
|
# URL handling protocols to allow in specific attributes. By default, no
|
78
78
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
79
|
-
# to allow relative URLs sans protocol.
|
79
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
80
80
|
protocols: {
|
81
81
|
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
82
82
|
"img" => { "href" => ["http", "https"] },
|
@@ -103,7 +103,11 @@ Here's an example which rewrites the `href` attribute on `a` and the `src` attri
|
|
103
103
|
|
104
104
|
```ruby
|
105
105
|
class MatchAttribute
|
106
|
-
SELECTOR = Selma::Selector(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
106
|
+
SELECTOR = Selma::Selector.new(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
107
|
+
|
108
|
+
def selector
|
109
|
+
SELECTOR
|
110
|
+
end
|
107
111
|
|
108
112
|
def handle_element(element)
|
109
113
|
if element.tag_name == "a"
|
@@ -178,38 +182,118 @@ The `element` argument in `handle_element` has the following methods:
|
|
178
182
|
|
179
183
|
## Benchmarks
|
180
184
|
|
185
|
+
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
186
|
+
|
187
|
+
### Benchmarks for just the sanitization process
|
188
|
+
|
189
|
+
Comparing Selma against popular Ruby sanitization gems:
|
190
|
+
|
191
|
+
<!-- prettier-ignore-start -->
|
181
192
|
<details>
|
182
193
|
<pre>
|
183
|
-
ruby test/benchmark.rb
|
184
|
-
ruby test/benchmark.rb
|
185
194
|
Warming up --------------------------------------
|
186
|
-
sanitize-
|
187
|
-
|
188
|
-
|
195
|
+
sanitize-sm 15.000 i/100ms
|
196
|
+
selma-sm 126.000 i/100ms
|
197
|
+
Calculating -------------------------------------
|
198
|
+
sanitize-sm 155.074 (± 1.9%) i/s - 4.665k in 30.092214s
|
199
|
+
selma-sm 1.290k (± 1.3%) i/s - 38.808k in 30.085333s
|
200
|
+
|
201
|
+
Comparison:
|
202
|
+
selma-sm: 1290.1 i/s
|
203
|
+
sanitize-sm: 155.1 i/s - 8.32x slower
|
204
|
+
|
205
|
+
input size = 86686 bytes, 0.09 MB
|
206
|
+
|
207
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
208
|
+
Warming up --------------------------------------
|
209
|
+
sanitize-md 3.000 i/100ms
|
210
|
+
selma-md 33.000 i/100ms
|
189
211
|
Calculating -------------------------------------
|
190
|
-
sanitize-
|
191
|
-
|
192
|
-
|
212
|
+
sanitize-md 40.321 (± 5.0%) i/s - 1.206k in 30.004711s
|
213
|
+
selma-md 337.417 (± 1.5%) i/s - 10.131k in 30.032772s
|
214
|
+
|
215
|
+
Comparison:
|
216
|
+
selma-md: 337.4 i/s
|
217
|
+
sanitize-md: 40.3 i/s - 8.37x slower
|
218
|
+
|
219
|
+
input size = 7172510 bytes, 7.17 MB
|
220
|
+
|
221
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
193
222
|
Warming up --------------------------------------
|
194
|
-
sanitize-
|
195
|
-
|
196
|
-
selma-document-medium
|
197
|
-
22.000 i/100ms
|
223
|
+
sanitize-lg 1.000 i/100ms
|
224
|
+
selma-lg 1.000 i/100ms
|
198
225
|
Calculating -------------------------------------
|
199
|
-
sanitize-
|
200
|
-
|
201
|
-
|
202
|
-
|
226
|
+
sanitize-lg 0.144 (± 0.0%) i/s - 5.000 in 34.772526s
|
227
|
+
selma-lg 4.026 (± 0.0%) i/s - 121.000 in 30.067415s
|
228
|
+
|
229
|
+
Comparison:
|
230
|
+
selma-lg: 4.0 i/s
|
231
|
+
sanitize-lg: 0.1 i/s - 27.99x slower
|
232
|
+
</pre>
|
233
|
+
</details>
|
234
|
+
<!-- prettier-ignore-end -->
|
235
|
+
|
236
|
+
### Benchmarks for just the rewriting process
|
237
|
+
|
238
|
+
Comparing Selma against popular Ruby HTML parsing gems:
|
239
|
+
|
240
|
+
<!-- prettier-ignore-start -->
|
241
|
+
<details>
|
242
|
+
<pre>
|
243
|
+
|
244
|
+
input size = 25309 bytes, 0.03 MB
|
245
|
+
|
246
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
247
|
+
Warming up --------------------------------------
|
248
|
+
nokogiri-sm 79.000 i/100ms
|
249
|
+
nokolexbor-sm 285.000 i/100ms
|
250
|
+
selma-sm 244.000 i/100ms
|
251
|
+
Calculating -------------------------------------
|
252
|
+
nokogiri-sm 807.790 (± 3.1%) i/s - 24.253k in 30.056301s
|
253
|
+
nokolexbor-sm 2.880k (± 6.4%) i/s - 86.070k in 30.044766s
|
254
|
+
selma-sm 2.508k (± 1.2%) i/s - 75.396k in 30.068792s
|
255
|
+
|
256
|
+
Comparison:
|
257
|
+
nokolexbor-sm: 2880.3 i/s
|
258
|
+
selma-sm: 2507.8 i/s - 1.15x slower
|
259
|
+
nokogiri-sm: 807.8 i/s - 3.57x slower
|
260
|
+
|
261
|
+
input size = 86686 bytes, 0.09 MB
|
262
|
+
|
263
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
264
|
+
Warming up --------------------------------------
|
265
|
+
nokogiri-md 8.000 i/100ms
|
266
|
+
nokolexbor-md 43.000 i/100ms
|
267
|
+
selma-md 39.000 i/100ms
|
268
|
+
Calculating -------------------------------------
|
269
|
+
nokogiri-md 87.367 (± 3.4%) i/s - 2.624k in 30.061642s
|
270
|
+
nokolexbor-md 438.782 (± 3.9%) i/s - 13.158k in 30.031163s
|
271
|
+
selma-md 392.591 (± 3.1%) i/s - 11.778k in 30.031391s
|
272
|
+
|
273
|
+
Comparison:
|
274
|
+
nokolexbor-md: 438.8 i/s
|
275
|
+
selma-md: 392.6 i/s - 1.12x slower
|
276
|
+
nokogiri-md: 87.4 i/s - 5.02x slower
|
277
|
+
|
278
|
+
input size = 7172510 bytes, 7.17 MB
|
279
|
+
|
280
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
203
281
|
Warming up --------------------------------------
|
204
|
-
|
205
|
-
|
206
|
-
selma-
|
282
|
+
nokogiri-lg 1.000 i/100ms
|
283
|
+
nokolexbor-lg 1.000 i/100ms
|
284
|
+
selma-lg 1.000 i/100ms
|
207
285
|
Calculating -------------------------------------
|
208
|
-
|
209
|
-
|
210
|
-
selma-
|
286
|
+
nokogiri-lg 0.895 (± 0.0%) i/s - 27.000 in 30.300832s
|
287
|
+
nokolexbor-lg 2.163 (± 0.0%) i/s - 65.000 in 30.085656s
|
288
|
+
selma-lg 5.867 (± 0.0%) i/s - 176.000 in 30.006240s
|
289
|
+
|
290
|
+
Comparison:
|
291
|
+
selma-lg: 5.9 i/s
|
292
|
+
nokolexbor-lg: 2.2 i/s - 2.71x slower
|
293
|
+
nokogiri-lg: 0.9 i/s - 6.55x slower
|
211
294
|
</pre>
|
212
295
|
</details>
|
296
|
+
<!-- prettier-ignore-end -->
|
213
297
|
|
214
298
|
## Contributing
|
215
299
|
|
data/lib/selma/3.1/selma.bundle
CHANGED
Binary file
|
data/lib/selma/3.2/selma.bundle
CHANGED
Binary file
|
data/lib/selma/3.3/selma.bundle
CHANGED
Binary file
|
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -66,7 +66,12 @@ module Selma
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def allow_protocol(element, attr, protos)
|
69
|
-
|
69
|
+
if protos.is_a?(Array)
|
70
|
+
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
71
|
+
else
|
72
|
+
protos = [protos]
|
73
|
+
end
|
74
|
+
|
70
75
|
set_allowed_protocols(element, attr, protos)
|
71
76
|
end
|
72
77
|
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|