selma 0.2.2-aarch64-linux → 0.4.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +122 -24
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/3.3/selma.so +0 -0
- data/lib/selma/config.rb +12 -0
- data/lib/selma/sanitizer/config/default.rb +1 -1
- data/lib/selma/sanitizer/config/relaxed.rb +1 -0
- data/lib/selma/sanitizer.rb +6 -1
- data/lib/selma/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf538efaaa0d117e1154a4aec93482567a3e1fe400200d556ceac5cb65952e12
|
4
|
+
data.tar.gz: 421b4534b3eb1e1d8f0e84f7608020ecc0acaf14c3dae27e58e1ccb421a70ff5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e065ab0aba3cd58d346cf64b01f4583cd77f6d2e7b0576086ffdb34f9ae01a71b63733db21616db826f5b3088fca3da578489bf4b4fd8c68743872ba3b5c0a3
|
7
|
+
data.tar.gz: 4a5f300855fafe27e342989895d6a56a8629075a0ad09aee7511edb08dae80f5aa0811c9aaa0e035bf32be9f7ac85f9752f2dc0f991dd0b3322f9672ad036e12
|
data/README.md
CHANGED
@@ -76,7 +76,7 @@ attributes: {
|
|
76
76
|
|
77
77
|
# URL handling protocols to allow in specific attributes. By default, no
|
78
78
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
79
|
-
# to allow relative URLs sans protocol.
|
79
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
80
80
|
protocols: {
|
81
81
|
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
82
82
|
"img" => { "href" => ["http", "https"] },
|
@@ -103,7 +103,11 @@ Here's an example which rewrites the `href` attribute on `a` and the `src` attri
|
|
103
103
|
|
104
104
|
```ruby
|
105
105
|
class MatchAttribute
|
106
|
-
SELECTOR = Selma::Selector(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
106
|
+
SELECTOR = Selma::Selector.new(match_element: %(a[href^="http:"], img[src^="http:"]"))
|
107
|
+
|
108
|
+
def selector
|
109
|
+
SELECTOR
|
110
|
+
end
|
107
111
|
|
108
112
|
def handle_element(element)
|
109
113
|
if element.tag_name == "a"
|
@@ -176,40 +180,134 @@ The `element` argument in `handle_element` has the following methods:
|
|
176
180
|
- `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
177
181
|
- `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
178
182
|
|
183
|
+
## Security
|
184
|
+
|
185
|
+
Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide two options into the `memory` namespace:
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
memory: {
|
189
|
+
max_allowed_memory_usage: 1000,
|
190
|
+
preallocated_parsing_buffer_size: 100,
|
191
|
+
},
|
192
|
+
```
|
193
|
+
|
194
|
+
Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
|
195
|
+
|
179
196
|
## Benchmarks
|
180
197
|
|
198
|
+
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
199
|
+
|
200
|
+
### Benchmarks for just the sanitization process
|
201
|
+
|
202
|
+
Comparing Selma against popular Ruby sanitization gems:
|
203
|
+
|
204
|
+
<!-- prettier-ignore-start -->
|
181
205
|
<details>
|
182
206
|
<pre>
|
183
|
-
|
184
|
-
|
207
|
+
input size = 25309 bytes, 0.03 MB
|
208
|
+
|
209
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
185
210
|
Warming up --------------------------------------
|
186
|
-
sanitize-
|
187
|
-
|
188
|
-
selma-document-huge 1.000 i/100ms
|
211
|
+
sanitize-sm 16.000 i/100ms
|
212
|
+
selma-sm 214.000 i/100ms
|
189
213
|
Calculating -------------------------------------
|
190
|
-
sanitize-
|
191
|
-
|
192
|
-
|
214
|
+
sanitize-sm 171.670 (± 1.2%) i/s - 5.152k in 30.017081s
|
215
|
+
selma-sm 2.146k (± 3.0%) i/s - 64.414k in 30.058470s
|
216
|
+
|
217
|
+
Comparison:
|
218
|
+
selma-sm: 2145.8 i/s
|
219
|
+
sanitize-sm: 171.7 i/s - 12.50x slower
|
220
|
+
|
221
|
+
input size = 86686 bytes, 0.09 MB
|
222
|
+
|
223
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
193
224
|
Warming up --------------------------------------
|
194
|
-
sanitize-
|
195
|
-
|
196
|
-
selma-document-medium
|
197
|
-
22.000 i/100ms
|
225
|
+
sanitize-md 4.000 i/100ms
|
226
|
+
selma-md 56.000 i/100ms
|
198
227
|
Calculating -------------------------------------
|
199
|
-
sanitize-
|
200
|
-
|
201
|
-
|
202
|
-
|
228
|
+
sanitize-md 44.397 (± 2.3%) i/s - 1.332k in 30.022430s
|
229
|
+
selma-md 558.448 (± 1.4%) i/s - 16.800k in 30.089196s
|
230
|
+
|
231
|
+
Comparison:
|
232
|
+
selma-md: 558.4 i/s
|
233
|
+
sanitize-md: 44.4 i/s - 12.58x slower
|
234
|
+
|
235
|
+
input size = 7172510 bytes, 7.17 MB
|
236
|
+
|
237
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
238
|
+
Warming up --------------------------------------
|
239
|
+
sanitize-lg 1.000 i/100ms
|
240
|
+
selma-lg 1.000 i/100ms
|
241
|
+
Calculating -------------------------------------
|
242
|
+
sanitize-lg 0.163 (± 0.0%) i/s - 6.000 in 37.375628s
|
243
|
+
selma-lg 6.750 (± 0.0%) i/s - 203.000 in 30.080976s
|
244
|
+
|
245
|
+
Comparison:
|
246
|
+
selma-lg: 6.7 i/s
|
247
|
+
sanitize-lg: 0.2 i/s - 41.32x slower
|
248
|
+
</pre>
|
249
|
+
</details>
|
250
|
+
<!-- prettier-ignore-end -->
|
251
|
+
|
252
|
+
### Benchmarks for just the rewriting process
|
253
|
+
|
254
|
+
Comparing Selma against popular Ruby HTML parsing gems:
|
255
|
+
|
256
|
+
<!-- prettier-ignore-start -->
|
257
|
+
<details>
|
258
|
+
<pre>input size = 25309 bytes, 0.03 MB
|
259
|
+
|
260
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
261
|
+
Warming up --------------------------------------
|
262
|
+
nokogiri-sm 107.000 i/100ms
|
263
|
+
nokolexbor-sm 340.000 i/100ms
|
264
|
+
selma-sm 380.000 i/100ms
|
265
|
+
Calculating -------------------------------------
|
266
|
+
nokogiri-sm 1.073k (± 2.1%) i/s - 32.207k in 30.025474s
|
267
|
+
nokolexbor-sm 3.300k (±13.2%) i/s - 27.540k in 36.788212s
|
268
|
+
selma-sm 3.779k (± 3.4%) i/s - 113.240k in 30.013908s
|
269
|
+
|
270
|
+
Comparison:
|
271
|
+
selma-sm: 3779.4 i/s
|
272
|
+
nokolexbor-sm: 3300.1 i/s - same-ish: difference falls within error
|
273
|
+
nokogiri-sm: 1073.1 i/s - 3.52x slower
|
274
|
+
|
275
|
+
input size = 86686 bytes, 0.09 MB
|
276
|
+
|
277
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
278
|
+
Warming up --------------------------------------
|
279
|
+
nokogiri-md 11.000 i/100ms
|
280
|
+
nokolexbor-md 48.000 i/100ms
|
281
|
+
selma-md 53.000 i/100ms
|
282
|
+
Calculating -------------------------------------
|
283
|
+
nokogiri-md 103.998 (± 5.8%) i/s - 3.113k in 30.029932s
|
284
|
+
nokolexbor-md 428.928 (± 7.9%) i/s - 12.816k in 30.066662s
|
285
|
+
selma-md 492.190 (± 6.9%) i/s - 14.734k in 30.082943s
|
286
|
+
|
287
|
+
Comparison:
|
288
|
+
selma-md: 492.2 i/s
|
289
|
+
nokolexbor-md: 428.9 i/s - same-ish: difference falls within error
|
290
|
+
nokogiri-md: 104.0 i/s - 4.73x slower
|
291
|
+
|
292
|
+
input size = 7172510 bytes, 7.17 MB
|
293
|
+
|
294
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
203
295
|
Warming up --------------------------------------
|
204
|
-
|
205
|
-
|
206
|
-
selma-
|
296
|
+
nokogiri-lg 1.000 i/100ms
|
297
|
+
nokolexbor-lg 1.000 i/100ms
|
298
|
+
selma-lg 1.000 i/100ms
|
207
299
|
Calculating -------------------------------------
|
208
|
-
|
209
|
-
|
210
|
-
selma-
|
300
|
+
nokogiri-lg 0.874 (± 0.0%) i/s - 27.000 in 30.921090s
|
301
|
+
nokolexbor-lg 2.227 (± 0.0%) i/s - 67.000 in 30.137903s
|
302
|
+
selma-lg 8.354 (± 0.0%) i/s - 251.000 in 30.075227s
|
303
|
+
|
304
|
+
Comparison:
|
305
|
+
selma-lg: 8.4 i/s
|
306
|
+
nokolexbor-lg: 2.2 i/s - 3.75x slower
|
307
|
+
nokogiri-lg: 0.9 i/s - 9.56x slower
|
211
308
|
</pre>
|
212
309
|
</details>
|
310
|
+
<!-- prettier-ignore-end -->
|
213
311
|
|
214
312
|
## Contributing
|
215
313
|
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
data/lib/selma/3.2/selma.so
CHANGED
Binary file
|
data/lib/selma/3.3/selma.so
CHANGED
Binary file
|
data/lib/selma/config.rb
ADDED
@@ -28,7 +28,7 @@ module Selma
|
|
28
28
|
|
29
29
|
# URL handling protocols to allow in specific attributes. By default, no
|
30
30
|
# protocols are allowed. Use :relative in place of a protocol if you want
|
31
|
-
# to allow relative URLs sans protocol.
|
31
|
+
# to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
|
32
32
|
protocols: {},
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
data/lib/selma/sanitizer.rb
CHANGED
@@ -66,7 +66,12 @@ module Selma
|
|
66
66
|
end
|
67
67
|
|
68
68
|
def allow_protocol(element, attr, protos)
|
69
|
-
|
69
|
+
if protos.is_a?(Array)
|
70
|
+
raise ArgumentError, "`:all` must be passed outside of an array" if protos.include?(:all)
|
71
|
+
else
|
72
|
+
protos = [protos]
|
73
|
+
end
|
74
|
+
|
70
75
|
set_allowed_protocols(element, attr, protos)
|
71
76
|
end
|
72
77
|
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: aarch64-linux
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- lib/selma/3.1/selma.so
|
52
52
|
- lib/selma/3.2/selma.so
|
53
53
|
- lib/selma/3.3/selma.so
|
54
|
+
- lib/selma/config.rb
|
54
55
|
- lib/selma/extension.rb
|
55
56
|
- lib/selma/html.rb
|
56
57
|
- lib/selma/rewriter.rb
|