selma 0.3.0-aarch64-linux → 0.4.1-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +57 -35
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/3.3/selma.so +0 -0
- data/lib/selma/config.rb +12 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d400498eeff57ae3e6c7fe050f9eb726a68bd237e9d3d1ed4157019ca86f011b
|
4
|
+
data.tar.gz: 695def76e747a252f3c6aad1f6ccccfad853455cbc5ae2b61d6b67cc77cc4578
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6f7785d62ca2e23948d19146bc730e4f352bba06b495eddc7c79db9a3712be250f1c498e6999d1a76036b1a30becb26f879bc807269a5dcff5877800412d034
|
7
|
+
data.tar.gz: cd8d61a3da0983c25df09f785576324011b4b67c27a85362f39c8611ec3b18161c39c6ed7e006a64446b9368e2ec16f386c2115e31b1b5ee9ed28ca4ac9d1c36
|
data/README.md
CHANGED
@@ -180,6 +180,26 @@ The `element` argument in `handle_element` has the following methods:
|
|
180
180
|
- `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
181
181
|
- `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
182
182
|
|
183
|
+
## Security
|
184
|
+
|
185
|
+
Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide `memory` options:
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
Selma::Rewriter.new(options: { memory: { max_allowed_memory_usage: 1_000_000 } }) # ~1MB
|
189
|
+
```
|
190
|
+
|
191
|
+
The structure of the `memory` options looks like this:
|
192
|
+
```ruby
|
193
|
+
{
|
194
|
+
memory: {
|
195
|
+
max_allowed_memory_usage: 1000,
|
196
|
+
preallocated_parsing_buffer_size: 100,
|
197
|
+
}
|
198
|
+
}
|
199
|
+
```
|
200
|
+
|
201
|
+
Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
|
202
|
+
|
183
203
|
## Benchmarks
|
184
204
|
|
185
205
|
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
@@ -191,30 +211,33 @@ Comparing Selma against popular Ruby sanitization gems:
|
|
191
211
|
<!-- prettier-ignore-start -->
|
192
212
|
<details>
|
193
213
|
<pre>
|
214
|
+
input size = 25309 bytes, 0.03 MB
|
215
|
+
|
216
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
194
217
|
Warming up --------------------------------------
|
195
218
|
sanitize-sm 15.000 i/100ms
|
196
|
-
selma-sm
|
219
|
+
selma-sm 127.000 i/100ms
|
197
220
|
Calculating -------------------------------------
|
198
|
-
sanitize-sm
|
199
|
-
selma-sm 1.
|
221
|
+
sanitize-sm 157.643 (± 1.9%) i/s - 4.740k in 30.077172s
|
222
|
+
selma-sm 1.278k (± 1.5%) i/s - 38.354k in 30.019722s
|
200
223
|
|
201
224
|
Comparison:
|
202
|
-
selma-sm:
|
203
|
-
sanitize-sm:
|
225
|
+
selma-sm: 1277.9 i/s
|
226
|
+
sanitize-sm: 157.6 i/s - 8.11x slower
|
204
227
|
|
205
228
|
input size = 86686 bytes, 0.09 MB
|
206
229
|
|
207
230
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
208
231
|
Warming up --------------------------------------
|
209
|
-
sanitize-md
|
232
|
+
sanitize-md 4.000 i/100ms
|
210
233
|
selma-md 33.000 i/100ms
|
211
234
|
Calculating -------------------------------------
|
212
|
-
sanitize-md 40.
|
213
|
-
selma-md
|
235
|
+
sanitize-md 40.034 (± 5.0%) i/s - 1.200k in 30.043322s
|
236
|
+
selma-md 332.959 (± 2.1%) i/s - 9.999k in 30.045733s
|
214
237
|
|
215
238
|
Comparison:
|
216
|
-
selma-md:
|
217
|
-
sanitize-md: 40.
|
239
|
+
selma-md: 333.0 i/s
|
240
|
+
sanitize-md: 40.0 i/s - 8.32x slower
|
218
241
|
|
219
242
|
input size = 7172510 bytes, 7.17 MB
|
220
243
|
|
@@ -223,12 +246,12 @@ Warming up --------------------------------------
|
|
223
246
|
sanitize-lg 1.000 i/100ms
|
224
247
|
selma-lg 1.000 i/100ms
|
225
248
|
Calculating -------------------------------------
|
226
|
-
sanitize-lg 0.
|
227
|
-
selma-lg
|
249
|
+
sanitize-lg 0.141 (± 0.0%) i/s - 5.000 in 35.426127s
|
250
|
+
selma-lg 3.963 (± 0.0%) i/s - 119.000 in 30.037386s
|
228
251
|
|
229
252
|
Comparison:
|
230
253
|
selma-lg: 4.0 i/s
|
231
|
-
sanitize-lg: 0.1 i/s -
|
254
|
+
sanitize-lg: 0.1 i/s - 28.03x slower
|
232
255
|
</pre>
|
233
256
|
</details>
|
234
257
|
<!-- prettier-ignore-end -->
|
@@ -240,23 +263,22 @@ Comparing Selma against popular Ruby HTML parsing gems:
|
|
240
263
|
<!-- prettier-ignore-start -->
|
241
264
|
<details>
|
242
265
|
<pre>
|
243
|
-
|
244
266
|
input size = 25309 bytes, 0.03 MB
|
245
267
|
|
246
268
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
247
269
|
Warming up --------------------------------------
|
248
270
|
nokogiri-sm 79.000 i/100ms
|
249
|
-
nokolexbor-sm
|
250
|
-
selma-sm
|
271
|
+
nokolexbor-sm 295.000 i/100ms
|
272
|
+
selma-sm 237.000 i/100ms
|
251
273
|
Calculating -------------------------------------
|
252
|
-
nokogiri-sm
|
253
|
-
nokolexbor-sm
|
254
|
-
selma-sm 2.
|
274
|
+
nokogiri-sm 800.531 (± 2.2%) i/s - 24.016k in 30.016056s
|
275
|
+
nokolexbor-sm 3.033k (± 3.6%) i/s - 91.155k in 30.094884s
|
276
|
+
selma-sm 2.386k (± 1.6%) i/s - 71.574k in 30.001701s
|
255
277
|
|
256
278
|
Comparison:
|
257
|
-
nokolexbor-sm:
|
258
|
-
selma-sm:
|
259
|
-
nokogiri-sm:
|
279
|
+
nokolexbor-sm: 3033.1 i/s
|
280
|
+
selma-sm: 2386.3 i/s - 1.27x slower
|
281
|
+
nokogiri-sm: 800.5 i/s - 3.79x slower
|
260
282
|
|
261
283
|
input size = 86686 bytes, 0.09 MB
|
262
284
|
|
@@ -264,16 +286,16 @@ ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
|
264
286
|
Warming up --------------------------------------
|
265
287
|
nokogiri-md 8.000 i/100ms
|
266
288
|
nokolexbor-md 43.000 i/100ms
|
267
|
-
selma-md
|
289
|
+
selma-md 38.000 i/100ms
|
268
290
|
Calculating -------------------------------------
|
269
|
-
nokogiri-md
|
270
|
-
nokolexbor-md
|
271
|
-
selma-md
|
291
|
+
nokogiri-md 85.013 (± 8.2%) i/s - 2.024k in 52.257472s
|
292
|
+
nokolexbor-md 416.074 (±11.1%) i/s - 12.341k in 30.111613s
|
293
|
+
selma-md 361.471 (± 4.7%) i/s - 10.830k in 30.033997s
|
272
294
|
|
273
295
|
Comparison:
|
274
|
-
nokolexbor-md:
|
275
|
-
selma-md:
|
276
|
-
nokogiri-md:
|
296
|
+
nokolexbor-md: 416.1 i/s
|
297
|
+
selma-md: 361.5 i/s - same-ish: difference falls within error
|
298
|
+
nokogiri-md: 85.0 i/s - 4.89x slower
|
277
299
|
|
278
300
|
input size = 7172510 bytes, 7.17 MB
|
279
301
|
|
@@ -283,14 +305,14 @@ Warming up --------------------------------------
|
|
283
305
|
nokolexbor-lg 1.000 i/100ms
|
284
306
|
selma-lg 1.000 i/100ms
|
285
307
|
Calculating -------------------------------------
|
286
|
-
nokogiri-lg 0.
|
287
|
-
nokolexbor-lg 2.
|
288
|
-
selma-lg 5.
|
308
|
+
nokogiri-lg 0.805 (± 0.0%) i/s - 25.000 in 31.148730s
|
309
|
+
nokolexbor-lg 2.194 (± 0.0%) i/s - 66.000 in 30.278108s
|
310
|
+
selma-lg 5.541 (± 0.0%) i/s - 166.000 in 30.037197s
|
289
311
|
|
290
312
|
Comparison:
|
291
|
-
selma-lg: 5.
|
292
|
-
nokolexbor-lg: 2.2 i/s - 2.
|
293
|
-
nokogiri-lg: 0.
|
313
|
+
selma-lg: 5.5 i/s
|
314
|
+
nokolexbor-lg: 2.2 i/s - 2.53x slower
|
315
|
+
nokogiri-lg: 0.8 i/s - 6.88x slower
|
294
316
|
</pre>
|
295
317
|
</details>
|
296
318
|
<!-- prettier-ignore-end -->
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
data/lib/selma/3.2/selma.so
CHANGED
Binary file
|
data/lib/selma/3.3/selma.so
CHANGED
Binary file
|
data/lib/selma/config.rb
ADDED
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: aarch64-linux
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- lib/selma/3.1/selma.so
|
52
52
|
- lib/selma/3.2/selma.so
|
53
53
|
- lib/selma/3.3/selma.so
|
54
|
+
- lib/selma/config.rb
|
54
55
|
- lib/selma/extension.rb
|
55
56
|
- lib/selma/html.rb
|
56
57
|
- lib/selma/rewriter.rb
|