selma 0.3.0-x86_64-linux → 0.4.1-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +57 -35
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/3.3/selma.so +0 -0
- data/lib/selma/config.rb +12 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1456aea079c07f26d3bbbcbf088a4a9d77632ffef4161e6060a09f85aad2dfff
|
4
|
+
data.tar.gz: 7c1b482bd506fb044adedfbb856f1b198707d7602ebf90f2036e7e14d0780092
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fffc3a70c84827d5f8159d2b48654753fc02017a7e8b1ba35e2a4cb6ff476e1cc2a962ff6428d65cedd506cce161e7f6ba562dfe89b8a84c728d1ff14f322461
|
7
|
+
data.tar.gz: 4e3409e0f42efa8a0a75fa952aaa692d0802b6dcebda53f34beb2aa130885988ebbf18ac830cbf96a484777d7bc080626da986df1be6675072ca55825c273b33
|
data/README.md
CHANGED
@@ -180,6 +180,26 @@ The `element` argument in `handle_element` has the following methods:
|
|
180
180
|
- `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
181
181
|
- `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
182
182
|
|
183
|
+
## Security
|
184
|
+
|
185
|
+
Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide `memory` options:
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
Selma::Rewriter.new(options: { memory: { max_allowed_memory_usage: 1_000_000 } }) # ~1MB
|
189
|
+
```
|
190
|
+
|
191
|
+
The structure of the `memory` options looks like this:
|
192
|
+
```ruby
|
193
|
+
{
|
194
|
+
memory: {
|
195
|
+
max_allowed_memory_usage: 1000,
|
196
|
+
preallocated_parsing_buffer_size: 100,
|
197
|
+
}
|
198
|
+
}
|
199
|
+
```
|
200
|
+
|
201
|
+
Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
|
202
|
+
|
183
203
|
## Benchmarks
|
184
204
|
|
185
205
|
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
@@ -191,30 +211,33 @@ Comparing Selma against popular Ruby sanitization gems:
|
|
191
211
|
<!-- prettier-ignore-start -->
|
192
212
|
<details>
|
193
213
|
<pre>
|
214
|
+
input size = 25309 bytes, 0.03 MB
|
215
|
+
|
216
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
194
217
|
Warming up --------------------------------------
|
195
218
|
sanitize-sm 15.000 i/100ms
|
196
|
-
selma-sm
|
219
|
+
selma-sm 127.000 i/100ms
|
197
220
|
Calculating -------------------------------------
|
198
|
-
sanitize-sm
|
199
|
-
selma-sm 1.
|
221
|
+
sanitize-sm 157.643 (± 1.9%) i/s - 4.740k in 30.077172s
|
222
|
+
selma-sm 1.278k (± 1.5%) i/s - 38.354k in 30.019722s
|
200
223
|
|
201
224
|
Comparison:
|
202
|
-
selma-sm:
|
203
|
-
sanitize-sm:
|
225
|
+
selma-sm: 1277.9 i/s
|
226
|
+
sanitize-sm: 157.6 i/s - 8.11x slower
|
204
227
|
|
205
228
|
input size = 86686 bytes, 0.09 MB
|
206
229
|
|
207
230
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
208
231
|
Warming up --------------------------------------
|
209
|
-
sanitize-md
|
232
|
+
sanitize-md 4.000 i/100ms
|
210
233
|
selma-md 33.000 i/100ms
|
211
234
|
Calculating -------------------------------------
|
212
|
-
sanitize-md 40.
|
213
|
-
selma-md
|
235
|
+
sanitize-md 40.034 (± 5.0%) i/s - 1.200k in 30.043322s
|
236
|
+
selma-md 332.959 (± 2.1%) i/s - 9.999k in 30.045733s
|
214
237
|
|
215
238
|
Comparison:
|
216
|
-
selma-md:
|
217
|
-
sanitize-md: 40.
|
239
|
+
selma-md: 333.0 i/s
|
240
|
+
sanitize-md: 40.0 i/s - 8.32x slower
|
218
241
|
|
219
242
|
input size = 7172510 bytes, 7.17 MB
|
220
243
|
|
@@ -223,12 +246,12 @@ Warming up --------------------------------------
|
|
223
246
|
sanitize-lg 1.000 i/100ms
|
224
247
|
selma-lg 1.000 i/100ms
|
225
248
|
Calculating -------------------------------------
|
226
|
-
sanitize-lg 0.
|
227
|
-
selma-lg
|
249
|
+
sanitize-lg 0.141 (± 0.0%) i/s - 5.000 in 35.426127s
|
250
|
+
selma-lg 3.963 (± 0.0%) i/s - 119.000 in 30.037386s
|
228
251
|
|
229
252
|
Comparison:
|
230
253
|
selma-lg: 4.0 i/s
|
231
|
-
sanitize-lg: 0.1 i/s -
|
254
|
+
sanitize-lg: 0.1 i/s - 28.03x slower
|
232
255
|
</pre>
|
233
256
|
</details>
|
234
257
|
<!-- prettier-ignore-end -->
|
@@ -240,23 +263,22 @@ Comparing Selma against popular Ruby HTML parsing gems:
|
|
240
263
|
<!-- prettier-ignore-start -->
|
241
264
|
<details>
|
242
265
|
<pre>
|
243
|
-
|
244
266
|
input size = 25309 bytes, 0.03 MB
|
245
267
|
|
246
268
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
247
269
|
Warming up --------------------------------------
|
248
270
|
nokogiri-sm 79.000 i/100ms
|
249
|
-
nokolexbor-sm
|
250
|
-
selma-sm
|
271
|
+
nokolexbor-sm 295.000 i/100ms
|
272
|
+
selma-sm 237.000 i/100ms
|
251
273
|
Calculating -------------------------------------
|
252
|
-
nokogiri-sm
|
253
|
-
nokolexbor-sm
|
254
|
-
selma-sm 2.
|
274
|
+
nokogiri-sm 800.531 (± 2.2%) i/s - 24.016k in 30.016056s
|
275
|
+
nokolexbor-sm 3.033k (± 3.6%) i/s - 91.155k in 30.094884s
|
276
|
+
selma-sm 2.386k (± 1.6%) i/s - 71.574k in 30.001701s
|
255
277
|
|
256
278
|
Comparison:
|
257
|
-
nokolexbor-sm:
|
258
|
-
selma-sm:
|
259
|
-
nokogiri-sm:
|
279
|
+
nokolexbor-sm: 3033.1 i/s
|
280
|
+
selma-sm: 2386.3 i/s - 1.27x slower
|
281
|
+
nokogiri-sm: 800.5 i/s - 3.79x slower
|
260
282
|
|
261
283
|
input size = 86686 bytes, 0.09 MB
|
262
284
|
|
@@ -264,16 +286,16 @@ ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
|
264
286
|
Warming up --------------------------------------
|
265
287
|
nokogiri-md 8.000 i/100ms
|
266
288
|
nokolexbor-md 43.000 i/100ms
|
267
|
-
selma-md
|
289
|
+
selma-md 38.000 i/100ms
|
268
290
|
Calculating -------------------------------------
|
269
|
-
nokogiri-md
|
270
|
-
nokolexbor-md
|
271
|
-
selma-md
|
291
|
+
nokogiri-md 85.013 (± 8.2%) i/s - 2.024k in 52.257472s
|
292
|
+
nokolexbor-md 416.074 (±11.1%) i/s - 12.341k in 30.111613s
|
293
|
+
selma-md 361.471 (± 4.7%) i/s - 10.830k in 30.033997s
|
272
294
|
|
273
295
|
Comparison:
|
274
|
-
nokolexbor-md:
|
275
|
-
selma-md:
|
276
|
-
nokogiri-md:
|
296
|
+
nokolexbor-md: 416.1 i/s
|
297
|
+
selma-md: 361.5 i/s - same-ish: difference falls within error
|
298
|
+
nokogiri-md: 85.0 i/s - 4.89x slower
|
277
299
|
|
278
300
|
input size = 7172510 bytes, 7.17 MB
|
279
301
|
|
@@ -283,14 +305,14 @@ Warming up --------------------------------------
|
|
283
305
|
nokolexbor-lg 1.000 i/100ms
|
284
306
|
selma-lg 1.000 i/100ms
|
285
307
|
Calculating -------------------------------------
|
286
|
-
nokogiri-lg 0.
|
287
|
-
nokolexbor-lg 2.
|
288
|
-
selma-lg 5.
|
308
|
+
nokogiri-lg 0.805 (± 0.0%) i/s - 25.000 in 31.148730s
|
309
|
+
nokolexbor-lg 2.194 (± 0.0%) i/s - 66.000 in 30.278108s
|
310
|
+
selma-lg 5.541 (± 0.0%) i/s - 166.000 in 30.037197s
|
289
311
|
|
290
312
|
Comparison:
|
291
|
-
selma-lg: 5.
|
292
|
-
nokolexbor-lg: 2.2 i/s - 2.
|
293
|
-
nokogiri-lg: 0.
|
313
|
+
selma-lg: 5.5 i/s
|
314
|
+
nokolexbor-lg: 2.2 i/s - 2.53x slower
|
315
|
+
nokogiri-lg: 0.8 i/s - 6.88x slower
|
294
316
|
</pre>
|
295
317
|
</details>
|
296
318
|
<!-- prettier-ignore-end -->
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
data/lib/selma/3.2/selma.so
CHANGED
Binary file
|
data/lib/selma/3.3/selma.so
CHANGED
Binary file
|
data/lib/selma/config.rb
ADDED
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- lib/selma/3.1/selma.so
|
52
52
|
- lib/selma/3.2/selma.so
|
53
53
|
- lib/selma/3.3/selma.so
|
54
|
+
- lib/selma/config.rb
|
54
55
|
- lib/selma/extension.rb
|
55
56
|
- lib/selma/html.rb
|
56
57
|
- lib/selma/rewriter.rb
|