selma 0.3.0-x64-mingw-ucrt → 0.4.0-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +57 -43
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/3.3/selma.so +0 -0
- data/lib/selma/config.rb +12 -0
- data/lib/selma/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09223eb3558240eceac68e85d2b1176d5200bfe0e5615f2fac578ce0aa009442'
|
4
|
+
data.tar.gz: 2055d3638f4f85f1b8b7257e820c441894fb5ee78185f9caa43334c6465b3406
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 870af5762919e3f45bf4c9386811910802fbc9c2f8dec2447ae84335694978044b117aa00803201a3bfbe680038be99e2a4ee5cd489eb689071a01cec58519e5
|
7
|
+
data.tar.gz: dc883f86c62281e50abfb6dccd77ac79dbea6068f2f683b2a37a67ab01d2332e968eaa7abfaa2ce9e7846f7eb6ee98d1215d3e3071997744377010802ecb128e
|
data/README.md
CHANGED
@@ -180,6 +180,19 @@ The `element` argument in `handle_element` has the following methods:
|
|
180
180
|
- `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
181
181
|
- `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
182
182
|
|
183
|
+
## Security
|
184
|
+
|
185
|
+
Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide two options into the `memory` namespace:
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
memory: {
|
189
|
+
max_allowed_memory_usage: 1000,
|
190
|
+
preallocated_parsing_buffer_size: 100,
|
191
|
+
},
|
192
|
+
```
|
193
|
+
|
194
|
+
Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
|
195
|
+
|
183
196
|
## Benchmarks
|
184
197
|
|
185
198
|
When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
|
@@ -191,30 +204,33 @@ Comparing Selma against popular Ruby sanitization gems:
|
|
191
204
|
<!-- prettier-ignore-start -->
|
192
205
|
<details>
|
193
206
|
<pre>
|
207
|
+
input size = 25309 bytes, 0.03 MB
|
208
|
+
|
209
|
+
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
194
210
|
Warming up --------------------------------------
|
195
|
-
sanitize-sm
|
196
|
-
selma-sm
|
211
|
+
sanitize-sm 16.000 i/100ms
|
212
|
+
selma-sm 214.000 i/100ms
|
197
213
|
Calculating -------------------------------------
|
198
|
-
sanitize-sm
|
199
|
-
selma-sm
|
214
|
+
sanitize-sm 171.670 (± 1.2%) i/s - 5.152k in 30.017081s
|
215
|
+
selma-sm 2.146k (± 3.0%) i/s - 64.414k in 30.058470s
|
200
216
|
|
201
217
|
Comparison:
|
202
|
-
selma-sm:
|
203
|
-
sanitize-sm:
|
218
|
+
selma-sm: 2145.8 i/s
|
219
|
+
sanitize-sm: 171.7 i/s - 12.50x slower
|
204
220
|
|
205
221
|
input size = 86686 bytes, 0.09 MB
|
206
222
|
|
207
223
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
208
224
|
Warming up --------------------------------------
|
209
|
-
sanitize-md
|
210
|
-
selma-md
|
225
|
+
sanitize-md 4.000 i/100ms
|
226
|
+
selma-md 56.000 i/100ms
|
211
227
|
Calculating -------------------------------------
|
212
|
-
sanitize-md
|
213
|
-
selma-md
|
228
|
+
sanitize-md 44.397 (± 2.3%) i/s - 1.332k in 30.022430s
|
229
|
+
selma-md 558.448 (± 1.4%) i/s - 16.800k in 30.089196s
|
214
230
|
|
215
231
|
Comparison:
|
216
|
-
selma-md:
|
217
|
-
sanitize-md:
|
232
|
+
selma-md: 558.4 i/s
|
233
|
+
sanitize-md: 44.4 i/s - 12.58x slower
|
218
234
|
|
219
235
|
input size = 7172510 bytes, 7.17 MB
|
220
236
|
|
@@ -223,12 +239,12 @@ Warming up --------------------------------------
|
|
223
239
|
sanitize-lg 1.000 i/100ms
|
224
240
|
selma-lg 1.000 i/100ms
|
225
241
|
Calculating -------------------------------------
|
226
|
-
sanitize-lg 0.
|
227
|
-
selma-lg
|
242
|
+
sanitize-lg 0.163 (± 0.0%) i/s - 6.000 in 37.375628s
|
243
|
+
selma-lg 6.750 (± 0.0%) i/s - 203.000 in 30.080976s
|
228
244
|
|
229
245
|
Comparison:
|
230
|
-
selma-lg:
|
231
|
-
sanitize-lg: 0.
|
246
|
+
selma-lg: 6.7 i/s
|
247
|
+
sanitize-lg: 0.2 i/s - 41.32x slower
|
232
248
|
</pre>
|
233
249
|
</details>
|
234
250
|
<!-- prettier-ignore-end -->
|
@@ -239,41 +255,39 @@ Comparing Selma against popular Ruby HTML parsing gems:
|
|
239
255
|
|
240
256
|
<!-- prettier-ignore-start -->
|
241
257
|
<details>
|
242
|
-
<pre>
|
243
|
-
|
244
|
-
input size = 25309 bytes, 0.03 MB
|
258
|
+
<pre>input size = 25309 bytes, 0.03 MB
|
245
259
|
|
246
260
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
247
261
|
Warming up --------------------------------------
|
248
|
-
nokogiri-sm
|
249
|
-
nokolexbor-sm
|
250
|
-
selma-sm
|
262
|
+
nokogiri-sm 107.000 i/100ms
|
263
|
+
nokolexbor-sm 340.000 i/100ms
|
264
|
+
selma-sm 380.000 i/100ms
|
251
265
|
Calculating -------------------------------------
|
252
|
-
nokogiri-sm
|
253
|
-
nokolexbor-sm
|
254
|
-
selma-sm
|
266
|
+
nokogiri-sm 1.073k (± 2.1%) i/s - 32.207k in 30.025474s
|
267
|
+
nokolexbor-sm 3.300k (±13.2%) i/s - 27.540k in 36.788212s
|
268
|
+
selma-sm 3.779k (± 3.4%) i/s - 113.240k in 30.013908s
|
255
269
|
|
256
270
|
Comparison:
|
257
|
-
|
258
|
-
|
259
|
-
nokogiri-sm:
|
271
|
+
selma-sm: 3779.4 i/s
|
272
|
+
nokolexbor-sm: 3300.1 i/s - same-ish: difference falls within error
|
273
|
+
nokogiri-sm: 1073.1 i/s - 3.52x slower
|
260
274
|
|
261
275
|
input size = 86686 bytes, 0.09 MB
|
262
276
|
|
263
277
|
ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
|
264
278
|
Warming up --------------------------------------
|
265
|
-
nokogiri-md
|
266
|
-
nokolexbor-md
|
267
|
-
selma-md
|
279
|
+
nokogiri-md 11.000 i/100ms
|
280
|
+
nokolexbor-md 48.000 i/100ms
|
281
|
+
selma-md 53.000 i/100ms
|
268
282
|
Calculating -------------------------------------
|
269
|
-
nokogiri-md
|
270
|
-
nokolexbor-md
|
271
|
-
selma-md
|
283
|
+
nokogiri-md 103.998 (± 5.8%) i/s - 3.113k in 30.029932s
|
284
|
+
nokolexbor-md 428.928 (± 7.9%) i/s - 12.816k in 30.066662s
|
285
|
+
selma-md 492.190 (± 6.9%) i/s - 14.734k in 30.082943s
|
272
286
|
|
273
287
|
Comparison:
|
274
|
-
|
275
|
-
|
276
|
-
nokogiri-md:
|
288
|
+
selma-md: 492.2 i/s
|
289
|
+
nokolexbor-md: 428.9 i/s - same-ish: difference falls within error
|
290
|
+
nokogiri-md: 104.0 i/s - 4.73x slower
|
277
291
|
|
278
292
|
input size = 7172510 bytes, 7.17 MB
|
279
293
|
|
@@ -283,14 +297,14 @@ Warming up --------------------------------------
|
|
283
297
|
nokolexbor-lg 1.000 i/100ms
|
284
298
|
selma-lg 1.000 i/100ms
|
285
299
|
Calculating -------------------------------------
|
286
|
-
nokogiri-lg 0.
|
287
|
-
nokolexbor-lg 2.
|
288
|
-
selma-lg
|
300
|
+
nokogiri-lg 0.874 (± 0.0%) i/s - 27.000 in 30.921090s
|
301
|
+
nokolexbor-lg 2.227 (± 0.0%) i/s - 67.000 in 30.137903s
|
302
|
+
selma-lg 8.354 (± 0.0%) i/s - 251.000 in 30.075227s
|
289
303
|
|
290
304
|
Comparison:
|
291
|
-
selma-lg:
|
292
|
-
nokolexbor-lg: 2.2 i/s -
|
293
|
-
nokogiri-lg: 0.9 i/s -
|
305
|
+
selma-lg: 8.4 i/s
|
306
|
+
nokolexbor-lg: 2.2 i/s - 3.75x slower
|
307
|
+
nokogiri-lg: 0.9 i/s - 9.56x slower
|
294
308
|
</pre>
|
295
309
|
</details>
|
296
310
|
<!-- prettier-ignore-end -->
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
data/lib/selma/3.2/selma.so
CHANGED
Binary file
|
data/lib/selma/3.3/selma.so
CHANGED
Binary file
|
data/lib/selma/config.rb
ADDED
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: x64-mingw-ucrt
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- lib/selma/3.1/selma.so
|
52
52
|
- lib/selma/3.2/selma.so
|
53
53
|
- lib/selma/3.3/selma.so
|
54
|
+
- lib/selma/config.rb
|
54
55
|
- lib/selma/extension.rb
|
55
56
|
- lib/selma/html.rb
|
56
57
|
- lib/selma/rewriter.rb
|