selma 0.3.0-aarch64-linux → 0.4.1-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 60eaadd36c1687863ba3a48d8c3506496e2b14d6b26e738a422d53c0a74b1eb0
4
- data.tar.gz: 3c094ba496d5edfbffd5ec03ff07dce4a689a87e134fa2e8e291be629570ba86
3
+ metadata.gz: d400498eeff57ae3e6c7fe050f9eb726a68bd237e9d3d1ed4157019ca86f011b
4
+ data.tar.gz: 695def76e747a252f3c6aad1f6ccccfad853455cbc5ae2b61d6b67cc77cc4578
5
5
  SHA512:
6
- metadata.gz: 47b36c8fc1d20ebb2366338fc743e6b9b44f10b84d492bb55a0851aeb4ce9acd4aec5183a14e1b5052ba9768b42c9fbaf637883379aeff0e5779f5978d513bf3
7
- data.tar.gz: e15a3dbf3439ecd2a158b82c883d3eea3acad7eee3d29bf9787f12724cccb5db5b54f56f9dd71a0a54edfa32cbd054341856179250c1e01a8f61c062d56bfdc0
6
+ metadata.gz: c6f7785d62ca2e23948d19146bc730e4f352bba06b495eddc7c79db9a3712be250f1c498e6999d1a76036b1a30becb26f879bc807269a5dcff5877800412d034
7
+ data.tar.gz: cd8d61a3da0983c25df09f785576324011b4b67c27a85362f39c8611ec3b18161c39c6ed7e006a64446b9368e2ec16f386c2115e31b1b5ee9ed28ca4ac9d1c36
data/README.md CHANGED
@@ -180,6 +180,26 @@ The `element` argument in `handle_element` has the following methods:
180
180
  - `after(content, as: content_type)`: Inserts `content` after the text. `content_type` is either `:text` or `:html` and determines how the content will be applied.
181
181
  - `replace(content, as: content_type)`: Replaces the text node with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
182
182
 
183
+ ## Security
184
+
185
+ Theoretically, a malicious user can provide a very large document for processing, which can exhaust the memory of the host machine. To set a limit on how much string content is processed at once, you can provide `memory` options:
186
+
187
+ ```ruby
188
+ Selma::Rewriter.new(options: { memory: { max_allowed_memory_usage: 1_000_000 } }) # ~1MB
189
+ ```
190
+
191
+ The structure of the `memory` options looks like this:
192
+ ```ruby
193
+ {
194
+ memory: {
195
+ max_allowed_memory_usage: 1000,
196
+ preallocated_parsing_buffer_size: 100,
197
+ }
198
+ }
199
+ ```
200
+
201
+ Note that `preallocated_parsing_buffer_size` must always be less than `max_allowed_memory_usage`. See [the`lol_html` project documentation](https://docs.rs/lol_html/1.2.1/lol_html/struct.MemorySettings.html) to learn more about the default values.
202
+
183
203
  ## Benchmarks
184
204
 
185
205
  When `bundle exec rake benchmark`, two different benchmarks are calculated. Here are those results on my machine.
@@ -191,30 +211,33 @@ Comparing Selma against popular Ruby sanitization gems:
191
211
  <!-- prettier-ignore-start -->
192
212
  <details>
193
213
  <pre>
214
+ input size = 25309 bytes, 0.03 MB
215
+
216
+ ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
194
217
  Warming up --------------------------------------
195
218
  sanitize-sm 15.000 i/100ms
196
- selma-sm 126.000 i/100ms
219
+ selma-sm 127.000 i/100ms
197
220
  Calculating -------------------------------------
198
- sanitize-sm 155.074 (± 1.9%) i/s - 4.665k in 30.092214s
199
- selma-sm 1.290k (± 1.3%) i/s - 38.808k in 30.085333s
221
+ sanitize-sm 157.643 (± 1.9%) i/s - 4.740k in 30.077172s
222
+ selma-sm 1.278k (± 1.5%) i/s - 38.354k in 30.019722s
200
223
 
201
224
  Comparison:
202
- selma-sm: 1290.1 i/s
203
- sanitize-sm: 155.1 i/s - 8.32x slower
225
+ selma-sm: 1277.9 i/s
226
+ sanitize-sm: 157.6 i/s - 8.11x slower
204
227
 
205
228
  input size = 86686 bytes, 0.09 MB
206
229
 
207
230
  ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
208
231
  Warming up --------------------------------------
209
- sanitize-md 3.000 i/100ms
232
+ sanitize-md 4.000 i/100ms
210
233
  selma-md 33.000 i/100ms
211
234
  Calculating -------------------------------------
212
- sanitize-md 40.321 (± 5.0%) i/s - 1.206k in 30.004711s
213
- selma-md 337.417 (± 1.5%) i/s - 10.131k in 30.032772s
235
+ sanitize-md 40.034 (± 5.0%) i/s - 1.200k in 30.043322s
236
+ selma-md 332.9592.1%) i/s - 9.999k in 30.045733s
214
237
 
215
238
  Comparison:
216
- selma-md: 337.4 i/s
217
- sanitize-md: 40.3 i/s - 8.37x slower
239
+ selma-md: 333.0 i/s
240
+ sanitize-md: 40.0 i/s - 8.32x slower
218
241
 
219
242
  input size = 7172510 bytes, 7.17 MB
220
243
 
@@ -223,12 +246,12 @@ Warming up --------------------------------------
223
246
  sanitize-lg 1.000 i/100ms
224
247
  selma-lg 1.000 i/100ms
225
248
  Calculating -------------------------------------
226
- sanitize-lg 0.144 (± 0.0%) i/s - 5.000 in 34.772526s
227
- selma-lg 4.026 (± 0.0%) i/s - 121.000 in 30.067415s
249
+ sanitize-lg 0.141 (± 0.0%) i/s - 5.000 in 35.426127s
250
+ selma-lg 3.963 (± 0.0%) i/s - 119.000 in 30.037386s
228
251
 
229
252
  Comparison:
230
253
  selma-lg: 4.0 i/s
231
- sanitize-lg: 0.1 i/s - 27.99x slower
254
+ sanitize-lg: 0.1 i/s - 28.03x slower
232
255
  </pre>
233
256
  </details>
234
257
  <!-- prettier-ignore-end -->
@@ -240,23 +263,22 @@ Comparing Selma against popular Ruby HTML parsing gems:
240
263
  <!-- prettier-ignore-start -->
241
264
  <details>
242
265
  <pre>
243
-
244
266
  input size = 25309 bytes, 0.03 MB
245
267
 
246
268
  ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
247
269
  Warming up --------------------------------------
248
270
  nokogiri-sm 79.000 i/100ms
249
- nokolexbor-sm 285.000 i/100ms
250
- selma-sm 244.000 i/100ms
271
+ nokolexbor-sm 295.000 i/100ms
272
+ selma-sm 237.000 i/100ms
251
273
  Calculating -------------------------------------
252
- nokogiri-sm 807.7903.1%) i/s - 24.253k in 30.056301s
253
- nokolexbor-sm 2.880k (± 6.4%) i/s - 86.070k in 30.044766s
254
- selma-sm 2.508k (± 1.2%) i/s - 75.396k in 30.068792s
274
+ nokogiri-sm 800.5312.2%) i/s - 24.016k in 30.016056s
275
+ nokolexbor-sm 3.033k3.6%) i/s - 91.155k in 30.094884s
276
+ selma-sm 2.386k (± 1.6%) i/s - 71.574k in 30.001701s
255
277
 
256
278
  Comparison:
257
- nokolexbor-sm: 2880.3 i/s
258
- selma-sm: 2507.8 i/s - 1.15x slower
259
- nokogiri-sm: 807.8 i/s - 3.57x slower
279
+ nokolexbor-sm: 3033.1 i/s
280
+ selma-sm: 2386.3 i/s - 1.27x slower
281
+ nokogiri-sm: 800.5 i/s - 3.79x slower
260
282
 
261
283
  input size = 86686 bytes, 0.09 MB
262
284
 
@@ -264,16 +286,16 @@ ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin23]
264
286
  Warming up --------------------------------------
265
287
  nokogiri-md 8.000 i/100ms
266
288
  nokolexbor-md 43.000 i/100ms
267
- selma-md 39.000 i/100ms
289
+ selma-md 38.000 i/100ms
268
290
  Calculating -------------------------------------
269
- nokogiri-md 87.3673.4%) i/s - 2.624k in 30.061642s
270
- nokolexbor-md 438.782 3.9%) i/s - 13.158k in 30.031163s
271
- selma-md 392.5913.1%) i/s - 11.778k in 30.031391s
291
+ nokogiri-md 85.0138.2%) i/s - 2.024k in 52.257472s
292
+ nokolexbor-md 416.07411.1%) i/s - 12.341k in 30.111613s
293
+ selma-md 361.4714.7%) i/s - 10.830k in 30.033997s
272
294
 
273
295
  Comparison:
274
- nokolexbor-md: 438.8 i/s
275
- selma-md: 392.6 i/s - 1.12x slower
276
- nokogiri-md: 87.4 i/s - 5.02x slower
296
+ nokolexbor-md: 416.1 i/s
297
+ selma-md: 361.5 i/s - same-ish: difference falls within error
298
+ nokogiri-md: 85.0 i/s - 4.89x slower
277
299
 
278
300
  input size = 7172510 bytes, 7.17 MB
279
301
 
@@ -283,14 +305,14 @@ Warming up --------------------------------------
283
305
  nokolexbor-lg 1.000 i/100ms
284
306
  selma-lg 1.000 i/100ms
285
307
  Calculating -------------------------------------
286
- nokogiri-lg 0.895 (± 0.0%) i/s - 27.000 in 30.300832s
287
- nokolexbor-lg 2.163 (± 0.0%) i/s - 65.000 in 30.085656s
288
- selma-lg 5.867 (± 0.0%) i/s - 176.000 in 30.006240s
308
+ nokogiri-lg 0.805 (± 0.0%) i/s - 25.000 in 31.148730s
309
+ nokolexbor-lg 2.194 (± 0.0%) i/s - 66.000 in 30.278108s
310
+ selma-lg 5.541 (± 0.0%) i/s - 166.000 in 30.037197s
289
311
 
290
312
  Comparison:
291
- selma-lg: 5.9 i/s
292
- nokolexbor-lg: 2.2 i/s - 2.71x slower
293
- nokogiri-lg: 0.9 i/s - 6.55x slower
313
+ selma-lg: 5.5 i/s
314
+ nokolexbor-lg: 2.2 i/s - 2.53x slower
315
+ nokogiri-lg: 0.8 i/s - 6.88x slower
294
316
  </pre>
295
317
  </details>
296
318
  <!-- prettier-ignore-end -->
Binary file
Binary file
Binary file
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Selma
4
+ module Config
5
+ OPTIONS = {
6
+ memory: {
7
+ max_allowed_memory_usage: nil,
8
+ preallocated_parsing_buffer_size: nil,
9
+ },
10
+ }
11
+ end
12
+ end
data/lib/selma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Selma
4
- VERSION = "0.3.0"
4
+ VERSION = "0.4.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: selma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.1
5
5
  platform: aarch64-linux
6
6
  authors:
7
7
  - Garen J. Torikian
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-07 00:00:00.000000000 Z
11
+ date: 2024-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -51,6 +51,7 @@ files:
51
51
  - lib/selma/3.1/selma.so
52
52
  - lib/selma/3.2/selma.so
53
53
  - lib/selma/3.3/selma.so
54
+ - lib/selma/config.rb
54
55
  - lib/selma/extension.rb
55
56
  - lib/selma/html.rb
56
57
  - lib/selma/rewriter.rb