selma 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +81 -62
- data/README.md +3 -0
- data/ext/selma/Cargo.toml +2 -2
- data/ext/selma/src/html/element.rs +38 -2
- data/ext/selma/src/html/end_tag.rs +2 -2
- data/ext/selma/src/html/text_chunk.rs +2 -2
- data/ext/selma/src/html.rs +3 -1
- data/ext/selma/src/lib.rs +0 -1
- data/ext/selma/src/rewriter.rs +46 -50
- data/ext/selma/src/sanitizer.rs +17 -22
- data/ext/selma/src/selector.rs +4 -4
- data/lib/selma/version.rb +1 -1
- metadata +3 -4
- data/ext/selma/src/wrapped_struct.rs +0 -92
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee5c02252f9d3c815aea09d7a79070ae69a4b6c167227f01195faad8c99f52da
|
4
|
+
data.tar.gz: 5e06f14ecd535c95cdda8c1869a1a255833ab3cc3c41dd4e2ca311b07ca8b4ff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e550b2bf33e7c57ee1f3cc07be5d5a0ab042959ffad69736ab13cd3e5ca3dca773855ac04f786b286c8917896ef5c7a50d9737f5ec4a85caf804a1d142d59be
|
7
|
+
data.tar.gz: 8e98daa7579c0f25ce65426813de1989235c18b86e0ee2519b72430fad7f5274c08c38971aff40305df8664c97ef6f369912abcf36509d9b1130b3025655a8ab
|
data/Cargo.lock
CHANGED
@@ -15,20 +15,20 @@ dependencies = [
|
|
15
15
|
|
16
16
|
[[package]]
|
17
17
|
name = "aho-corasick"
|
18
|
-
version = "1.
|
18
|
+
version = "1.1.2"
|
19
19
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
20
|
-
checksum = "
|
20
|
+
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
21
21
|
dependencies = [
|
22
22
|
"memchr",
|
23
23
|
]
|
24
24
|
|
25
25
|
[[package]]
|
26
26
|
name = "bindgen"
|
27
|
-
version = "0.
|
27
|
+
version = "0.66.1"
|
28
28
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
29
|
-
checksum = "
|
29
|
+
checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7"
|
30
30
|
dependencies = [
|
31
|
-
"bitflags
|
31
|
+
"bitflags 2.4.0",
|
32
32
|
"cexpr",
|
33
33
|
"clang-sys",
|
34
34
|
"lazy_static",
|
@@ -39,7 +39,7 @@ dependencies = [
|
|
39
39
|
"regex",
|
40
40
|
"rustc-hash",
|
41
41
|
"shlex",
|
42
|
-
"syn
|
42
|
+
"syn 2.0.38",
|
43
43
|
]
|
44
44
|
|
45
45
|
[[package]]
|
@@ -50,15 +50,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
|
50
50
|
|
51
51
|
[[package]]
|
52
52
|
name = "bitflags"
|
53
|
-
version = "2.
|
53
|
+
version = "2.4.0"
|
54
54
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
55
|
-
checksum = "
|
55
|
+
checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
|
56
56
|
|
57
57
|
[[package]]
|
58
58
|
name = "byteorder"
|
59
|
-
version = "1.
|
59
|
+
version = "1.5.0"
|
60
60
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
61
|
-
checksum = "
|
61
|
+
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
62
62
|
|
63
63
|
[[package]]
|
64
64
|
name = "cexpr"
|
@@ -116,7 +116,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
116
116
|
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
117
117
|
dependencies = [
|
118
118
|
"quote",
|
119
|
-
"syn 2.0.
|
119
|
+
"syn 2.0.38",
|
120
120
|
]
|
121
121
|
|
122
122
|
[[package]]
|
@@ -134,24 +134,24 @@ dependencies = [
|
|
134
134
|
|
135
135
|
[[package]]
|
136
136
|
name = "dtoa"
|
137
|
-
version = "0.
|
137
|
+
version = "1.0.9"
|
138
138
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
139
|
-
checksum = "
|
139
|
+
checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653"
|
140
140
|
|
141
141
|
[[package]]
|
142
142
|
name = "dtoa-short"
|
143
|
-
version = "0.3.
|
143
|
+
version = "0.3.4"
|
144
144
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
145
|
-
checksum = "
|
145
|
+
checksum = "dbaceec3c6e4211c79e7b1800fb9680527106beb2f9c51904a3210c03a448c74"
|
146
146
|
dependencies = [
|
147
147
|
"dtoa",
|
148
148
|
]
|
149
149
|
|
150
150
|
[[package]]
|
151
151
|
name = "encoding_rs"
|
152
|
-
version = "0.8.
|
152
|
+
version = "0.8.33"
|
153
153
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
154
|
-
checksum = "
|
154
|
+
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
|
155
155
|
dependencies = [
|
156
156
|
"cfg-if",
|
157
157
|
]
|
@@ -179,7 +179,7 @@ checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb"
|
|
179
179
|
dependencies = [
|
180
180
|
"proc-macro2",
|
181
181
|
"quote",
|
182
|
-
"syn 2.0.
|
182
|
+
"syn 2.0.38",
|
183
183
|
]
|
184
184
|
|
185
185
|
[[package]]
|
@@ -246,9 +246,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
246
246
|
|
247
247
|
[[package]]
|
248
248
|
name = "libc"
|
249
|
-
version = "0.2.
|
249
|
+
version = "0.2.149"
|
250
250
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
251
|
-
checksum = "
|
251
|
+
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
|
252
252
|
|
253
253
|
[[package]]
|
254
254
|
name = "libloading"
|
@@ -262,17 +262,17 @@ dependencies = [
|
|
262
262
|
|
263
263
|
[[package]]
|
264
264
|
name = "log"
|
265
|
-
version = "0.4.
|
265
|
+
version = "0.4.20"
|
266
266
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
267
|
-
checksum = "
|
267
|
+
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
268
268
|
|
269
269
|
[[package]]
|
270
270
|
name = "lol_html"
|
271
|
-
version = "1.
|
271
|
+
version = "1.2.0"
|
272
272
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
273
|
-
checksum = "
|
273
|
+
checksum = "10662f7aad081ec900fd735be33076da75e0389400277dc3734e2b0aa02bb115"
|
274
274
|
dependencies = [
|
275
|
-
"bitflags 2.
|
275
|
+
"bitflags 2.4.0",
|
276
276
|
"cfg-if",
|
277
277
|
"cssparser",
|
278
278
|
"encoding_rs",
|
@@ -288,24 +288,25 @@ dependencies = [
|
|
288
288
|
|
289
289
|
[[package]]
|
290
290
|
name = "magnus"
|
291
|
-
version = "0.
|
291
|
+
version = "0.6.2"
|
292
292
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
293
|
-
checksum = "
|
293
|
+
checksum = "4778544796676e8428e9c622460ebf284bea52d8b10db3aeb449d8b5e61b3a13"
|
294
294
|
dependencies = [
|
295
295
|
"magnus-macros",
|
296
296
|
"rb-sys",
|
297
297
|
"rb-sys-env",
|
298
|
+
"seq-macro",
|
298
299
|
]
|
299
300
|
|
300
301
|
[[package]]
|
301
302
|
name = "magnus-macros"
|
302
|
-
version = "0.
|
303
|
+
version = "0.6.0"
|
303
304
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
304
|
-
checksum = "
|
305
|
+
checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
305
306
|
dependencies = [
|
306
307
|
"proc-macro2",
|
307
308
|
"quote",
|
308
|
-
"syn
|
309
|
+
"syn 2.0.38",
|
309
310
|
]
|
310
311
|
|
311
312
|
[[package]]
|
@@ -316,9 +317,9 @@ checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
|
|
316
317
|
|
317
318
|
[[package]]
|
318
319
|
name = "memchr"
|
319
|
-
version = "2.
|
320
|
+
version = "2.6.4"
|
320
321
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
321
|
-
checksum = "
|
322
|
+
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
|
322
323
|
|
323
324
|
[[package]]
|
324
325
|
name = "mime"
|
@@ -434,18 +435,18 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
|
|
434
435
|
|
435
436
|
[[package]]
|
436
437
|
name = "proc-macro2"
|
437
|
-
version = "1.0.
|
438
|
+
version = "1.0.69"
|
438
439
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
439
|
-
checksum = "
|
440
|
+
checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
|
440
441
|
dependencies = [
|
441
442
|
"unicode-ident",
|
442
443
|
]
|
443
444
|
|
444
445
|
[[package]]
|
445
446
|
name = "quote"
|
446
|
-
version = "1.0.
|
447
|
+
version = "1.0.33"
|
447
448
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
448
|
-
checksum = "
|
449
|
+
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
449
450
|
dependencies = [
|
450
451
|
"proc-macro2",
|
451
452
|
]
|
@@ -503,18 +504,18 @@ dependencies = [
|
|
503
504
|
|
504
505
|
[[package]]
|
505
506
|
name = "rb-sys"
|
506
|
-
version = "0.9.
|
507
|
+
version = "0.9.82"
|
507
508
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
508
|
-
checksum = "
|
509
|
+
checksum = "a3e6bf79bf4c711917cacfaf46dfab4314dbfdd89a8ee3ec4b98336cd23f1ebf"
|
509
510
|
dependencies = [
|
510
511
|
"rb-sys-build",
|
511
512
|
]
|
512
513
|
|
513
514
|
[[package]]
|
514
515
|
name = "rb-sys-build"
|
515
|
-
version = "0.9.
|
516
|
+
version = "0.9.82"
|
516
517
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
517
|
-
checksum = "
|
518
|
+
checksum = "5482a1ed4cde58dddaf162b6aebcb5c25645822547832b8be101f2acd40bcdd6"
|
518
519
|
dependencies = [
|
519
520
|
"bindgen",
|
520
521
|
"lazy_static",
|
@@ -522,7 +523,7 @@ dependencies = [
|
|
522
523
|
"quote",
|
523
524
|
"regex",
|
524
525
|
"shell-words",
|
525
|
-
"syn
|
526
|
+
"syn 2.0.38",
|
526
527
|
]
|
527
528
|
|
528
529
|
[[package]]
|
@@ -533,9 +534,21 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
533
534
|
|
534
535
|
[[package]]
|
535
536
|
name = "regex"
|
536
|
-
version = "1.
|
537
|
+
version = "1.10.0"
|
538
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
539
|
+
checksum = "d119d7c7ca818f8a53c300863d4f87566aac09943aef5b355bb83969dae75d87"
|
540
|
+
dependencies = [
|
541
|
+
"aho-corasick",
|
542
|
+
"memchr",
|
543
|
+
"regex-automata",
|
544
|
+
"regex-syntax",
|
545
|
+
]
|
546
|
+
|
547
|
+
[[package]]
|
548
|
+
name = "regex-automata"
|
549
|
+
version = "0.4.1"
|
537
550
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
538
|
-
checksum = "
|
551
|
+
checksum = "465c6fc0621e4abc4187a2bda0937bfd4f722c2730b29562e19689ea796c9a4b"
|
539
552
|
dependencies = [
|
540
553
|
"aho-corasick",
|
541
554
|
"memchr",
|
@@ -544,9 +557,9 @@ dependencies = [
|
|
544
557
|
|
545
558
|
[[package]]
|
546
559
|
name = "regex-syntax"
|
547
|
-
version = "0.
|
560
|
+
version = "0.8.1"
|
548
561
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
549
|
-
checksum = "
|
562
|
+
checksum = "56d84fdd47036b038fc80dd333d10b6aab10d5d31f4a366e20014def75328d33"
|
550
563
|
|
551
564
|
[[package]]
|
552
565
|
name = "rustc-hash"
|
@@ -601,9 +614,15 @@ dependencies = [
|
|
601
614
|
|
602
615
|
[[package]]
|
603
616
|
name = "semver"
|
604
|
-
version = "1.0.
|
617
|
+
version = "1.0.20"
|
605
618
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
606
|
-
checksum = "
|
619
|
+
checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
|
620
|
+
|
621
|
+
[[package]]
|
622
|
+
name = "seq-macro"
|
623
|
+
version = "0.3.5"
|
624
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
625
|
+
checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
607
626
|
|
608
627
|
[[package]]
|
609
628
|
name = "servo_arc"
|
@@ -623,21 +642,21 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
|
623
642
|
|
624
643
|
[[package]]
|
625
644
|
name = "shlex"
|
626
|
-
version = "1.
|
645
|
+
version = "1.2.0"
|
627
646
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
628
|
-
checksum = "
|
647
|
+
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
|
629
648
|
|
630
649
|
[[package]]
|
631
650
|
name = "siphasher"
|
632
|
-
version = "0.3.
|
651
|
+
version = "0.3.11"
|
633
652
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
634
|
-
checksum = "
|
653
|
+
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
|
635
654
|
|
636
655
|
[[package]]
|
637
656
|
name = "smallvec"
|
638
|
-
version = "1.
|
657
|
+
version = "1.11.1"
|
639
658
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
640
|
-
checksum = "
|
659
|
+
checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a"
|
641
660
|
|
642
661
|
[[package]]
|
643
662
|
name = "stable_deref_trait"
|
@@ -658,9 +677,9 @@ dependencies = [
|
|
658
677
|
|
659
678
|
[[package]]
|
660
679
|
name = "syn"
|
661
|
-
version = "2.0.
|
680
|
+
version = "2.0.38"
|
662
681
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
663
|
-
checksum = "
|
682
|
+
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
|
664
683
|
dependencies = [
|
665
684
|
"proc-macro2",
|
666
685
|
"quote",
|
@@ -675,29 +694,29 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
|
675
694
|
|
676
695
|
[[package]]
|
677
696
|
name = "thiserror"
|
678
|
-
version = "1.0.
|
697
|
+
version = "1.0.49"
|
679
698
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
680
|
-
checksum = "
|
699
|
+
checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
|
681
700
|
dependencies = [
|
682
701
|
"thiserror-impl",
|
683
702
|
]
|
684
703
|
|
685
704
|
[[package]]
|
686
705
|
name = "thiserror-impl"
|
687
|
-
version = "1.0.
|
706
|
+
version = "1.0.49"
|
688
707
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
689
|
-
checksum = "
|
708
|
+
checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
|
690
709
|
dependencies = [
|
691
710
|
"proc-macro2",
|
692
711
|
"quote",
|
693
|
-
"syn 2.0.
|
712
|
+
"syn 2.0.38",
|
694
713
|
]
|
695
714
|
|
696
715
|
[[package]]
|
697
716
|
name = "unicode-ident"
|
698
|
-
version = "1.0.
|
717
|
+
version = "1.0.12"
|
699
718
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
700
|
-
checksum = "
|
719
|
+
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
701
720
|
|
702
721
|
[[package]]
|
703
722
|
name = "version_check"
|
data/README.md
CHANGED
@@ -164,6 +164,9 @@ The `element` argument in `handle_element` has the following methods:
|
|
164
164
|
- `prepend(content, as: content_type)`: prepends `content` to the element's inner content, i.e. inserts content right after the element's start tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
165
165
|
- `append(content, as: content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
166
166
|
- `set_inner_content`: Replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
167
|
+
- `remove`: Removes the element and its inner content.
|
168
|
+
- `remove_and_keep_content`: Removes the element, but keeps its content. I.e. remove start and end tags of the element.
|
169
|
+
- `removed?`: A bool which identifies if the element has been removed or replaced with some content.
|
167
170
|
|
168
171
|
#### `text_chunk` methods
|
169
172
|
|
data/ext/selma/Cargo.toml
CHANGED
@@ -215,12 +215,41 @@ impl SelmaHTMLElement {
|
|
215
215
|
|
216
216
|
Ok(())
|
217
217
|
}
|
218
|
+
|
219
|
+
fn remove(&self) {
|
220
|
+
let mut binding = self.0.borrow_mut();
|
221
|
+
|
222
|
+
if let Ok(e) = binding.element.get_mut() {
|
223
|
+
e.remove()
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
fn remove_and_keep_content(&self) {
|
228
|
+
let mut binding = self.0.borrow_mut();
|
229
|
+
|
230
|
+
if let Ok(e) = binding.element.get_mut() {
|
231
|
+
e.remove_and_keep_content()
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
fn is_removed(&self) -> Result<bool, Error> {
|
236
|
+
let binding = self.0.borrow();
|
237
|
+
|
238
|
+
if let Ok(e) = binding.element.get() {
|
239
|
+
Ok(e.removed())
|
240
|
+
} else {
|
241
|
+
Err(Error::new(
|
242
|
+
exception::runtime_error(),
|
243
|
+
"`is_removed` is not available",
|
244
|
+
))
|
245
|
+
}
|
246
|
+
}
|
218
247
|
}
|
219
248
|
|
220
249
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
221
250
|
let c_element = c_html
|
222
|
-
.define_class("Element",
|
223
|
-
.expect("cannot
|
251
|
+
.define_class("Element", magnus::class::object())
|
252
|
+
.expect("cannot define class Selma::HTML::Element");
|
224
253
|
|
225
254
|
c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
|
226
255
|
c_element.define_method("tag_name=", method!(SelmaHTMLElement::set_tag_name, 1))?;
|
@@ -250,5 +279,12 @@ pub fn init(c_html: RClass) -> Result<(), Error> {
|
|
250
279
|
method!(SelmaHTMLElement::set_inner_content, -1),
|
251
280
|
)?;
|
252
281
|
|
282
|
+
c_element.define_method("remove", method!(SelmaHTMLElement::remove, 0))?;
|
283
|
+
c_element.define_method(
|
284
|
+
"remove_and_keep_content",
|
285
|
+
method!(SelmaHTMLElement::remove_and_keep_content, 0),
|
286
|
+
)?;
|
287
|
+
c_element.define_method("removed?", method!(SelmaHTMLElement::is_removed, 0))?;
|
288
|
+
|
253
289
|
Ok(())
|
254
290
|
}
|
@@ -26,8 +26,8 @@ impl SelmaHTMLEndTag {
|
|
26
26
|
|
27
27
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
28
28
|
let c_end_tag = c_html
|
29
|
-
.define_class("EndTag",
|
30
|
-
.expect("cannot
|
29
|
+
.define_class("EndTag", magnus::class::object())
|
30
|
+
.expect("cannot define class Selma::HTML::EndTag");
|
31
31
|
|
32
32
|
c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
|
33
33
|
|
@@ -99,8 +99,8 @@ impl SelmaHTMLTextChunk {
|
|
99
99
|
|
100
100
|
pub fn init(c_html: RClass) -> Result<(), Error> {
|
101
101
|
let c_text_chunk = c_html
|
102
|
-
.define_class("TextChunk",
|
103
|
-
.expect("cannot
|
102
|
+
.define_class("TextChunk", magnus::class::object())
|
103
|
+
.expect("cannot define class Selma::HTML::TextChunk");
|
104
104
|
|
105
105
|
c_text_chunk.define_method("to_s", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
106
106
|
c_text_chunk.define_method("content", method!(SelmaHTMLTextChunk::to_s, 0))?;
|
data/ext/selma/src/html.rs
CHANGED
@@ -5,7 +5,9 @@ use magnus::{Error, Module, RModule};
|
|
5
5
|
pub(crate) struct SelmaHTML {}
|
6
6
|
|
7
7
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
8
|
-
let c_html = m_selma
|
8
|
+
let c_html = m_selma
|
9
|
+
.define_class("HTML", magnus::class::object())
|
10
|
+
.expect("cannot define class Selma::HTML");
|
9
11
|
|
10
12
|
element::init(c_html).expect("cannot define Selma::HTML::Element class");
|
11
13
|
end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
|
data/ext/selma/src/lib.rs
CHANGED
data/ext/selma/src/rewriter.rs
CHANGED
@@ -3,7 +3,12 @@ use lol_html::{
|
|
3
3
|
html_content::{Element, TextChunk},
|
4
4
|
text, DocumentContentHandlers, ElementContentHandlers, HtmlRewriter, Selector, Settings,
|
5
5
|
};
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{
|
7
|
+
exception, function, method, scan_args,
|
8
|
+
typed_data::Obj,
|
9
|
+
value::{Opaque, ReprValue},
|
10
|
+
Module, Object, RArray, RModule, Ruby, Value,
|
11
|
+
};
|
7
12
|
|
8
13
|
use std::{borrow::Cow, cell::RefCell, primitive::str, rc::Rc};
|
9
14
|
|
@@ -12,31 +17,30 @@ use crate::{
|
|
12
17
|
sanitizer::SelmaSanitizer,
|
13
18
|
selector::SelmaSelector,
|
14
19
|
tags::Tag,
|
15
|
-
wrapped_struct::WrappedStruct,
|
16
20
|
};
|
17
21
|
|
18
|
-
#[derive(Clone
|
22
|
+
#[derive(Clone)]
|
19
23
|
pub struct Handler {
|
20
|
-
rb_handler: Value
|
21
|
-
rb_selector:
|
22
|
-
|
23
|
-
|
24
|
-
total_elapsed_element_handlers: f64,
|
24
|
+
rb_handler: Opaque<Value>,
|
25
|
+
rb_selector: Opaque<Obj<SelmaSelector>>,
|
26
|
+
// total_element_handler_calls: usize,
|
27
|
+
// total_elapsed_element_handlers: f64,
|
25
28
|
|
26
|
-
total_text_handler_calls: usize,
|
27
|
-
total_elapsed_text_handlers: f64,
|
29
|
+
// total_text_handler_calls: usize,
|
30
|
+
// total_elapsed_text_handlers: f64,
|
28
31
|
}
|
29
32
|
|
30
33
|
pub struct Rewriter {
|
31
34
|
sanitizer: Option<SelmaSanitizer>,
|
32
35
|
handlers: Vec<Handler>,
|
33
|
-
|
34
|
-
total_elapsed: f64,
|
36
|
+
// total_elapsed: f64,
|
35
37
|
}
|
36
38
|
|
37
39
|
#[magnus::wrap(class = "Selma::Rewriter")]
|
38
40
|
pub struct SelmaRewriter(std::cell::RefCell<Rewriter>);
|
39
41
|
|
42
|
+
type RewriterValues = (Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>);
|
43
|
+
|
40
44
|
impl SelmaRewriter {
|
41
45
|
const SELMA_ON_END_TAG: &str = "on_end_tag";
|
42
46
|
const SELMA_HANDLE_ELEMENT: &str = "handle_element";
|
@@ -53,15 +57,15 @@ impl SelmaRewriter {
|
|
53
57
|
let sanitizer = match rb_sanitizer {
|
54
58
|
None => {
|
55
59
|
let default_sanitizer = SelmaSanitizer::new(&[])?;
|
56
|
-
let wrapped_sanitizer =
|
60
|
+
let wrapped_sanitizer = Obj::wrap(default_sanitizer);
|
57
61
|
wrapped_sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
58
|
-
Some(wrapped_sanitizer.get().
|
62
|
+
Some(wrapped_sanitizer.get().to_owned())
|
59
63
|
}
|
60
64
|
Some(sanitizer_value) => match sanitizer_value {
|
61
65
|
None => None,
|
62
66
|
Some(sanitizer) => {
|
63
67
|
sanitizer.funcall::<&str, (), Value>("setup", ())?;
|
64
|
-
Some(sanitizer.get().
|
68
|
+
Some(sanitizer.get().to_owned())
|
65
69
|
}
|
66
70
|
},
|
67
71
|
};
|
@@ -86,24 +90,23 @@ impl SelmaRewriter {
|
|
86
90
|
));
|
87
91
|
}
|
88
92
|
|
89
|
-
let rb_selector:
|
90
|
-
|
91
|
-
Err(
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
};
|
93
|
+
let rb_selector: Obj<SelmaSelector> = match rb_handler.funcall("selector", ()) {
|
94
|
+
Err(err) => {
|
95
|
+
return Err(magnus::Error::new(
|
96
|
+
exception::type_error(),
|
97
|
+
format!("Error instantiating selector: {err:?}"),
|
98
|
+
));
|
99
|
+
}
|
100
|
+
Ok(rb_selector) => rb_selector,
|
101
|
+
};
|
99
102
|
let handler = Handler {
|
100
|
-
rb_handler,
|
101
|
-
rb_selector,
|
102
|
-
total_element_handler_calls: 0,
|
103
|
-
total_elapsed_element_handlers: 0.0,
|
103
|
+
rb_handler: Opaque::from(rb_handler),
|
104
|
+
rb_selector: Opaque::from(rb_selector),
|
105
|
+
// total_element_handler_calls: 0,
|
106
|
+
// total_elapsed_element_handlers: 0.0,
|
104
107
|
|
105
|
-
total_text_handler_calls: 0,
|
106
|
-
total_elapsed_text_handlers: 0.0,
|
108
|
+
// total_text_handler_calls: 0,
|
109
|
+
// total_elapsed_text_handlers: 0.0,
|
107
110
|
};
|
108
111
|
handlers.push(handler);
|
109
112
|
}
|
@@ -121,20 +124,12 @@ impl SelmaRewriter {
|
|
121
124
|
Ok(Self(std::cell::RefCell::new(Rewriter {
|
122
125
|
sanitizer,
|
123
126
|
handlers,
|
124
|
-
total_elapsed: 0.0,
|
127
|
+
// total_elapsed: 0.0,
|
125
128
|
})))
|
126
129
|
}
|
127
130
|
|
128
131
|
#[allow(clippy::let_unit_value)]
|
129
|
-
fn scan_parse_args(
|
130
|
-
args: &[Value],
|
131
|
-
) -> Result<
|
132
|
-
(
|
133
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
134
|
-
Option<RArray>,
|
135
|
-
),
|
136
|
-
magnus::Error,
|
137
|
-
> {
|
132
|
+
fn scan_parse_args(args: &[Value]) -> Result<RewriterValues, magnus::Error> {
|
138
133
|
let args = scan_args::scan_args(args)?;
|
139
134
|
let _: () = args.required;
|
140
135
|
let _: () = args.optional;
|
@@ -145,10 +140,7 @@ impl SelmaRewriter {
|
|
145
140
|
let kwargs = scan_args::get_kwargs::<
|
146
141
|
_,
|
147
142
|
(),
|
148
|
-
(
|
149
|
-
Option<Option<WrappedStruct<SelmaSanitizer>>>,
|
150
|
-
Option<RArray>,
|
151
|
-
),
|
143
|
+
(Option<Option<Obj<SelmaSanitizer>>>, Option<RArray>),
|
152
144
|
(),
|
153
145
|
>(args.keywords, &[], &["sanitizer", "handlers"])?;
|
154
146
|
let (rb_sanitizer, rb_handlers) = kwargs.optional;
|
@@ -270,7 +262,9 @@ impl SelmaRewriter {
|
|
270
262
|
handlers.iter().for_each(|handler| {
|
271
263
|
let element_stack: Rc<RefCell<Vec<String>>> = Rc::new(RefCell::new(vec![]));
|
272
264
|
|
273
|
-
let
|
265
|
+
let ruby = Ruby::get().unwrap();
|
266
|
+
|
267
|
+
let selector = ruby.get_inner(handler.rb_selector);
|
274
268
|
|
275
269
|
// TODO: test final raise by simulating errors
|
276
270
|
if selector.match_element().is_some() {
|
@@ -280,7 +274,7 @@ impl SelmaRewriter {
|
|
280
274
|
selector.match_element().unwrap(),
|
281
275
|
move |el| {
|
282
276
|
match Self::process_element_handlers(
|
283
|
-
handler.rb_handler,
|
277
|
+
ruby.get_inner(handler.rb_handler),
|
284
278
|
el,
|
285
279
|
&closure_element_stack.borrow(),
|
286
280
|
) {
|
@@ -311,7 +305,9 @@ impl SelmaRewriter {
|
|
311
305
|
}
|
312
306
|
}
|
313
307
|
|
314
|
-
|
308
|
+
let ruby = Ruby::get().unwrap();
|
309
|
+
match Self::process_text_handlers(ruby.get_inner(handler.rb_handler), text)
|
310
|
+
{
|
315
311
|
Ok(_) => Ok(()),
|
316
312
|
Err(err) => Err(err.to_string().into()),
|
317
313
|
}
|
@@ -421,8 +417,8 @@ impl SelmaRewriter {
|
|
421
417
|
|
422
418
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
423
419
|
let c_rewriter = m_selma
|
424
|
-
.define_class("Rewriter",
|
425
|
-
.expect("cannot
|
420
|
+
.define_class("Rewriter", magnus::class::object())
|
421
|
+
.expect("cannot define class Selma::Rewriter");
|
426
422
|
|
427
423
|
c_rewriter.define_singleton_method("new", function!(SelmaRewriter::new, -1))?;
|
428
424
|
c_rewriter
|
data/ext/selma/src/sanitizer.rs
CHANGED
@@ -4,9 +4,13 @@ use lol_html::{
|
|
4
4
|
errors::AttributeNameError,
|
5
5
|
html_content::{Comment, ContentType, Doctype, Element, EndTag},
|
6
6
|
};
|
7
|
-
use magnus::{
|
7
|
+
use magnus::{
|
8
|
+
class, function, method, scan_args,
|
9
|
+
value::{Opaque, ReprValue},
|
10
|
+
Module, Object, RArray, RHash, RModule, Ruby, Value,
|
11
|
+
};
|
8
12
|
|
9
|
-
#[derive(Clone, Debug)]
|
13
|
+
#[derive(Clone, Debug, Default)]
|
10
14
|
struct ElementSanitizer {
|
11
15
|
allowed_attrs: Vec<String>,
|
12
16
|
required_attrs: Vec<String>,
|
@@ -14,19 +18,7 @@ struct ElementSanitizer {
|
|
14
18
|
protocol_sanitizers: HashMap<String, Vec<String>>,
|
15
19
|
}
|
16
20
|
|
17
|
-
|
18
|
-
fn default() -> Self {
|
19
|
-
ElementSanitizer {
|
20
|
-
allowed_attrs: vec![],
|
21
|
-
allowed_classes: vec![],
|
22
|
-
required_attrs: vec![],
|
23
|
-
|
24
|
-
protocol_sanitizers: HashMap::new(),
|
25
|
-
}
|
26
|
-
}
|
27
|
-
}
|
28
|
-
|
29
|
-
#[derive(Clone, Debug)]
|
21
|
+
#[derive(Clone)]
|
30
22
|
pub struct Sanitizer {
|
31
23
|
flags: [u8; crate::tags::Tag::TAG_COUNT],
|
32
24
|
allowed_attrs: Vec<String>,
|
@@ -36,10 +28,10 @@ pub struct Sanitizer {
|
|
36
28
|
pub escape_tagfilter: bool,
|
37
29
|
pub allow_comments: bool,
|
38
30
|
pub allow_doctype: bool,
|
39
|
-
config: RHash
|
31
|
+
config: Opaque<RHash>,
|
40
32
|
}
|
41
33
|
|
42
|
-
#[derive(Clone
|
34
|
+
#[derive(Clone)]
|
43
35
|
#[magnus::wrap(class = "Selma::Sanitizer")]
|
44
36
|
pub struct SelmaSanitizer(std::cell::RefCell<Sanitizer>);
|
45
37
|
|
@@ -77,14 +69,15 @@ impl SelmaSanitizer {
|
|
77
69
|
escape_tagfilter: true,
|
78
70
|
allow_comments: false,
|
79
71
|
allow_doctype: true,
|
80
|
-
config,
|
72
|
+
config: config.into(),
|
81
73
|
})))
|
82
74
|
}
|
83
75
|
|
84
76
|
fn get_config(&self) -> Result<RHash, magnus::Error> {
|
85
77
|
let binding = self.0.borrow();
|
78
|
+
let ruby = Ruby::get().unwrap();
|
86
79
|
|
87
|
-
Ok(binding.config)
|
80
|
+
Ok(ruby.get_inner(binding.config))
|
88
81
|
}
|
89
82
|
|
90
83
|
/// Toggle a sanitizer option on or off.
|
@@ -308,9 +301,9 @@ impl SelmaSanitizer {
|
|
308
301
|
let mut buf = String::new();
|
309
302
|
// ...then, escape any special characters, for security
|
310
303
|
if attr_name == "href" {
|
311
|
-
escapist::escape_href(&mut buf, unescaped_attr_val.as_str());
|
304
|
+
escapist::escape_href(&mut buf, unescaped_attr_val.as_str()).unwrap();
|
312
305
|
} else {
|
313
|
-
escapist::escape_html(&mut buf, unescaped_attr_val.as_str());
|
306
|
+
escapist::escape_html(&mut buf, unescaped_attr_val.as_str()).unwrap();
|
314
307
|
};
|
315
308
|
|
316
309
|
match element.set_attribute(attr_name, &buf) {
|
@@ -554,7 +547,9 @@ impl SelmaSanitizer {
|
|
554
547
|
}
|
555
548
|
|
556
549
|
pub fn init(m_selma: RModule) -> Result<(), magnus::Error> {
|
557
|
-
let c_sanitizer = m_selma
|
550
|
+
let c_sanitizer = m_selma
|
551
|
+
.define_class("Sanitizer", magnus::class::object())
|
552
|
+
.expect("cannot define class Selma::Sanitizer");
|
558
553
|
|
559
554
|
c_sanitizer.define_singleton_method("new", function!(SelmaSanitizer::new, -1))?;
|
560
555
|
c_sanitizer.define_method("config", method!(SelmaSanitizer::get_config, 0))?;
|
data/ext/selma/src/selector.rs
CHANGED
@@ -8,6 +8,8 @@ pub struct SelmaSelector {
|
|
8
8
|
ignore_text_within: Option<Vec<String>>,
|
9
9
|
}
|
10
10
|
|
11
|
+
type SelectorMatches = (Option<String>, Option<String>, Option<Vec<String>>);
|
12
|
+
|
11
13
|
impl SelmaSelector {
|
12
14
|
fn new(args: &[Value]) -> Result<Self, Error> {
|
13
15
|
let (match_element, match_text_within, rb_ignore_text_within) =
|
@@ -63,9 +65,7 @@ impl SelmaSelector {
|
|
63
65
|
}
|
64
66
|
|
65
67
|
#[allow(clippy::let_unit_value)]
|
66
|
-
fn scan_parse_args(
|
67
|
-
args: &[Value],
|
68
|
-
) -> Result<(Option<String>, Option<String>, Option<Vec<String>>), Error> {
|
68
|
+
fn scan_parse_args(args: &[Value]) -> Result<SelectorMatches, Error> {
|
69
69
|
let args = scan_args::scan_args(args)?;
|
70
70
|
let _: () = args.required;
|
71
71
|
let _: () = args.optional;
|
@@ -103,7 +103,7 @@ impl SelmaSelector {
|
|
103
103
|
|
104
104
|
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
105
105
|
let c_selector = m_selma
|
106
|
-
.define_class("Selector",
|
106
|
+
.define_class("Selector", magnus::class::object())
|
107
107
|
.expect("cannot define class Selma::Selector");
|
108
108
|
|
109
109
|
c_selector.define_singleton_method("new", function!(SelmaSelector::new, -1))?;
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -76,7 +76,6 @@ files:
|
|
76
76
|
- ext/selma/src/sanitizer.rs
|
77
77
|
- ext/selma/src/selector.rs
|
78
78
|
- ext/selma/src/tags.rs
|
79
|
-
- ext/selma/src/wrapped_struct.rs
|
80
79
|
- lib/selma.rb
|
81
80
|
- lib/selma/extension.rb
|
82
81
|
- lib/selma/html.rb
|
@@ -112,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
111
|
- !ruby/object:Gem::Version
|
113
112
|
version: 3.3.22
|
114
113
|
requirements: []
|
115
|
-
rubygems_version: 3.4.
|
114
|
+
rubygems_version: 3.4.20
|
116
115
|
signing_key:
|
117
116
|
specification_version: 4
|
118
117
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|
@@ -1,92 +0,0 @@
|
|
1
|
-
use magnus::{error::Error, exception, gc, value::Value, RTypedData, TryConvert, TypedData};
|
2
|
-
use std::{marker::PhantomData, ops::Deref};
|
3
|
-
|
4
|
-
// NOTE: My Rust isn't good enough to know what any of this does,
|
5
|
-
// but it was taken from https://cs.github.com/bytecodealliance/wasmtime-rb/blob/a843e4b4582a945f2c881b8bd3e2b87688ab5509/ext/src/helpers/wrapped_struct.rs#L4
|
6
|
-
|
7
|
-
/// A small wrapper for `RTypedData` that keeps track of the concrete struct
|
8
|
-
/// type, and the underlying [`Value`] for GC purposes.
|
9
|
-
#[derive(Debug)]
|
10
|
-
#[repr(transparent)]
|
11
|
-
pub struct WrappedStruct<T: TypedData> {
|
12
|
-
inner: RTypedData,
|
13
|
-
phantom: PhantomData<T>,
|
14
|
-
}
|
15
|
-
|
16
|
-
impl<T: TypedData> Clone for WrappedStruct<T> {
|
17
|
-
fn clone(&self) -> Self {
|
18
|
-
Self {
|
19
|
-
inner: self.inner,
|
20
|
-
phantom: PhantomData,
|
21
|
-
}
|
22
|
-
}
|
23
|
-
}
|
24
|
-
impl<T: TypedData> Copy for WrappedStruct<T> {}
|
25
|
-
|
26
|
-
impl<T: TypedData> WrappedStruct<T> {
|
27
|
-
/// Gets the underlying struct.
|
28
|
-
pub fn get(&self) -> Result<&T, Error> {
|
29
|
-
self.inner.try_convert()
|
30
|
-
}
|
31
|
-
|
32
|
-
/// Gets the underlying struct with a `'static` lifetime.
|
33
|
-
pub fn get_static(&self) -> Result<&'static T, Error> {
|
34
|
-
self.inner.try_convert()
|
35
|
-
}
|
36
|
-
|
37
|
-
/// Get the Ruby [`Value`] for this struct.
|
38
|
-
pub fn to_value(self) -> Value {
|
39
|
-
self.inner.into()
|
40
|
-
}
|
41
|
-
|
42
|
-
/// Marks the Ruby [`Value`] for GC.
|
43
|
-
pub fn mark(&self) {
|
44
|
-
gc::mark(&self.inner.into());
|
45
|
-
}
|
46
|
-
}
|
47
|
-
|
48
|
-
impl<T: TypedData> From<WrappedStruct<T>> for Value {
|
49
|
-
fn from(wrapped_struct: WrappedStruct<T>) -> Self {
|
50
|
-
wrapped_struct.to_value()
|
51
|
-
}
|
52
|
-
}
|
53
|
-
|
54
|
-
impl<T: TypedData> Deref for WrappedStruct<T> {
|
55
|
-
type Target = RTypedData;
|
56
|
-
|
57
|
-
fn deref(&self) -> &Self::Target {
|
58
|
-
&self.inner
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
impl<T: TypedData> From<T> for WrappedStruct<T> {
|
63
|
-
fn from(t: T) -> Self {
|
64
|
-
Self {
|
65
|
-
inner: RTypedData::wrap(t),
|
66
|
-
phantom: PhantomData,
|
67
|
-
}
|
68
|
-
}
|
69
|
-
}
|
70
|
-
|
71
|
-
impl<T> TryConvert for WrappedStruct<T>
|
72
|
-
where
|
73
|
-
T: TypedData,
|
74
|
-
{
|
75
|
-
fn try_convert(val: Value) -> Result<Self, Error> {
|
76
|
-
let inner = RTypedData::from_value(val).ok_or_else(|| {
|
77
|
-
Error::new(
|
78
|
-
exception::type_error(),
|
79
|
-
format!(
|
80
|
-
"no implicit conversion of {} into {}",
|
81
|
-
unsafe { val.classname() },
|
82
|
-
T::class()
|
83
|
-
),
|
84
|
-
)
|
85
|
-
})?;
|
86
|
-
|
87
|
-
Ok(Self {
|
88
|
-
inner,
|
89
|
-
phantom: PhantomData,
|
90
|
-
})
|
91
|
-
}
|
92
|
-
}
|