selma 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/ext/selma/extconf.rb +1 -1
- data/lib/selma/sanitizer/config/basic.rb +33 -2
- data/lib/selma/sanitizer/config/default.rb +40 -4
- data/lib/selma/sanitizer/config/relaxed.rb +69 -8
- data/lib/selma/version.rb +1 -1
- data/selma.gemspec +1 -1
- metadata +9 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9d12ee9e6f3513dac3d511e1289c8719a2746dc0c565dd838c04e8cdab59bc6e
|
|
4
|
+
data.tar.gz: 75616f7a40ee931b88768cf27e92d97e38e3c8a31d897567368eeeb5ae0b6957
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 15c950e3689390a1e7a32f60191aa29a9f351ef8f7c84127552c0cd936bfb3ae858aa79050ab9c418234fa50e2fb2a3b11a95606bdacd041dfd69a6d66b3b949
|
|
7
|
+
data.tar.gz: e02be77c5b4a4e4f61641cc03b65103c2771cc3164caa456ecf6a40fbaa03e627a386265629f8fe77d4c277f0ed969d9220d589c740a3bd4a3e8f532168bf8c8
|
data/README.md
CHANGED
|
@@ -37,7 +37,9 @@ sanitizer_config = {
|
|
|
37
37
|
}
|
|
38
38
|
sanitizer = Selma::Sanitizer.new(sanitizer_config)
|
|
39
39
|
rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
|
|
40
|
-
|
|
40
|
+
# removes any element that is not ["b", "em", "i", "strong", "u"];
|
|
41
|
+
# then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
|
|
42
|
+
rewriter.rewrite(html)
|
|
41
43
|
```
|
|
42
44
|
|
|
43
45
|
Here's a look at each individual part.
|
data/ext/selma/extconf.rb
CHANGED
|
@@ -4,8 +4,39 @@ module Selma
|
|
|
4
4
|
class Sanitizer
|
|
5
5
|
module Config
|
|
6
6
|
BASIC = freeze_config(
|
|
7
|
-
elements: [
|
|
8
|
-
|
|
7
|
+
elements: [
|
|
8
|
+
"a",
|
|
9
|
+
"abbr",
|
|
10
|
+
"blockquote",
|
|
11
|
+
"b",
|
|
12
|
+
"br",
|
|
13
|
+
"cite",
|
|
14
|
+
"code",
|
|
15
|
+
"dd",
|
|
16
|
+
"dfn",
|
|
17
|
+
"dl",
|
|
18
|
+
"dt",
|
|
19
|
+
"em",
|
|
20
|
+
"i",
|
|
21
|
+
"kbd",
|
|
22
|
+
"li",
|
|
23
|
+
"mark",
|
|
24
|
+
"ol",
|
|
25
|
+
"p",
|
|
26
|
+
"pre",
|
|
27
|
+
"q",
|
|
28
|
+
"s",
|
|
29
|
+
"samp",
|
|
30
|
+
"small",
|
|
31
|
+
"strike",
|
|
32
|
+
"strong",
|
|
33
|
+
"sub",
|
|
34
|
+
"sup",
|
|
35
|
+
"time",
|
|
36
|
+
"u",
|
|
37
|
+
"ul",
|
|
38
|
+
"var",
|
|
39
|
+
],
|
|
9
40
|
|
|
10
41
|
attributes: {
|
|
11
42
|
"a" => ["href"],
|
|
@@ -33,13 +33,49 @@ module Selma
|
|
|
33
33
|
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
|
35
35
|
# of all other filtered elements will be left behind.
|
|
36
|
-
remove_contents: [
|
|
37
|
-
|
|
36
|
+
remove_contents: [
|
|
37
|
+
"iframe",
|
|
38
|
+
"math",
|
|
39
|
+
"noembed",
|
|
40
|
+
"noframes",
|
|
41
|
+
"noscript",
|
|
42
|
+
"plaintext",
|
|
43
|
+
"script",
|
|
44
|
+
"style",
|
|
45
|
+
"svg",
|
|
46
|
+
"xmp",
|
|
47
|
+
],
|
|
38
48
|
|
|
39
49
|
# Elements which, when removed, should have their contents surrounded by
|
|
40
50
|
# whitespace.
|
|
41
|
-
whitespace_elements: [
|
|
42
|
-
|
|
51
|
+
whitespace_elements: [
|
|
52
|
+
"address",
|
|
53
|
+
"article",
|
|
54
|
+
"aside",
|
|
55
|
+
"blockquote",
|
|
56
|
+
"br",
|
|
57
|
+
"dd",
|
|
58
|
+
"div",
|
|
59
|
+
"dl",
|
|
60
|
+
"dt",
|
|
61
|
+
"footer",
|
|
62
|
+
"h1",
|
|
63
|
+
"h2",
|
|
64
|
+
"h3",
|
|
65
|
+
"h4",
|
|
66
|
+
"h5",
|
|
67
|
+
"h6",
|
|
68
|
+
"header",
|
|
69
|
+
"hgroup",
|
|
70
|
+
"hr",
|
|
71
|
+
"li",
|
|
72
|
+
"nav",
|
|
73
|
+
"ol",
|
|
74
|
+
"p",
|
|
75
|
+
"pre",
|
|
76
|
+
"section",
|
|
77
|
+
"ul",
|
|
78
|
+
],
|
|
43
79
|
)
|
|
44
80
|
end
|
|
45
81
|
end
|
|
@@ -4,12 +4,60 @@ module Selma
|
|
|
4
4
|
class Sanitizer
|
|
5
5
|
module Config
|
|
6
6
|
RELAXED = freeze_config(
|
|
7
|
-
elements: BASIC[:elements] + [
|
|
8
|
-
|
|
7
|
+
elements: BASIC[:elements] + [
|
|
8
|
+
"address",
|
|
9
|
+
"article",
|
|
10
|
+
"aside",
|
|
11
|
+
"bdi",
|
|
12
|
+
"bdo",
|
|
13
|
+
"body",
|
|
14
|
+
"caption",
|
|
15
|
+
"col",
|
|
16
|
+
"colgroup",
|
|
17
|
+
"data",
|
|
18
|
+
"del",
|
|
19
|
+
"div",
|
|
20
|
+
"figcaption",
|
|
21
|
+
"figure",
|
|
22
|
+
"footer",
|
|
23
|
+
"h1",
|
|
24
|
+
"h2",
|
|
25
|
+
"h3",
|
|
26
|
+
"h4",
|
|
27
|
+
"h5",
|
|
28
|
+
"h6",
|
|
29
|
+
"head",
|
|
30
|
+
"header",
|
|
31
|
+
"hgroup",
|
|
32
|
+
"hr",
|
|
33
|
+
"html",
|
|
34
|
+
"img",
|
|
35
|
+
"ins",
|
|
36
|
+
"main",
|
|
37
|
+
"nav",
|
|
38
|
+
"rp",
|
|
39
|
+
"rt",
|
|
40
|
+
"ruby",
|
|
41
|
+
"section",
|
|
42
|
+
"span",
|
|
43
|
+
"style",
|
|
44
|
+
"summary",
|
|
45
|
+
"sup",
|
|
46
|
+
"table",
|
|
47
|
+
"tbody",
|
|
48
|
+
"td",
|
|
49
|
+
"tfoot",
|
|
50
|
+
"th",
|
|
51
|
+
"thead",
|
|
52
|
+
"title",
|
|
53
|
+
"tr",
|
|
54
|
+
"wbr",
|
|
55
|
+
],
|
|
9
56
|
|
|
10
57
|
allow_doctype: true,
|
|
11
58
|
|
|
12
|
-
attributes: merge(
|
|
59
|
+
attributes: merge(
|
|
60
|
+
BASIC[:attributes],
|
|
13
61
|
:all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
|
|
14
62
|
"a" => ["href", "hreflang", "name", "rel"],
|
|
15
63
|
"col" => ["span", "width"],
|
|
@@ -21,16 +69,29 @@ module Selma
|
|
|
21
69
|
"li" => ["value"],
|
|
22
70
|
"ol" => ["reversed", "start", "type"],
|
|
23
71
|
"style" => ["media", "scoped", "type"],
|
|
24
|
-
"table" => [
|
|
25
|
-
|
|
72
|
+
"table" => [
|
|
73
|
+
"align",
|
|
74
|
+
"bgcolor",
|
|
75
|
+
"border",
|
|
76
|
+
"cellpadding",
|
|
77
|
+
"cellspacing",
|
|
78
|
+
"frame",
|
|
79
|
+
"rules",
|
|
80
|
+
"sortable",
|
|
81
|
+
"summary",
|
|
82
|
+
"width",
|
|
83
|
+
],
|
|
26
84
|
"td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
|
|
27
85
|
"th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
|
|
28
|
-
"ul" => ["type"]
|
|
86
|
+
"ul" => ["type"],
|
|
87
|
+
),
|
|
29
88
|
|
|
30
|
-
protocols: merge(
|
|
89
|
+
protocols: merge(
|
|
90
|
+
BASIC[:protocols],
|
|
31
91
|
"del" => { "cite" => ["http", "https", :relative] },
|
|
32
92
|
"img" => { "src" => ["http", "https", :relative] },
|
|
33
|
-
"ins" => { "cite" => ["http", "https", :relative] }
|
|
93
|
+
"ins" => { "cite" => ["http", "https", :relative] },
|
|
94
|
+
),
|
|
34
95
|
)
|
|
35
96
|
end
|
|
36
97
|
end
|
data/lib/selma/version.rb
CHANGED
data/selma.gemspec
CHANGED
|
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
|
|
|
24
24
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
25
25
|
|
|
26
26
|
spec.require_paths = ["lib"]
|
|
27
|
-
spec.extensions = ["ext/selma/
|
|
27
|
+
spec.extensions = ["ext/selma/extconf.rb"]
|
|
28
28
|
|
|
29
29
|
spec.metadata = {
|
|
30
30
|
"allowed_push_host" => "https://rubygems.org",
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: selma
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Garen J. Torikian
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2023-01-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|
|
@@ -66,12 +66,12 @@ dependencies:
|
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
68
|
version: '1.2'
|
|
69
|
-
description:
|
|
69
|
+
description:
|
|
70
70
|
email:
|
|
71
71
|
- gjtorikian@gmail.com
|
|
72
72
|
executables: []
|
|
73
73
|
extensions:
|
|
74
|
-
- ext/selma/
|
|
74
|
+
- ext/selma/extconf.rb
|
|
75
75
|
extra_rdoc_files: []
|
|
76
76
|
files:
|
|
77
77
|
- LICENSE.txt
|
|
@@ -103,7 +103,7 @@ files:
|
|
|
103
103
|
- lib/selma/selector.rb
|
|
104
104
|
- lib/selma/version.rb
|
|
105
105
|
- selma.gemspec
|
|
106
|
-
homepage:
|
|
106
|
+
homepage:
|
|
107
107
|
licenses:
|
|
108
108
|
- MIT
|
|
109
109
|
metadata:
|
|
@@ -111,7 +111,7 @@ metadata:
|
|
|
111
111
|
funding_uri: https://github.com/sponsors/gjtorikian/
|
|
112
112
|
source_code_uri: https://github.com/gjtorikian/selma
|
|
113
113
|
rubygems_mfa_required: 'true'
|
|
114
|
-
post_install_message:
|
|
114
|
+
post_install_message:
|
|
115
115
|
rdoc_options: []
|
|
116
116
|
require_paths:
|
|
117
117
|
- lib
|
|
@@ -126,8 +126,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
126
126
|
- !ruby/object:Gem::Version
|
|
127
127
|
version: 3.3.22
|
|
128
128
|
requirements: []
|
|
129
|
-
rubygems_version: 3.3
|
|
130
|
-
signing_key:
|
|
129
|
+
rubygems_version: 3.4.3
|
|
130
|
+
signing_key:
|
|
131
131
|
specification_version: 4
|
|
132
132
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|
|
133
133
|
parser.
|