selma 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -1
- data/ext/selma/extconf.rb +1 -1
- data/lib/selma/sanitizer/config/basic.rb +33 -2
- data/lib/selma/sanitizer/config/default.rb +40 -4
- data/lib/selma/sanitizer/config/relaxed.rb +69 -8
- data/lib/selma/version.rb +1 -1
- data/selma.gemspec +1 -1
- metadata +9 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d12ee9e6f3513dac3d511e1289c8719a2746dc0c565dd838c04e8cdab59bc6e
|
4
|
+
data.tar.gz: 75616f7a40ee931b88768cf27e92d97e38e3c8a31d897567368eeeb5ae0b6957
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15c950e3689390a1e7a32f60191aa29a9f351ef8f7c84127552c0cd936bfb3ae858aa79050ab9c418234fa50e2fb2a3b11a95606bdacd041dfd69a6d66b3b949
|
7
|
+
data.tar.gz: e02be77c5b4a4e4f61641cc03b65103c2771cc3164caa456ecf6a40fbaa03e627a386265629f8fe77d4c277f0ed969d9220d589c740a3bd4a3e8f532168bf8c8
|
data/README.md
CHANGED
@@ -37,7 +37,9 @@ sanitizer_config = {
|
|
37
37
|
}
|
38
38
|
sanitizer = Selma::Sanitizer.new(sanitizer_config)
|
39
39
|
rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
|
40
|
-
|
40
|
+
# removes any element that is not ["b", "em", "i", "strong", "u"];
|
41
|
+
# then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
|
42
|
+
rewriter.rewrite(html)
|
41
43
|
```
|
42
44
|
|
43
45
|
Here's a look at each individual part.
|
data/ext/selma/extconf.rb
CHANGED
@@ -4,8 +4,39 @@ module Selma
|
|
4
4
|
class Sanitizer
|
5
5
|
module Config
|
6
6
|
BASIC = freeze_config(
|
7
|
-
elements: [
|
8
|
-
|
7
|
+
elements: [
|
8
|
+
"a",
|
9
|
+
"abbr",
|
10
|
+
"blockquote",
|
11
|
+
"b",
|
12
|
+
"br",
|
13
|
+
"cite",
|
14
|
+
"code",
|
15
|
+
"dd",
|
16
|
+
"dfn",
|
17
|
+
"dl",
|
18
|
+
"dt",
|
19
|
+
"em",
|
20
|
+
"i",
|
21
|
+
"kbd",
|
22
|
+
"li",
|
23
|
+
"mark",
|
24
|
+
"ol",
|
25
|
+
"p",
|
26
|
+
"pre",
|
27
|
+
"q",
|
28
|
+
"s",
|
29
|
+
"samp",
|
30
|
+
"small",
|
31
|
+
"strike",
|
32
|
+
"strong",
|
33
|
+
"sub",
|
34
|
+
"sup",
|
35
|
+
"time",
|
36
|
+
"u",
|
37
|
+
"ul",
|
38
|
+
"var",
|
39
|
+
],
|
9
40
|
|
10
41
|
attributes: {
|
11
42
|
"a" => ["href"],
|
@@ -33,13 +33,49 @@ module Selma
|
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
35
35
|
# of all other filtered elements will be left behind.
|
36
|
-
remove_contents: [
|
37
|
-
|
36
|
+
remove_contents: [
|
37
|
+
"iframe",
|
38
|
+
"math",
|
39
|
+
"noembed",
|
40
|
+
"noframes",
|
41
|
+
"noscript",
|
42
|
+
"plaintext",
|
43
|
+
"script",
|
44
|
+
"style",
|
45
|
+
"svg",
|
46
|
+
"xmp",
|
47
|
+
],
|
38
48
|
|
39
49
|
# Elements which, when removed, should have their contents surrounded by
|
40
50
|
# whitespace.
|
41
|
-
whitespace_elements: [
|
42
|
-
|
51
|
+
whitespace_elements: [
|
52
|
+
"address",
|
53
|
+
"article",
|
54
|
+
"aside",
|
55
|
+
"blockquote",
|
56
|
+
"br",
|
57
|
+
"dd",
|
58
|
+
"div",
|
59
|
+
"dl",
|
60
|
+
"dt",
|
61
|
+
"footer",
|
62
|
+
"h1",
|
63
|
+
"h2",
|
64
|
+
"h3",
|
65
|
+
"h4",
|
66
|
+
"h5",
|
67
|
+
"h6",
|
68
|
+
"header",
|
69
|
+
"hgroup",
|
70
|
+
"hr",
|
71
|
+
"li",
|
72
|
+
"nav",
|
73
|
+
"ol",
|
74
|
+
"p",
|
75
|
+
"pre",
|
76
|
+
"section",
|
77
|
+
"ul",
|
78
|
+
],
|
43
79
|
)
|
44
80
|
end
|
45
81
|
end
|
@@ -4,12 +4,60 @@ module Selma
|
|
4
4
|
class Sanitizer
|
5
5
|
module Config
|
6
6
|
RELAXED = freeze_config(
|
7
|
-
elements: BASIC[:elements] + [
|
8
|
-
|
7
|
+
elements: BASIC[:elements] + [
|
8
|
+
"address",
|
9
|
+
"article",
|
10
|
+
"aside",
|
11
|
+
"bdi",
|
12
|
+
"bdo",
|
13
|
+
"body",
|
14
|
+
"caption",
|
15
|
+
"col",
|
16
|
+
"colgroup",
|
17
|
+
"data",
|
18
|
+
"del",
|
19
|
+
"div",
|
20
|
+
"figcaption",
|
21
|
+
"figure",
|
22
|
+
"footer",
|
23
|
+
"h1",
|
24
|
+
"h2",
|
25
|
+
"h3",
|
26
|
+
"h4",
|
27
|
+
"h5",
|
28
|
+
"h6",
|
29
|
+
"head",
|
30
|
+
"header",
|
31
|
+
"hgroup",
|
32
|
+
"hr",
|
33
|
+
"html",
|
34
|
+
"img",
|
35
|
+
"ins",
|
36
|
+
"main",
|
37
|
+
"nav",
|
38
|
+
"rp",
|
39
|
+
"rt",
|
40
|
+
"ruby",
|
41
|
+
"section",
|
42
|
+
"span",
|
43
|
+
"style",
|
44
|
+
"summary",
|
45
|
+
"sup",
|
46
|
+
"table",
|
47
|
+
"tbody",
|
48
|
+
"td",
|
49
|
+
"tfoot",
|
50
|
+
"th",
|
51
|
+
"thead",
|
52
|
+
"title",
|
53
|
+
"tr",
|
54
|
+
"wbr",
|
55
|
+
],
|
9
56
|
|
10
57
|
allow_doctype: true,
|
11
58
|
|
12
|
-
attributes: merge(
|
59
|
+
attributes: merge(
|
60
|
+
BASIC[:attributes],
|
13
61
|
:all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
|
14
62
|
"a" => ["href", "hreflang", "name", "rel"],
|
15
63
|
"col" => ["span", "width"],
|
@@ -21,16 +69,29 @@ module Selma
|
|
21
69
|
"li" => ["value"],
|
22
70
|
"ol" => ["reversed", "start", "type"],
|
23
71
|
"style" => ["media", "scoped", "type"],
|
24
|
-
"table" => [
|
25
|
-
|
72
|
+
"table" => [
|
73
|
+
"align",
|
74
|
+
"bgcolor",
|
75
|
+
"border",
|
76
|
+
"cellpadding",
|
77
|
+
"cellspacing",
|
78
|
+
"frame",
|
79
|
+
"rules",
|
80
|
+
"sortable",
|
81
|
+
"summary",
|
82
|
+
"width",
|
83
|
+
],
|
26
84
|
"td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
|
27
85
|
"th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
|
28
|
-
"ul" => ["type"]
|
86
|
+
"ul" => ["type"],
|
87
|
+
),
|
29
88
|
|
30
|
-
protocols: merge(
|
89
|
+
protocols: merge(
|
90
|
+
BASIC[:protocols],
|
31
91
|
"del" => { "cite" => ["http", "https", :relative] },
|
32
92
|
"img" => { "src" => ["http", "https", :relative] },
|
33
|
-
"ins" => { "cite" => ["http", "https", :relative] }
|
93
|
+
"ins" => { "cite" => ["http", "https", :relative] },
|
94
|
+
),
|
34
95
|
)
|
35
96
|
end
|
36
97
|
end
|
data/lib/selma/version.rb
CHANGED
data/selma.gemspec
CHANGED
@@ -24,7 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
25
25
|
|
26
26
|
spec.require_paths = ["lib"]
|
27
|
-
spec.extensions = ["ext/selma/
|
27
|
+
spec.extensions = ["ext/selma/extconf.rb"]
|
28
28
|
|
29
29
|
spec.metadata = {
|
30
30
|
"allowed_push_host" => "https://rubygems.org",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -66,12 +66,12 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.2'
|
69
|
-
description:
|
69
|
+
description:
|
70
70
|
email:
|
71
71
|
- gjtorikian@gmail.com
|
72
72
|
executables: []
|
73
73
|
extensions:
|
74
|
-
- ext/selma/
|
74
|
+
- ext/selma/extconf.rb
|
75
75
|
extra_rdoc_files: []
|
76
76
|
files:
|
77
77
|
- LICENSE.txt
|
@@ -103,7 +103,7 @@ files:
|
|
103
103
|
- lib/selma/selector.rb
|
104
104
|
- lib/selma/version.rb
|
105
105
|
- selma.gemspec
|
106
|
-
homepage:
|
106
|
+
homepage:
|
107
107
|
licenses:
|
108
108
|
- MIT
|
109
109
|
metadata:
|
@@ -111,7 +111,7 @@ metadata:
|
|
111
111
|
funding_uri: https://github.com/sponsors/gjtorikian/
|
112
112
|
source_code_uri: https://github.com/gjtorikian/selma
|
113
113
|
rubygems_mfa_required: 'true'
|
114
|
-
post_install_message:
|
114
|
+
post_install_message:
|
115
115
|
rdoc_options: []
|
116
116
|
require_paths:
|
117
117
|
- lib
|
@@ -126,8 +126,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
126
|
- !ruby/object:Gem::Version
|
127
127
|
version: 3.3.22
|
128
128
|
requirements: []
|
129
|
-
rubygems_version: 3.3
|
130
|
-
signing_key:
|
129
|
+
rubygems_version: 3.4.3
|
130
|
+
signing_key:
|
131
131
|
specification_version: 4
|
132
132
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|
133
133
|
parser.
|