selma 0.0.6-x86_64-linux → 0.1.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/3.2/selma.so +0 -0
- data/lib/selma/sanitizer/config/basic.rb +33 -2
- data/lib/selma/sanitizer/config/default.rb +40 -4
- data/lib/selma/sanitizer/config/relaxed.rb +69 -8
- data/lib/selma/version.rb +1 -1
- metadata +10 -52
- data/ext/selma/Cargo.toml +0 -14
- data/ext/selma/_util.rb +0 -102
- data/ext/selma/extconf.rb +0 -6
- data/ext/selma/src/html/element.rs +0 -254
- data/ext/selma/src/html/end_tag.rs +0 -35
- data/ext/selma/src/html/text_chunk.rs +0 -113
- data/ext/selma/src/html.rs +0 -19
- data/ext/selma/src/lib.rs +0 -50
- data/ext/selma/src/native_ref_wrap.rs +0 -79
- data/ext/selma/src/rewriter.rs +0 -429
- data/ext/selma/src/sanitizer.rs +0 -607
- data/ext/selma/src/selector.rs +0 -112
- data/ext/selma/src/tags.rs +0 -1136
- data/ext/selma/src/wrapped_struct.rs +0 -92
- data/selma.gemspec +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8c130b05fe7738da1df29eb9fc125f1e06a08d35d82580163de4d542710d0b83
|
4
|
+
data.tar.gz: 15c186ca42e6c9ffdc07427419effa373c3686cefd6d6a33a60888854f8ef3e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d58e7be164d0f66c3a06e94c0243d7f66089818365f6f40e781f211c66aa36345920265ad21be764070d45c468ad0cd2235748da97250c5737e0b95e77bff2fb
|
7
|
+
data.tar.gz: cdca69f98b98fc71c6b564adc410993af74d197464bee25ecc21ff7bbe9274c67542354cb6093b29d4661a4f53d48dbdbe14146a49b71e5220735176894d1f54
|
data/README.md
CHANGED
@@ -29,7 +29,7 @@ Selma can perform two different actions, either independently or together:
|
|
29
29
|
- Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
|
30
30
|
- Select HTML using CSS rules, and manipulate elements and text nodes along the way.
|
31
31
|
|
32
|
-
It does this through two
|
32
|
+
It does this through two kwargs: `sanitizer` and `handlers`. The basic API for Selma looks like this:
|
33
33
|
|
34
34
|
```ruby
|
35
35
|
sanitizer_config = {
|
@@ -37,7 +37,9 @@ sanitizer_config = {
|
|
37
37
|
}
|
38
38
|
sanitizer = Selma::Sanitizer.new(sanitizer_config)
|
39
39
|
rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
|
40
|
-
|
40
|
+
# removes any element that is not ["b", "em", "i", "strong", "u"];
|
41
|
+
# then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
|
42
|
+
rewriter.rewrite(html)
|
41
43
|
```
|
42
44
|
|
43
45
|
Here's a look at each individual part.
|
data/lib/selma/3.1/selma.so
CHANGED
Binary file
|
Binary file
|
@@ -4,8 +4,39 @@ module Selma
|
|
4
4
|
class Sanitizer
|
5
5
|
module Config
|
6
6
|
BASIC = freeze_config(
|
7
|
-
elements: [
|
8
|
-
|
7
|
+
elements: [
|
8
|
+
"a",
|
9
|
+
"abbr",
|
10
|
+
"blockquote",
|
11
|
+
"b",
|
12
|
+
"br",
|
13
|
+
"cite",
|
14
|
+
"code",
|
15
|
+
"dd",
|
16
|
+
"dfn",
|
17
|
+
"dl",
|
18
|
+
"dt",
|
19
|
+
"em",
|
20
|
+
"i",
|
21
|
+
"kbd",
|
22
|
+
"li",
|
23
|
+
"mark",
|
24
|
+
"ol",
|
25
|
+
"p",
|
26
|
+
"pre",
|
27
|
+
"q",
|
28
|
+
"s",
|
29
|
+
"samp",
|
30
|
+
"small",
|
31
|
+
"strike",
|
32
|
+
"strong",
|
33
|
+
"sub",
|
34
|
+
"sup",
|
35
|
+
"time",
|
36
|
+
"u",
|
37
|
+
"ul",
|
38
|
+
"var",
|
39
|
+
],
|
9
40
|
|
10
41
|
attributes: {
|
11
42
|
"a" => ["href"],
|
@@ -33,13 +33,49 @@ module Selma
|
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
35
35
|
# of all other filtered elements will be left behind.
|
36
|
-
remove_contents: [
|
37
|
-
|
36
|
+
remove_contents: [
|
37
|
+
"iframe",
|
38
|
+
"math",
|
39
|
+
"noembed",
|
40
|
+
"noframes",
|
41
|
+
"noscript",
|
42
|
+
"plaintext",
|
43
|
+
"script",
|
44
|
+
"style",
|
45
|
+
"svg",
|
46
|
+
"xmp",
|
47
|
+
],
|
38
48
|
|
39
49
|
# Elements which, when removed, should have their contents surrounded by
|
40
50
|
# whitespace.
|
41
|
-
whitespace_elements: [
|
42
|
-
|
51
|
+
whitespace_elements: [
|
52
|
+
"address",
|
53
|
+
"article",
|
54
|
+
"aside",
|
55
|
+
"blockquote",
|
56
|
+
"br",
|
57
|
+
"dd",
|
58
|
+
"div",
|
59
|
+
"dl",
|
60
|
+
"dt",
|
61
|
+
"footer",
|
62
|
+
"h1",
|
63
|
+
"h2",
|
64
|
+
"h3",
|
65
|
+
"h4",
|
66
|
+
"h5",
|
67
|
+
"h6",
|
68
|
+
"header",
|
69
|
+
"hgroup",
|
70
|
+
"hr",
|
71
|
+
"li",
|
72
|
+
"nav",
|
73
|
+
"ol",
|
74
|
+
"p",
|
75
|
+
"pre",
|
76
|
+
"section",
|
77
|
+
"ul",
|
78
|
+
],
|
43
79
|
)
|
44
80
|
end
|
45
81
|
end
|
@@ -4,12 +4,60 @@ module Selma
|
|
4
4
|
class Sanitizer
|
5
5
|
module Config
|
6
6
|
RELAXED = freeze_config(
|
7
|
-
elements: BASIC[:elements] + [
|
8
|
-
|
7
|
+
elements: BASIC[:elements] + [
|
8
|
+
"address",
|
9
|
+
"article",
|
10
|
+
"aside",
|
11
|
+
"bdi",
|
12
|
+
"bdo",
|
13
|
+
"body",
|
14
|
+
"caption",
|
15
|
+
"col",
|
16
|
+
"colgroup",
|
17
|
+
"data",
|
18
|
+
"del",
|
19
|
+
"div",
|
20
|
+
"figcaption",
|
21
|
+
"figure",
|
22
|
+
"footer",
|
23
|
+
"h1",
|
24
|
+
"h2",
|
25
|
+
"h3",
|
26
|
+
"h4",
|
27
|
+
"h5",
|
28
|
+
"h6",
|
29
|
+
"head",
|
30
|
+
"header",
|
31
|
+
"hgroup",
|
32
|
+
"hr",
|
33
|
+
"html",
|
34
|
+
"img",
|
35
|
+
"ins",
|
36
|
+
"main",
|
37
|
+
"nav",
|
38
|
+
"rp",
|
39
|
+
"rt",
|
40
|
+
"ruby",
|
41
|
+
"section",
|
42
|
+
"span",
|
43
|
+
"style",
|
44
|
+
"summary",
|
45
|
+
"sup",
|
46
|
+
"table",
|
47
|
+
"tbody",
|
48
|
+
"td",
|
49
|
+
"tfoot",
|
50
|
+
"th",
|
51
|
+
"thead",
|
52
|
+
"title",
|
53
|
+
"tr",
|
54
|
+
"wbr",
|
55
|
+
],
|
9
56
|
|
10
57
|
allow_doctype: true,
|
11
58
|
|
12
|
-
attributes: merge(
|
59
|
+
attributes: merge(
|
60
|
+
BASIC[:attributes],
|
13
61
|
:all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
|
14
62
|
"a" => ["href", "hreflang", "name", "rel"],
|
15
63
|
"col" => ["span", "width"],
|
@@ -21,16 +69,29 @@ module Selma
|
|
21
69
|
"li" => ["value"],
|
22
70
|
"ol" => ["reversed", "start", "type"],
|
23
71
|
"style" => ["media", "scoped", "type"],
|
24
|
-
"table" => [
|
25
|
-
|
72
|
+
"table" => [
|
73
|
+
"align",
|
74
|
+
"bgcolor",
|
75
|
+
"border",
|
76
|
+
"cellpadding",
|
77
|
+
"cellspacing",
|
78
|
+
"frame",
|
79
|
+
"rules",
|
80
|
+
"sortable",
|
81
|
+
"summary",
|
82
|
+
"width",
|
83
|
+
],
|
26
84
|
"td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
|
27
85
|
"th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
|
28
|
-
"ul" => ["type"]
|
86
|
+
"ul" => ["type"],
|
87
|
+
),
|
29
88
|
|
30
|
-
protocols: merge(
|
89
|
+
protocols: merge(
|
90
|
+
BASIC[:protocols],
|
31
91
|
"del" => { "cite" => ["http", "https", :relative] },
|
32
92
|
"img" => { "src" => ["http", "https", :relative] },
|
33
|
-
"ins" => { "cite" => ["http", "https", :relative] }
|
93
|
+
"ins" => { "cite" => ["http", "https", :relative] },
|
94
|
+
),
|
34
95
|
)
|
35
96
|
end
|
36
97
|
end
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: rb_sys
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.9'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.9'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: rake
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,21 +38,7 @@ dependencies:
|
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
40
|
version: '1.2'
|
55
|
-
|
56
|
-
name: rake-compiler-dock
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.2'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '1.2'
|
69
|
-
description:
|
41
|
+
description:
|
70
42
|
email:
|
71
43
|
- gjtorikian@gmail.com
|
72
44
|
executables: []
|
@@ -75,22 +47,9 @@ extra_rdoc_files: []
|
|
75
47
|
files:
|
76
48
|
- LICENSE.txt
|
77
49
|
- README.md
|
78
|
-
- ext/selma/Cargo.toml
|
79
|
-
- ext/selma/_util.rb
|
80
|
-
- ext/selma/extconf.rb
|
81
|
-
- ext/selma/src/html.rs
|
82
|
-
- ext/selma/src/html/element.rs
|
83
|
-
- ext/selma/src/html/end_tag.rs
|
84
|
-
- ext/selma/src/html/text_chunk.rs
|
85
|
-
- ext/selma/src/lib.rs
|
86
|
-
- ext/selma/src/native_ref_wrap.rs
|
87
|
-
- ext/selma/src/rewriter.rs
|
88
|
-
- ext/selma/src/sanitizer.rs
|
89
|
-
- ext/selma/src/selector.rs
|
90
|
-
- ext/selma/src/tags.rs
|
91
|
-
- ext/selma/src/wrapped_struct.rs
|
92
50
|
- lib/selma.rb
|
93
51
|
- lib/selma/3.1/selma.so
|
52
|
+
- lib/selma/3.2/selma.so
|
94
53
|
- lib/selma/extension.rb
|
95
54
|
- lib/selma/html.rb
|
96
55
|
- lib/selma/rewriter.rb
|
@@ -102,8 +61,7 @@ files:
|
|
102
61
|
- lib/selma/sanitizer/config/restricted.rb
|
103
62
|
- lib/selma/selector.rb
|
104
63
|
- lib/selma/version.rb
|
105
|
-
|
106
|
-
homepage:
|
64
|
+
homepage:
|
107
65
|
licenses:
|
108
66
|
- MIT
|
109
67
|
metadata:
|
@@ -111,7 +69,7 @@ metadata:
|
|
111
69
|
funding_uri: https://github.com/sponsors/gjtorikian/
|
112
70
|
source_code_uri: https://github.com/gjtorikian/selma
|
113
71
|
rubygems_mfa_required: 'true'
|
114
|
-
post_install_message:
|
72
|
+
post_install_message:
|
115
73
|
rdoc_options: []
|
116
74
|
require_paths:
|
117
75
|
- lib
|
@@ -122,15 +80,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
80
|
version: '3.1'
|
123
81
|
- - "<"
|
124
82
|
- !ruby/object:Gem::Version
|
125
|
-
version: 3.
|
83
|
+
version: 3.3.dev
|
126
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
85
|
requirements:
|
128
86
|
- - ">="
|
129
87
|
- !ruby/object:Gem::Version
|
130
88
|
version: 3.3.22
|
131
89
|
requirements: []
|
132
|
-
rubygems_version: 3.
|
133
|
-
signing_key:
|
90
|
+
rubygems_version: 3.4.4
|
91
|
+
signing_key:
|
134
92
|
specification_version: 4
|
135
93
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|
136
94
|
parser.
|
data/ext/selma/Cargo.toml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
[package]
|
2
|
-
name = "selma"
|
3
|
-
version = "1.0.0"
|
4
|
-
edition = "2021"
|
5
|
-
|
6
|
-
[dependencies]
|
7
|
-
enum-iterator = "1.2"
|
8
|
-
escapist = "0.0.2"
|
9
|
-
magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
|
10
|
-
lol_html = "0.3"
|
11
|
-
|
12
|
-
[lib]
|
13
|
-
name = "selma"
|
14
|
-
crate-type = ["cdylib"]
|
data/ext/selma/_util.rb
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
RUBY_MAJOR, RUBY_MINOR = RUBY_VERSION.split(".").collect(&:to_i)
|
4
|
-
|
5
|
-
PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
6
|
-
PACKAGE_EXT_DIR = File.join(PACKAGE_ROOT_DIR, "ext", "selma")
|
7
|
-
|
8
|
-
OS = case os = RbConfig::CONFIG["host_os"].downcase
|
9
|
-
when /linux/
|
10
|
-
# The official ruby-alpine Docker containers pre-build Ruby. As a result,
|
11
|
-
# Ruby doesn't know that it's on a musl-based platform. `ldd` is the
|
12
|
-
# a more reliable way to detect musl.
|
13
|
-
# See https://github.com/skylightio/skylight-ruby/issues/92
|
14
|
-
if ENV["SKYLIGHT_MUSL"] || %x(ldd --version 2>&1).include?("musl")
|
15
|
-
"linux-musl"
|
16
|
-
else
|
17
|
-
"linux"
|
18
|
-
end
|
19
|
-
when /darwin/
|
20
|
-
"darwin"
|
21
|
-
when /freebsd/
|
22
|
-
"freebsd"
|
23
|
-
when /netbsd/
|
24
|
-
"netbsd"
|
25
|
-
when /openbsd/
|
26
|
-
"openbsd"
|
27
|
-
when /sunos|solaris/
|
28
|
-
"solaris"
|
29
|
-
when /mingw|mswin/
|
30
|
-
"windows"
|
31
|
-
else
|
32
|
-
os
|
33
|
-
end
|
34
|
-
|
35
|
-
# Normalize the platform CPU
|
36
|
-
ARCH = case cpu = RbConfig::CONFIG["host_cpu"].downcase
|
37
|
-
when /amd64|x86_64|x64/
|
38
|
-
"x86_64"
|
39
|
-
when /i?86|x86|i86pc/
|
40
|
-
"x86"
|
41
|
-
when /ppc|powerpc/
|
42
|
-
"powerpc"
|
43
|
-
when /^aarch/
|
44
|
-
"aarch"
|
45
|
-
when /^arm/
|
46
|
-
"arm"
|
47
|
-
else
|
48
|
-
cpu
|
49
|
-
end
|
50
|
-
|
51
|
-
def windows?
|
52
|
-
OS == "windows"
|
53
|
-
end
|
54
|
-
|
55
|
-
def solaris?
|
56
|
-
OS == solaries
|
57
|
-
end
|
58
|
-
|
59
|
-
def darwin?
|
60
|
-
OS == "darwin"
|
61
|
-
end
|
62
|
-
|
63
|
-
def macos?
|
64
|
-
darwin? || OS == "macos"
|
65
|
-
end
|
66
|
-
|
67
|
-
def openbsd?
|
68
|
-
OS == "openbsd"
|
69
|
-
end
|
70
|
-
|
71
|
-
def aix?
|
72
|
-
OS == "aix"
|
73
|
-
end
|
74
|
-
|
75
|
-
def nix?
|
76
|
-
!(windows? || solaris? || darwin?)
|
77
|
-
end
|
78
|
-
|
79
|
-
def x86_64?
|
80
|
-
ARCH == "x86_64"
|
81
|
-
end
|
82
|
-
|
83
|
-
def x86?
|
84
|
-
ARCH == "x86"
|
85
|
-
end
|
86
|
-
|
87
|
-
def abs_path(path)
|
88
|
-
File.join(PACKAGE_EXT_DIR, path)
|
89
|
-
end
|
90
|
-
|
91
|
-
def find_header_or_abort(header, *paths)
|
92
|
-
find_header(header, *paths) || abort("#{header} was expected in `#{paths.join(", ")}`, but it is missing.")
|
93
|
-
end
|
94
|
-
|
95
|
-
def find_library_or_abort(lib, func, *paths)
|
96
|
-
find_library(lib, func, *paths) || abort("#{lib} was expected in `#{paths.join(", ")}`, but it is missing.")
|
97
|
-
end
|
98
|
-
|
99
|
-
def concat_flags(*args)
|
100
|
-
args.compact.join(" ")
|
101
|
-
end
|
102
|
-
|