selma 0.0.6-arm64-darwin → 0.1.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/selma/3.1/selma.bundle +0 -0
- data/lib/selma/3.2/selma.bundle +0 -0
- data/lib/selma/sanitizer/config/basic.rb +33 -2
- data/lib/selma/sanitizer/config/default.rb +40 -4
- data/lib/selma/sanitizer/config/relaxed.rb +69 -8
- data/lib/selma/version.rb +1 -1
- metadata +10 -52
- data/ext/selma/Cargo.toml +0 -14
- data/ext/selma/_util.rb +0 -102
- data/ext/selma/extconf.rb +0 -6
- data/ext/selma/src/html/element.rs +0 -254
- data/ext/selma/src/html/end_tag.rs +0 -35
- data/ext/selma/src/html/text_chunk.rs +0 -113
- data/ext/selma/src/html.rs +0 -19
- data/ext/selma/src/lib.rs +0 -50
- data/ext/selma/src/native_ref_wrap.rs +0 -79
- data/ext/selma/src/rewriter.rs +0 -429
- data/ext/selma/src/sanitizer.rs +0 -607
- data/ext/selma/src/selector.rs +0 -112
- data/ext/selma/src/tags.rs +0 -1136
- data/ext/selma/src/wrapped_struct.rs +0 -92
- data/selma.gemspec +0 -41
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '03387493da2fdd5dafcb0a92a16c9b9d2ec5dd6243231ea0c38d326b9808bdb5'
|
4
|
+
data.tar.gz: 40cd15d7511ce3cf8c4c6004748e76adec4273a792e51c6c2b919e0d87d09cc9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b98b3ea69bf997b2a4dba0d44933273ff37ff8ac253c794e5f62231628cea17b8297c36f7c4897e7ae3405b1b7f1acb76c5d8f1b46a55441e9d72ecb0e947383
|
7
|
+
data.tar.gz: '004899441026f8d069243c3358aa5d91bf9c47e664ab335edf896a5f546c2d13e8c045c128052c648b778b933f61ab52d7fc673022902d86e64bb800329f5c32'
|
data/README.md
CHANGED
@@ -29,7 +29,7 @@ Selma can perform two different actions, either independently or together:
|
|
29
29
|
- Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
|
30
30
|
- Select HTML using CSS rules, and manipulate elements and text nodes along the way.
|
31
31
|
|
32
|
-
It does this through two
|
32
|
+
It does this through two kwargs: `sanitizer` and `handlers`. The basic API for Selma looks like this:
|
33
33
|
|
34
34
|
```ruby
|
35
35
|
sanitizer_config = {
|
@@ -37,7 +37,9 @@ sanitizer_config = {
|
|
37
37
|
}
|
38
38
|
sanitizer = Selma::Sanitizer.new(sanitizer_config)
|
39
39
|
rewriter = Selma::Rewriter.new(sanitizer: sanitizer, handlers: [MatchElementRewrite.new, MatchTextRewrite.new])
|
40
|
-
|
40
|
+
# removes any element that is not ["b", "em", "i", "strong", "u"];
|
41
|
+
# then calls `MatchElementRewrite` and `MatchTextRewrite` on matching HTML elements
|
42
|
+
rewriter.rewrite(html)
|
41
43
|
```
|
42
44
|
|
43
45
|
Here's a look at each individual part.
|
data/lib/selma/3.1/selma.bundle
CHANGED
Binary file
|
Binary file
|
@@ -4,8 +4,39 @@ module Selma
|
|
4
4
|
class Sanitizer
|
5
5
|
module Config
|
6
6
|
BASIC = freeze_config(
|
7
|
-
elements: [
|
8
|
-
|
7
|
+
elements: [
|
8
|
+
"a",
|
9
|
+
"abbr",
|
10
|
+
"blockquote",
|
11
|
+
"b",
|
12
|
+
"br",
|
13
|
+
"cite",
|
14
|
+
"code",
|
15
|
+
"dd",
|
16
|
+
"dfn",
|
17
|
+
"dl",
|
18
|
+
"dt",
|
19
|
+
"em",
|
20
|
+
"i",
|
21
|
+
"kbd",
|
22
|
+
"li",
|
23
|
+
"mark",
|
24
|
+
"ol",
|
25
|
+
"p",
|
26
|
+
"pre",
|
27
|
+
"q",
|
28
|
+
"s",
|
29
|
+
"samp",
|
30
|
+
"small",
|
31
|
+
"strike",
|
32
|
+
"strong",
|
33
|
+
"sub",
|
34
|
+
"sup",
|
35
|
+
"time",
|
36
|
+
"u",
|
37
|
+
"ul",
|
38
|
+
"var",
|
39
|
+
],
|
9
40
|
|
10
41
|
attributes: {
|
11
42
|
"a" => ["href"],
|
@@ -33,13 +33,49 @@ module Selma
|
|
33
33
|
|
34
34
|
# An Array of element names whose contents will be removed. The contents
|
35
35
|
# of all other filtered elements will be left behind.
|
36
|
-
remove_contents: [
|
37
|
-
|
36
|
+
remove_contents: [
|
37
|
+
"iframe",
|
38
|
+
"math",
|
39
|
+
"noembed",
|
40
|
+
"noframes",
|
41
|
+
"noscript",
|
42
|
+
"plaintext",
|
43
|
+
"script",
|
44
|
+
"style",
|
45
|
+
"svg",
|
46
|
+
"xmp",
|
47
|
+
],
|
38
48
|
|
39
49
|
# Elements which, when removed, should have their contents surrounded by
|
40
50
|
# whitespace.
|
41
|
-
whitespace_elements: [
|
42
|
-
|
51
|
+
whitespace_elements: [
|
52
|
+
"address",
|
53
|
+
"article",
|
54
|
+
"aside",
|
55
|
+
"blockquote",
|
56
|
+
"br",
|
57
|
+
"dd",
|
58
|
+
"div",
|
59
|
+
"dl",
|
60
|
+
"dt",
|
61
|
+
"footer",
|
62
|
+
"h1",
|
63
|
+
"h2",
|
64
|
+
"h3",
|
65
|
+
"h4",
|
66
|
+
"h5",
|
67
|
+
"h6",
|
68
|
+
"header",
|
69
|
+
"hgroup",
|
70
|
+
"hr",
|
71
|
+
"li",
|
72
|
+
"nav",
|
73
|
+
"ol",
|
74
|
+
"p",
|
75
|
+
"pre",
|
76
|
+
"section",
|
77
|
+
"ul",
|
78
|
+
],
|
43
79
|
)
|
44
80
|
end
|
45
81
|
end
|
@@ -4,12 +4,60 @@ module Selma
|
|
4
4
|
class Sanitizer
|
5
5
|
module Config
|
6
6
|
RELAXED = freeze_config(
|
7
|
-
elements: BASIC[:elements] + [
|
8
|
-
|
7
|
+
elements: BASIC[:elements] + [
|
8
|
+
"address",
|
9
|
+
"article",
|
10
|
+
"aside",
|
11
|
+
"bdi",
|
12
|
+
"bdo",
|
13
|
+
"body",
|
14
|
+
"caption",
|
15
|
+
"col",
|
16
|
+
"colgroup",
|
17
|
+
"data",
|
18
|
+
"del",
|
19
|
+
"div",
|
20
|
+
"figcaption",
|
21
|
+
"figure",
|
22
|
+
"footer",
|
23
|
+
"h1",
|
24
|
+
"h2",
|
25
|
+
"h3",
|
26
|
+
"h4",
|
27
|
+
"h5",
|
28
|
+
"h6",
|
29
|
+
"head",
|
30
|
+
"header",
|
31
|
+
"hgroup",
|
32
|
+
"hr",
|
33
|
+
"html",
|
34
|
+
"img",
|
35
|
+
"ins",
|
36
|
+
"main",
|
37
|
+
"nav",
|
38
|
+
"rp",
|
39
|
+
"rt",
|
40
|
+
"ruby",
|
41
|
+
"section",
|
42
|
+
"span",
|
43
|
+
"style",
|
44
|
+
"summary",
|
45
|
+
"sup",
|
46
|
+
"table",
|
47
|
+
"tbody",
|
48
|
+
"td",
|
49
|
+
"tfoot",
|
50
|
+
"th",
|
51
|
+
"thead",
|
52
|
+
"title",
|
53
|
+
"tr",
|
54
|
+
"wbr",
|
55
|
+
],
|
9
56
|
|
10
57
|
allow_doctype: true,
|
11
58
|
|
12
|
-
attributes: merge(
|
59
|
+
attributes: merge(
|
60
|
+
BASIC[:attributes],
|
13
61
|
:all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
|
14
62
|
"a" => ["href", "hreflang", "name", "rel"],
|
15
63
|
"col" => ["span", "width"],
|
@@ -21,16 +69,29 @@ module Selma
|
|
21
69
|
"li" => ["value"],
|
22
70
|
"ol" => ["reversed", "start", "type"],
|
23
71
|
"style" => ["media", "scoped", "type"],
|
24
|
-
"table" => [
|
25
|
-
|
72
|
+
"table" => [
|
73
|
+
"align",
|
74
|
+
"bgcolor",
|
75
|
+
"border",
|
76
|
+
"cellpadding",
|
77
|
+
"cellspacing",
|
78
|
+
"frame",
|
79
|
+
"rules",
|
80
|
+
"sortable",
|
81
|
+
"summary",
|
82
|
+
"width",
|
83
|
+
],
|
26
84
|
"td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
|
27
85
|
"th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
|
28
|
-
"ul" => ["type"]
|
86
|
+
"ul" => ["type"],
|
87
|
+
),
|
29
88
|
|
30
|
-
protocols: merge(
|
89
|
+
protocols: merge(
|
90
|
+
BASIC[:protocols],
|
31
91
|
"del" => { "cite" => ["http", "https", :relative] },
|
32
92
|
"img" => { "src" => ["http", "https", :relative] },
|
33
|
-
"ins" => { "cite" => ["http", "https", :relative] }
|
93
|
+
"ins" => { "cite" => ["http", "https", :relative] },
|
94
|
+
),
|
34
95
|
)
|
35
96
|
end
|
36
97
|
end
|
data/lib/selma/version.rb
CHANGED
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: selma
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: arm64-darwin
|
6
6
|
authors:
|
7
7
|
- Garen J. Torikian
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: rb_sys
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0.9'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0.9'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: rake
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,21 +38,7 @@ dependencies:
|
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
40
|
version: '1.2'
|
55
|
-
|
56
|
-
name: rake-compiler-dock
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '1.2'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '1.2'
|
69
|
-
description:
|
41
|
+
description:
|
70
42
|
email:
|
71
43
|
- gjtorikian@gmail.com
|
72
44
|
executables: []
|
@@ -75,22 +47,9 @@ extra_rdoc_files: []
|
|
75
47
|
files:
|
76
48
|
- LICENSE.txt
|
77
49
|
- README.md
|
78
|
-
- ext/selma/Cargo.toml
|
79
|
-
- ext/selma/_util.rb
|
80
|
-
- ext/selma/extconf.rb
|
81
|
-
- ext/selma/src/html.rs
|
82
|
-
- ext/selma/src/html/element.rs
|
83
|
-
- ext/selma/src/html/end_tag.rs
|
84
|
-
- ext/selma/src/html/text_chunk.rs
|
85
|
-
- ext/selma/src/lib.rs
|
86
|
-
- ext/selma/src/native_ref_wrap.rs
|
87
|
-
- ext/selma/src/rewriter.rs
|
88
|
-
- ext/selma/src/sanitizer.rs
|
89
|
-
- ext/selma/src/selector.rs
|
90
|
-
- ext/selma/src/tags.rs
|
91
|
-
- ext/selma/src/wrapped_struct.rs
|
92
50
|
- lib/selma.rb
|
93
51
|
- lib/selma/3.1/selma.bundle
|
52
|
+
- lib/selma/3.2/selma.bundle
|
94
53
|
- lib/selma/extension.rb
|
95
54
|
- lib/selma/html.rb
|
96
55
|
- lib/selma/rewriter.rb
|
@@ -102,8 +61,7 @@ files:
|
|
102
61
|
- lib/selma/sanitizer/config/restricted.rb
|
103
62
|
- lib/selma/selector.rb
|
104
63
|
- lib/selma/version.rb
|
105
|
-
|
106
|
-
homepage:
|
64
|
+
homepage:
|
107
65
|
licenses:
|
108
66
|
- MIT
|
109
67
|
metadata:
|
@@ -111,7 +69,7 @@ metadata:
|
|
111
69
|
funding_uri: https://github.com/sponsors/gjtorikian/
|
112
70
|
source_code_uri: https://github.com/gjtorikian/selma
|
113
71
|
rubygems_mfa_required: 'true'
|
114
|
-
post_install_message:
|
72
|
+
post_install_message:
|
115
73
|
rdoc_options: []
|
116
74
|
require_paths:
|
117
75
|
- lib
|
@@ -122,15 +80,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
80
|
version: '3.1'
|
123
81
|
- - "<"
|
124
82
|
- !ruby/object:Gem::Version
|
125
|
-
version: 3.
|
83
|
+
version: 3.3.dev
|
126
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
85
|
requirements:
|
128
86
|
- - ">="
|
129
87
|
- !ruby/object:Gem::Version
|
130
88
|
version: 3.3.22
|
131
89
|
requirements: []
|
132
|
-
rubygems_version: 3.
|
133
|
-
signing_key:
|
90
|
+
rubygems_version: 3.4.4
|
91
|
+
signing_key:
|
134
92
|
specification_version: 4
|
135
93
|
summary: Selma selects and matches HTML nodes using CSS rules. Backed by Rust's lol_html
|
136
94
|
parser.
|
data/ext/selma/Cargo.toml
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
[package]
|
2
|
-
name = "selma"
|
3
|
-
version = "1.0.0"
|
4
|
-
edition = "2021"
|
5
|
-
|
6
|
-
[dependencies]
|
7
|
-
enum-iterator = "1.2"
|
8
|
-
escapist = "0.0.2"
|
9
|
-
magnus = { git = "https://github.com/matsadler/magnus", rev = "23160f7229ac74c42da1b5096a65ccbc40962697" }
|
10
|
-
lol_html = "0.3"
|
11
|
-
|
12
|
-
[lib]
|
13
|
-
name = "selma"
|
14
|
-
crate-type = ["cdylib"]
|
data/ext/selma/_util.rb
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
RUBY_MAJOR, RUBY_MINOR = RUBY_VERSION.split(".").collect(&:to_i)
|
4
|
-
|
5
|
-
PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
6
|
-
PACKAGE_EXT_DIR = File.join(PACKAGE_ROOT_DIR, "ext", "selma")
|
7
|
-
|
8
|
-
OS = case os = RbConfig::CONFIG["host_os"].downcase
|
9
|
-
when /linux/
|
10
|
-
# The official ruby-alpine Docker containers pre-build Ruby. As a result,
|
11
|
-
# Ruby doesn't know that it's on a musl-based platform. `ldd` is the
|
12
|
-
# a more reliable way to detect musl.
|
13
|
-
# See https://github.com/skylightio/skylight-ruby/issues/92
|
14
|
-
if ENV["SKYLIGHT_MUSL"] || %x(ldd --version 2>&1).include?("musl")
|
15
|
-
"linux-musl"
|
16
|
-
else
|
17
|
-
"linux"
|
18
|
-
end
|
19
|
-
when /darwin/
|
20
|
-
"darwin"
|
21
|
-
when /freebsd/
|
22
|
-
"freebsd"
|
23
|
-
when /netbsd/
|
24
|
-
"netbsd"
|
25
|
-
when /openbsd/
|
26
|
-
"openbsd"
|
27
|
-
when /sunos|solaris/
|
28
|
-
"solaris"
|
29
|
-
when /mingw|mswin/
|
30
|
-
"windows"
|
31
|
-
else
|
32
|
-
os
|
33
|
-
end
|
34
|
-
|
35
|
-
# Normalize the platform CPU
|
36
|
-
ARCH = case cpu = RbConfig::CONFIG["host_cpu"].downcase
|
37
|
-
when /amd64|x86_64|x64/
|
38
|
-
"x86_64"
|
39
|
-
when /i?86|x86|i86pc/
|
40
|
-
"x86"
|
41
|
-
when /ppc|powerpc/
|
42
|
-
"powerpc"
|
43
|
-
when /^aarch/
|
44
|
-
"aarch"
|
45
|
-
when /^arm/
|
46
|
-
"arm"
|
47
|
-
else
|
48
|
-
cpu
|
49
|
-
end
|
50
|
-
|
51
|
-
def windows?
|
52
|
-
OS == "windows"
|
53
|
-
end
|
54
|
-
|
55
|
-
def solaris?
|
56
|
-
OS == solaries
|
57
|
-
end
|
58
|
-
|
59
|
-
def darwin?
|
60
|
-
OS == "darwin"
|
61
|
-
end
|
62
|
-
|
63
|
-
def macos?
|
64
|
-
darwin? || OS == "macos"
|
65
|
-
end
|
66
|
-
|
67
|
-
def openbsd?
|
68
|
-
OS == "openbsd"
|
69
|
-
end
|
70
|
-
|
71
|
-
def aix?
|
72
|
-
OS == "aix"
|
73
|
-
end
|
74
|
-
|
75
|
-
def nix?
|
76
|
-
!(windows? || solaris? || darwin?)
|
77
|
-
end
|
78
|
-
|
79
|
-
def x86_64?
|
80
|
-
ARCH == "x86_64"
|
81
|
-
end
|
82
|
-
|
83
|
-
def x86?
|
84
|
-
ARCH == "x86"
|
85
|
-
end
|
86
|
-
|
87
|
-
def abs_path(path)
|
88
|
-
File.join(PACKAGE_EXT_DIR, path)
|
89
|
-
end
|
90
|
-
|
91
|
-
def find_header_or_abort(header, *paths)
|
92
|
-
find_header(header, *paths) || abort("#{header} was expected in `#{paths.join(", ")}`, but it is missing.")
|
93
|
-
end
|
94
|
-
|
95
|
-
def find_library_or_abort(lib, func, *paths)
|
96
|
-
find_library(lib, func, *paths) || abort("#{lib} was expected in `#{paths.join(", ")}`, but it is missing.")
|
97
|
-
end
|
98
|
-
|
99
|
-
def concat_flags(*args)
|
100
|
-
args.compact.join(" ")
|
101
|
-
end
|
102
|
-
|