selma 0.0.2-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +173 -0
- data/ext/selma/Cargo.toml +14 -0
- data/ext/selma/_util.rb +102 -0
- data/ext/selma/extconf.rb +6 -0
- data/ext/selma/src/html/element.rs +195 -0
- data/ext/selma/src/html/end_tag.rs +35 -0
- data/ext/selma/src/html.rs +17 -0
- data/ext/selma/src/lib.rs +23 -0
- data/ext/selma/src/native_ref_wrap.rs +79 -0
- data/ext/selma/src/rewriter.rs +441 -0
- data/ext/selma/src/sanitizer.rs +578 -0
- data/ext/selma/src/selector.rs +115 -0
- data/ext/selma/src/tags.rs +1133 -0
- data/ext/selma/src/wrapped_struct.rs +92 -0
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/extension.rb +14 -0
- data/lib/selma/html.rb +6 -0
- data/lib/selma/rewriter.rb +6 -0
- data/lib/selma/sanitizer/config/basic.rb +27 -0
- data/lib/selma/sanitizer/config/default.rb +42 -0
- data/lib/selma/sanitizer/config/relaxed.rb +37 -0
- data/lib/selma/sanitizer/config/restricted.rb +13 -0
- data/lib/selma/sanitizer/config.rb +67 -0
- data/lib/selma/sanitizer.rb +85 -0
- data/lib/selma/selector.rb +6 -0
- data/lib/selma/version.rb +5 -0
- data/lib/selma.rb +13 -0
- data/selma.gemspec +41 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 76a748290e61c06cf1aa8b4df13ad2bcf77e9e87fcbd575968854acd2dd9e55b
|
4
|
+
data.tar.gz: 3aa85c75fd95e90b3f991883460033f885279c75b607c0043fe3fc23957f6755
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a5de62db9d9c2a9994a34b527c4a218417228bf716848c97785cf4d1db66148defe243614c89a4e96985b32e0bec1796991a0a72a1d9ec95e34469f2eb9141ec
|
7
|
+
data.tar.gz: a8d2c7c951ea57c31f2b482f017a093f0062f34f9f6ef2009e8e5e39a765ed8e3665cd7575b32b6be12ba9d8b911a9863438edf6aad7d258da83ff3f6e322792
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2022 Garen J. Torikian
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
# Selma
|
2
|
+
|
3
|
+
Selma **sel**ects and **ma**tches HTML nodes using CSS rules. (It can also reject/delete nodes, but then the name isn't as cool.) It's mostly an idiomatic wrapper around Cloudflare's [lol-html](https://github.com/cloudflare/lol-html) project.
|
4
|
+
|
5
|
+
![Principal Skinner asking Selma after their date: 'Isn't it nice we hate the same things?'](https://user-images.githubusercontent.com/64050/207155384-14e8bd40-780c-466f-bfff-31a8a8fc3d25.jpg)
|
6
|
+
|
7
|
+
Selma's strength (aside from being backed by Rust) is that HTML content is parsed _once_ and can be manipulated multiple times.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'selma'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle install
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install selma
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Selma can perform two different actions:
|
28
|
+
|
29
|
+
- Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
|
30
|
+
- Select HTML using CSS rules, and manipulate elements and text
|
31
|
+
|
32
|
+
The basic API for Selma looks like this:
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
rewriter = Selma::Rewriter.new(sanitizer: sanitizer_config, handlers: [MatchAttribute.new, TextRewrite.new])
|
36
|
+
rewriter(html)
|
37
|
+
```
|
38
|
+
|
39
|
+
Let's take a look at each part individually.
|
40
|
+
|
41
|
+
### Sanitization config
|
42
|
+
|
43
|
+
Selma sanitizes by default. That is, even if the `sanitizer` kwarg is not passed in, sanitization occurs. If you want to disable HTML sanitization (for some reason), pass `nil`:
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
Selma::Rewriter.new(sanitizer: nil) # dangerous and ill-advised
|
47
|
+
```
|
48
|
+
|
49
|
+
The configuration for the sanitization process is based on the follow key-value hash allowlist:
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
# Whether or not to allow HTML comments.
|
53
|
+
allow_comments: false,
|
54
|
+
|
55
|
+
# Whether or not to allow well-formed HTML doctype declarations such as
|
56
|
+
# "<!DOCTYPE html>" when sanitizing a document.
|
57
|
+
allow_doctype: false,
|
58
|
+
|
59
|
+
# HTML attributes to allow in specific elements. The key is the name of the element,
|
60
|
+
# and the value is an array of allowed attributes. By default, no attributes
|
61
|
+
# are allowed.
|
62
|
+
attributes: {
|
63
|
+
"a" => ["href"],
|
64
|
+
"img" => ["src"],
|
65
|
+
},
|
66
|
+
|
67
|
+
# HTML elements to allow. By default, no elements are allowed (which means
|
68
|
+
# that all HTML will be stripped).
|
69
|
+
elements: ["a", "b", "img", ],
|
70
|
+
|
71
|
+
# URL handling protocols to allow in specific attributes. By default, no
|
72
|
+
# protocols are allowed. Use :relative in place of a protocol if you want
|
73
|
+
# to allow relative URLs sans protocol.
|
74
|
+
protocols: {
|
75
|
+
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
76
|
+
"img" => { "href" => ["http", "https"] },
|
77
|
+
},
|
78
|
+
|
79
|
+
# An Array of element names whose contents will be removed. The contents
|
80
|
+
# of all other filtered elements will be left behind.
|
81
|
+
remove_contents: ["iframe", "math", "noembed", "noframes", "noscript"],
|
82
|
+
|
83
|
+
# Elements which, when removed, should have their contents surrounded by
|
84
|
+
# whitespace.
|
85
|
+
whitespace_elements: ["blockquote", "h1", "h2", "h3", "h4", "h5", "h6", ]
|
86
|
+
```
|
87
|
+
|
88
|
+
### Defining handlers
|
89
|
+
|
90
|
+
The real power in Selma comes in its use of handlers. A handler is simply an object with various methods:
|
91
|
+
|
92
|
+
- `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
|
93
|
+
- `handle_element`, a method that's call on each matched element
|
94
|
+
- `handle_text`, a method that's called on each matched text node; this MUST return a string
|
95
|
+
|
96
|
+
Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
class MatchAttribute
|
100
|
+
SELECTOR = Selma::Selector(match_element: "a, img")
|
101
|
+
|
102
|
+
def handle_element(element)
|
103
|
+
if element.tag_name == "a" && element["href"] =~ /^http:/
|
104
|
+
element["href"] = rename_http(element["href"])
|
105
|
+
elsif element.tag_name == "img" && element["src"] =~ /^http:/
|
106
|
+
element["src"] = rename_http(element["src"])
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
private def rename_http(link)
|
111
|
+
link.sub("http", "https")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new])
|
116
|
+
```
|
117
|
+
|
118
|
+
The `Selma::Selector` object has three possible kwargs:
|
119
|
+
|
120
|
+
- `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
|
121
|
+
- `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text`
|
122
|
+
- `ignore_text_within`: this is an array of element names whose text contents will be ignored
|
123
|
+
|
124
|
+
You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
|
128
|
+
class MatchText
|
129
|
+
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: ["pre", "code"])
|
130
|
+
|
131
|
+
def selector
|
132
|
+
SELECTOR
|
133
|
+
end
|
134
|
+
|
135
|
+
def handle_text(text)
|
136
|
+
string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
rewriter = Selma::Rewriter.new(handlers: [MatchText.new])
|
141
|
+
```
|
142
|
+
|
143
|
+
#### `element` methods
|
144
|
+
|
145
|
+
The `element` argument in `handle_element` has the following methods:
|
146
|
+
|
147
|
+
- `tag_name`: The element's name
|
148
|
+
- `[]`: get an attribute
|
149
|
+
- `[]=`: set an attribute
|
150
|
+
- `remove_attribute`: remove an attribute
|
151
|
+
- `attributes`: list all the attributes
|
152
|
+
- `ancestors`: list all the ancestors
|
153
|
+
- `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
154
|
+
- `wrap(start_text, end_text, content_type)`: adds `start_text` before an element and `end_text` after an element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
155
|
+
- `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
156
|
+
|
157
|
+
## Benchmarks
|
158
|
+
|
159
|
+
TBD
|
160
|
+
|
161
|
+
## Contributing
|
162
|
+
|
163
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/gjtorikian/selma. This project is a safe, welcoming space for collaboration.
|
164
|
+
|
165
|
+
## Acknowledgements
|
166
|
+
|
167
|
+
- https://github.com/flavorjones/ruby-c-extensions-explained#strategy-3-precompiled and [Nokogiri](https://github.com/sparklemotion/nokogiri) for hints on how to ship precompiled cross-platform gems
|
168
|
+
- @vmg for his work at GitHub on goomba, from which some design patterns were learned
|
169
|
+
- [sanitize](https://github.com/rgrove/sanitize) for a comprehensive configuration API and test suite
|
170
|
+
|
171
|
+
## License
|
172
|
+
|
173
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[package]
|
2
|
+
name = "selma"
|
3
|
+
version = "1.0.0"
|
4
|
+
edition = "2021"
|
5
|
+
|
6
|
+
[dependencies]
|
7
|
+
enum-iterator = "1.2"
|
8
|
+
escapist = "0.0.1"
|
9
|
+
magnus = "0.4"
|
10
|
+
lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "b09b7afbbcecb944f4bf338b0e669c430d91061e" }
|
11
|
+
|
12
|
+
[lib]
|
13
|
+
name = "selma"
|
14
|
+
crate-type = ["cdylib"]
|
data/ext/selma/_util.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RUBY_MAJOR, RUBY_MINOR = RUBY_VERSION.split(".").collect(&:to_i)
|
4
|
+
|
5
|
+
PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
6
|
+
PACKAGE_EXT_DIR = File.join(PACKAGE_ROOT_DIR, "ext", "selma")
|
7
|
+
|
8
|
+
OS = case os = RbConfig::CONFIG["host_os"].downcase
|
9
|
+
when /linux/
|
10
|
+
# The official ruby-alpine Docker containers pre-build Ruby. As a result,
|
11
|
+
# Ruby doesn't know that it's on a musl-based platform. `ldd` is the
|
12
|
+
# a more reliable way to detect musl.
|
13
|
+
# See https://github.com/skylightio/skylight-ruby/issues/92
|
14
|
+
if ENV["SKYLIGHT_MUSL"] || %x(ldd --version 2>&1).include?("musl")
|
15
|
+
"linux-musl"
|
16
|
+
else
|
17
|
+
"linux"
|
18
|
+
end
|
19
|
+
when /darwin/
|
20
|
+
"darwin"
|
21
|
+
when /freebsd/
|
22
|
+
"freebsd"
|
23
|
+
when /netbsd/
|
24
|
+
"netbsd"
|
25
|
+
when /openbsd/
|
26
|
+
"openbsd"
|
27
|
+
when /sunos|solaris/
|
28
|
+
"solaris"
|
29
|
+
when /mingw|mswin/
|
30
|
+
"windows"
|
31
|
+
else
|
32
|
+
os
|
33
|
+
end
|
34
|
+
|
35
|
+
# Normalize the platform CPU
|
36
|
+
ARCH = case cpu = RbConfig::CONFIG["host_cpu"].downcase
|
37
|
+
when /amd64|x86_64|x64/
|
38
|
+
"x86_64"
|
39
|
+
when /i?86|x86|i86pc/
|
40
|
+
"x86"
|
41
|
+
when /ppc|powerpc/
|
42
|
+
"powerpc"
|
43
|
+
when /^aarch/
|
44
|
+
"aarch"
|
45
|
+
when /^arm/
|
46
|
+
"arm"
|
47
|
+
else
|
48
|
+
cpu
|
49
|
+
end
|
50
|
+
|
51
|
+
def windows?
|
52
|
+
OS == "windows"
|
53
|
+
end
|
54
|
+
|
55
|
+
def solaris?
|
56
|
+
OS == solaries
|
57
|
+
end
|
58
|
+
|
59
|
+
def darwin?
|
60
|
+
OS == "darwin"
|
61
|
+
end
|
62
|
+
|
63
|
+
def macos?
|
64
|
+
darwin? || OS == "macos"
|
65
|
+
end
|
66
|
+
|
67
|
+
def openbsd?
|
68
|
+
OS == "openbsd"
|
69
|
+
end
|
70
|
+
|
71
|
+
def aix?
|
72
|
+
OS == "aix"
|
73
|
+
end
|
74
|
+
|
75
|
+
def nix?
|
76
|
+
!(windows? || solaris? || darwin?)
|
77
|
+
end
|
78
|
+
|
79
|
+
def x86_64?
|
80
|
+
ARCH == "x86_64"
|
81
|
+
end
|
82
|
+
|
83
|
+
def x86?
|
84
|
+
ARCH == "x86"
|
85
|
+
end
|
86
|
+
|
87
|
+
def abs_path(path)
|
88
|
+
File.join(PACKAGE_EXT_DIR, path)
|
89
|
+
end
|
90
|
+
|
91
|
+
def find_header_or_abort(header, *paths)
|
92
|
+
find_header(header, *paths) || abort("#{header} was expected in `#{paths.join(", ")}`, but it is missing.")
|
93
|
+
end
|
94
|
+
|
95
|
+
def find_library_or_abort(lib, func, *paths)
|
96
|
+
find_library(lib, func, *paths) || abort("#{lib} was expected in `#{paths.join(", ")}`, but it is missing.")
|
97
|
+
end
|
98
|
+
|
99
|
+
def concat_flags(*args)
|
100
|
+
args.compact.join(" ")
|
101
|
+
end
|
102
|
+
|
@@ -0,0 +1,195 @@
|
|
1
|
+
use std::borrow::Cow;
|
2
|
+
|
3
|
+
use crate::native_ref_wrap::NativeRefWrap;
|
4
|
+
use lol_html::html_content::{ContentType, Element};
|
5
|
+
use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Symbol};
|
6
|
+
|
7
|
+
struct HTMLElement {
|
8
|
+
element: NativeRefWrap<Element<'static, 'static>>,
|
9
|
+
ancestors: Vec<String>,
|
10
|
+
}
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Selma::HTML::Element")]
|
13
|
+
pub struct SelmaHTMLElement(std::cell::RefCell<HTMLElement>);
|
14
|
+
|
15
|
+
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
16
|
+
unsafe impl Send for SelmaHTMLElement {}
|
17
|
+
|
18
|
+
impl SelmaHTMLElement {
|
19
|
+
pub fn new(element: &mut Element, ancestors: &[String]) -> Self {
|
20
|
+
let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(element);
|
21
|
+
|
22
|
+
Self(std::cell::RefCell::new(HTMLElement {
|
23
|
+
element: ref_wrap,
|
24
|
+
ancestors: ancestors.to_owned(),
|
25
|
+
}))
|
26
|
+
}
|
27
|
+
|
28
|
+
fn tag_name(&self) -> Result<String, Error> {
|
29
|
+
let binding = self.0.borrow();
|
30
|
+
|
31
|
+
if let Ok(e) = binding.element.get() {
|
32
|
+
Ok(e.tag_name())
|
33
|
+
} else {
|
34
|
+
Err(Error::new(
|
35
|
+
exception::runtime_error(),
|
36
|
+
"`tag_name` is not available",
|
37
|
+
))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
fn get_attribute(&self, attr: String) -> Option<String> {
|
42
|
+
let binding = self.0.borrow();
|
43
|
+
let element = binding.element.get();
|
44
|
+
element.unwrap().get_attribute(&attr)
|
45
|
+
}
|
46
|
+
|
47
|
+
fn set_attribute(&self, attr: String, value: String) -> Result<String, Error> {
|
48
|
+
let mut binding = self.0.borrow_mut();
|
49
|
+
if let Ok(element) = binding.element.get_mut() {
|
50
|
+
match element.set_attribute(&attr, &value) {
|
51
|
+
Ok(_) => Ok(value),
|
52
|
+
Err(err) => Err(Error::new(
|
53
|
+
exception::runtime_error(),
|
54
|
+
format!("AttributeNameError: {}", err),
|
55
|
+
)),
|
56
|
+
}
|
57
|
+
} else {
|
58
|
+
Err(Error::new(
|
59
|
+
exception::runtime_error(),
|
60
|
+
"`tag_name` is not available",
|
61
|
+
))
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
fn remove_attribute(&self, attr: String) {
|
66
|
+
let mut binding = self.0.borrow_mut();
|
67
|
+
|
68
|
+
if let Ok(e) = binding.element.get_mut() {
|
69
|
+
e.remove_attribute(&attr)
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
fn get_attributes(&self) -> Result<RHash, Error> {
|
74
|
+
let binding = self.0.borrow();
|
75
|
+
let hash = RHash::new();
|
76
|
+
|
77
|
+
if let Ok(e) = binding.element.get() {
|
78
|
+
e.attributes()
|
79
|
+
.iter()
|
80
|
+
.for_each(|attr| match hash.aset(attr.name(), attr.value()) {
|
81
|
+
Ok(_) => {}
|
82
|
+
Err(err) => Err(Error::new(
|
83
|
+
exception::runtime_error(),
|
84
|
+
format!("AttributeNameError: {}", err),
|
85
|
+
))
|
86
|
+
.unwrap(),
|
87
|
+
});
|
88
|
+
}
|
89
|
+
Ok(hash)
|
90
|
+
}
|
91
|
+
|
92
|
+
fn get_ancestors(&self) -> Result<RArray, Error> {
|
93
|
+
let binding = self.0.borrow();
|
94
|
+
let array = RArray::new();
|
95
|
+
|
96
|
+
binding
|
97
|
+
.ancestors
|
98
|
+
.iter()
|
99
|
+
.for_each(|ancestor| match array.push(RString::new(ancestor)) {
|
100
|
+
Ok(_) => {}
|
101
|
+
Err(err) => {
|
102
|
+
Err(Error::new(exception::runtime_error(), format!("{}", err))).unwrap()
|
103
|
+
}
|
104
|
+
});
|
105
|
+
|
106
|
+
Ok(array)
|
107
|
+
}
|
108
|
+
|
109
|
+
fn append(&self, text_to_append: String, content_type: Symbol) -> Result<(), Error> {
|
110
|
+
let mut binding = self.0.borrow_mut();
|
111
|
+
let element = binding.element.get_mut().unwrap();
|
112
|
+
|
113
|
+
let text_str = text_to_append.as_str();
|
114
|
+
|
115
|
+
let content_type = Self::find_content_type(content_type);
|
116
|
+
|
117
|
+
element.append(text_str, content_type);
|
118
|
+
|
119
|
+
Ok(())
|
120
|
+
}
|
121
|
+
|
122
|
+
fn wrap(
|
123
|
+
&self,
|
124
|
+
start_text: String,
|
125
|
+
end_text: String,
|
126
|
+
content_type: Symbol,
|
127
|
+
) -> Result<(), Error> {
|
128
|
+
let mut binding = self.0.borrow_mut();
|
129
|
+
let element = binding.element.get_mut().unwrap();
|
130
|
+
|
131
|
+
let before_content_type = Self::find_content_type(content_type);
|
132
|
+
let after_content_type = Self::find_content_type(content_type);
|
133
|
+
element.before(&start_text, before_content_type);
|
134
|
+
element.after(&end_text, after_content_type);
|
135
|
+
|
136
|
+
Ok(())
|
137
|
+
}
|
138
|
+
|
139
|
+
fn set_inner_content(&self, text_to_set: String, content_type: Symbol) -> Result<(), Error> {
|
140
|
+
let mut binding = self.0.borrow_mut();
|
141
|
+
let element = binding.element.get_mut().unwrap();
|
142
|
+
|
143
|
+
let text_str = text_to_set.as_str();
|
144
|
+
|
145
|
+
let content_type = Self::find_content_type(content_type);
|
146
|
+
|
147
|
+
element.set_inner_content(text_str, content_type);
|
148
|
+
|
149
|
+
Ok(())
|
150
|
+
}
|
151
|
+
|
152
|
+
fn find_content_type(content_type: Symbol) -> ContentType {
|
153
|
+
match content_type.name() {
|
154
|
+
Ok(name) => match (name) {
|
155
|
+
Cow::Borrowed("as_text") => ContentType::Text,
|
156
|
+
Cow::Borrowed("as_html") => ContentType::Html,
|
157
|
+
_ => Err(Error::new(
|
158
|
+
exception::runtime_error(),
|
159
|
+
format!("unknown symbol `{}`", name),
|
160
|
+
))
|
161
|
+
.unwrap(),
|
162
|
+
},
|
163
|
+
Err(err) => Err(Error::new(
|
164
|
+
exception::runtime_error(),
|
165
|
+
format!("Could not unwrap symbol"),
|
166
|
+
))
|
167
|
+
.unwrap(),
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
pub fn init(c_html: RClass) -> Result<(), Error> {
|
173
|
+
let c_element = c_html
|
174
|
+
.define_class("Element", Default::default())
|
175
|
+
.expect("cannot find class Selma::Element");
|
176
|
+
|
177
|
+
c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
|
178
|
+
c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
|
179
|
+
c_element.define_method("[]=", method!(SelmaHTMLElement::set_attribute, 2))?;
|
180
|
+
c_element.define_method(
|
181
|
+
"remove_attribute",
|
182
|
+
method!(SelmaHTMLElement::remove_attribute, 1),
|
183
|
+
)?;
|
184
|
+
c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
|
185
|
+
c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
|
186
|
+
|
187
|
+
c_element.define_method("append", method!(SelmaHTMLElement::append, 2))?;
|
188
|
+
c_element.define_method("wrap", method!(SelmaHTMLElement::wrap, 3))?;
|
189
|
+
c_element.define_method(
|
190
|
+
"set_inner_content",
|
191
|
+
method!(SelmaHTMLElement::set_inner_content, 2),
|
192
|
+
)?;
|
193
|
+
|
194
|
+
Ok(())
|
195
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
use crate::native_ref_wrap::NativeRefWrap;
|
2
|
+
use lol_html::html_content::EndTag;
|
3
|
+
use magnus::{method, Error, Module, RClass};
|
4
|
+
|
5
|
+
struct HTMLEndTag {
|
6
|
+
end_tag: NativeRefWrap<EndTag<'static>>,
|
7
|
+
}
|
8
|
+
|
9
|
+
#[magnus::wrap(class = "Selma::HTML::Element")]
|
10
|
+
pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
|
11
|
+
|
12
|
+
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
13
|
+
unsafe impl Send for SelmaHTMLEndTag {}
|
14
|
+
|
15
|
+
impl SelmaHTMLEndTag {
|
16
|
+
pub fn new(end_tag: &mut EndTag) -> Self {
|
17
|
+
let (ref_wrap, _anchor) = NativeRefWrap::wrap(end_tag);
|
18
|
+
|
19
|
+
Self(std::cell::RefCell::new(HTMLEndTag { end_tag: ref_wrap }))
|
20
|
+
}
|
21
|
+
|
22
|
+
fn tag_name(&self) -> String {
|
23
|
+
self.0.borrow().end_tag.get().unwrap().name()
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn init(c_html: RClass) -> Result<(), Error> {
|
28
|
+
let c_end_tag = c_html
|
29
|
+
.define_class("EndTag", Default::default())
|
30
|
+
.expect("cannot find class Selma::EndTag");
|
31
|
+
|
32
|
+
c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
|
33
|
+
|
34
|
+
Ok(())
|
35
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
use magnus::{Error, Module, RModule};
|
2
|
+
|
3
|
+
#[derive(Clone, Debug)]
|
4
|
+
#[magnus::wrap(class = "Selma::HTML")]
|
5
|
+
pub(crate) struct SelmaHTML {}
|
6
|
+
|
7
|
+
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
8
|
+
let c_html = m_selma.define_class("HTML", Default::default()).unwrap();
|
9
|
+
|
10
|
+
element::init(c_html).expect("cannot define Selma::HTML::Element class");
|
11
|
+
end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
|
12
|
+
|
13
|
+
Ok(())
|
14
|
+
}
|
15
|
+
|
16
|
+
pub mod element;
|
17
|
+
pub mod end_tag;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
extern crate core;
|
2
|
+
|
3
|
+
use magnus::{define_module, Error};
|
4
|
+
|
5
|
+
pub mod html;
|
6
|
+
pub mod native_ref_wrap;
|
7
|
+
pub mod rewriter;
|
8
|
+
pub mod sanitizer;
|
9
|
+
pub mod selector;
|
10
|
+
pub mod tags;
|
11
|
+
pub mod wrapped_struct;
|
12
|
+
|
13
|
+
#[magnus::init]
|
14
|
+
fn init() -> Result<(), Error> {
|
15
|
+
let m_selma = define_module("Selma").expect("cannot define ::Selma module");
|
16
|
+
|
17
|
+
sanitizer::init(m_selma).expect("cannot define Selma::Sanitizer class");
|
18
|
+
rewriter::init(m_selma).expect("cannot define Selma::Rewriter class");
|
19
|
+
html::init(m_selma).expect("cannot define Selma::HTML class");
|
20
|
+
selector::init(m_selma).expect("cannot define Selma::Selector class");
|
21
|
+
|
22
|
+
Ok(())
|
23
|
+
}
|
@@ -0,0 +1,79 @@
|
|
1
|
+
use std::{cell::Cell, marker::PhantomData, mem, rc::Rc};
|
2
|
+
|
3
|
+
// NOTE: My Rust isn't good enough to know what any of this does,
|
4
|
+
// but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
|
5
|
+
|
6
|
+
pub struct Anchor<'r> {
|
7
|
+
poisoned: Rc<Cell<bool>>,
|
8
|
+
lifetime: PhantomData<&'r mut ()>,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl<'r> Anchor<'r> {
|
12
|
+
pub fn new(poisoned: Rc<Cell<bool>>) -> Self {
|
13
|
+
Anchor {
|
14
|
+
poisoned,
|
15
|
+
lifetime: PhantomData,
|
16
|
+
}
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
// impl Drop for Anchor<'_> {
|
21
|
+
// fn drop(&mut self) {
|
22
|
+
// self.poisoned.replace(true);
|
23
|
+
// }
|
24
|
+
// }
|
25
|
+
|
26
|
+
// NOTE: wasm_bindgen doesn't allow structures with lifetimes. To workaround that
|
27
|
+
// we create a wrapper that erases all the lifetime information from the inner reference
|
28
|
+
// and provides an anchor object that keeps track of the lifetime in the runtime.
|
29
|
+
//
|
30
|
+
// When anchor goes out of scope, wrapper becomes poisoned and any attempt to get inner
|
31
|
+
// object results in exception.
|
32
|
+
pub struct NativeRefWrap<R> {
|
33
|
+
inner_ptr: *mut R,
|
34
|
+
poisoned: Rc<Cell<bool>>,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl<R> NativeRefWrap<R> {
|
38
|
+
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
39
|
+
let wrap = NativeRefWrap {
|
40
|
+
inner_ptr: unsafe { mem::transmute(inner) },
|
41
|
+
poisoned: Rc::new(Cell::new(false)),
|
42
|
+
};
|
43
|
+
|
44
|
+
let anchor = Anchor::new(Rc::clone(&wrap.poisoned));
|
45
|
+
|
46
|
+
(wrap, anchor)
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
|
50
|
+
let wrap = NativeRefWrap {
|
51
|
+
inner_ptr: unsafe { mem::transmute(inner) },
|
52
|
+
poisoned: Rc::new(Cell::new(false)),
|
53
|
+
};
|
54
|
+
|
55
|
+
let anchor = Anchor::new(Rc::clone(&wrap.poisoned));
|
56
|
+
|
57
|
+
(wrap, anchor)
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn get(&self) -> Result<&R, &'static str> {
|
61
|
+
self.assert_not_poisoned()?;
|
62
|
+
|
63
|
+
Ok(unsafe { self.inner_ptr.as_ref() }.unwrap())
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn get_mut(&mut self) -> Result<&mut R, &'static str> {
|
67
|
+
self.assert_not_poisoned()?;
|
68
|
+
|
69
|
+
Ok(unsafe { self.inner_ptr.as_mut() }.unwrap())
|
70
|
+
}
|
71
|
+
|
72
|
+
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
73
|
+
if self.poisoned.get() {
|
74
|
+
Err("The object has been freed and can't be used anymore.")
|
75
|
+
} else {
|
76
|
+
Ok(())
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|