selma 0.0.2-x86_64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +173 -0
- data/ext/selma/Cargo.toml +14 -0
- data/ext/selma/_util.rb +102 -0
- data/ext/selma/extconf.rb +6 -0
- data/ext/selma/src/html/element.rs +195 -0
- data/ext/selma/src/html/end_tag.rs +35 -0
- data/ext/selma/src/html.rs +17 -0
- data/ext/selma/src/lib.rs +23 -0
- data/ext/selma/src/native_ref_wrap.rs +79 -0
- data/ext/selma/src/rewriter.rs +441 -0
- data/ext/selma/src/sanitizer.rs +578 -0
- data/ext/selma/src/selector.rs +115 -0
- data/ext/selma/src/tags.rs +1133 -0
- data/ext/selma/src/wrapped_struct.rs +92 -0
- data/lib/selma/3.1/selma.so +0 -0
- data/lib/selma/extension.rb +14 -0
- data/lib/selma/html.rb +6 -0
- data/lib/selma/rewriter.rb +6 -0
- data/lib/selma/sanitizer/config/basic.rb +27 -0
- data/lib/selma/sanitizer/config/default.rb +42 -0
- data/lib/selma/sanitizer/config/relaxed.rb +37 -0
- data/lib/selma/sanitizer/config/restricted.rb +13 -0
- data/lib/selma/sanitizer/config.rb +67 -0
- data/lib/selma/sanitizer.rb +85 -0
- data/lib/selma/selector.rb +6 -0
- data/lib/selma/version.rb +5 -0
- data/lib/selma.rb +13 -0
- data/selma.gemspec +41 -0
- metadata +136 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 76a748290e61c06cf1aa8b4df13ad2bcf77e9e87fcbd575968854acd2dd9e55b
|
4
|
+
data.tar.gz: 3aa85c75fd95e90b3f991883460033f885279c75b607c0043fe3fc23957f6755
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a5de62db9d9c2a9994a34b527c4a218417228bf716848c97785cf4d1db66148defe243614c89a4e96985b32e0bec1796991a0a72a1d9ec95e34469f2eb9141ec
|
7
|
+
data.tar.gz: a8d2c7c951ea57c31f2b482f017a093f0062f34f9f6ef2009e8e5e39a765ed8e3665cd7575b32b6be12ba9d8b911a9863438edf6aad7d258da83ff3f6e322792
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2022 Garen J. Torikian
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
# Selma
|
2
|
+
|
3
|
+
Selma **sel**ects and **ma**tches HTML nodes using CSS rules. (It can also reject/delete nodes, but then the name isn't as cool.) It's mostly an idiomatic wrapper around Cloudflare's [lol-html](https://github.com/cloudflare/lol-html) project.
|
4
|
+
|
5
|
+

|
6
|
+
|
7
|
+
Selma's strength (aside from being backed by Rust) is that HTML content is parsed _once_ and can be manipulated multiple times.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'selma'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle install
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install selma
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Selma can perform two different actions:
|
28
|
+
|
29
|
+
- Sanitize HTML, through a [Sanitize](https://github.com/rgrove/sanitize)-like allowlist syntax; and
|
30
|
+
- Select HTML using CSS rules, and manipulate elements and text
|
31
|
+
|
32
|
+
The basic API for Selma looks like this:
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
rewriter = Selma::Rewriter.new(sanitizer: sanitizer_config, handlers: [MatchAttribute.new, TextRewrite.new])
|
36
|
+
rewriter(html)
|
37
|
+
```
|
38
|
+
|
39
|
+
Let's take a look at each part individually.
|
40
|
+
|
41
|
+
### Sanitization config
|
42
|
+
|
43
|
+
Selma sanitizes by default. That is, even if the `sanitizer` kwarg is not passed in, sanitization occurs. If you want to disable HTML sanitization (for some reason), pass `nil`:
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
Selma::Rewriter.new(sanitizer: nil) # dangerous and ill-advised
|
47
|
+
```
|
48
|
+
|
49
|
+
The configuration for the sanitization process is based on the follow key-value hash allowlist:
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
# Whether or not to allow HTML comments.
|
53
|
+
allow_comments: false,
|
54
|
+
|
55
|
+
# Whether or not to allow well-formed HTML doctype declarations such as
|
56
|
+
# "<!DOCTYPE html>" when sanitizing a document.
|
57
|
+
allow_doctype: false,
|
58
|
+
|
59
|
+
# HTML attributes to allow in specific elements. The key is the name of the element,
|
60
|
+
# and the value is an array of allowed attributes. By default, no attributes
|
61
|
+
# are allowed.
|
62
|
+
attributes: {
|
63
|
+
"a" => ["href"],
|
64
|
+
"img" => ["src"],
|
65
|
+
},
|
66
|
+
|
67
|
+
# HTML elements to allow. By default, no elements are allowed (which means
|
68
|
+
# that all HTML will be stripped).
|
69
|
+
elements: ["a", "b", "img", ],
|
70
|
+
|
71
|
+
# URL handling protocols to allow in specific attributes. By default, no
|
72
|
+
# protocols are allowed. Use :relative in place of a protocol if you want
|
73
|
+
# to allow relative URLs sans protocol.
|
74
|
+
protocols: {
|
75
|
+
"a" => { "href" => ["http", "https", "mailto", :relative] },
|
76
|
+
"img" => { "href" => ["http", "https"] },
|
77
|
+
},
|
78
|
+
|
79
|
+
# An Array of element names whose contents will be removed. The contents
|
80
|
+
# of all other filtered elements will be left behind.
|
81
|
+
remove_contents: ["iframe", "math", "noembed", "noframes", "noscript"],
|
82
|
+
|
83
|
+
# Elements which, when removed, should have their contents surrounded by
|
84
|
+
# whitespace.
|
85
|
+
whitespace_elements: ["blockquote", "h1", "h2", "h3", "h4", "h5", "h6", ]
|
86
|
+
```
|
87
|
+
|
88
|
+
### Defining handlers
|
89
|
+
|
90
|
+
The real power in Selma comes in its use of handlers. A handler is simply an object with various methods:
|
91
|
+
|
92
|
+
- `selector`, a method which MUST return instance of `Selma::Selector` which defines the CSS classes to match
|
93
|
+
- `handle_element`, a method that's call on each matched element
|
94
|
+
- `handle_text`, a method that's called on each matched text node; this MUST return a string
|
95
|
+
|
96
|
+
Here's an example which rewrites the `href` attribute on `a` and the `src` attribute on `img` to be `https` rather than `http`.
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
class MatchAttribute
|
100
|
+
SELECTOR = Selma::Selector(match_element: "a, img")
|
101
|
+
|
102
|
+
def handle_element(element)
|
103
|
+
if element.tag_name == "a" && element["href"] =~ /^http:/
|
104
|
+
element["href"] = rename_http(element["href"])
|
105
|
+
elsif element.tag_name == "img" && element["src"] =~ /^http:/
|
106
|
+
element["src"] = rename_http(element["src"])
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
private def rename_http(link)
|
111
|
+
link.sub("http", "https")
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
rewriter = Selma::Rewriter.new(handlers: [MatchAttribute.new])
|
116
|
+
```
|
117
|
+
|
118
|
+
The `Selma::Selector` object has three possible kwargs:
|
119
|
+
|
120
|
+
- `match_element`: any element which matches this CSS rule will be passed on to `handle_element`
|
121
|
+
- `match_text_within`: any element which matches this CSS rule will be passed on to `handle_text`
|
122
|
+
- `ignore_text_within`: this is an array of element names whose text contents will be ignored
|
123
|
+
|
124
|
+
You've seen an example of `match_element`; here's one for `match_text` which changes strings in various elements which are _not_ `pre` or `code`:
|
125
|
+
|
126
|
+
```ruby
|
127
|
+
|
128
|
+
class MatchText
|
129
|
+
SELECTOR = Selma::Selector.new(match_text_within: "*", ignore_text_within: ["pre", "code"])
|
130
|
+
|
131
|
+
def selector
|
132
|
+
SELECTOR
|
133
|
+
end
|
134
|
+
|
135
|
+
def handle_text(text)
|
136
|
+
string.sub(/@.+/, "<a href=\"www.yetto.app/#{Regexp.last_match}\">")
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
rewriter = Selma::Rewriter.new(handlers: [MatchText.new])
|
141
|
+
```
|
142
|
+
|
143
|
+
#### `element` methods
|
144
|
+
|
145
|
+
The `element` argument in `handle_element` has the following methods:
|
146
|
+
|
147
|
+
- `tag_name`: The element's name
|
148
|
+
- `[]`: get an attribute
|
149
|
+
- `[]=`: set an attribute
|
150
|
+
- `remove_attribute`: remove an attribute
|
151
|
+
- `attributes`: list all the attributes
|
152
|
+
- `ancestors`: list all the ancestors
|
153
|
+
- `append(content, content_type)`: appends `content` to the element's inner content, i.e. inserts content right before the element's end tag. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
154
|
+
- `wrap(start_text, end_text, content_type)`: adds `start_text` before an element and `end_text` after an element. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
155
|
+
- `set_inner_content`: replaces inner content of the element with `content`. `content_type` is either `:text` or `:html` and determines how the content will be applied.
|
156
|
+
|
157
|
+
## Benchmarks
|
158
|
+
|
159
|
+
TBD
|
160
|
+
|
161
|
+
## Contributing
|
162
|
+
|
163
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/gjtorikian/selma. This project is a safe, welcoming space for collaboration.
|
164
|
+
|
165
|
+
## Acknowledgements
|
166
|
+
|
167
|
+
- https://github.com/flavorjones/ruby-c-extensions-explained#strategy-3-precompiled and [Nokogiri](https://github.com/sparklemotion/nokogiri) for hints on how to ship precompiled cross-platform gems
|
168
|
+
- @vmg for his work at GitHub on goomba, from which some design patterns were learned
|
169
|
+
- [sanitize](https://github.com/rgrove/sanitize) for a comprehensive configuration API and test suite
|
170
|
+
|
171
|
+
## License
|
172
|
+
|
173
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,14 @@
|
|
1
|
+
[package]
|
2
|
+
name = "selma"
|
3
|
+
version = "1.0.0"
|
4
|
+
edition = "2021"
|
5
|
+
|
6
|
+
[dependencies]
|
7
|
+
enum-iterator = "1.2"
|
8
|
+
escapist = "0.0.1"
|
9
|
+
magnus = "0.4"
|
10
|
+
lol_html = { git = "https://github.com/cloudflare/lol-html", rev = "b09b7afbbcecb944f4bf338b0e669c430d91061e" }
|
11
|
+
|
12
|
+
[lib]
|
13
|
+
name = "selma"
|
14
|
+
crate-type = ["cdylib"]
|
data/ext/selma/_util.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RUBY_MAJOR, RUBY_MINOR = RUBY_VERSION.split(".").collect(&:to_i)
|
4
|
+
|
5
|
+
PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", ".."))
|
6
|
+
PACKAGE_EXT_DIR = File.join(PACKAGE_ROOT_DIR, "ext", "selma")
|
7
|
+
|
8
|
+
OS = case os = RbConfig::CONFIG["host_os"].downcase
|
9
|
+
when /linux/
|
10
|
+
# The official ruby-alpine Docker containers pre-build Ruby. As a result,
|
11
|
+
# Ruby doesn't know that it's on a musl-based platform. `ldd` is the
|
12
|
+
# a more reliable way to detect musl.
|
13
|
+
# See https://github.com/skylightio/skylight-ruby/issues/92
|
14
|
+
if ENV["SKYLIGHT_MUSL"] || %x(ldd --version 2>&1).include?("musl")
|
15
|
+
"linux-musl"
|
16
|
+
else
|
17
|
+
"linux"
|
18
|
+
end
|
19
|
+
when /darwin/
|
20
|
+
"darwin"
|
21
|
+
when /freebsd/
|
22
|
+
"freebsd"
|
23
|
+
when /netbsd/
|
24
|
+
"netbsd"
|
25
|
+
when /openbsd/
|
26
|
+
"openbsd"
|
27
|
+
when /sunos|solaris/
|
28
|
+
"solaris"
|
29
|
+
when /mingw|mswin/
|
30
|
+
"windows"
|
31
|
+
else
|
32
|
+
os
|
33
|
+
end
|
34
|
+
|
35
|
+
# Normalize the platform CPU
|
36
|
+
ARCH = case cpu = RbConfig::CONFIG["host_cpu"].downcase
|
37
|
+
when /amd64|x86_64|x64/
|
38
|
+
"x86_64"
|
39
|
+
when /i?86|x86|i86pc/
|
40
|
+
"x86"
|
41
|
+
when /ppc|powerpc/
|
42
|
+
"powerpc"
|
43
|
+
when /^aarch/
|
44
|
+
"aarch"
|
45
|
+
when /^arm/
|
46
|
+
"arm"
|
47
|
+
else
|
48
|
+
cpu
|
49
|
+
end
|
50
|
+
|
51
|
+
def windows?
|
52
|
+
OS == "windows"
|
53
|
+
end
|
54
|
+
|
55
|
+
def solaris?
|
56
|
+
OS == solaries
|
57
|
+
end
|
58
|
+
|
59
|
+
def darwin?
|
60
|
+
OS == "darwin"
|
61
|
+
end
|
62
|
+
|
63
|
+
def macos?
|
64
|
+
darwin? || OS == "macos"
|
65
|
+
end
|
66
|
+
|
67
|
+
def openbsd?
|
68
|
+
OS == "openbsd"
|
69
|
+
end
|
70
|
+
|
71
|
+
def aix?
|
72
|
+
OS == "aix"
|
73
|
+
end
|
74
|
+
|
75
|
+
def nix?
|
76
|
+
!(windows? || solaris? || darwin?)
|
77
|
+
end
|
78
|
+
|
79
|
+
def x86_64?
|
80
|
+
ARCH == "x86_64"
|
81
|
+
end
|
82
|
+
|
83
|
+
def x86?
|
84
|
+
ARCH == "x86"
|
85
|
+
end
|
86
|
+
|
87
|
+
def abs_path(path)
|
88
|
+
File.join(PACKAGE_EXT_DIR, path)
|
89
|
+
end
|
90
|
+
|
91
|
+
def find_header_or_abort(header, *paths)
|
92
|
+
find_header(header, *paths) || abort("#{header} was expected in `#{paths.join(", ")}`, but it is missing.")
|
93
|
+
end
|
94
|
+
|
95
|
+
def find_library_or_abort(lib, func, *paths)
|
96
|
+
find_library(lib, func, *paths) || abort("#{lib} was expected in `#{paths.join(", ")}`, but it is missing.")
|
97
|
+
end
|
98
|
+
|
99
|
+
def concat_flags(*args)
|
100
|
+
args.compact.join(" ")
|
101
|
+
end
|
102
|
+
|
@@ -0,0 +1,195 @@
|
|
1
|
+
use std::borrow::Cow;
|
2
|
+
|
3
|
+
use crate::native_ref_wrap::NativeRefWrap;
|
4
|
+
use lol_html::html_content::{ContentType, Element};
|
5
|
+
use magnus::{exception, method, Error, Module, RArray, RClass, RHash, RString, Symbol};
|
6
|
+
|
7
|
+
struct HTMLElement {
|
8
|
+
element: NativeRefWrap<Element<'static, 'static>>,
|
9
|
+
ancestors: Vec<String>,
|
10
|
+
}
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Selma::HTML::Element")]
|
13
|
+
pub struct SelmaHTMLElement(std::cell::RefCell<HTMLElement>);
|
14
|
+
|
15
|
+
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
16
|
+
unsafe impl Send for SelmaHTMLElement {}
|
17
|
+
|
18
|
+
impl SelmaHTMLElement {
|
19
|
+
pub fn new(element: &mut Element, ancestors: &[String]) -> Self {
|
20
|
+
let (ref_wrap, _anchor) = NativeRefWrap::wrap_mut(element);
|
21
|
+
|
22
|
+
Self(std::cell::RefCell::new(HTMLElement {
|
23
|
+
element: ref_wrap,
|
24
|
+
ancestors: ancestors.to_owned(),
|
25
|
+
}))
|
26
|
+
}
|
27
|
+
|
28
|
+
fn tag_name(&self) -> Result<String, Error> {
|
29
|
+
let binding = self.0.borrow();
|
30
|
+
|
31
|
+
if let Ok(e) = binding.element.get() {
|
32
|
+
Ok(e.tag_name())
|
33
|
+
} else {
|
34
|
+
Err(Error::new(
|
35
|
+
exception::runtime_error(),
|
36
|
+
"`tag_name` is not available",
|
37
|
+
))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
fn get_attribute(&self, attr: String) -> Option<String> {
|
42
|
+
let binding = self.0.borrow();
|
43
|
+
let element = binding.element.get();
|
44
|
+
element.unwrap().get_attribute(&attr)
|
45
|
+
}
|
46
|
+
|
47
|
+
fn set_attribute(&self, attr: String, value: String) -> Result<String, Error> {
|
48
|
+
let mut binding = self.0.borrow_mut();
|
49
|
+
if let Ok(element) = binding.element.get_mut() {
|
50
|
+
match element.set_attribute(&attr, &value) {
|
51
|
+
Ok(_) => Ok(value),
|
52
|
+
Err(err) => Err(Error::new(
|
53
|
+
exception::runtime_error(),
|
54
|
+
format!("AttributeNameError: {}", err),
|
55
|
+
)),
|
56
|
+
}
|
57
|
+
} else {
|
58
|
+
Err(Error::new(
|
59
|
+
exception::runtime_error(),
|
60
|
+
"`tag_name` is not available",
|
61
|
+
))
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
fn remove_attribute(&self, attr: String) {
|
66
|
+
let mut binding = self.0.borrow_mut();
|
67
|
+
|
68
|
+
if let Ok(e) = binding.element.get_mut() {
|
69
|
+
e.remove_attribute(&attr)
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
fn get_attributes(&self) -> Result<RHash, Error> {
|
74
|
+
let binding = self.0.borrow();
|
75
|
+
let hash = RHash::new();
|
76
|
+
|
77
|
+
if let Ok(e) = binding.element.get() {
|
78
|
+
e.attributes()
|
79
|
+
.iter()
|
80
|
+
.for_each(|attr| match hash.aset(attr.name(), attr.value()) {
|
81
|
+
Ok(_) => {}
|
82
|
+
Err(err) => Err(Error::new(
|
83
|
+
exception::runtime_error(),
|
84
|
+
format!("AttributeNameError: {}", err),
|
85
|
+
))
|
86
|
+
.unwrap(),
|
87
|
+
});
|
88
|
+
}
|
89
|
+
Ok(hash)
|
90
|
+
}
|
91
|
+
|
92
|
+
fn get_ancestors(&self) -> Result<RArray, Error> {
|
93
|
+
let binding = self.0.borrow();
|
94
|
+
let array = RArray::new();
|
95
|
+
|
96
|
+
binding
|
97
|
+
.ancestors
|
98
|
+
.iter()
|
99
|
+
.for_each(|ancestor| match array.push(RString::new(ancestor)) {
|
100
|
+
Ok(_) => {}
|
101
|
+
Err(err) => {
|
102
|
+
Err(Error::new(exception::runtime_error(), format!("{}", err))).unwrap()
|
103
|
+
}
|
104
|
+
});
|
105
|
+
|
106
|
+
Ok(array)
|
107
|
+
}
|
108
|
+
|
109
|
+
fn append(&self, text_to_append: String, content_type: Symbol) -> Result<(), Error> {
|
110
|
+
let mut binding = self.0.borrow_mut();
|
111
|
+
let element = binding.element.get_mut().unwrap();
|
112
|
+
|
113
|
+
let text_str = text_to_append.as_str();
|
114
|
+
|
115
|
+
let content_type = Self::find_content_type(content_type);
|
116
|
+
|
117
|
+
element.append(text_str, content_type);
|
118
|
+
|
119
|
+
Ok(())
|
120
|
+
}
|
121
|
+
|
122
|
+
fn wrap(
|
123
|
+
&self,
|
124
|
+
start_text: String,
|
125
|
+
end_text: String,
|
126
|
+
content_type: Symbol,
|
127
|
+
) -> Result<(), Error> {
|
128
|
+
let mut binding = self.0.borrow_mut();
|
129
|
+
let element = binding.element.get_mut().unwrap();
|
130
|
+
|
131
|
+
let before_content_type = Self::find_content_type(content_type);
|
132
|
+
let after_content_type = Self::find_content_type(content_type);
|
133
|
+
element.before(&start_text, before_content_type);
|
134
|
+
element.after(&end_text, after_content_type);
|
135
|
+
|
136
|
+
Ok(())
|
137
|
+
}
|
138
|
+
|
139
|
+
fn set_inner_content(&self, text_to_set: String, content_type: Symbol) -> Result<(), Error> {
|
140
|
+
let mut binding = self.0.borrow_mut();
|
141
|
+
let element = binding.element.get_mut().unwrap();
|
142
|
+
|
143
|
+
let text_str = text_to_set.as_str();
|
144
|
+
|
145
|
+
let content_type = Self::find_content_type(content_type);
|
146
|
+
|
147
|
+
element.set_inner_content(text_str, content_type);
|
148
|
+
|
149
|
+
Ok(())
|
150
|
+
}
|
151
|
+
|
152
|
+
fn find_content_type(content_type: Symbol) -> ContentType {
|
153
|
+
match content_type.name() {
|
154
|
+
Ok(name) => match (name) {
|
155
|
+
Cow::Borrowed("as_text") => ContentType::Text,
|
156
|
+
Cow::Borrowed("as_html") => ContentType::Html,
|
157
|
+
_ => Err(Error::new(
|
158
|
+
exception::runtime_error(),
|
159
|
+
format!("unknown symbol `{}`", name),
|
160
|
+
))
|
161
|
+
.unwrap(),
|
162
|
+
},
|
163
|
+
Err(err) => Err(Error::new(
|
164
|
+
exception::runtime_error(),
|
165
|
+
format!("Could not unwrap symbol"),
|
166
|
+
))
|
167
|
+
.unwrap(),
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
pub fn init(c_html: RClass) -> Result<(), Error> {
|
173
|
+
let c_element = c_html
|
174
|
+
.define_class("Element", Default::default())
|
175
|
+
.expect("cannot find class Selma::Element");
|
176
|
+
|
177
|
+
c_element.define_method("tag_name", method!(SelmaHTMLElement::tag_name, 0))?;
|
178
|
+
c_element.define_method("[]", method!(SelmaHTMLElement::get_attribute, 1))?;
|
179
|
+
c_element.define_method("[]=", method!(SelmaHTMLElement::set_attribute, 2))?;
|
180
|
+
c_element.define_method(
|
181
|
+
"remove_attribute",
|
182
|
+
method!(SelmaHTMLElement::remove_attribute, 1),
|
183
|
+
)?;
|
184
|
+
c_element.define_method("attributes", method!(SelmaHTMLElement::get_attributes, 0))?;
|
185
|
+
c_element.define_method("ancestors", method!(SelmaHTMLElement::get_ancestors, 0))?;
|
186
|
+
|
187
|
+
c_element.define_method("append", method!(SelmaHTMLElement::append, 2))?;
|
188
|
+
c_element.define_method("wrap", method!(SelmaHTMLElement::wrap, 3))?;
|
189
|
+
c_element.define_method(
|
190
|
+
"set_inner_content",
|
191
|
+
method!(SelmaHTMLElement::set_inner_content, 2),
|
192
|
+
)?;
|
193
|
+
|
194
|
+
Ok(())
|
195
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
use crate::native_ref_wrap::NativeRefWrap;
|
2
|
+
use lol_html::html_content::EndTag;
|
3
|
+
use magnus::{method, Error, Module, RClass};
|
4
|
+
|
5
|
+
struct HTMLEndTag {
|
6
|
+
end_tag: NativeRefWrap<EndTag<'static>>,
|
7
|
+
}
|
8
|
+
|
9
|
+
#[magnus::wrap(class = "Selma::HTML::Element")]
|
10
|
+
pub struct SelmaHTMLEndTag(std::cell::RefCell<HTMLEndTag>);
|
11
|
+
|
12
|
+
/// SAFETY: This is safe because we only access this data when the GVL is held.
|
13
|
+
unsafe impl Send for SelmaHTMLEndTag {}
|
14
|
+
|
15
|
+
impl SelmaHTMLEndTag {
|
16
|
+
pub fn new(end_tag: &mut EndTag) -> Self {
|
17
|
+
let (ref_wrap, _anchor) = NativeRefWrap::wrap(end_tag);
|
18
|
+
|
19
|
+
Self(std::cell::RefCell::new(HTMLEndTag { end_tag: ref_wrap }))
|
20
|
+
}
|
21
|
+
|
22
|
+
fn tag_name(&self) -> String {
|
23
|
+
self.0.borrow().end_tag.get().unwrap().name()
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn init(c_html: RClass) -> Result<(), Error> {
|
28
|
+
let c_end_tag = c_html
|
29
|
+
.define_class("EndTag", Default::default())
|
30
|
+
.expect("cannot find class Selma::EndTag");
|
31
|
+
|
32
|
+
c_end_tag.define_method("tag_name", method!(SelmaHTMLEndTag::tag_name, 0))?;
|
33
|
+
|
34
|
+
Ok(())
|
35
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
use magnus::{Error, Module, RModule};
|
2
|
+
|
3
|
+
#[derive(Clone, Debug)]
|
4
|
+
#[magnus::wrap(class = "Selma::HTML")]
|
5
|
+
pub(crate) struct SelmaHTML {}
|
6
|
+
|
7
|
+
pub fn init(m_selma: RModule) -> Result<(), Error> {
|
8
|
+
let c_html = m_selma.define_class("HTML", Default::default()).unwrap();
|
9
|
+
|
10
|
+
element::init(c_html).expect("cannot define Selma::HTML::Element class");
|
11
|
+
end_tag::init(c_html).expect("cannot define Selma::HTML::EndTag class");
|
12
|
+
|
13
|
+
Ok(())
|
14
|
+
}
|
15
|
+
|
16
|
+
pub mod element;
|
17
|
+
pub mod end_tag;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
extern crate core;
|
2
|
+
|
3
|
+
use magnus::{define_module, Error};
|
4
|
+
|
5
|
+
pub mod html;
|
6
|
+
pub mod native_ref_wrap;
|
7
|
+
pub mod rewriter;
|
8
|
+
pub mod sanitizer;
|
9
|
+
pub mod selector;
|
10
|
+
pub mod tags;
|
11
|
+
pub mod wrapped_struct;
|
12
|
+
|
13
|
+
#[magnus::init]
|
14
|
+
fn init() -> Result<(), Error> {
|
15
|
+
let m_selma = define_module("Selma").expect("cannot define ::Selma module");
|
16
|
+
|
17
|
+
sanitizer::init(m_selma).expect("cannot define Selma::Sanitizer class");
|
18
|
+
rewriter::init(m_selma).expect("cannot define Selma::Rewriter class");
|
19
|
+
html::init(m_selma).expect("cannot define Selma::HTML class");
|
20
|
+
selector::init(m_selma).expect("cannot define Selma::Selector class");
|
21
|
+
|
22
|
+
Ok(())
|
23
|
+
}
|
@@ -0,0 +1,79 @@
|
|
1
|
+
use std::{cell::Cell, marker::PhantomData, mem, rc::Rc};
|
2
|
+
|
3
|
+
// NOTE: My Rust isn't good enough to know what any of this does,
|
4
|
+
// but it was taken from https://github.com/cloudflare/lol-html/blob/1a1ab2e2bf896f815fe8888ed78ccdf46d7c6b85/js-api/src/lib.rs#LL38
|
5
|
+
|
6
|
+
pub struct Anchor<'r> {
|
7
|
+
poisoned: Rc<Cell<bool>>,
|
8
|
+
lifetime: PhantomData<&'r mut ()>,
|
9
|
+
}
|
10
|
+
|
11
|
+
impl<'r> Anchor<'r> {
|
12
|
+
pub fn new(poisoned: Rc<Cell<bool>>) -> Self {
|
13
|
+
Anchor {
|
14
|
+
poisoned,
|
15
|
+
lifetime: PhantomData,
|
16
|
+
}
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
// impl Drop for Anchor<'_> {
|
21
|
+
// fn drop(&mut self) {
|
22
|
+
// self.poisoned.replace(true);
|
23
|
+
// }
|
24
|
+
// }
|
25
|
+
|
26
|
+
// NOTE: wasm_bindgen doesn't allow structures with lifetimes. To workaround that
|
27
|
+
// we create a wrapper that erases all the lifetime information from the inner reference
|
28
|
+
// and provides an anchor object that keeps track of the lifetime in the runtime.
|
29
|
+
//
|
30
|
+
// When anchor goes out of scope, wrapper becomes poisoned and any attempt to get inner
|
31
|
+
// object results in exception.
|
32
|
+
pub struct NativeRefWrap<R> {
|
33
|
+
inner_ptr: *mut R,
|
34
|
+
poisoned: Rc<Cell<bool>>,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl<R> NativeRefWrap<R> {
|
38
|
+
pub fn wrap<I>(inner: &I) -> (Self, Anchor) {
|
39
|
+
let wrap = NativeRefWrap {
|
40
|
+
inner_ptr: unsafe { mem::transmute(inner) },
|
41
|
+
poisoned: Rc::new(Cell::new(false)),
|
42
|
+
};
|
43
|
+
|
44
|
+
let anchor = Anchor::new(Rc::clone(&wrap.poisoned));
|
45
|
+
|
46
|
+
(wrap, anchor)
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn wrap_mut<I>(inner: &mut I) -> (Self, Anchor) {
|
50
|
+
let wrap = NativeRefWrap {
|
51
|
+
inner_ptr: unsafe { mem::transmute(inner) },
|
52
|
+
poisoned: Rc::new(Cell::new(false)),
|
53
|
+
};
|
54
|
+
|
55
|
+
let anchor = Anchor::new(Rc::clone(&wrap.poisoned));
|
56
|
+
|
57
|
+
(wrap, anchor)
|
58
|
+
}
|
59
|
+
|
60
|
+
pub fn get(&self) -> Result<&R, &'static str> {
|
61
|
+
self.assert_not_poisoned()?;
|
62
|
+
|
63
|
+
Ok(unsafe { self.inner_ptr.as_ref() }.unwrap())
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn get_mut(&mut self) -> Result<&mut R, &'static str> {
|
67
|
+
self.assert_not_poisoned()?;
|
68
|
+
|
69
|
+
Ok(unsafe { self.inner_ptr.as_mut() }.unwrap())
|
70
|
+
}
|
71
|
+
|
72
|
+
fn assert_not_poisoned(&self) -> Result<(), &'static str> {
|
73
|
+
if self.poisoned.get() {
|
74
|
+
Err("The object has been freed and can't be used anymore.")
|
75
|
+
} else {
|
76
|
+
Ok(())
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|