inkmark 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,284 @@
1
+ //! Image attribute injection filter and URL matcher.
2
+ //!
3
+ //! When enabled, replaces pulldown-cmark's default image event sequence
4
+ //! (`Start(Tag::Image) ... End(TagEnd::Image)`) with a single `Event::Html`
5
+ //! carrying a hand-built `<img>` tag that includes the "modern" loading and
6
+ //! decoding hints:
7
+ //!
8
+ //! ```html
9
+ //! <img src="..." alt="..." loading="lazy" decoding="async" />
10
+ //! ```
11
+ //!
12
+ //! Pulldown-cmark's `Tag::Image` struct doesn't expose an "extra attributes"
13
+ //! field, so rewriting the Tag in place isn't enough—we have to bypass
14
+ //! the built-in image writer entirely and emit the HTML ourselves. Alt,
15
+ //! title, and URL are escaped through the same `pulldown-cmark-escape`
16
+ //! functions the upstream html writer uses, so the output stays byte-
17
+ //! compatible with what pulldown-cmark would have produced plus the two
18
+ //! extra attributes.
19
+
20
+ use globset::GlobSet;
21
+ use pulldown_cmark::{CowStr, Event, Tag, TagEnd};
22
+ use pulldown_cmark_escape::{escape_href, escape_html};
23
+
24
+ use crate::url_match::is_host_allowed;
25
+
26
+ /// Rewrite every image in the event stream as a self-contained `Event::Html`
27
+ /// carrying `<img ... loading="lazy" decoding="async">`.
28
+ ///
29
+ /// We consume the input Vec to own each event, then rebuild with
30
+ /// `Vec::with_capacity(events.len())` so passthrough events move by value
31
+ /// and image events are replaced with a single Html event.
32
+ pub fn add_lazy_loading(events: Vec<Event<'_>>) -> Vec<Event<'_>> {
33
+ let mut out: Vec<Event<'_>> = Vec::with_capacity(events.len());
34
+ let mut iter = events.into_iter();
35
+
36
+ while let Some(event) = iter.next() {
37
+ match event {
38
+ Event::Start(Tag::Image {
39
+ dest_url, title, ..
40
+ }) => {
41
+ // Consume events up to the matching End(Image), accumulating
42
+ // alt text from Text and Code payloads. Images can contain
43
+ // inline formatting (e.g. `![**bold**](img.png)`), which
44
+ // produces Start(Strong)/Text/End(Strong) events; the bare
45
+ // text content is what we want for the alt attribute.
46
+ let mut alt = String::new();
47
+ for inner in iter.by_ref() {
48
+ match inner {
49
+ Event::End(TagEnd::Image) => break,
50
+ Event::Text(t) | Event::Code(t) => alt.push_str(&t),
51
+ _ => {}
52
+ }
53
+ }
54
+
55
+ let html = build_img_tag(&dest_url, &alt, &title);
56
+ out.push(Event::Html(CowStr::Boxed(html.into_boxed_str())));
57
+ }
58
+ other => out.push(other),
59
+ }
60
+ }
61
+
62
+ out
63
+ }
64
+
65
+ /// Drop images whose `src` host isn't in the allowlist. The whole
66
+ /// `Start(Image) ... End(Image)` sequence is replaced with a single
67
+ /// `Event::Text` carrying the image's alt text, or removed entirely
68
+ /// when alt is empty. Non-web URLs pass through: [`is_host_allowed`]
69
+ /// returns true for any URL with no parseable host.
70
+ ///
71
+ /// Alt accumulation matches `add_lazy_loading`: images can contain
72
+ /// markdown like `![**bold**](img.png)`, producing
73
+ /// Start(Strong)/Text/End(Strong) events—we pull the raw text payloads
74
+ /// out and discard formatting.
75
+ pub fn filter_by_hosts<'a>(events: Vec<Event<'a>>, set: &GlobSet) -> Vec<Event<'a>> {
76
+ let mut out: Vec<Event<'a>> = Vec::with_capacity(events.len());
77
+ let mut iter = events.into_iter();
78
+
79
+ while let Some(event) = iter.next() {
80
+ match event {
81
+ Event::Start(Tag::Image { ref dest_url, .. }) if !is_host_allowed(dest_url, set) => {
82
+ let mut alt = String::new();
83
+ for inner in iter.by_ref() {
84
+ match inner {
85
+ Event::End(TagEnd::Image) => break,
86
+ Event::Text(t) | Event::Code(t) => alt.push_str(&t),
87
+ _ => {}
88
+ }
89
+ }
90
+ if !alt.is_empty() {
91
+ out.push(Event::Text(CowStr::Boxed(alt.into_boxed_str())));
92
+ }
93
+ }
94
+ other => out.push(other),
95
+ }
96
+ }
97
+
98
+ out
99
+ }
100
+
101
+ /// Construct the `<img>` HTML string with `loading="lazy"` and
102
+ /// `decoding="async"` attributes. `src` is escaped as a URL (percent-
103
+ /// encoded where necessary); `alt` and `title` are HTML-attribute
104
+ /// escaped. The output matches pulldown-cmark's built-in image writer
105
+ /// plus the two extra hint attributes.
106
+ #[inline]
107
+ fn build_img_tag(src: &str, alt: &str, title: &str) -> String {
108
+ // Rough capacity estimate: base tag (~60) + src + alt + title length.
109
+ let mut out = String::with_capacity(60 + src.len() + alt.len() + title.len());
110
+ out.push_str("<img src=\"");
111
+
112
+ // escape_href percent-encodes problematic bytes and also handles HTML
113
+ // specials (&, <, etc.). Matches pulldown-cmark's upstream behavior.
114
+ let _ = escape_href(&mut out, src);
115
+ out.push_str("\" alt=\"");
116
+ let _ = escape_html(&mut out, alt);
117
+ out.push('"');
118
+ if !title.is_empty() {
119
+ out.push_str(" title=\"");
120
+ let _ = escape_html(&mut out, title);
121
+ out.push('"');
122
+ }
123
+ out.push_str(" loading=\"lazy\" decoding=\"async\" />");
124
+ out
125
+ }
126
+
127
+ #[cfg(test)]
128
+ mod tests {
129
+ use super::{add_lazy_loading, build_img_tag, filter_by_hosts};
130
+ use globset::{Glob, GlobSetBuilder};
131
+ use pulldown_cmark::{CowStr, Event, LinkType, Tag, TagEnd};
132
+
133
+ fn host_set(patterns: &[&str]) -> globset::GlobSet {
134
+ let mut b = GlobSetBuilder::new();
135
+ for p in patterns {
136
+ b.add(Glob::new(p).unwrap());
137
+ }
138
+ b.build().unwrap()
139
+ }
140
+
141
+ #[test]
142
+ fn basic_tag() {
143
+ let html = build_img_tag("img.png", "a picture", "");
144
+ assert_eq!(
145
+ html,
146
+ r#"<img src="img.png" alt="a picture" loading="lazy" decoding="async" />"#
147
+ );
148
+ }
149
+
150
+ #[test]
151
+ fn with_title() {
152
+ let html = build_img_tag("img.png", "alt", "the title");
153
+ assert_eq!(
154
+ html,
155
+ r#"<img src="img.png" alt="alt" title="the title" loading="lazy" decoding="async" />"#
156
+ );
157
+ }
158
+
159
+ #[test]
160
+ fn escapes_alt_html_specials() {
161
+ // Attempted HTML injection in alt—must come out escaped.
162
+ let html = build_img_tag("img.png", "a\"b<c>d&e", "");
163
+ assert!(html.contains("alt=\"a&quot;b&lt;c&gt;d&amp;e\""));
164
+ }
165
+
166
+ #[test]
167
+ fn escapes_url_ampersand() {
168
+ let html = build_img_tag("img.png?a=1&b=2", "alt", "");
169
+ // pulldown-cmark-escape writes `&` as `&amp;` in hrefs.
170
+ assert!(html.contains("src=\"img.png?a=1&amp;b=2\""));
171
+ }
172
+
173
+ #[test]
174
+ fn empty_alt_still_valid() {
175
+ let html = build_img_tag("img.png", "", "");
176
+ assert_eq!(
177
+ html,
178
+ r#"<img src="img.png" alt="" loading="lazy" decoding="async" />"#
179
+ );
180
+ }
181
+
182
+ #[test]
183
+ fn title_skipped_when_empty() {
184
+ let html = build_img_tag("img.png", "alt", "");
185
+ assert!(!html.contains("title="));
186
+ }
187
+
188
+ #[test]
189
+ fn add_lazy_loading_collapses_image_events_into_html() {
190
+ // Start(Image) + Text("alt") + End(Image) → single Html event with loading=
191
+ let events = vec![
192
+ Event::Start(Tag::Image {
193
+ link_type: LinkType::Inline,
194
+ dest_url: CowStr::Borrowed("photo.jpg"),
195
+ title: CowStr::Borrowed(""),
196
+ id: CowStr::Borrowed(""),
197
+ }),
198
+ Event::Text(CowStr::Borrowed("alt text")),
199
+ Event::End(TagEnd::Image),
200
+ ];
201
+ let out = add_lazy_loading(events);
202
+ assert_eq!(out.len(), 1, "should collapse to one event");
203
+ match &out[0] {
204
+ Event::Html(html) => {
205
+ assert!(
206
+ html.contains("loading="),
207
+ "missing loading attribute: {html}"
208
+ );
209
+ assert!(html.contains("alt=\"alt text\""), "missing alt: {html}");
210
+ assert!(html.contains("src=\"photo.jpg\""), "missing src: {html}");
211
+ }
212
+ other => panic!("expected Html event, got {other:?}"),
213
+ }
214
+ }
215
+
216
+ #[test]
217
+ fn filter_by_hosts_drops_disallowed_image_to_alt_text() {
218
+ let events = vec![
219
+ Event::Start(Tag::Image {
220
+ link_type: LinkType::Inline,
221
+ dest_url: CowStr::Borrowed("https://evil.com/bad.png"),
222
+ title: CowStr::Borrowed(""),
223
+ id: CowStr::Borrowed(""),
224
+ }),
225
+ Event::Text(CowStr::Borrowed("fallback alt")),
226
+ Event::End(TagEnd::Image),
227
+ ];
228
+ let out = filter_by_hosts(events, &host_set(&["example.net"]));
229
+ assert_eq!(out.len(), 1);
230
+ match &out[0] {
231
+ Event::Text(t) => assert_eq!(t.as_ref(), "fallback alt"),
232
+ other => panic!("expected Text event, got {other:?}"),
233
+ }
234
+ }
235
+
236
+ #[test]
237
+ fn filter_by_hosts_drops_disallowed_image_with_empty_alt_entirely() {
238
+ let events = vec![
239
+ Event::Start(Tag::Image {
240
+ link_type: LinkType::Inline,
241
+ dest_url: CowStr::Borrowed("https://evil.com/x.png"),
242
+ title: CowStr::Borrowed(""),
243
+ id: CowStr::Borrowed(""),
244
+ }),
245
+ Event::End(TagEnd::Image),
246
+ ];
247
+ let out = filter_by_hosts(events, &host_set(&["example.net"]));
248
+ assert!(out.is_empty(), "expected zero events, got {out:?}");
249
+ }
250
+
251
+ #[test]
252
+ fn filter_by_hosts_keeps_allowed_images_untouched() {
253
+ let events = vec![
254
+ Event::Start(Tag::Image {
255
+ link_type: LinkType::Inline,
256
+ dest_url: CowStr::Borrowed("https://cdn.example.net/ok.png"),
257
+ title: CowStr::Borrowed(""),
258
+ id: CowStr::Borrowed(""),
259
+ }),
260
+ Event::Text(CowStr::Borrowed("alt")),
261
+ Event::End(TagEnd::Image),
262
+ ];
263
+ let out = filter_by_hosts(events, &host_set(&["*.example.net"]));
264
+ assert_eq!(out.len(), 3);
265
+ assert!(matches!(out[0], Event::Start(Tag::Image { .. })));
266
+ }
267
+
268
+ #[test]
269
+ fn filter_by_hosts_leaves_relative_images_alone() {
270
+ let events = vec![
271
+ Event::Start(Tag::Image {
272
+ link_type: LinkType::Inline,
273
+ dest_url: CowStr::Borrowed("/local/pic.png"),
274
+ title: CowStr::Borrowed(""),
275
+ id: CowStr::Borrowed(""),
276
+ }),
277
+ Event::Text(CowStr::Borrowed("alt")),
278
+ Event::End(TagEnd::Image),
279
+ ];
280
+ let out = filter_by_hosts(events, &host_set(&[]));
281
+ assert_eq!(out.len(), 3);
282
+ assert!(matches!(out[0], Event::Start(Tag::Image { .. })));
283
+ }
284
+ }
@@ -0,0 +1,54 @@
1
+ #![forbid(unsafe_code)]
2
+
3
+ use magnus::{function, prelude::*, Error, Ruby};
4
+
5
+ mod autolink;
6
+ mod chunks_by_heading;
7
+ mod chunks_by_size;
8
+ mod document;
9
+ mod emoji;
10
+ mod handler;
11
+ mod heading;
12
+ mod highlight;
13
+ mod image;
14
+ mod link;
15
+ mod options;
16
+ mod plain_text;
17
+ mod scheme_filter;
18
+ mod stats;
19
+ mod tag_filter;
20
+ mod toc;
21
+ mod truncate;
22
+ mod url_match;
23
+
24
+ #[magnus::init]
25
+ fn init(ruby: &Ruby) -> Result<(), Error> {
26
+ let inkmark = ruby.define_class("Inkmark", ruby.class_object())?;
27
+ inkmark.define_singleton_method("_native_to_html", function!(document::native_to_html, 2))?;
28
+ inkmark.define_singleton_method("_native_to_markdown", function!(document::native_to_markdown, 2))?;
29
+ inkmark.define_singleton_method("_native_to_plain_text", function!(document::native_to_plain_text, 2))?;
30
+ inkmark.define_singleton_method(
31
+ "_native_chunks_by_heading",
32
+ function!(chunks_by_heading::native_chunks_by_heading, 2),
33
+ )?;
34
+ inkmark.define_singleton_method(
35
+ "_native_chunks_by_size",
36
+ function!(chunks_by_size::native_chunks_by_size, 2),
37
+ )?;
38
+ inkmark.define_singleton_method(
39
+ "_native_truncate_markdown",
40
+ function!(truncate::native_truncate_markdown, 3),
41
+ )?;
42
+ inkmark.define_singleton_method(
43
+ "_native_render_full",
44
+ function!(document::native_render_full, 2),
45
+ )?;
46
+ inkmark.define_singleton_method("_syntax_css", function!(highlight::syntax_css, 1))?;
47
+ inkmark.define_singleton_method("_syntax_themes", function!(highlight::syntax_themes, 0))?;
48
+ inkmark.define_singleton_method("_native_walk", function!(handler::native_walk, 3))?;
49
+ inkmark.define_singleton_method(
50
+ "_native_render_with_handlers",
51
+ function!(handler::native_render_with_handlers, 3),
52
+ )?;
53
+ Ok(())
54
+ }
@@ -0,0 +1,291 @@
1
+ //! External link `rel` attribute injection filter and URL matcher.
2
+ //!
3
+ //! When enabled, replaces the `Start(Tag::Link)` and matching
4
+ //! `End(TagEnd::Link)` events for every external link with hand-built
5
+ //! `<a href="..." rel="nofollow noopener">` / `</a>` HTML events. Inner
6
+ //! events (text, emphasis, inline code, images) pass through unchanged,
7
+ //! so pulldown-cmark's built-in writers still render the link content:
8
+ //! we only replace the opening and closing tags.
9
+ //!
10
+ //! "External" here means the URL starts with `http://` or `https://`
11
+ //! (case-insensitive). Relative paths, anchor fragments, and non-web
12
+ //! schemes (`mailto:`, `tel:`, `javascript:`) are not touched:
13
+
14
+ use globset::GlobSet;
15
+ use pulldown_cmark::{CowStr, Event, Tag, TagEnd};
16
+ use pulldown_cmark_escape::{escape_href, escape_html};
17
+
18
+ use crate::url_match::is_host_allowed;
19
+
20
+ /// Add `rel="nofollow noopener"` to every external `<a>` tag by replacing
21
+ /// its `Start(Link)` event with a synthesized `Event::Html` opening tag
22
+ /// and its matching `End(Link)` event with a `</a>` close tag.
23
+ pub fn add_nofollow(events: Vec<Event<'_>>) -> Vec<Event<'_>> {
24
+ let mut out: Vec<Event<'_>> = Vec::with_capacity(events.len());
25
+ let mut iter = events.into_iter();
26
+
27
+ while let Some(event) = iter.next() {
28
+ match event {
29
+ Event::Start(Tag::Link {
30
+ link_type: _,
31
+ ref dest_url,
32
+ ref title,
33
+ id: _,
34
+ }) if is_external(dest_url) => {
35
+ let open = build_link_open(dest_url, title);
36
+ out.push(Event::Html(CowStr::Boxed(open.into_boxed_str())));
37
+
38
+ // Consume inner events through the matching End(Link),
39
+ // depth-counting so a nested link doesn't break the
40
+ // close-tag pairing. CommonMark disallows nested links
41
+ // in valid markdown, so depth should always reach zero on
42
+ // the first End we see.
43
+ let mut depth: usize = 1;
44
+ for inner in iter.by_ref() {
45
+ let is_link_start = matches!(&inner, Event::Start(Tag::Link { .. }));
46
+ let is_link_end = matches!(&inner, Event::End(TagEnd::Link));
47
+
48
+ if is_link_start {
49
+ depth += 1;
50
+ out.push(inner);
51
+ } else if is_link_end {
52
+ depth -= 1;
53
+ if depth == 0 {
54
+ out.push(Event::Html(CowStr::Borrowed("</a>")));
55
+ break;
56
+ }
57
+ out.push(inner);
58
+ } else {
59
+ out.push(inner);
60
+ }
61
+ }
62
+ }
63
+ other => out.push(other),
64
+ }
65
+ }
66
+
67
+ out
68
+ }
69
+
70
+ /// Drop `<a>` tags whose destination URL's host isn't in the allowlist,
71
+ /// leaving the inner content (text, emphasis, images) in place as a
72
+ /// bare phrase. Non-web URLs (relative paths, `mailto:`, etc.) pass
73
+ /// through.
74
+ pub fn filter_by_hosts<'a>(events: Vec<Event<'a>>, set: &GlobSet) -> Vec<Event<'a>> {
75
+ let mut out: Vec<Event<'a>> = Vec::with_capacity(events.len());
76
+ let mut iter = events.into_iter();
77
+
78
+ while let Some(event) = iter.next() {
79
+ match event {
80
+ Event::Start(Tag::Link { ref dest_url, .. }) if !is_host_allowed(dest_url, set) => {
81
+ let mut depth: usize = 1;
82
+ for inner in iter.by_ref() {
83
+ match &inner {
84
+ Event::Start(Tag::Link { .. }) => {
85
+ depth += 1;
86
+ out.push(inner);
87
+ }
88
+ Event::End(TagEnd::Link) => {
89
+ depth -= 1;
90
+ if depth == 0 {
91
+ break;
92
+ }
93
+ out.push(inner);
94
+ }
95
+ _ => out.push(inner),
96
+ }
97
+ }
98
+ }
99
+ other => out.push(other),
100
+ }
101
+ }
102
+
103
+ out
104
+ }
105
+
106
+ /// Return true when the URL starts with `http://` or `https://` (case
107
+ /// insensitive). Relative paths, anchor fragments, and `mailto:` /
108
+ /// `tel:` / `javascript:` URLs return false.
109
+ #[inline]
110
+ fn is_external(url: &str) -> bool {
111
+ url.split_once("://").is_some_and(|(scheme, _)| {
112
+ scheme.eq_ignore_ascii_case("http") || scheme.eq_ignore_ascii_case("https")
113
+ })
114
+ }
115
+
116
+ /// Construct the `<a href="..." title="..." rel="nofollow noopener">`
117
+ /// opening tag. The URL goes through `escape_href` (percent-encoding +
118
+ /// HTML-special escaping, matching pulldown-cmark's upstream behavior),
119
+ /// and the title through `escape_html` for attribute context.
120
+ #[inline]
121
+ fn build_link_open(href: &str, title: &str) -> String {
122
+ let mut out = String::with_capacity(40 + href.len() + title.len());
123
+ out.push_str("<a href=\"");
124
+ let _ = escape_href(&mut out, href);
125
+ out.push('"');
126
+ if !title.is_empty() {
127
+ out.push_str(" title=\"");
128
+ let _ = escape_html(&mut out, title);
129
+ out.push('"');
130
+ }
131
+ out.push_str(" rel=\"nofollow noopener\">");
132
+ out
133
+ }
134
+
135
+ #[cfg(test)]
136
+ mod tests {
137
+ use super::{add_nofollow, build_link_open, filter_by_hosts, is_external};
138
+ use globset::{Glob, GlobSetBuilder};
139
+ use pulldown_cmark::{CowStr, Event, LinkType, Tag, TagEnd};
140
+
141
+ fn host_set(patterns: &[&str]) -> globset::GlobSet {
142
+ let mut b = GlobSetBuilder::new();
143
+ for p in patterns {
144
+ b.add(Glob::new(p).unwrap());
145
+ }
146
+ b.build().unwrap()
147
+ }
148
+
149
+ #[test]
150
+ fn external_detection() {
151
+ assert!(is_external("http://example.net"));
152
+ assert!(is_external("https://example.net"));
153
+ assert!(is_external("HTTPS://EXAMPLE.NET"));
154
+ assert!(is_external("Http://mixed.case"));
155
+
156
+ assert!(!is_external("/local/path"));
157
+ assert!(!is_external("relative.html"));
158
+ assert!(!is_external("#anchor"));
159
+ assert!(!is_external("mailto:user@example.net"));
160
+ assert!(!is_external("tel:+1234567890"));
161
+ assert!(!is_external("javascript:alert(1)"));
162
+ assert!(!is_external("//protocol-relative.com"));
163
+ assert!(!is_external(""));
164
+ assert!(!is_external("h"));
165
+ assert!(!is_external("http"));
166
+ assert!(!is_external("https"));
167
+ }
168
+
169
+ #[test]
170
+ fn open_tag_basic() {
171
+ assert_eq!(
172
+ build_link_open("https://example.net", ""),
173
+ r#"<a href="https://example.net" rel="nofollow noopener">"#
174
+ );
175
+ }
176
+
177
+ #[test]
178
+ fn open_tag_with_title() {
179
+ assert_eq!(
180
+ build_link_open("https://example.net", "the title"),
181
+ r#"<a href="https://example.net" title="the title" rel="nofollow noopener">"#
182
+ );
183
+ }
184
+
185
+ #[test]
186
+ fn open_tag_escapes_url_ampersand() {
187
+ let tag = build_link_open("https://example.net/?a=1&b=2", "");
188
+ assert!(tag.contains(r#"href="https://example.net/?a=1&amp;b=2""#));
189
+ }
190
+
191
+ #[test]
192
+ fn open_tag_escapes_title_specials() {
193
+ let tag = build_link_open("https://example.net", r#"a "quoted" <title>"#);
194
+ assert!(tag.contains("&quot;quoted&quot;"));
195
+ assert!(tag.contains("&lt;title&gt;"));
196
+ }
197
+
198
+ #[test]
199
+ fn add_nofollow_adds_rel_to_external_link() {
200
+ // Start(Link) + Text("click") + End(Link) → Html open + Text + Html close
201
+ let events = vec![
202
+ Event::Start(Tag::Link {
203
+ link_type: LinkType::Inline,
204
+ dest_url: CowStr::Borrowed("https://example.net"),
205
+ title: CowStr::Borrowed(""),
206
+ id: CowStr::Borrowed(""),
207
+ }),
208
+ Event::Text(CowStr::Borrowed("click")),
209
+ Event::End(TagEnd::Link),
210
+ ];
211
+ let out = add_nofollow(events);
212
+ // Should produce: Html(open), Text("click"), Html("</a>")
213
+ assert_eq!(out.len(), 3, "expected 3 events, got {}", out.len());
214
+ match &out[0] {
215
+ Event::Html(html) => {
216
+ assert!(
217
+ html.contains("nofollow"),
218
+ "opening tag must contain nofollow: {html}"
219
+ );
220
+ assert!(
221
+ html.contains("https://example.net"),
222
+ "opening tag must contain href: {html}"
223
+ );
224
+ }
225
+ other => panic!("expected Html open event, got {other:?}"),
226
+ }
227
+ match &out[2] {
228
+ Event::Html(html) => assert_eq!(html.as_ref(), "</a>"),
229
+ other => panic!("expected Html close event, got {other:?}"),
230
+ }
231
+ }
232
+
233
+ #[test]
234
+ fn filter_by_hosts_drops_disallowed_link_tags_keeping_text() {
235
+ // Start(Link to evil) + Text("click") + End(Link) →
236
+ // just Text("click"), with the link wrapper gone.
237
+ let events = vec![
238
+ Event::Start(Tag::Link {
239
+ link_type: LinkType::Inline,
240
+ dest_url: CowStr::Borrowed("https://evil.com"),
241
+ title: CowStr::Borrowed(""),
242
+ id: CowStr::Borrowed(""),
243
+ }),
244
+ Event::Text(CowStr::Borrowed("click me")),
245
+ Event::End(TagEnd::Link),
246
+ ];
247
+ let out = filter_by_hosts(events, &host_set(&["example.net"]));
248
+ assert_eq!(out.len(), 1);
249
+ match &out[0] {
250
+ Event::Text(t) => assert_eq!(t.as_ref(), "click me"),
251
+ other => panic!("expected Text, got {other:?}"),
252
+ }
253
+ }
254
+
255
+ #[test]
256
+ fn filter_by_hosts_keeps_allowed_links_untouched() {
257
+ let events = vec![
258
+ Event::Start(Tag::Link {
259
+ link_type: LinkType::Inline,
260
+ dest_url: CowStr::Borrowed("https://cdn.example.net/doc"),
261
+ title: CowStr::Borrowed(""),
262
+ id: CowStr::Borrowed(""),
263
+ }),
264
+ Event::Text(CowStr::Borrowed("ok")),
265
+ Event::End(TagEnd::Link),
266
+ ];
267
+ let out = filter_by_hosts(events, &host_set(&["*.example.net"]));
268
+ assert_eq!(out.len(), 3);
269
+ assert!(matches!(out[0], Event::Start(Tag::Link { .. })));
270
+ assert!(matches!(out[2], Event::End(TagEnd::Link)));
271
+ }
272
+
273
+ #[test]
274
+ fn filter_by_hosts_leaves_relative_and_mailto_alone() {
275
+ // Even with an empty allowlist that blocks everything external,
276
+ // relative/mailto links pass through unchanged.
277
+ let events = vec![
278
+ Event::Start(Tag::Link {
279
+ link_type: LinkType::Inline,
280
+ dest_url: CowStr::Borrowed("/local"),
281
+ title: CowStr::Borrowed(""),
282
+ id: CowStr::Borrowed(""),
283
+ }),
284
+ Event::Text(CowStr::Borrowed("home")),
285
+ Event::End(TagEnd::Link),
286
+ ];
287
+ let out = filter_by_hosts(events, &host_set(&[]));
288
+ assert_eq!(out.len(), 3);
289
+ assert!(matches!(out[0], Event::Start(Tag::Link { .. })));
290
+ }
291
+ }