inkmark 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,319 @@
1
+ //! Streaming scheme-allowlist filter over pulldown-cmark events.
2
+ //!
3
+ //! Wraps any `Iterator<Item = Event<'a>>` and filters out markdown-emitted
4
+ //! Link/Image events whose URL scheme is not in the respective allowlist.
5
+ //!
6
+ //! This replaces:
7
+ //!
8
+ //! - Blocked link: `Start(Link)` and matching `End(Link)` both dropped;
9
+ //! inner events (text, emphasis, nested markup) pass through as bare
10
+ //! content. Defensive depth counting handles malformed nested links.
11
+ //! - Blocked image: the entire `Start(Image) ... End(Image)` sequence is
12
+ //! replaced with a single `Event::Text` carrying the accumulated alt
13
+ //! text, or nothing if alt was empty.
14
+ //! - `Option<&[String]>` is `None` for "don't filter this kind" (caller
15
+ //! opted out). Empty slice `Some(&[])` blocks every absolute URL.
16
+
17
+ use pulldown_cmark::{CowStr, Event, Tag, TagEnd};
18
+
19
+ use crate::url_match::is_scheme_allowed;
20
+
21
+ /// Streaming adapter that drops Link/Image events with disallowed schemes.
22
+ ///
23
+ /// `'e` is the lifetime of the `Event` borrows from the underlying parser,
24
+ /// `'s` is the lifetime of the scheme-allowlist slices.
25
+ /// Separate lifetimes matter because in the fast path the slices come
26
+ /// from `&'static` `OnceLock` storage (outlives everything), while events
27
+ /// are tied to the source string—tying them into one lifetime would
28
+ /// constrain callers unnecessarily.
29
+ pub struct SchemeFilter<'e, 's, I: Iterator<Item = Event<'e>>> {
30
+ inner: I,
31
+ link_allowed: Option<&'s [String]>,
32
+ image_allowed: Option<&'s [String]>,
33
+ // State for in-progress link drop. 0 = not skipping; N>0 = inside a
34
+ // blocked link with N nested Link starts yet to close. CommonMark
35
+ // disallows nested links so this is usually 1, but the counter keeps
36
+ // us correct on malformed / extension-emitted streams.
37
+ skipping_link_depth: usize,
38
+ // State for in-progress image drop.
39
+ skipping_image: bool,
40
+ image_alt: String,
41
+ }
42
+
43
+ impl<'e, 's, I: Iterator<Item = Event<'e>>> SchemeFilter<'e, 's, I> {
44
+ pub fn new(
45
+ inner: I,
46
+ link_allowed: Option<&'s [String]>,
47
+ image_allowed: Option<&'s [String]>,
48
+ ) -> Self {
49
+ Self {
50
+ inner,
51
+ link_allowed,
52
+ image_allowed,
53
+ skipping_link_depth: 0,
54
+ skipping_image: false,
55
+ image_alt: String::new(),
56
+ }
57
+ }
58
+ }
59
+
60
+ impl<'e, 's, I: Iterator<Item = Event<'e>>> Iterator for SchemeFilter<'e, 's, I> {
61
+ type Item = Event<'e>;
62
+
63
+ fn next(&mut self) -> Option<Event<'e>> {
64
+ loop {
65
+ // Image skipping mode
66
+ if self.skipping_image {
67
+ match self.inner.next()? {
68
+ Event::End(TagEnd::Image) => {
69
+ self.skipping_image = false;
70
+ if !self.image_alt.is_empty() {
71
+ let alt = std::mem::take(&mut self.image_alt);
72
+ return Some(Event::Text(CowStr::Boxed(alt.into_boxed_str())));
73
+ }
74
+ continue;
75
+ }
76
+ Event::Text(t) | Event::Code(t) => {
77
+ self.image_alt.push_str(&t);
78
+ continue;
79
+ }
80
+ _ => continue,
81
+ }
82
+ }
83
+
84
+ // Link skipping mode
85
+ if self.skipping_link_depth > 0 {
86
+ let ev = self.inner.next()?;
87
+ match &ev {
88
+ Event::Start(Tag::Link { .. }) => {
89
+ self.skipping_link_depth += 1;
90
+ return Some(ev);
91
+ }
92
+ Event::End(TagEnd::Link) => {
93
+ self.skipping_link_depth -= 1;
94
+ if self.skipping_link_depth == 0 {
95
+ continue;
96
+ } else {
97
+ return Some(ev);
98
+ }
99
+ }
100
+ _ => return Some(ev),
101
+ }
102
+ }
103
+
104
+ // Check each event for a drop trigger
105
+ let ev = self.inner.next()?;
106
+ match &ev {
107
+ Event::Start(Tag::Link { dest_url, .. })
108
+ if self
109
+ .link_allowed
110
+ .is_some_and(|s| !is_scheme_allowed(dest_url, s)) =>
111
+ {
112
+ self.skipping_link_depth = 1;
113
+ continue;
114
+ }
115
+ Event::Start(Tag::Image { dest_url, .. })
116
+ if self
117
+ .image_allowed
118
+ .is_some_and(|s| !is_scheme_allowed(dest_url, s)) =>
119
+ {
120
+ self.skipping_image = true;
121
+ self.image_alt.clear();
122
+ continue;
123
+ }
124
+ _ => return Some(ev),
125
+ }
126
+ }
127
+ }
128
+ }
129
+
130
+ #[cfg(test)]
131
+ mod tests {
132
+ use super::SchemeFilter;
133
+ use pulldown_cmark::{CowStr, Event, LinkType, Tag, TagEnd};
134
+
135
+ fn schemes(s: &[&str]) -> Vec<String> {
136
+ s.iter().map(|x| x.to_ascii_lowercase()).collect()
137
+ }
138
+
139
+ fn run<'a>(
140
+ events: Vec<Event<'a>>,
141
+ link: Option<&'a [String]>,
142
+ image: Option<&'a [String]>,
143
+ ) -> Vec<Event<'a>> {
144
+ SchemeFilter::new(events.into_iter(), link, image).collect()
145
+ }
146
+
147
+ #[test]
148
+ fn passes_allowed_link_through() {
149
+ let events = vec![
150
+ Event::Start(Tag::Link {
151
+ link_type: LinkType::Inline,
152
+ dest_url: CowStr::Borrowed("https://example.net"),
153
+ title: CowStr::Borrowed(""),
154
+ id: CowStr::Borrowed(""),
155
+ }),
156
+ Event::Text(CowStr::Borrowed("t")),
157
+ Event::End(TagEnd::Link),
158
+ ];
159
+ let allowed = schemes(&["https"]);
160
+ let out = run(events, Some(&allowed), None);
161
+ assert_eq!(out.len(), 3);
162
+ assert!(matches!(out[0], Event::Start(Tag::Link { .. })));
163
+ }
164
+
165
+ #[test]
166
+ fn drops_blocked_link_keeps_text() {
167
+ let events = vec![
168
+ Event::Start(Tag::Link {
169
+ link_type: LinkType::Inline,
170
+ dest_url: CowStr::Borrowed("javascript:alert(1)"),
171
+ title: CowStr::Borrowed(""),
172
+ id: CowStr::Borrowed(""),
173
+ }),
174
+ Event::Text(CowStr::Borrowed("click")),
175
+ Event::End(TagEnd::Link),
176
+ ];
177
+ let allowed = schemes(&["http", "https"]);
178
+ let out = run(events, Some(&allowed), None);
179
+ assert_eq!(out.len(), 1);
180
+ match &out[0] {
181
+ Event::Text(t) => assert_eq!(t.as_ref(), "click"),
182
+ other => panic!("expected Text, got {other:?}"),
183
+ }
184
+ }
185
+
186
+ #[test]
187
+ fn drops_blocked_image_to_alt() {
188
+ let events = vec![
189
+ Event::Start(Tag::Image {
190
+ link_type: LinkType::Inline,
191
+ dest_url: CowStr::Borrowed("data:image/svg+xml,<svg/>"),
192
+ title: CowStr::Borrowed(""),
193
+ id: CowStr::Borrowed(""),
194
+ }),
195
+ Event::Text(CowStr::Borrowed("fallback")),
196
+ Event::End(TagEnd::Image),
197
+ ];
198
+ let allowed = schemes(&["http", "https"]);
199
+ let out = run(events, None, Some(&allowed));
200
+ assert_eq!(out.len(), 1);
201
+ match &out[0] {
202
+ Event::Text(t) => assert_eq!(t.as_ref(), "fallback"),
203
+ other => panic!("expected Text, got {other:?}"),
204
+ }
205
+ }
206
+
207
+ #[test]
208
+ fn drops_blocked_image_empty_alt_entirely() {
209
+ let events = vec![
210
+ Event::Start(Tag::Image {
211
+ link_type: LinkType::Inline,
212
+ dest_url: CowStr::Borrowed("data:image/svg+xml,<svg/>"),
213
+ title: CowStr::Borrowed(""),
214
+ id: CowStr::Borrowed(""),
215
+ }),
216
+ Event::End(TagEnd::Image),
217
+ ];
218
+ let allowed = schemes(&["https"]);
219
+ let out = run(events, None, Some(&allowed));
220
+ assert!(out.is_empty());
221
+ }
222
+
223
+ #[test]
224
+ fn fuses_link_and_image_filters_in_one_pass() {
225
+ let events = vec![
226
+ Event::Start(Tag::Link {
227
+ link_type: LinkType::Inline,
228
+ dest_url: CowStr::Borrowed("javascript:alert(1)"),
229
+ title: CowStr::Borrowed(""),
230
+ id: CowStr::Borrowed(""),
231
+ }),
232
+ Event::Text(CowStr::Borrowed("bad link")),
233
+ Event::End(TagEnd::Link),
234
+ Event::Text(CowStr::Borrowed(" and ")),
235
+ Event::Start(Tag::Image {
236
+ link_type: LinkType::Inline,
237
+ dest_url: CowStr::Borrowed("data:x"),
238
+ title: CowStr::Borrowed(""),
239
+ id: CowStr::Borrowed(""),
240
+ }),
241
+ Event::Text(CowStr::Borrowed("bad pic")),
242
+ Event::End(TagEnd::Image),
243
+ ];
244
+ let link = schemes(&["https"]);
245
+ let image = schemes(&["https"]);
246
+ let out = run(events, Some(&link), Some(&image));
247
+ // Expect: Text("bad link"), Text(" and "), Text("bad pic")
248
+ assert_eq!(out.len(), 3);
249
+ assert!(matches!(out[0], Event::Text(_)));
250
+ assert!(matches!(out[1], Event::Text(_)));
251
+ assert!(matches!(out[2], Event::Text(_)));
252
+ }
253
+
254
+ #[test]
255
+ fn relative_urls_pass_through() {
256
+ let events = vec![
257
+ Event::Start(Tag::Link {
258
+ link_type: LinkType::Inline,
259
+ dest_url: CowStr::Borrowed("/local"),
260
+ title: CowStr::Borrowed(""),
261
+ id: CowStr::Borrowed(""),
262
+ }),
263
+ Event::Text(CowStr::Borrowed("home")),
264
+ Event::End(TagEnd::Link),
265
+ ];
266
+ // Even with an empty allowlist that blocks all absolute URLs,
267
+ // relative URLs pass through.
268
+ let allowed = schemes(&[]);
269
+ let out = run(events, Some(&allowed), None);
270
+ assert_eq!(out.len(), 3);
271
+ assert!(matches!(out[0], Event::Start(Tag::Link { .. })));
272
+ }
273
+
274
+ #[test]
275
+ fn none_allowlist_means_no_filter() {
276
+ let events = vec![
277
+ Event::Start(Tag::Link {
278
+ link_type: LinkType::Inline,
279
+ dest_url: CowStr::Borrowed("javascript:alert(1)"),
280
+ title: CowStr::Borrowed(""),
281
+ id: CowStr::Borrowed(""),
282
+ }),
283
+ Event::Text(CowStr::Borrowed("click")),
284
+ Event::End(TagEnd::Link),
285
+ ];
286
+ let out = run(events, None, None);
287
+ assert_eq!(out.len(), 3);
288
+ assert!(matches!(out[0], Event::Start(Tag::Link { .. })));
289
+ }
290
+
291
+ #[test]
292
+ fn independent_link_and_image_control() {
293
+ // Link filter off, image filter on—only image should drop.
294
+ let events = vec![
295
+ Event::Start(Tag::Link {
296
+ link_type: LinkType::Inline,
297
+ dest_url: CowStr::Borrowed("javascript:x"),
298
+ title: CowStr::Borrowed(""),
299
+ id: CowStr::Borrowed(""),
300
+ }),
301
+ Event::Text(CowStr::Borrowed("stays")),
302
+ Event::End(TagEnd::Link),
303
+ Event::Start(Tag::Image {
304
+ link_type: LinkType::Inline,
305
+ dest_url: CowStr::Borrowed("data:x"),
306
+ title: CowStr::Borrowed(""),
307
+ id: CowStr::Borrowed(""),
308
+ }),
309
+ Event::Text(CowStr::Borrowed("dropped")),
310
+ Event::End(TagEnd::Image),
311
+ ];
312
+ let image = schemes(&["https"]);
313
+ let out = run(events, None, Some(&image));
314
+ // Link passes (3 events), image drops to Text (1 event).
315
+ assert_eq!(out.len(), 4);
316
+ assert!(matches!(out[0], Event::Start(Tag::Link { .. })));
317
+ assert!(matches!(out[3], Event::Text(_)));
318
+ }
319
+ }