inkmark 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +1 -1
- data/ext/inkmark/Cargo.toml +1 -1
- data/ext/inkmark/src/chunks_by_heading.rs +3 -2
- data/ext/inkmark/src/chunks_by_size.rs +3 -3
- data/ext/inkmark/src/document.rs +46 -4
- data/ext/inkmark/src/highlight.rs +26 -1
- data/ext/inkmark/src/truncate.rs +3 -3
- data/lib/inkmark/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 75b9caec914a5e3b191911f77c83be71f81b729dc156461c30066de77c2e0f13
|
|
4
|
+
data.tar.gz: 8ff22a0d8e12d2ae300d450edbdb0546de506993c9df47612a5c852018e9a8f9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0709173de6887ccee58ace68ea2dca6b42fc1c6e44f4ac5b2510f37501bb876652e7cfb60d5dd6d35025d43a1769fa49ebfd95fbd9d873c1669c5dd8ce31db65'
|
|
7
|
+
data.tar.gz: d8abaf48a2172226c0e39390989210dcb4f51df1d7ae627d99a72e65f2f6731cc85a99a1993827f0480f8da07da57d2a598c32c3272d8b4672cda0a1c5d6697e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
## [0.1.4] - 2026-06-25
|
|
2
|
+
|
|
3
|
+
- Fix `frontmatter: true` leaking the frontmatter block into `to_markdown`, `chunks_by_heading`, and `chunks_by_size` output. Bug report by @freesteph [#3]
|
|
4
|
+
|
|
5
|
+
## [0.1.3] - 2026-06-21
|
|
6
|
+
|
|
7
|
+
- Fix possible XSS via unescaped language tag in syntax-highlighted code blocks.
|
|
8
|
+
|
|
1
9
|
## [0.1.2] - 2026-06-21
|
|
2
10
|
|
|
3
11
|
- Fix `Inkmark.truncate_markdown` raising `TypeError` when called without explicit `options:`.
|
data/Cargo.lock
CHANGED
data/ext/inkmark/Cargo.toml
CHANGED
|
@@ -19,7 +19,7 @@ use magnus::{Error, RArray, RHash, Ruby};
|
|
|
19
19
|
use pulldown_cmark::{Event, HeadingLevel, Parser, Tag, TagEnd};
|
|
20
20
|
use unicode_segmentation::UnicodeSegmentation;
|
|
21
21
|
|
|
22
|
-
use crate::document::apply_filters;
|
|
22
|
+
use crate::document::{apply_filters, content_events};
|
|
23
23
|
use crate::heading::{self, SlugDeduplicator};
|
|
24
24
|
use crate::options::build_options;
|
|
25
25
|
use crate::toc;
|
|
@@ -43,7 +43,8 @@ pub fn native_chunks_by_heading(
|
|
|
43
43
|
let (cm_opts, flags) = build_options(ruby, opts_hash)?;
|
|
44
44
|
|
|
45
45
|
// Parse + run the full filter pipeline, same as `to_markdown`.
|
|
46
|
-
|
|
46
|
+
// `content_events` drops frontmatter so it never becomes a section.
|
|
47
|
+
let events: Vec<Event> = content_events(&source, cm_opts).collect();
|
|
47
48
|
let events = apply_filters(events, &flags);
|
|
48
49
|
|
|
49
50
|
let boundaries = find_heading_boundaries(&events);
|
|
@@ -18,10 +18,10 @@
|
|
|
18
18
|
//! adjacent chunks share context.
|
|
19
19
|
|
|
20
20
|
use magnus::{Error, RArray, RHash, Ruby};
|
|
21
|
-
use pulldown_cmark::
|
|
21
|
+
use pulldown_cmark::Event;
|
|
22
22
|
use unicode_segmentation::UnicodeSegmentation;
|
|
23
23
|
|
|
24
|
-
use crate::document::apply_filters;
|
|
24
|
+
use crate::document::{apply_filters, content_events};
|
|
25
25
|
use crate::options::build_options;
|
|
26
26
|
|
|
27
27
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
@@ -45,7 +45,7 @@ pub fn native_chunks_by_size(
|
|
|
45
45
|
let params = parse_params(ruby, &opts_hash)?;
|
|
46
46
|
let (cm_opts, flags) = build_options(ruby, opts_hash)?;
|
|
47
47
|
|
|
48
|
-
let events: Vec<Event> =
|
|
48
|
+
let events: Vec<Event> = content_events(&source, cm_opts).collect();
|
|
49
49
|
let events = apply_filters(events, &flags);
|
|
50
50
|
|
|
51
51
|
let windows = match params.at {
|
data/ext/inkmark/src/document.rs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use magnus::{Error, RHash, Ruby};
|
|
2
|
-
use pulldown_cmark::{html, Event, Options, Parser};
|
|
2
|
+
use pulldown_cmark::{html, Event, Options, Parser, Tag, TagEnd};
|
|
3
3
|
|
|
4
4
|
use crate::autolink;
|
|
5
5
|
use crate::emoji;
|
|
@@ -115,6 +115,44 @@ fn hard_wrap(event: Event) -> Event {
|
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
+
/// Parse `source` into the content event stream that renderers and chunkers
|
|
119
|
+
/// consume.
|
|
120
|
+
///
|
|
121
|
+
/// YAML frontmatter is removed at this boundary: it is document *metadata*
|
|
122
|
+
/// (surfaced via {Inkmark#frontmatter}), never content. pulldown-cmark's HTML
|
|
123
|
+
/// renderer ignores metadata blocks and our plain-text writer discards them,
|
|
124
|
+
/// but the Markdown serializer (`pulldown-cmark-to-cmark`) faithfully
|
|
125
|
+
/// re-emits them as `---\n...\n---`. Rather than re-stripping after the fact
|
|
126
|
+
/// in every Markdown/chunk path, we never hand those consumers the events in
|
|
127
|
+
/// the first place—so `to_markdown`, `chunks_by_heading`, and
|
|
128
|
+
/// `chunks_by_size` are frontmatter-free by construction, with no separate
|
|
129
|
+
/// pass and no special-casing of the streaming fast path.
|
|
130
|
+
///
|
|
131
|
+
/// Frontmatter extraction walks the *raw* parser (see `stats::collect`), so
|
|
132
|
+
/// dropping the events here does not affect the `frontmatter` accessor.
|
|
133
|
+
pub fn content_events(source: &str, cm_opts: Options) -> impl Iterator<Item = Event<'_>> {
|
|
134
|
+
drop_metadata(Parser::new_ext(source, cm_opts))
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/// Iterator adapter that filters out `Start(MetadataBlock) … End(MetadataBlock)`
|
|
138
|
+
/// runs, including the raw YAML `Text` between the markers. Stateful but
|
|
139
|
+
/// composes with both the streaming fast path and the buffered `.collect()`
|
|
140
|
+
/// path, so a single definition serves every content consumer.
|
|
141
|
+
fn drop_metadata<'a>(events: impl Iterator<Item = Event<'a>>) -> impl Iterator<Item = Event<'a>> {
|
|
142
|
+
let mut in_metadata = false;
|
|
143
|
+
events.filter(move |event| match event {
|
|
144
|
+
Event::Start(Tag::MetadataBlock(_)) => {
|
|
145
|
+
in_metadata = true;
|
|
146
|
+
false
|
|
147
|
+
}
|
|
148
|
+
Event::End(TagEnd::MetadataBlock(_)) => {
|
|
149
|
+
in_metadata = false;
|
|
150
|
+
false
|
|
151
|
+
}
|
|
152
|
+
_ => !in_metadata,
|
|
153
|
+
})
|
|
154
|
+
}
|
|
155
|
+
|
|
118
156
|
/// Full render: parse once, collect stats + TOC from original events,
|
|
119
157
|
/// apply filters, render HTML. Returns a Ruby Hash:
|
|
120
158
|
///
|
|
@@ -219,14 +257,18 @@ fn render(source: &str, cm_opts: pulldown_cmark::Options, flags: Flags) -> Strin
|
|
|
219
257
|
|
|
220
258
|
fn render_to_markdown(source: &str, cm_opts: pulldown_cmark::Options, flags: Flags) -> String {
|
|
221
259
|
let mut buf = String::with_capacity(source.len());
|
|
222
|
-
let parser = Parser::new_ext(source, cm_opts);
|
|
223
260
|
|
|
261
|
+
// `content_events` strips frontmatter, so the cmark serializer never sees
|
|
262
|
+
// a metadata block to re-emit—on either the streaming or buffered path.
|
|
224
263
|
if !needs_buffer(&flags) {
|
|
225
|
-
cmark_write(
|
|
264
|
+
cmark_write(
|
|
265
|
+
content_events(source, cm_opts).map(stream_filter(&flags)),
|
|
266
|
+
&mut buf,
|
|
267
|
+
);
|
|
226
268
|
return buf;
|
|
227
269
|
}
|
|
228
270
|
|
|
229
|
-
let events = apply_filters(
|
|
271
|
+
let events = apply_filters(content_events(source, cm_opts).collect(), &flags);
|
|
230
272
|
cmark_write(events.into_iter(), &mut buf);
|
|
231
273
|
buf
|
|
232
274
|
}
|
|
@@ -15,6 +15,7 @@ use std::sync::OnceLock;
|
|
|
15
15
|
|
|
16
16
|
use magnus::{Error, Ruby};
|
|
17
17
|
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag, TagEnd};
|
|
18
|
+
use pulldown_cmark_escape::escape_html;
|
|
18
19
|
use syntect::highlighting::ThemeSet;
|
|
19
20
|
use syntect::html::{css_for_theme_with_class_style, ClassStyle, ClassedHTMLGenerator};
|
|
20
21
|
use syntect::parsing::SyntaxSet;
|
|
@@ -87,7 +88,15 @@ fn highlight_code(code: &str, lang: &str, ss: &SyntaxSet) -> String {
|
|
|
87
88
|
|
|
88
89
|
// Wrap each line in <span class="line"> so CSS can add line numbers
|
|
89
90
|
// via counter()/::before, highlight specific lines on hover, etc.
|
|
90
|
-
|
|
91
|
+
//
|
|
92
|
+
// `lang` is the fenced code block's info string—attacker-controlled
|
|
93
|
+
// markdown. HTML-escape it before it enters the class attribute so a
|
|
94
|
+
// crafted language tag like `x"><img onerror=...>` can't break out of
|
|
95
|
+
// the attribute and inject markup (this Html event bypasses
|
|
96
|
+
// suppress_raw_html, so escaping here is the only defense).
|
|
97
|
+
let mut buf = String::from("<pre><code class=\"language-");
|
|
98
|
+
let _ = escape_html(&mut buf, lang);
|
|
99
|
+
buf.push_str("\">");
|
|
91
100
|
for line in highlighted.split('\n') {
|
|
92
101
|
if !line.is_empty() {
|
|
93
102
|
buf.push_str("<span class=\"line\">");
|
|
@@ -149,6 +158,22 @@ mod tests {
|
|
|
149
158
|
assert!(html.contains("<pre><code"));
|
|
150
159
|
}
|
|
151
160
|
|
|
161
|
+
#[test]
|
|
162
|
+
fn escapes_malicious_language_tag_in_class_attribute() {
|
|
163
|
+
// The info string is attacker-controlled markdown. A crafted
|
|
164
|
+
// language tag must not break out of the class attribute.
|
|
165
|
+
let payload = "x\"><img src=a onerror=alert(1)>";
|
|
166
|
+
let html = highlight_code("let v = 1;\n", payload, syntax_set());
|
|
167
|
+
assert!(
|
|
168
|
+
!html.contains("\"><img src=a onerror="),
|
|
169
|
+
"language tag must be escaped, got: {html}"
|
|
170
|
+
);
|
|
171
|
+
assert!(
|
|
172
|
+
html.contains("language-x"><img"),
|
|
173
|
+
"expected escaped class attribute, got: {html}"
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
152
177
|
#[test]
|
|
153
178
|
fn unknown_language_falls_back_to_plain_text() {
|
|
154
179
|
let html = highlight_code("hello\n", "nonexistent-lang-xyz", syntax_set());
|
data/ext/inkmark/src/truncate.rs
CHANGED
|
@@ -7,10 +7,10 @@
|
|
|
7
7
|
//! input, context-window budgeting, and chunk normalization.
|
|
8
8
|
|
|
9
9
|
use magnus::{Error, RHash, Ruby};
|
|
10
|
-
use pulldown_cmark::
|
|
10
|
+
use pulldown_cmark::Event;
|
|
11
11
|
use unicode_segmentation::UnicodeSegmentation;
|
|
12
12
|
|
|
13
|
-
use crate::document::apply_filters;
|
|
13
|
+
use crate::document::{apply_filters, content_events};
|
|
14
14
|
use crate::options::{build_options, Flags};
|
|
15
15
|
|
|
16
16
|
/// What kind of boundary to cut at.
|
|
@@ -44,7 +44,7 @@ pub fn truncate_source(
|
|
|
44
44
|
flags: &Flags,
|
|
45
45
|
params: &TruncateParams,
|
|
46
46
|
) -> String {
|
|
47
|
-
let events: Vec<Event> =
|
|
47
|
+
let events: Vec<Event> = content_events(source, cm_opts).collect();
|
|
48
48
|
let events = apply_filters(events, flags);
|
|
49
49
|
truncate_events(&events, params)
|
|
50
50
|
}
|
data/lib/inkmark/version.rb
CHANGED