inkmark 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +1 -1
- data/ext/inkmark/Cargo.toml +1 -1
- data/ext/inkmark/src/chunks_by_heading.rs +3 -2
- data/ext/inkmark/src/chunks_by_size.rs +3 -3
- data/ext/inkmark/src/document.rs +46 -4
- data/ext/inkmark/src/truncate.rs +3 -3
- data/lib/inkmark/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 75b9caec914a5e3b191911f77c83be71f81b729dc156461c30066de77c2e0f13
|
|
4
|
+
data.tar.gz: 8ff22a0d8e12d2ae300d450edbdb0546de506993c9df47612a5c852018e9a8f9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0709173de6887ccee58ace68ea2dca6b42fc1c6e44f4ac5b2510f37501bb876652e7cfb60d5dd6d35025d43a1769fa49ebfd95fbd9d873c1669c5dd8ce31db65'
|
|
7
|
+
data.tar.gz: d8abaf48a2172226c0e39390989210dcb4f51df1d7ae627d99a72e65f2f6731cc85a99a1993827f0480f8da07da57d2a598c32c3272d8b4672cda0a1c5d6697e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
## [0.1.4] - 2026-06-25
|
|
2
|
+
|
|
3
|
+
- Fix `frontmatter: true` leaking the frontmatter block into `to_markdown`, `chunks_by_heading`, and `chunks_by_size` output. Bug report by @freesteph [#3]
|
|
4
|
+
|
|
1
5
|
## [0.1.3] - 2026-06-21
|
|
2
6
|
|
|
3
7
|
- Fix possible XSS via unescaped language tag in syntax-highlighted code blocks.
|
data/Cargo.lock
CHANGED
data/ext/inkmark/Cargo.toml
CHANGED
|
@@ -19,7 +19,7 @@ use magnus::{Error, RArray, RHash, Ruby};
|
|
|
19
19
|
use pulldown_cmark::{Event, HeadingLevel, Parser, Tag, TagEnd};
|
|
20
20
|
use unicode_segmentation::UnicodeSegmentation;
|
|
21
21
|
|
|
22
|
-
use crate::document::apply_filters;
|
|
22
|
+
use crate::document::{apply_filters, content_events};
|
|
23
23
|
use crate::heading::{self, SlugDeduplicator};
|
|
24
24
|
use crate::options::build_options;
|
|
25
25
|
use crate::toc;
|
|
@@ -43,7 +43,8 @@ pub fn native_chunks_by_heading(
|
|
|
43
43
|
let (cm_opts, flags) = build_options(ruby, opts_hash)?;
|
|
44
44
|
|
|
45
45
|
// Parse + run the full filter pipeline, same as `to_markdown`.
|
|
46
|
-
|
|
46
|
+
// `content_events` drops frontmatter so it never becomes a section.
|
|
47
|
+
let events: Vec<Event> = content_events(&source, cm_opts).collect();
|
|
47
48
|
let events = apply_filters(events, &flags);
|
|
48
49
|
|
|
49
50
|
let boundaries = find_heading_boundaries(&events);
|
|
@@ -18,10 +18,10 @@
|
|
|
18
18
|
//! adjacent chunks share context.
|
|
19
19
|
|
|
20
20
|
use magnus::{Error, RArray, RHash, Ruby};
|
|
21
|
-
use pulldown_cmark::
|
|
21
|
+
use pulldown_cmark::Event;
|
|
22
22
|
use unicode_segmentation::UnicodeSegmentation;
|
|
23
23
|
|
|
24
|
-
use crate::document::apply_filters;
|
|
24
|
+
use crate::document::{apply_filters, content_events};
|
|
25
25
|
use crate::options::build_options;
|
|
26
26
|
|
|
27
27
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
@@ -45,7 +45,7 @@ pub fn native_chunks_by_size(
|
|
|
45
45
|
let params = parse_params(ruby, &opts_hash)?;
|
|
46
46
|
let (cm_opts, flags) = build_options(ruby, opts_hash)?;
|
|
47
47
|
|
|
48
|
-
let events: Vec<Event> =
|
|
48
|
+
let events: Vec<Event> = content_events(&source, cm_opts).collect();
|
|
49
49
|
let events = apply_filters(events, &flags);
|
|
50
50
|
|
|
51
51
|
let windows = match params.at {
|
data/ext/inkmark/src/document.rs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use magnus::{Error, RHash, Ruby};
|
|
2
|
-
use pulldown_cmark::{html, Event, Options, Parser};
|
|
2
|
+
use pulldown_cmark::{html, Event, Options, Parser, Tag, TagEnd};
|
|
3
3
|
|
|
4
4
|
use crate::autolink;
|
|
5
5
|
use crate::emoji;
|
|
@@ -115,6 +115,44 @@ fn hard_wrap(event: Event) -> Event {
|
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
+
/// Parse `source` into the content event stream that renderers and chunkers
|
|
119
|
+
/// consume.
|
|
120
|
+
///
|
|
121
|
+
/// YAML frontmatter is removed at this boundary: it is document *metadata*
|
|
122
|
+
/// (surfaced via {Inkmark#frontmatter}), never content. pulldown-cmark's HTML
|
|
123
|
+
/// renderer ignores metadata blocks and our plain-text writer discards them,
|
|
124
|
+
/// but the Markdown serializer (`pulldown-cmark-to-cmark`) faithfully
|
|
125
|
+
/// re-emits them as `---\n...\n---`. Rather than re-stripping after the fact
|
|
126
|
+
/// in every Markdown/chunk path, we never hand those consumers the events in
|
|
127
|
+
/// the first place—so `to_markdown`, `chunks_by_heading`, and
|
|
128
|
+
/// `chunks_by_size` are frontmatter-free by construction, with no separate
|
|
129
|
+
/// pass and no special-casing of the streaming fast path.
|
|
130
|
+
///
|
|
131
|
+
/// Frontmatter extraction walks the *raw* parser (see `stats::collect`), so
|
|
132
|
+
/// dropping the events here does not affect the `frontmatter` accessor.
|
|
133
|
+
pub fn content_events(source: &str, cm_opts: Options) -> impl Iterator<Item = Event<'_>> {
|
|
134
|
+
drop_metadata(Parser::new_ext(source, cm_opts))
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/// Iterator adapter that filters out `Start(MetadataBlock) … End(MetadataBlock)`
|
|
138
|
+
/// runs, including the raw YAML `Text` between the markers. Stateful but
|
|
139
|
+
/// composes with both the streaming fast path and the buffered `.collect()`
|
|
140
|
+
/// path, so a single definition serves every content consumer.
|
|
141
|
+
fn drop_metadata<'a>(events: impl Iterator<Item = Event<'a>>) -> impl Iterator<Item = Event<'a>> {
|
|
142
|
+
let mut in_metadata = false;
|
|
143
|
+
events.filter(move |event| match event {
|
|
144
|
+
Event::Start(Tag::MetadataBlock(_)) => {
|
|
145
|
+
in_metadata = true;
|
|
146
|
+
false
|
|
147
|
+
}
|
|
148
|
+
Event::End(TagEnd::MetadataBlock(_)) => {
|
|
149
|
+
in_metadata = false;
|
|
150
|
+
false
|
|
151
|
+
}
|
|
152
|
+
_ => !in_metadata,
|
|
153
|
+
})
|
|
154
|
+
}
|
|
155
|
+
|
|
118
156
|
/// Full render: parse once, collect stats + TOC from original events,
|
|
119
157
|
/// apply filters, render HTML. Returns a Ruby Hash:
|
|
120
158
|
///
|
|
@@ -219,14 +257,18 @@ fn render(source: &str, cm_opts: pulldown_cmark::Options, flags: Flags) -> Strin
|
|
|
219
257
|
|
|
220
258
|
fn render_to_markdown(source: &str, cm_opts: pulldown_cmark::Options, flags: Flags) -> String {
|
|
221
259
|
let mut buf = String::with_capacity(source.len());
|
|
222
|
-
let parser = Parser::new_ext(source, cm_opts);
|
|
223
260
|
|
|
261
|
+
// `content_events` strips frontmatter, so the cmark serializer never sees
|
|
262
|
+
// a metadata block to re-emit—on either the streaming or buffered path.
|
|
224
263
|
if !needs_buffer(&flags) {
|
|
225
|
-
cmark_write(
|
|
264
|
+
cmark_write(
|
|
265
|
+
content_events(source, cm_opts).map(stream_filter(&flags)),
|
|
266
|
+
&mut buf,
|
|
267
|
+
);
|
|
226
268
|
return buf;
|
|
227
269
|
}
|
|
228
270
|
|
|
229
|
-
let events = apply_filters(
|
|
271
|
+
let events = apply_filters(content_events(source, cm_opts).collect(), &flags);
|
|
230
272
|
cmark_write(events.into_iter(), &mut buf);
|
|
231
273
|
buf
|
|
232
274
|
}
|
data/ext/inkmark/src/truncate.rs
CHANGED
|
@@ -7,10 +7,10 @@
|
|
|
7
7
|
//! input, context-window budgeting, and chunk normalization.
|
|
8
8
|
|
|
9
9
|
use magnus::{Error, RHash, Ruby};
|
|
10
|
-
use pulldown_cmark::
|
|
10
|
+
use pulldown_cmark::Event;
|
|
11
11
|
use unicode_segmentation::UnicodeSegmentation;
|
|
12
12
|
|
|
13
|
-
use crate::document::apply_filters;
|
|
13
|
+
use crate::document::{apply_filters, content_events};
|
|
14
14
|
use crate::options::{build_options, Flags};
|
|
15
15
|
|
|
16
16
|
/// What kind of boundary to cut at.
|
|
@@ -44,7 +44,7 @@ pub fn truncate_source(
|
|
|
44
44
|
flags: &Flags,
|
|
45
45
|
params: &TruncateParams,
|
|
46
46
|
) -> String {
|
|
47
|
-
let events: Vec<Event> =
|
|
47
|
+
let events: Vec<Event> = content_events(source, cm_opts).collect();
|
|
48
48
|
let events = apply_filters(events, flags);
|
|
49
49
|
truncate_events(&events, params)
|
|
50
50
|
}
|
data/lib/inkmark/version.rb
CHANGED