inkmark 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: defe040ab5efe3959229a885dcfd613bcd7bec9a264a7eb5bd6a441221af3e36
4
- data.tar.gz: a3bd5205a96103fb12392155e548014824f90af1e91a130c52d96715abc63295
3
+ metadata.gz: 75b9caec914a5e3b191911f77c83be71f81b729dc156461c30066de77c2e0f13
4
+ data.tar.gz: 8ff22a0d8e12d2ae300d450edbdb0546de506993c9df47612a5c852018e9a8f9
5
5
  SHA512:
6
- metadata.gz: 21536bdb30970d9f7bb58b002d35965a0ea6961a307f579229bc4eb34c5dabffd0a9b625aba12202e820166fb2efc5a35797e7714a7bcaf68be6f52afe14bef1
7
- data.tar.gz: 022bfcb26faa0b74031f6984fffe894ff3038ea65195572aac17026fa6566d25a18bfbb766631b4e5d39b05fad65dcbbf4dbf932ac20c424127f36dc31b3e9f3
6
+ metadata.gz: '0709173de6887ccee58ace68ea2dca6b42fc1c6e44f4ac5b2510f37501bb876652e7cfb60d5dd6d35025d43a1769fa49ebfd95fbd9d873c1669c5dd8ce31db65'
7
+ data.tar.gz: d8abaf48a2172226c0e39390989210dcb4f51df1d7ae627d99a72e65f2f6731cc85a99a1993827f0480f8da07da57d2a598c32c3272d8b4672cda0a1c5d6697e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## [0.1.4] - 2026-06-25
2
+
3
+ - Fix `frontmatter: true` leaking the frontmatter block into `to_markdown`, `chunks_by_heading`, and `chunks_by_size` output. Bug report by @freesteph [#3]
4
+
1
5
  ## [0.1.3] - 2026-06-21
2
6
 
3
7
  - Fix possible XSS via unescaped language tag in syntax-highlighted code blocks.
data/Cargo.lock CHANGED
@@ -331,7 +331,7 @@ dependencies = [
331
331
 
332
332
  [[package]]
333
333
  name = "inkmark"
334
- version = "0.1.3"
334
+ version = "0.1.4"
335
335
  dependencies = [
336
336
  "deunicode",
337
337
  "emojis",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "inkmark"
3
- version = "0.1.3"
3
+ version = "0.1.4"
4
4
  edition = "2021"
5
5
  authors = ["Yaroslav Markin <yaroslav@markin.net>"]
6
6
  license = "MIT"
@@ -19,7 +19,7 @@ use magnus::{Error, RArray, RHash, Ruby};
19
19
  use pulldown_cmark::{Event, HeadingLevel, Parser, Tag, TagEnd};
20
20
  use unicode_segmentation::UnicodeSegmentation;
21
21
 
22
- use crate::document::apply_filters;
22
+ use crate::document::{apply_filters, content_events};
23
23
  use crate::heading::{self, SlugDeduplicator};
24
24
  use crate::options::build_options;
25
25
  use crate::toc;
@@ -43,7 +43,8 @@ pub fn native_chunks_by_heading(
43
43
  let (cm_opts, flags) = build_options(ruby, opts_hash)?;
44
44
 
45
45
  // Parse + run the full filter pipeline, same as `to_markdown`.
46
- let events: Vec<Event> = Parser::new_ext(&source, cm_opts).collect();
46
+ // `content_events` drops frontmatter so it never becomes a section.
47
+ let events: Vec<Event> = content_events(&source, cm_opts).collect();
47
48
  let events = apply_filters(events, &flags);
48
49
 
49
50
  let boundaries = find_heading_boundaries(&events);
@@ -18,10 +18,10 @@
18
18
  //! adjacent chunks share context.
19
19
 
20
20
  use magnus::{Error, RArray, RHash, Ruby};
21
- use pulldown_cmark::{Event, Parser};
21
+ use pulldown_cmark::Event;
22
22
  use unicode_segmentation::UnicodeSegmentation;
23
23
 
24
- use crate::document::apply_filters;
24
+ use crate::document::{apply_filters, content_events};
25
25
  use crate::options::build_options;
26
26
 
27
27
  #[derive(Clone, Copy, PartialEq, Eq)]
@@ -45,7 +45,7 @@ pub fn native_chunks_by_size(
45
45
  let params = parse_params(ruby, &opts_hash)?;
46
46
  let (cm_opts, flags) = build_options(ruby, opts_hash)?;
47
47
 
48
- let events: Vec<Event> = Parser::new_ext(&source, cm_opts).collect();
48
+ let events: Vec<Event> = content_events(&source, cm_opts).collect();
49
49
  let events = apply_filters(events, &flags);
50
50
 
51
51
  let windows = match params.at {
@@ -1,5 +1,5 @@
1
1
  use magnus::{Error, RHash, Ruby};
2
- use pulldown_cmark::{html, Event, Options, Parser};
2
+ use pulldown_cmark::{html, Event, Options, Parser, Tag, TagEnd};
3
3
 
4
4
  use crate::autolink;
5
5
  use crate::emoji;
@@ -115,6 +115,44 @@ fn hard_wrap(event: Event) -> Event {
115
115
  }
116
116
  }
117
117
 
118
+ /// Parse `source` into the content event stream that renderers and chunkers
119
+ /// consume.
120
+ ///
121
+ /// YAML frontmatter is removed at this boundary: it is document *metadata*
122
+ /// (surfaced via {Inkmark#frontmatter}), never content. pulldown-cmark's HTML
123
+ /// renderer ignores metadata blocks and our plain-text writer discards them,
124
+ /// but the Markdown serializer (`pulldown-cmark-to-cmark`) faithfully
125
+ /// re-emits them as `---\n...\n---`. Rather than re-stripping after the fact
126
+ /// in every Markdown/chunk path, we never hand those consumers the events in
127
+ /// the first place—so `to_markdown`, `chunks_by_heading`, and
128
+ /// `chunks_by_size` are frontmatter-free by construction, with no separate
129
+ /// pass and no special-casing of the streaming fast path.
130
+ ///
131
+ /// Frontmatter extraction walks the *raw* parser (see `stats::collect`), so
132
+ /// dropping the events here does not affect the `frontmatter` accessor.
133
+ pub fn content_events(source: &str, cm_opts: Options) -> impl Iterator<Item = Event<'_>> {
134
+ drop_metadata(Parser::new_ext(source, cm_opts))
135
+ }
136
+
137
+ /// Iterator adapter that filters out `Start(MetadataBlock) … End(MetadataBlock)`
138
+ /// runs, including the raw YAML `Text` between the markers. Stateful but
139
+ /// composes with both the streaming fast path and the buffered `.collect()`
140
+ /// path, so a single definition serves every content consumer.
141
+ fn drop_metadata<'a>(events: impl Iterator<Item = Event<'a>>) -> impl Iterator<Item = Event<'a>> {
142
+ let mut in_metadata = false;
143
+ events.filter(move |event| match event {
144
+ Event::Start(Tag::MetadataBlock(_)) => {
145
+ in_metadata = true;
146
+ false
147
+ }
148
+ Event::End(TagEnd::MetadataBlock(_)) => {
149
+ in_metadata = false;
150
+ false
151
+ }
152
+ _ => !in_metadata,
153
+ })
154
+ }
155
+
118
156
  /// Full render: parse once, collect stats + TOC from original events,
119
157
  /// apply filters, render HTML. Returns a Ruby Hash:
120
158
  ///
@@ -219,14 +257,18 @@ fn render(source: &str, cm_opts: pulldown_cmark::Options, flags: Flags) -> Strin
219
257
 
220
258
  fn render_to_markdown(source: &str, cm_opts: pulldown_cmark::Options, flags: Flags) -> String {
221
259
  let mut buf = String::with_capacity(source.len());
222
- let parser = Parser::new_ext(source, cm_opts);
223
260
 
261
+ // `content_events` strips frontmatter, so the cmark serializer never sees
262
+ // a metadata block to re-emit—on either the streaming or buffered path.
224
263
  if !needs_buffer(&flags) {
225
- cmark_write(parser.map(stream_filter(&flags)), &mut buf);
264
+ cmark_write(
265
+ content_events(source, cm_opts).map(stream_filter(&flags)),
266
+ &mut buf,
267
+ );
226
268
  return buf;
227
269
  }
228
270
 
229
- let events = apply_filters(parser.collect(), &flags);
271
+ let events = apply_filters(content_events(source, cm_opts).collect(), &flags);
230
272
  cmark_write(events.into_iter(), &mut buf);
231
273
  buf
232
274
  }
@@ -7,10 +7,10 @@
7
7
  //! input, context-window budgeting, and chunk normalization.
8
8
 
9
9
  use magnus::{Error, RHash, Ruby};
10
- use pulldown_cmark::{Event, Parser};
10
+ use pulldown_cmark::Event;
11
11
  use unicode_segmentation::UnicodeSegmentation;
12
12
 
13
- use crate::document::apply_filters;
13
+ use crate::document::{apply_filters, content_events};
14
14
  use crate::options::{build_options, Flags};
15
15
 
16
16
  /// What kind of boundary to cut at.
@@ -44,7 +44,7 @@ pub fn truncate_source(
44
44
  flags: &Flags,
45
45
  params: &TruncateParams,
46
46
  ) -> String {
47
- let events: Vec<Event> = Parser::new_ext(source, cm_opts).collect();
47
+ let events: Vec<Event> = content_events(source, cm_opts).collect();
48
48
  let events = apply_filters(events, flags);
49
49
  truncate_events(&events, params)
50
50
  }
@@ -2,5 +2,5 @@
2
2
 
3
3
  class Inkmark
4
4
  # Current gem version.
5
- VERSION = "0.1.3"
5
+ VERSION = "0.1.4"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inkmark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yaroslav Markin