RubyGems - html-to-markdown - Versions diffs - 2.26.3 → 2.27.1 - Mend

html-to-markdown 2.26.3 → 2.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/Gemfile.lock +2 -2
data/README.md +20 -2
data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
data/ext/html-to-markdown-rb/native/src/options.rs +1 -0
data/lib/html_to_markdown/version.rb +1 -1
data/rust-vendor/html-to-markdown-rs/Cargo.toml +1 -1
data/rust-vendor/html-to-markdown-rs/src/convert_api.rs +14 -13
data/rust-vendor/html-to-markdown-rs/src/converter/context.rs +0 -3
data/rust-vendor/html-to-markdown-rs/src/converter/list/definition.rs +8 -67
data/rust-vendor/html-to-markdown-rs/src/converter/main.rs +8 -0
data/rust-vendor/html-to-markdown-rs/src/converter/mod.rs +1 -0
data/rust-vendor/html-to-markdown-rs/src/converter/plain_text.rs +265 -0
data/rust-vendor/html-to-markdown-rs/src/converter/semantic/definition_list.rs +8 -86
data/rust-vendor/html-to-markdown-rs/src/options/conversion.rs +2 -2
data/rust-vendor/html-to-markdown-rs/src/options/validation.rs +4 -0
data/rust-vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +25 -4
data/rust-vendor/html-to-markdown-rs/tests/plain_output_test.rs +214 -0
data/rust-vendor/pxfm/.cargo-checksum.json +1 -1
data/rust-vendor/pxfm/.cargo_vcs_info.json +1 -1
data/rust-vendor/pxfm/Cargo.lock +1 -19
data/rust-vendor/pxfm/Cargo.toml +1 -4
data/rust-vendor/pxfm/Cargo.toml.orig +1 -4
data/rust-vendor/pxfm/src/common.rs +0 -44
data/rust-vendor/pxfm/src/pow.rs +2 -1
data/rust-vendor/pxfm/src/powf.rs +1 -0
data/rust-vendor/pxfm/src/tangent/cotpi.rs +11 -11
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 23d0242cd4fc575d8081e675fb8d16f09faa7fb1c6c0df9b18d21338c0391880
-  data.tar.gz: cf86724440a34a26e1f17c134a232b1b321edaacb95758e42f9eab59dc710f8b
+  metadata.gz: 41d0b097b3f46c377ddadf6da05274a283efeddf1142795a1766e908d7c78290
+  data.tar.gz: 2e6585fb07a8e8cf3fc1f474cbadf6b2a3135f00ef4707029d3e35dce02726c5
 SHA512:
-  metadata.gz: 63afe8bdf9d36f4cc225859e3a7ebb62452e97feafccc5ea1e20564a47b7e037900b6af7300508eec4313d4306608e3b00bc5f6a3115001ee250d5c560880bb6
-  data.tar.gz: c8fcaa6e61fea4325b08ce39ebf0a2bd92ff4e6d58702497e38cc6a081c1eec3de095d72986ccdc828ee6e219697d53c12483087ee563d39f767fe989d72ffdb
+  metadata.gz: df64f0ec15405f15043aa6594aa0560885dfa9c957fcf40ec5b4de35457f06442efd0cd3bb868d06a2a53c67a3a096719ce026e260173114857f94ab5c6249cb
+  data.tar.gz: cbaa15dee930c2940b9aaf4f768d63cd95373cc4aea4c093d58a049e6612c3e533645cdbeaf9a64aca0571ad4a2995873ecb1b98a3034de83d5771439673989c

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    html-to-markdown (2.26.3)
+    html-to-markdown (2.27.1)
       rb_sys (>= 0.9, < 1.0)
 GEM
@@ -172,7 +172,7 @@ CHECKSUMS
   ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5
   ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
   fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
-  html-to-markdown (2.26.3)
+  html-to-markdown (2.27.1)
   i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
   json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
   json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702

data/README.md CHANGED Viewed

@@ -18,7 +18,7 @@
     <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
   </a>
   <a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown">
-    <img src="https://img.shields.io/badge/Go-v2.26.1-007ec6" alt="Go">
+    <img src="https://img.shields.io/badge/Go-v2.27.1-007ec6" alt="Go">
   </a>
   <a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
     <img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
@@ -144,7 +144,7 @@ Extract base64-encoded inline images with metadata.
 - `wrap_width`: Wrap at column — default: `80`
 - `code_language`: Default fenced code block language — default: none
 - `extract_metadata`: Embed metadata as YAML frontmatter — default: `false`
-- `output_format`: Output markup format (`"markdown"` | `"djot"`) — default: `"markdown"`
+- `output_format`: Output markup format (`"markdown"` | `"djot"` | `"plain"`) — default: `"markdown"`
 **`MetadataConfig`** – Selective metadata extraction:
 - `extract_headers`: h1-h6 elements — default: `true`
@@ -191,6 +191,24 @@ djot = HtmlToMarkdown.convert(html, output_format: 'djot')
 Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
+## Plain Text Output
+Set `output_format` to `"plain"` to strip all markup and return only visible text. This bypasses the Markdown conversion pipeline entirely for maximum speed.
+```ruby
+require 'html_to_markdown'
+html = "<h1>Title</h1><p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
+plain = HtmlToMarkdown.convert(html, output_format: 'plain')
+# Result: "Title\n\nThis is bold and italic text."
+```
+Plain text mode is useful for search indexing, text extraction, and feeding content to LLMs.
 ## Metadata Extraction

data/ext/html-to-markdown-rb/native/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "html-to-markdown-rb"
-version ="2.26.3"
+version ="2.27.1"
 edition = "2024"
 authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
 license = "MIT"

data/ext/html-to-markdown-rb/native/src/options.rs CHANGED Viewed

@@ -65,6 +65,7 @@ pub fn parse_output_format(value: Value) -> Result<OutputFormat, Error> {
     match symbol_to_string(value)?.as_str() {
         "markdown" => Ok(OutputFormat::Markdown),
         "djot" => Ok(OutputFormat::Djot),
+        "plain" => Ok(OutputFormat::Plain),
         other => Err(arg_error(format!("invalid output_format: {other}"))),
     }
 }

data/lib/html_to_markdown/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module HtmlToMarkdown
-  VERSION = '2.26.3'
+  VERSION = '2.27.1'
 end

data/rust-vendor/html-to-markdown-rs/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "html-to-markdown-rs"
-version = "2.26.3"
+version = "2.27.1"
 edition = "2024"
 authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
 license = "MIT"

data/rust-vendor/html-to-markdown-rs/src/convert_api.rs CHANGED Viewed

@@ -562,19 +562,20 @@ fn fast_text_only(html: &str, options: &ConversionOptions) -> Option<String> {
         Cow::Borrowed(trimmed)
     };
-    let escaped =
-        if options.escape_misc || options.escape_asterisks || options.escape_underscores || options.escape_ascii {
-            text::escape(
-                normalized.as_ref(),
-                options.escape_misc,
-                options.escape_asterisks,
-                options.escape_underscores,
-                options.escape_ascii,
-            )
-            .into_owned()
-        } else {
-            normalized.into_owned()
-        };
+    let escaped = if options.output_format == crate::options::OutputFormat::Plain {
+        normalized.into_owned()
+    } else if options.escape_misc || options.escape_asterisks || options.escape_underscores || options.escape_ascii {
+        text::escape(
+            normalized.as_ref(),
+            options.escape_misc,
+            options.escape_asterisks,
+            options.escape_underscores,
+            options.escape_ascii,
+        )
+        .into_owned()
+    } else {
+        normalized.into_owned()
+    };
     let mut output = String::with_capacity(escaped.len() + 1);
     output.push_str(&escaped);

data/rust-vendor/html-to-markdown-rs/src/converter/context.rs CHANGED Viewed

@@ -35,8 +35,6 @@ pub struct Context {
     pub(crate) list_counter: usize,
     /// Are we in an ordered list (vs unordered)?
     pub(crate) in_ordered_list: bool,
-    /// Track if previous sibling in dl was a dt
-    pub(crate) last_was_dt: bool,
     /// Blockquote nesting depth
     pub(crate) blockquote_depth: usize,
     /// Are we inside a table cell (td/th)?
@@ -142,7 +140,6 @@ impl Context {
             in_code: false,
             list_counter: 0,
             in_ordered_list: false,
-            last_was_dt: false,
             blockquote_depth: 0,
             in_table_cell: false,
             convert_as_inline: options.convert_as_inline,

data/rust-vendor/html-to-markdown-rs/src/converter/list/definition.rs CHANGED Viewed

@@ -3,7 +3,7 @@
 //! Processes definition lists with:
 //! - Definition terms (dt)
 //! - Definition descriptions (dd)
-//! - Proper Markdown formatting with `:   ` separator
+//! - Plain block formatting (no Pandoc colon syntax)
 use crate::options::ConversionOptions;
 use tl;
@@ -24,12 +24,12 @@ pub(crate) fn handle_dl(
     depth: usize,
     dom_ctx: &DomContext,
 ) {
-    if ctx.convert_as_inline {
-        let tag = match node_handle.get(parser) {
-            Some(tl::Node::Tag(t)) => t,
-            _ => return,
-        };
+    let tag = match node_handle.get(parser) {
+        Some(tl::Node::Tag(t)) => t,
+        _ => return,
+    };
+    if ctx.convert_as_inline {
         let children = tag.children();
         {
             for child_handle in children.top().iter() {
@@ -40,48 +40,11 @@ pub(crate) fn handle_dl(
         return;
     }
-    let tag = match node_handle.get(parser) {
-        Some(tl::Node::Tag(t)) => t,
-        _ => return,
-    };
     let mut content = String::new();
-    let mut in_dt_group = false;
     let children = tag.children();
     {
         for child_handle in children.top().iter() {
-            let (is_definition_term, is_definition_description) =
-                if let Some(tl::Node::Tag(child_tag)) = child_handle.get(parser) {
-                    let tag_name = {
-                        use crate::converter::normalized_tag_name;
-                        normalized_tag_name(child_tag.name().as_utf8_str())
-                    };
-                    (tag_name == "dt", tag_name == "dd")
-                } else {
-                    (false, false)
-                };
-            let child_ctx = Context {
-                last_was_dt: in_dt_group && is_definition_description,
-                ..ctx.clone()
-            };
-            crate::converter::walk_node(child_handle, parser, &mut content, options, &child_ctx, depth, dom_ctx);
-            match child_handle.get(parser) {
-                Some(tl::Node::Tag(_)) => {
-                    if is_definition_term {
-                        in_dt_group = true;
-                    } else if !is_definition_description {
-                        in_dt_group = false;
-                    }
-                }
-                Some(tl::Node::Raw(raw)) => {
-                    if !raw.as_utf8_str().trim().is_empty() {
-                        in_dt_group = false;
-                    }
-                }
-                Some(tl::Node::Comment(_)) | None => {}
-            }
+            crate::converter::walk_node(child_handle, parser, &mut content, options, ctx, depth, dom_ctx);
         }
     }
@@ -132,8 +95,7 @@ pub(crate) fn handle_dt(
 /// Handle definition description element (<dd>).
 ///
-/// Outputs the description with `:   ` prefix if it follows a dt,
-/// or on its own with proper spacing.
+/// Outputs the description as a plain block.
 pub(crate) fn handle_dd(
     node_handle: &tl::NodeHandle,
     parser: &tl::Parser,
@@ -162,27 +124,6 @@ pub(crate) fn handle_dd(
         if !trimmed.is_empty() {
             output.push_str(trimmed);
         }
-    } else if ctx.last_was_dt {
-        if trimmed.is_empty() {
-            output.push_str(":   \n\n");
-        } else {
-            let mut lines = trimmed.lines();
-            if let Some(first) = lines.next() {
-                output.push_str(":   ");
-                output.push_str(first);
-                output.push('\n');
-            }
-            for line in lines {
-                if line.is_empty() {
-                    output.push('\n');
-                } else {
-                    output.push_str("    ");
-                    output.push_str(line);
-                    output.push('\n');
-                }
-            }
-            output.push('\n');
-        }
     } else if !trimmed.is_empty() {
         output.push_str(trimmed);
         output.push_str("\n\n");

data/rust-vendor/html-to-markdown-rs/src/converter/main.rs CHANGED Viewed

@@ -18,11 +18,13 @@ use crate::converter::main_helpers::{
     extract_head_metadata, format_metadata_frontmatter, handle_hocr_document, has_custom_element_tags,
     repair_with_html5ever, trim_line_end_whitespace, trim_trailing_whitespace,
 };
+use crate::converter::plain_text::extract_plain_text;
 use crate::converter::preprocessing_helpers::{has_inline_block_misnest, should_drop_for_preprocessing};
 use crate::converter::utility::caching::build_dom_context;
 use crate::converter::utility::content::normalized_tag_name;
 use crate::converter::utility::preprocessing::{preprocess_html, strip_script_and_style_tags};
 use crate::converter::utility::serialization::serialize_tag_to_html;
+use crate::options::OutputFormat;
 use crate::converter::handlers::{handle_blockquote, handle_code, handle_graphic, handle_img, handle_link, handle_pre};
 use crate::error::Result;
@@ -134,6 +136,12 @@ pub(crate) fn convert_html_impl(
         }
     }
+    // Fast path for plain text output: skip the full conversion pipeline
+    if options.output_format == OutputFormat::Plain {
+        let plain = extract_plain_text(&dom, parser, options);
+        return Ok(plain);
+    }
     let wants_frontmatter = options.extract_metadata && !options.convert_as_inline;
     #[cfg(feature = "metadata")]
     let wants_document = metadata_collector

data/rust-vendor/html-to-markdown-rs/src/converter/mod.rs CHANGED Viewed

@@ -102,6 +102,7 @@ pub mod main;
 mod main_helpers;
 pub mod media;
 mod metadata;
+pub mod plain_text;
 pub mod preprocessing_helpers;
 pub mod semantic;
 pub mod text;

data/rust-vendor/html-to-markdown-rs/src/converter/plain_text.rs ADDED Viewed

@@ -0,0 +1,265 @@
+//! Plain text extraction from parsed HTML DOM.
+//!
+//! Provides a fast-path text extractor that walks the DOM tree collecting only
+//! visible text content with structural whitespace, bypassing the full
+//! Markdown/Djot conversion pipeline.
+use crate::options::ConversionOptions;
+use crate::text;
+/// Tags whose content should be skipped entirely.
+const SKIP_TAGS: &[&str] = &["script", "style", "head", "template", "noscript", "svg", "math"];
+/// Block-level tags that should be separated by blank lines.
+const BLOCK_TAGS: &[&str] = &[
+    "p",
+    "div",
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6",
+    "blockquote",
+    "section",
+    "article",
+    "aside",
+    "main",
+    "nav",
+    "header",
+    "footer",
+    "figure",
+    "figcaption",
+    "details",
+    "summary",
+    "address",
+    "hgroup",
+    "search",
+];
+/// Extract plain text from a parsed DOM tree.
+///
+/// Walks the tree collecting visible text with structural whitespace:
+/// - Block elements get blank-line separation
+/// - `<br>` becomes a newline, `<hr>` a blank line
+/// - `<pre>` preserves internal whitespace
+/// - `<img>` outputs alt text (unless `skip_images` is set)
+/// - `<script>`, `<style>`, `<head>`, `<template>`, `<noscript>` are skipped
+/// - Tables: cells separated by tab, rows by newline
+/// - Inline elements are recursed without markers
+pub fn extract_plain_text(dom: &tl::VDom, parser: &tl::Parser, options: &ConversionOptions) -> String {
+    let mut buf = String::with_capacity(1024);
+    for child_handle in dom.children() {
+        walk_plain(child_handle, parser, &mut buf, options, false);
+    }
+    post_process(&mut buf);
+    buf
+}
+/// Recursive plain-text walker.
+fn walk_plain(
+    node_handle: &tl::NodeHandle,
+    parser: &tl::Parser,
+    buf: &mut String,
+    options: &ConversionOptions,
+    in_pre: bool,
+) {
+    let Some(node) = node_handle.get(parser) else {
+        return;
+    };
+    match node {
+        tl::Node::Raw(bytes) => {
+            let raw = bytes.as_utf8_str();
+            let decoded = text::decode_html_entities_cow(raw.as_ref());
+            if in_pre {
+                buf.push_str(&decoded);
+            } else {
+                let normalized = text::normalize_whitespace_cow(&decoded);
+                if !normalized.is_empty() {
+                    // Avoid leading space at start of a new line
+                    if normalized.as_ref() == " " && buf.ends_with('\n') {
+                        return;
+                    }
+                    buf.push_str(&normalized);
+                }
+            }
+        }
+        tl::Node::Tag(tag) => {
+            let tag_name = tag.name().as_utf8_str().to_ascii_lowercase();
+            let tag_str = tag_name.as_str();
+            // Skip invisible content
+            if SKIP_TAGS.contains(&tag_str) {
+                return;
+            }
+            match tag_str {
+                "br" => {
+                    buf.push('\n');
+                }
+                "hr" => {
+                    ensure_blank_line(buf);
+                }
+                "pre" => {
+                    ensure_blank_line(buf);
+                    walk_children(tag, parser, buf, options, true);
+                    ensure_blank_line(buf);
+                }
+                "img" => {
+                    if !options.skip_images {
+                        if let Some(Some(alt)) = tag.attributes().get("alt") {
+                            let alt_text = alt.as_utf8_str();
+                            if !alt_text.is_empty() {
+                                buf.push_str(alt_text.as_ref());
+                            }
+                        }
+                    }
+                }
+                "table" => {
+                    ensure_blank_line(buf);
+                    walk_table(tag, parser, buf, options);
+                    ensure_blank_line(buf);
+                }
+                "li" => {
+                    ensure_newline(buf);
+                    walk_children(tag, parser, buf, options, false);
+                    ensure_newline(buf);
+                }
+                _ if BLOCK_TAGS.contains(&tag_str) => {
+                    ensure_blank_line(buf);
+                    walk_children(tag, parser, buf, options, in_pre);
+                    ensure_blank_line(buf);
+                }
+                _ => {
+                    // Inline elements and structural containers (html, body, ul, ol, etc.)
+                    walk_children(tag, parser, buf, options, in_pre);
+                }
+            }
+        }
+        tl::Node::Comment(_) => {}
+    }
+}
+/// Walk all children of a tag.
+fn walk_children(tag: &tl::HTMLTag, parser: &tl::Parser, buf: &mut String, options: &ConversionOptions, in_pre: bool) {
+    let children = tag.children();
+    let top = children.top();
+    for child in top.iter() {
+        walk_plain(child, parser, buf, options, in_pre);
+    }
+}
+/// Walk a `<table>` element, extracting cells as tab-separated, rows as newline-separated.
+fn walk_table(table_tag: &tl::HTMLTag, parser: &tl::Parser, buf: &mut String, options: &ConversionOptions) {
+    // Collect all <tr> node handles by recursing into the table
+    let mut row_handles = Vec::new();
+    collect_descendant_handles(table_tag, parser, "tr", &mut row_handles);
+    for (row_idx, row_handle) in row_handles.iter().enumerate() {
+        if row_idx > 0 {
+            buf.push('\n');
+        }
+        let Some(tl::Node::Tag(row_tag)) = row_handle.get(parser) else {
+            continue;
+        };
+        // Collect direct <th>/<td> children
+        let mut cell_handles = Vec::new();
+        let row_children = row_tag.children();
+        let row_top = row_children.top();
+        for child in row_top.iter() {
+            if let Some(tl::Node::Tag(child_tag)) = child.get(parser) {
+                let name = child_tag.name().as_utf8_str();
+                if name.eq_ignore_ascii_case("th") || name.eq_ignore_ascii_case("td") {
+                    cell_handles.push(*child);
+                }
+            }
+        }
+        for (cell_idx, cell_handle) in cell_handles.iter().enumerate() {
+            if cell_idx > 0 {
+                buf.push('\t');
+            }
+            let mut cell_buf = String::new();
+            if let Some(tl::Node::Tag(cell_tag)) = cell_handle.get(parser) {
+                walk_children(cell_tag, parser, &mut cell_buf, options, false);
+            }
+            buf.push_str(cell_buf.trim());
+        }
+    }
+}
+/// Recursively collect all descendant `NodeHandle`s matching `target_tag` (by cloning handles).
+fn collect_descendant_handles(
+    tag: &tl::HTMLTag,
+    parser: &tl::Parser,
+    target_tag: &str,
+    result: &mut Vec<tl::NodeHandle>,
+) {
+    let children = tag.children();
+    let top = children.top();
+    for child in top.iter() {
+        if let Some(tl::Node::Tag(child_tag)) = child.get(parser) {
+            if child_tag.name().as_utf8_str().eq_ignore_ascii_case(target_tag) {
+                result.push(*child);
+            } else {
+                collect_descendant_handles(child_tag, parser, target_tag, result);
+            }
+        }
+    }
+}
+/// Ensure the buffer ends with a blank line (two newlines).
+fn ensure_blank_line(buf: &mut String) {
+    if buf.is_empty() {
+        return;
+    }
+    // Strip trailing horizontal whitespace
+    while buf.ends_with(' ') || buf.ends_with('\t') {
+        buf.pop();
+    }
+    let current_newlines = buf.chars().rev().take_while(|&c| c == '\n').count();
+    for _ in current_newlines..2 {
+        buf.push('\n');
+    }
+}
+/// Ensure the buffer ends with at least one newline.
+fn ensure_newline(buf: &mut String) {
+    if buf.is_empty() {
+        return;
+    }
+    if !buf.ends_with('\n') {
+        buf.push('\n');
+    }
+}
+/// Post-process: collapse 3+ newlines to 2, trim line-end whitespace, ensure single trailing newline.
+fn post_process(buf: &mut String) {
+    // Collapse runs of 3+ newlines to exactly 2
+    while buf.contains("\n\n\n") {
+        *buf = buf.replace("\n\n\n", "\n\n");
+    }
+    // Trim trailing whitespace from each line — collect owned strings to avoid borrow conflict
+    let lines: Vec<String> = buf.lines().map(|line| line.trim_end().to_string()).collect();
+    buf.clear();
+    for (i, line) in lines.iter().enumerate() {
+        buf.push_str(line);
+        if i < lines.len() - 1 {
+            buf.push('\n');
+        }
+    }
+    // Trim to single trailing newline
+    let keep = buf.trim_end_matches('\n').len();
+    if keep == 0 {
+        buf.clear();
+    } else {
+        buf.truncate(keep);
+        buf.push('\n');
+    }
+}