html-to-markdown 2.26.3 → 2.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 23d0242cd4fc575d8081e675fb8d16f09faa7fb1c6c0df9b18d21338c0391880
4
- data.tar.gz: cf86724440a34a26e1f17c134a232b1b321edaacb95758e42f9eab59dc710f8b
3
+ metadata.gz: 41d0b097b3f46c377ddadf6da05274a283efeddf1142795a1766e908d7c78290
4
+ data.tar.gz: 2e6585fb07a8e8cf3fc1f474cbadf6b2a3135f00ef4707029d3e35dce02726c5
5
5
  SHA512:
6
- metadata.gz: 63afe8bdf9d36f4cc225859e3a7ebb62452e97feafccc5ea1e20564a47b7e037900b6af7300508eec4313d4306608e3b00bc5f6a3115001ee250d5c560880bb6
7
- data.tar.gz: c8fcaa6e61fea4325b08ce39ebf0a2bd92ff4e6d58702497e38cc6a081c1eec3de095d72986ccdc828ee6e219697d53c12483087ee563d39f767fe989d72ffdb
6
+ metadata.gz: df64f0ec15405f15043aa6594aa0560885dfa9c957fcf40ec5b4de35457f06442efd0cd3bb868d06a2a53c67a3a096719ce026e260173114857f94ab5c6249cb
7
+ data.tar.gz: cbaa15dee930c2940b9aaf4f768d63cd95373cc4aea4c093d58a049e6612c3e533645cdbeaf9a64aca0571ad4a2995873ecb1b98a3034de83d5771439673989c
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (2.26.3)
4
+ html-to-markdown (2.27.1)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
@@ -172,7 +172,7 @@ CHECKSUMS
172
172
  ffi (1.17.3-x86_64-darwin) sha256=1f211811eb5cfaa25998322cdd92ab104bfbd26d1c4c08471599c511f2c00bb5
173
173
  ffi (1.17.3-x86_64-linux-gnu) sha256=3746b01f677aae7b16dc1acb7cb3cc17b3e35bdae7676a3f568153fb0e2c887f
174
174
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
175
- html-to-markdown (2.26.3)
175
+ html-to-markdown (2.27.1)
176
176
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
177
177
  json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
178
178
  json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
data/README.md CHANGED
@@ -18,7 +18,7 @@
18
18
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
19
19
  </a>
20
20
  <a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown">
21
- <img src="https://img.shields.io/badge/Go-v2.26.1-007ec6" alt="Go">
21
+ <img src="https://img.shields.io/badge/Go-v2.27.1-007ec6" alt="Go">
22
22
  </a>
23
23
  <a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
24
24
  <img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
@@ -144,7 +144,7 @@ Extract base64-encoded inline images with metadata.
144
144
  - `wrap_width`: Wrap at column — default: `80`
145
145
  - `code_language`: Default fenced code block language — default: none
146
146
  - `extract_metadata`: Embed metadata as YAML frontmatter — default: `false`
147
- - `output_format`: Output markup format (`"markdown"` | `"djot"`) — default: `"markdown"`
147
+ - `output_format`: Output markup format (`"markdown"` | `"djot"` | `"plain"`) — default: `"markdown"`
148
148
 
149
149
  **`MetadataConfig`** – Selective metadata extraction:
150
150
  - `extract_headers`: h1-h6 elements — default: `true`
@@ -191,6 +191,24 @@ djot = HtmlToMarkdown.convert(html, output_format: 'djot')
191
191
  Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
192
192
 
193
193
 
194
+ ## Plain Text Output
195
+
196
+ Set `output_format` to `"plain"` to strip all markup and return only visible text. This bypasses the Markdown conversion pipeline entirely for maximum speed.
197
+
198
+
199
+ ```ruby
200
+ require 'html_to_markdown'
201
+
202
+ html = "<h1>Title</h1><p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
203
+
204
+ plain = HtmlToMarkdown.convert(html, output_format: 'plain')
205
+ # Result: "Title\n\nThis is bold and italic text."
206
+ ```
207
+
208
+
209
+ Plain text mode is useful for search indexing, text extraction, and feeding content to LLMs.
210
+
211
+
194
212
 
195
213
  ## Metadata Extraction
196
214
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version ="2.26.3"
3
+ version ="2.27.1"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
@@ -65,6 +65,7 @@ pub fn parse_output_format(value: Value) -> Result<OutputFormat, Error> {
65
65
  match symbol_to_string(value)?.as_str() {
66
66
  "markdown" => Ok(OutputFormat::Markdown),
67
67
  "djot" => Ok(OutputFormat::Djot),
68
+ "plain" => Ok(OutputFormat::Plain),
68
69
  other => Err(arg_error(format!("invalid output_format: {other}"))),
69
70
  }
70
71
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.26.3'
4
+ VERSION = '2.27.1'
5
5
  end
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rs"
3
- version = "2.26.3"
3
+ version = "2.27.1"
4
4
  edition = "2024"
5
5
  authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
6
6
  license = "MIT"
@@ -562,19 +562,20 @@ fn fast_text_only(html: &str, options: &ConversionOptions) -> Option<String> {
562
562
  Cow::Borrowed(trimmed)
563
563
  };
564
564
 
565
- let escaped =
566
- if options.escape_misc || options.escape_asterisks || options.escape_underscores || options.escape_ascii {
567
- text::escape(
568
- normalized.as_ref(),
569
- options.escape_misc,
570
- options.escape_asterisks,
571
- options.escape_underscores,
572
- options.escape_ascii,
573
- )
574
- .into_owned()
575
- } else {
576
- normalized.into_owned()
577
- };
565
+ let escaped = if options.output_format == crate::options::OutputFormat::Plain {
566
+ normalized.into_owned()
567
+ } else if options.escape_misc || options.escape_asterisks || options.escape_underscores || options.escape_ascii {
568
+ text::escape(
569
+ normalized.as_ref(),
570
+ options.escape_misc,
571
+ options.escape_asterisks,
572
+ options.escape_underscores,
573
+ options.escape_ascii,
574
+ )
575
+ .into_owned()
576
+ } else {
577
+ normalized.into_owned()
578
+ };
578
579
 
579
580
  let mut output = String::with_capacity(escaped.len() + 1);
580
581
  output.push_str(&escaped);
@@ -35,8 +35,6 @@ pub struct Context {
35
35
  pub(crate) list_counter: usize,
36
36
  /// Are we in an ordered list (vs unordered)?
37
37
  pub(crate) in_ordered_list: bool,
38
- /// Track if previous sibling in dl was a dt
39
- pub(crate) last_was_dt: bool,
40
38
  /// Blockquote nesting depth
41
39
  pub(crate) blockquote_depth: usize,
42
40
  /// Are we inside a table cell (td/th)?
@@ -142,7 +140,6 @@ impl Context {
142
140
  in_code: false,
143
141
  list_counter: 0,
144
142
  in_ordered_list: false,
145
- last_was_dt: false,
146
143
  blockquote_depth: 0,
147
144
  in_table_cell: false,
148
145
  convert_as_inline: options.convert_as_inline,
@@ -3,7 +3,7 @@
3
3
  //! Processes definition lists with:
4
4
  //! - Definition terms (dt)
5
5
  //! - Definition descriptions (dd)
6
- //! - Proper Markdown formatting with `: ` separator
6
+ //! - Plain block formatting (no Pandoc colon syntax)
7
7
 
8
8
  use crate::options::ConversionOptions;
9
9
  use tl;
@@ -24,12 +24,12 @@ pub(crate) fn handle_dl(
24
24
  depth: usize,
25
25
  dom_ctx: &DomContext,
26
26
  ) {
27
- if ctx.convert_as_inline {
28
- let tag = match node_handle.get(parser) {
29
- Some(tl::Node::Tag(t)) => t,
30
- _ => return,
31
- };
27
+ let tag = match node_handle.get(parser) {
28
+ Some(tl::Node::Tag(t)) => t,
29
+ _ => return,
30
+ };
32
31
 
32
+ if ctx.convert_as_inline {
33
33
  let children = tag.children();
34
34
  {
35
35
  for child_handle in children.top().iter() {
@@ -40,48 +40,11 @@ pub(crate) fn handle_dl(
40
40
  return;
41
41
  }
42
42
 
43
- let tag = match node_handle.get(parser) {
44
- Some(tl::Node::Tag(t)) => t,
45
- _ => return,
46
- };
47
-
48
43
  let mut content = String::new();
49
- let mut in_dt_group = false;
50
44
  let children = tag.children();
51
45
  {
52
46
  for child_handle in children.top().iter() {
53
- let (is_definition_term, is_definition_description) =
54
- if let Some(tl::Node::Tag(child_tag)) = child_handle.get(parser) {
55
- let tag_name = {
56
- use crate::converter::normalized_tag_name;
57
- normalized_tag_name(child_tag.name().as_utf8_str())
58
- };
59
- (tag_name == "dt", tag_name == "dd")
60
- } else {
61
- (false, false)
62
- };
63
-
64
- let child_ctx = Context {
65
- last_was_dt: in_dt_group && is_definition_description,
66
- ..ctx.clone()
67
- };
68
- crate::converter::walk_node(child_handle, parser, &mut content, options, &child_ctx, depth, dom_ctx);
69
-
70
- match child_handle.get(parser) {
71
- Some(tl::Node::Tag(_)) => {
72
- if is_definition_term {
73
- in_dt_group = true;
74
- } else if !is_definition_description {
75
- in_dt_group = false;
76
- }
77
- }
78
- Some(tl::Node::Raw(raw)) => {
79
- if !raw.as_utf8_str().trim().is_empty() {
80
- in_dt_group = false;
81
- }
82
- }
83
- Some(tl::Node::Comment(_)) | None => {}
84
- }
47
+ crate::converter::walk_node(child_handle, parser, &mut content, options, ctx, depth, dom_ctx);
85
48
  }
86
49
  }
87
50
 
@@ -132,8 +95,7 @@ pub(crate) fn handle_dt(
132
95
 
133
96
  /// Handle definition description element (<dd>).
134
97
  ///
135
- /// Outputs the description with `: ` prefix if it follows a dt,
136
- /// or on its own with proper spacing.
98
+ /// Outputs the description as a plain block.
137
99
  pub(crate) fn handle_dd(
138
100
  node_handle: &tl::NodeHandle,
139
101
  parser: &tl::Parser,
@@ -162,27 +124,6 @@ pub(crate) fn handle_dd(
162
124
  if !trimmed.is_empty() {
163
125
  output.push_str(trimmed);
164
126
  }
165
- } else if ctx.last_was_dt {
166
- if trimmed.is_empty() {
167
- output.push_str(": \n\n");
168
- } else {
169
- let mut lines = trimmed.lines();
170
- if let Some(first) = lines.next() {
171
- output.push_str(": ");
172
- output.push_str(first);
173
- output.push('\n');
174
- }
175
- for line in lines {
176
- if line.is_empty() {
177
- output.push('\n');
178
- } else {
179
- output.push_str(" ");
180
- output.push_str(line);
181
- output.push('\n');
182
- }
183
- }
184
- output.push('\n');
185
- }
186
127
  } else if !trimmed.is_empty() {
187
128
  output.push_str(trimmed);
188
129
  output.push_str("\n\n");
@@ -18,11 +18,13 @@ use crate::converter::main_helpers::{
18
18
  extract_head_metadata, format_metadata_frontmatter, handle_hocr_document, has_custom_element_tags,
19
19
  repair_with_html5ever, trim_line_end_whitespace, trim_trailing_whitespace,
20
20
  };
21
+ use crate::converter::plain_text::extract_plain_text;
21
22
  use crate::converter::preprocessing_helpers::{has_inline_block_misnest, should_drop_for_preprocessing};
22
23
  use crate::converter::utility::caching::build_dom_context;
23
24
  use crate::converter::utility::content::normalized_tag_name;
24
25
  use crate::converter::utility::preprocessing::{preprocess_html, strip_script_and_style_tags};
25
26
  use crate::converter::utility::serialization::serialize_tag_to_html;
27
+ use crate::options::OutputFormat;
26
28
 
27
29
  use crate::converter::handlers::{handle_blockquote, handle_code, handle_graphic, handle_img, handle_link, handle_pre};
28
30
  use crate::error::Result;
@@ -134,6 +136,12 @@ pub(crate) fn convert_html_impl(
134
136
  }
135
137
  }
136
138
 
139
+ // Fast path for plain text output: skip the full conversion pipeline
140
+ if options.output_format == OutputFormat::Plain {
141
+ let plain = extract_plain_text(&dom, parser, options);
142
+ return Ok(plain);
143
+ }
144
+
137
145
  let wants_frontmatter = options.extract_metadata && !options.convert_as_inline;
138
146
  #[cfg(feature = "metadata")]
139
147
  let wants_document = metadata_collector
@@ -102,6 +102,7 @@ pub mod main;
102
102
  mod main_helpers;
103
103
  pub mod media;
104
104
  mod metadata;
105
+ pub mod plain_text;
105
106
  pub mod preprocessing_helpers;
106
107
  pub mod semantic;
107
108
  pub mod text;
@@ -0,0 +1,265 @@
1
+ //! Plain text extraction from parsed HTML DOM.
2
+ //!
3
+ //! Provides a fast-path text extractor that walks the DOM tree collecting only
4
+ //! visible text content with structural whitespace, bypassing the full
5
+ //! Markdown/Djot conversion pipeline.
6
+
7
+ use crate::options::ConversionOptions;
8
+ use crate::text;
9
+
10
+ /// Tags whose content should be skipped entirely.
11
+ const SKIP_TAGS: &[&str] = &["script", "style", "head", "template", "noscript", "svg", "math"];
12
+
13
+ /// Block-level tags that should be separated by blank lines.
14
+ const BLOCK_TAGS: &[&str] = &[
15
+ "p",
16
+ "div",
17
+ "h1",
18
+ "h2",
19
+ "h3",
20
+ "h4",
21
+ "h5",
22
+ "h6",
23
+ "blockquote",
24
+ "section",
25
+ "article",
26
+ "aside",
27
+ "main",
28
+ "nav",
29
+ "header",
30
+ "footer",
31
+ "figure",
32
+ "figcaption",
33
+ "details",
34
+ "summary",
35
+ "address",
36
+ "hgroup",
37
+ "search",
38
+ ];
39
+
40
+ /// Extract plain text from a parsed DOM tree.
41
+ ///
42
+ /// Walks the tree collecting visible text with structural whitespace:
43
+ /// - Block elements get blank-line separation
44
+ /// - `<br>` becomes a newline, `<hr>` a blank line
45
+ /// - `<pre>` preserves internal whitespace
46
+ /// - `<img>` outputs alt text (unless `skip_images` is set)
47
+ /// - `<script>`, `<style>`, `<head>`, `<template>`, `<noscript>` are skipped
48
+ /// - Tables: cells separated by tab, rows by newline
49
+ /// - Inline elements are recursed without markers
50
+ pub fn extract_plain_text(dom: &tl::VDom, parser: &tl::Parser, options: &ConversionOptions) -> String {
51
+ let mut buf = String::with_capacity(1024);
52
+
53
+ for child_handle in dom.children() {
54
+ walk_plain(child_handle, parser, &mut buf, options, false);
55
+ }
56
+
57
+ post_process(&mut buf);
58
+ buf
59
+ }
60
+
61
+ /// Recursive plain-text walker.
62
+ fn walk_plain(
63
+ node_handle: &tl::NodeHandle,
64
+ parser: &tl::Parser,
65
+ buf: &mut String,
66
+ options: &ConversionOptions,
67
+ in_pre: bool,
68
+ ) {
69
+ let Some(node) = node_handle.get(parser) else {
70
+ return;
71
+ };
72
+
73
+ match node {
74
+ tl::Node::Raw(bytes) => {
75
+ let raw = bytes.as_utf8_str();
76
+ let decoded = text::decode_html_entities_cow(raw.as_ref());
77
+ if in_pre {
78
+ buf.push_str(&decoded);
79
+ } else {
80
+ let normalized = text::normalize_whitespace_cow(&decoded);
81
+ if !normalized.is_empty() {
82
+ // Avoid leading space at start of a new line
83
+ if normalized.as_ref() == " " && buf.ends_with('\n') {
84
+ return;
85
+ }
86
+ buf.push_str(&normalized);
87
+ }
88
+ }
89
+ }
90
+ tl::Node::Tag(tag) => {
91
+ let tag_name = tag.name().as_utf8_str().to_ascii_lowercase();
92
+ let tag_str = tag_name.as_str();
93
+
94
+ // Skip invisible content
95
+ if SKIP_TAGS.contains(&tag_str) {
96
+ return;
97
+ }
98
+
99
+ match tag_str {
100
+ "br" => {
101
+ buf.push('\n');
102
+ }
103
+ "hr" => {
104
+ ensure_blank_line(buf);
105
+ }
106
+ "pre" => {
107
+ ensure_blank_line(buf);
108
+ walk_children(tag, parser, buf, options, true);
109
+ ensure_blank_line(buf);
110
+ }
111
+ "img" => {
112
+ if !options.skip_images {
113
+ if let Some(Some(alt)) = tag.attributes().get("alt") {
114
+ let alt_text = alt.as_utf8_str();
115
+ if !alt_text.is_empty() {
116
+ buf.push_str(alt_text.as_ref());
117
+ }
118
+ }
119
+ }
120
+ }
121
+ "table" => {
122
+ ensure_blank_line(buf);
123
+ walk_table(tag, parser, buf, options);
124
+ ensure_blank_line(buf);
125
+ }
126
+ "li" => {
127
+ ensure_newline(buf);
128
+ walk_children(tag, parser, buf, options, false);
129
+ ensure_newline(buf);
130
+ }
131
+ _ if BLOCK_TAGS.contains(&tag_str) => {
132
+ ensure_blank_line(buf);
133
+ walk_children(tag, parser, buf, options, in_pre);
134
+ ensure_blank_line(buf);
135
+ }
136
+ _ => {
137
+ // Inline elements and structural containers (html, body, ul, ol, etc.)
138
+ walk_children(tag, parser, buf, options, in_pre);
139
+ }
140
+ }
141
+ }
142
+ tl::Node::Comment(_) => {}
143
+ }
144
+ }
145
+
146
+ /// Walk all children of a tag.
147
+ fn walk_children(tag: &tl::HTMLTag, parser: &tl::Parser, buf: &mut String, options: &ConversionOptions, in_pre: bool) {
148
+ let children = tag.children();
149
+ let top = children.top();
150
+ for child in top.iter() {
151
+ walk_plain(child, parser, buf, options, in_pre);
152
+ }
153
+ }
154
+
155
+ /// Walk a `<table>` element, extracting cells as tab-separated, rows as newline-separated.
156
+ fn walk_table(table_tag: &tl::HTMLTag, parser: &tl::Parser, buf: &mut String, options: &ConversionOptions) {
157
+ // Collect all <tr> node handles by recursing into the table
158
+ let mut row_handles = Vec::new();
159
+ collect_descendant_handles(table_tag, parser, "tr", &mut row_handles);
160
+
161
+ for (row_idx, row_handle) in row_handles.iter().enumerate() {
162
+ if row_idx > 0 {
163
+ buf.push('\n');
164
+ }
165
+ let Some(tl::Node::Tag(row_tag)) = row_handle.get(parser) else {
166
+ continue;
167
+ };
168
+
169
+ // Collect direct <th>/<td> children
170
+ let mut cell_handles = Vec::new();
171
+ let row_children = row_tag.children();
172
+ let row_top = row_children.top();
173
+ for child in row_top.iter() {
174
+ if let Some(tl::Node::Tag(child_tag)) = child.get(parser) {
175
+ let name = child_tag.name().as_utf8_str();
176
+ if name.eq_ignore_ascii_case("th") || name.eq_ignore_ascii_case("td") {
177
+ cell_handles.push(*child);
178
+ }
179
+ }
180
+ }
181
+
182
+ for (cell_idx, cell_handle) in cell_handles.iter().enumerate() {
183
+ if cell_idx > 0 {
184
+ buf.push('\t');
185
+ }
186
+ let mut cell_buf = String::new();
187
+ if let Some(tl::Node::Tag(cell_tag)) = cell_handle.get(parser) {
188
+ walk_children(cell_tag, parser, &mut cell_buf, options, false);
189
+ }
190
+ buf.push_str(cell_buf.trim());
191
+ }
192
+ }
193
+ }
194
+
195
+ /// Recursively collect all descendant `NodeHandle`s matching `target_tag` (by cloning handles).
196
+ fn collect_descendant_handles(
197
+ tag: &tl::HTMLTag,
198
+ parser: &tl::Parser,
199
+ target_tag: &str,
200
+ result: &mut Vec<tl::NodeHandle>,
201
+ ) {
202
+ let children = tag.children();
203
+ let top = children.top();
204
+ for child in top.iter() {
205
+ if let Some(tl::Node::Tag(child_tag)) = child.get(parser) {
206
+ if child_tag.name().as_utf8_str().eq_ignore_ascii_case(target_tag) {
207
+ result.push(*child);
208
+ } else {
209
+ collect_descendant_handles(child_tag, parser, target_tag, result);
210
+ }
211
+ }
212
+ }
213
+ }
214
+
215
+ /// Ensure the buffer ends with a blank line (two newlines).
216
+ fn ensure_blank_line(buf: &mut String) {
217
+ if buf.is_empty() {
218
+ return;
219
+ }
220
+ // Strip trailing horizontal whitespace
221
+ while buf.ends_with(' ') || buf.ends_with('\t') {
222
+ buf.pop();
223
+ }
224
+ let current_newlines = buf.chars().rev().take_while(|&c| c == '\n').count();
225
+ for _ in current_newlines..2 {
226
+ buf.push('\n');
227
+ }
228
+ }
229
+
230
+ /// Ensure the buffer ends with at least one newline.
231
+ fn ensure_newline(buf: &mut String) {
232
+ if buf.is_empty() {
233
+ return;
234
+ }
235
+ if !buf.ends_with('\n') {
236
+ buf.push('\n');
237
+ }
238
+ }
239
+
240
+ /// Post-process: collapse 3+ newlines to 2, trim line-end whitespace, ensure single trailing newline.
241
+ fn post_process(buf: &mut String) {
242
+ // Collapse runs of 3+ newlines to exactly 2
243
+ while buf.contains("\n\n\n") {
244
+ *buf = buf.replace("\n\n\n", "\n\n");
245
+ }
246
+
247
+ // Trim trailing whitespace from each line — collect owned strings to avoid borrow conflict
248
+ let lines: Vec<String> = buf.lines().map(|line| line.trim_end().to_string()).collect();
249
+ buf.clear();
250
+ for (i, line) in lines.iter().enumerate() {
251
+ buf.push_str(line);
252
+ if i < lines.len() - 1 {
253
+ buf.push('\n');
254
+ }
255
+ }
256
+
257
+ // Trim to single trailing newline
258
+ let keep = buf.trim_end_matches('\n').len();
259
+ if keep == 0 {
260
+ buf.clear();
261
+ } else {
262
+ buf.truncate(keep);
263
+ buf.push('\n');
264
+ }
265
+ }