html-to-markdown 3.6.10 → 3.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91c79ad314d7c648cf3f5d4f826170c1c5234a889a325597887b39f04f42ead9
4
- data.tar.gz: e540404974b9692e9adce99200cbab29b38c835a7bfcf3bf56534e199188b38f
3
+ metadata.gz: 70d67eb7df250429349103b104af5bb9118d742d947020ffc9d0768fc632a086
4
+ data.tar.gz: ca87f0babd9da500f417d89727e143c9f0ebbaef5766184fd45737e39794a0a5
5
5
  SHA512:
6
- metadata.gz: 3278ce32f1ee099f7e96e98b5ba9f7e46c58a822aa2d32f24d0443f032036745b6e8eb471c0fbe8a102b505fd4bb12ee0af22aa49afc6c88a0535968a25b1482
7
- data.tar.gz: 1654d4528e34ec8b434f3acd55713cc62f428cbabb3874285125640ba6b19e72478efaeef921acdaf2183e36821299f22f632889afc09161bf66695745b88cd5
6
+ metadata.gz: '090e40fd52c42ad51e9988d14c195482a857d2793d7cebe746c83e44e0376aa5954298aa1ba4abd7063f7fb69eedcd3cc1a9bb0b447324a1dd5ef5c143343006'
7
+ data.tar.gz: db8c7e520b5bde43566ff153d77ef56a401fb530ebbc329eb23dbad46b4e6bbfef892d50d8515158a4c10696eca408161722206e5842403812dd3a89aa6e9ecc
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  <div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
4
4
  <a href="https://github.com/kreuzberg-dev/alef">
5
- <img src="https://img.shields.io/badge/Bindings-alef%20%D7%90-007ec6" alt="Bindings">
5
+ <img src="https://img.shields.io/badge/built%20with-alef%20%D7%90-007ec6" alt="Built with alef">
6
6
  </a>
7
7
  <!-- Language Bindings -->
8
8
  <a href="https://crates.io/crates/html-to-markdown-rs">
@@ -161,7 +161,7 @@ require 'html_to_markdown'
161
161
  result = HtmlToMarkdown.convert(html)
162
162
  markdown = result[:content] # Converted Markdown string
163
163
  metadata = result[:metadata] # Metadata (when extract_metadata: true)
164
- tables = result[:tables] # Structured table data (when extract_tables: true)
164
+ tables = result[:tables] # Structured table data
165
165
  document = result[:document] # Document-level info
166
166
  images = result[:images] # Extracted images
167
167
  warnings = result[:warnings] # Any conversion warnings
@@ -171,14 +171,13 @@ warnings = result[:warnings] # Any conversion warnings
171
171
 
172
172
  **`ConversionOptions`** – Key configuration fields:
173
173
 
174
- - `heading_style`: Heading format (`"underlined"` | `"atx"` | `"atx_closed"`) — default: `"underlined"`
174
+ - `heading_style`: Heading format (`"underlined"` | `"atx"` | `"atx_closed"`) — default: `"atx"`
175
175
  - `list_indent_width`: Spaces per indent level — default: `2`
176
- - `bullets`: Bullet characters cycle — default: `"*+-"`
176
+ - `bullets`: Bullet characters cycle — default: `"-*+"`
177
177
  - `wrap`: Enable text wrapping — default: `false`
178
178
  - `wrap_width`: Wrap at column — default: `80`
179
179
  - `code_language`: Default fenced code block language — default: none
180
- - `extract_metadata`: Enable metadata extraction into `result.metadata` — default: `false`
181
- - `extract_tables`: Enable structured table extraction into `result.tables` — default: `false`
180
+ - `extract_metadata`: Enable metadata extraction into `result.metadata` — default: `true`
182
181
  - `output_format`: Output markup format (`"markdown"` | `"djot"` | `"plain"`) — default: `"markdown"`
183
182
 
184
183
  ## Djot Output Format
@@ -205,11 +204,13 @@ require 'html_to_markdown'
205
204
  html = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
206
205
 
207
206
  # Default Markdown output
208
- markdown = HtmlToMarkdown.convert(html)
207
+ markdown_result = HtmlToMarkdown.convert(html)
208
+ markdown = markdown_result[:content]
209
209
  # Result: "This is **bold** and *italic* text."
210
210
 
211
211
  # Djot output
212
- djot = HtmlToMarkdown.convert(html, output_format: 'djot')
212
+ djot_result = HtmlToMarkdown.convert(html, output_format: 'djot')
213
+ djot = djot_result[:content]
213
214
  # Result: "This is *bold* and _italic_ text."
214
215
  ```
215
216
 
@@ -224,7 +225,8 @@ require 'html_to_markdown'
224
225
 
225
226
  html = "<h1>Title</h1><p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
226
227
 
227
- plain = HtmlToMarkdown.convert(html, output_format: 'plain')
228
+ result = HtmlToMarkdown.convert(html, output_format: 'plain')
229
+ plain = result[:content]
228
230
  # Result: "Title\n\nThis is bold and italic text."
229
231
  ```
230
232
 
@@ -309,13 +311,13 @@ markdown = result[:content]
309
311
 
310
312
  ## Part of Kreuzberg.dev
311
313
 
312
- - [Kreuzberg](https://github.com/kreuzberg-dev/kreuzberg) — document intelligence: text, tables, metadata from 90+ formats with optional OCR.
314
+ - [Kreuzberg](https://github.com/kreuzberg-dev/kreuzberg) — document intelligence: text, tables, metadata from 91+ formats with optional OCR.
313
315
  - [Kreuzberg Cloud](https://github.com/kreuzberg-dev/kreuzberg-cloud) — managed extraction API with SDKs, dashboards, and observability.
314
316
  - [kreuzcrawl](https://github.com/kreuzberg-dev/kreuzcrawl) — web crawling and scraping with HTML→Markdown and headless-Chrome fallback.
317
+ - [html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown) — fast, lossless HTML→Markdown engine.
315
318
  - [liter-llm](https://github.com/kreuzberg-dev/liter-llm) — universal LLM API client with native bindings for 14 languages and 143 providers.
316
319
  - [tree-sitter-language-pack](https://github.com/kreuzberg-dev/tree-sitter-language-pack) — tree-sitter grammars and code-intelligence primitives.
317
320
  - [alef](https://github.com/kreuzberg-dev/alef) — the polyglot binding generator that produces every per-language binding across the 5 polyglot repos.
318
- - [Discord](https://discord.gg/xt9WY3GnKR) — community, roadmap, announcements.
319
321
 
320
322
  ## Contributing
321
323
 
@@ -343,5 +345,4 @@ If you find this library useful, consider [sponsoring the project](https://githu
343
345
  Have questions or run into issues? We're here to help:
344
346
 
345
347
  - **GitHub Issues:** [github.com/kreuzberg-dev/html-to-markdown/issues](https://github.com/kreuzberg-dev/html-to-markdown/issues)
346
- - **Issues:** [github.com/kreuzberg-dev/html-to-markdown/issues](https://github.com/kreuzberg-dev/html-to-markdown/issues)
347
348
  - **Discord Community:** [discord.gg/xt9WY3GnKR](https://discord.gg/xt9WY3GnKR)
@@ -1,7 +1,7 @@
1
1
 
2
2
  [package]
3
3
  name = "html-to-markdown-rb"
4
- version = "3.6.10"
4
+ version = "3.6.11"
5
5
  edition = "2024"
6
6
  license = "MIT"
7
7
  [workspace]
@@ -263,7 +263,7 @@ dependencies = [
263
263
 
264
264
  [[package]]
265
265
  name = "html-to-markdown-rb"
266
- version = "3.6.10"
266
+ version = "3.6.11"
267
267
  dependencies = [
268
268
  "async-trait",
269
269
  "html-to-markdown-rs",
@@ -276,9 +276,9 @@ dependencies = [
276
276
 
277
277
  [[package]]
278
278
  name = "html-to-markdown-rs"
279
- version = "3.6.10"
279
+ version = "3.6.11"
280
280
  source = "registry+https://github.com/rust-lang/crates.io-index"
281
- checksum = "061bf88c7bb26bcad8f42ff109663b65e4ccdc7844a07acf7a1769712ca72277"
281
+ checksum = "32027e930a32dd01839a07405eae5e482ff507ccc4ca6fd1a89091304bd7ba22"
282
282
  dependencies = [
283
283
  "ahash",
284
284
  "astral-tl",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "html-to-markdown-rb"
3
- version = "3.6.10"
3
+ version = "3.6.11"
4
4
  edition = "2024"
5
5
  license = "MIT"
6
6
  description = "High-performance HTML to Markdown converter"
@@ -16,9 +16,16 @@ name = "html_to_markdown_rb"
16
16
  path = "../src/lib.rs"
17
17
  crate-type = ["cdylib"]
18
18
 
19
+ [features]
20
+ default = ["inline-images", "metadata", "testkit", "visitor"]
21
+ inline-images = ["html-to-markdown-rs/inline-images"]
22
+ metadata = ["html-to-markdown-rs/metadata"]
23
+ testkit = ["html-to-markdown-rs/testkit"]
24
+ visitor = ["html-to-markdown-rs/visitor"]
25
+
19
26
  [dependencies]
20
27
  async-trait = "0.1"
21
- html-to-markdown-rs = { version = "3.6.10", features = ["serde", "metadata", "visitor", "inline-images", "testkit"] }
28
+ html-to-markdown-rs = { version = "3.6.11", features = ["serde", "metadata", "visitor", "inline-images", "testkit"] }
22
29
  magnus = "0.8"
23
30
  rb-sys = ">=0.9, <0.9.128"
24
31
  serde = { version = "1", features = ["derive"] }
@@ -1,5 +1,5 @@
1
1
  // This file is auto-generated by alef. DO NOT EDIT.
2
- // alef:hash:4ba300ecbb725eb266bbcd4f47e9bc50e560372ba8b6f9d9f93c785e8758cdc8
2
+ // alef:hash:7d62cf0c304e24a167a710e943106d8203118ef6cb83aa812f3133c24bfa7c7f
3
3
  // Re-generate with: alef generate
4
4
  #![allow(dead_code, unused_imports, unused_variables)]
5
5
  #![allow(
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:4ba300ecbb725eb266bbcd4f47e9bc50e560372ba8b6f9d9f93c785e8758cdc8
2
+ # alef:hash:7d62cf0c304e24a167a710e943106d8203118ef6cb83aa812f3133c24bfa7c7f
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
@@ -1,10 +1,10 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:4ba300ecbb725eb266bbcd4f47e9bc50e560372ba8b6f9d9f93c785e8758cdc8
2
+ # alef:hash:7d62cf0c304e24a167a710e943106d8203118ef6cb83aa812f3133c24bfa7c7f
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
6
6
 
7
7
  module HtmlToMarkdown
8
8
  ## The version string for this package.
9
- VERSION = "3.6.10"
9
+ VERSION = "3.6.11"
10
10
  end
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:4ba300ecbb725eb266bbcd4f47e9bc50e560372ba8b6f9d9f93c785e8758cdc8
2
+ # alef:hash:7d62cf0c304e24a167a710e943106d8203118ef6cb83aa812f3133c24bfa7c7f
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
Binary file
data/sig/types.rbs CHANGED
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:4ba300ecbb725eb266bbcd4f47e9bc50e560372ba8b6f9d9f93c785e8758cdc8
2
+ # alef:hash:7d62cf0c304e24a167a710e943106d8203118ef6cb83aa812f3133c24bfa7c7f
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.10
4
+ version: 3.6.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld <naaman@kreuzberg.dev>