html-to-markdown 2.7.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +17 -0
- data/ext/html-to-markdown-rb/extconf.rb +11 -1
- data/ext/html-to-markdown-rb/native/Cargo.toml +4 -4
- data/lib/html_to_markdown/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 92b1acd7c60d7aa288f3a73ee5d5f67e0397719f61879edd0d8fa4d8e3b09601
|
|
4
|
+
data.tar.gz: 687dabe472299a6007d1cc0462acb8a5103b6a41d63c4987788c8d915bdfe8c1
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c98e25f2a37a2cedec0fa611e0460aaa6f26e7be19b3fba461f9a5a4fa6ebcc8bd76e0698489e1ba0c5e8a8a172596e67cdae9ba8dc0409c7466fb34329adb93
|
|
7
|
+
data.tar.gz: 8cea9bc49e6156ce2242c155959793bd778f8747aaa6f343a8eec8d14285d503b2ef659d3cc3304b955f614dbc8fe123c7e2b7a878d6ddc965ae0a4350fab443
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -62,6 +62,23 @@ Apple M4 • Real Wikipedia documents • `HtmlToMarkdown.convert` (Ruby)
|
|
|
62
62
|
|
|
63
63
|
> Same core, same benchmarks: the Ruby extension stays within single-digit % of the Rust CLI and mirrors the Python/Node numbers.
|
|
64
64
|
|
|
65
|
+
### Benchmark Fixtures (Apple M4)
|
|
66
|
+
|
|
67
|
+
Measured via `task bench:bindings -- --language ruby` with the shared Wikipedia + hOCR suite:
|
|
68
|
+
|
|
69
|
+
| Document | Size | ops/sec (Ruby) |
|
|
70
|
+
| ---------------------- | ------ | -------------- |
|
|
71
|
+
| Lists (Timeline) | 129 KB | 1,349 |
|
|
72
|
+
| Tables (Countries) | 360 KB | 326 |
|
|
73
|
+
| Medium (Python) | 657 KB | 157 |
|
|
74
|
+
| Large (Rust) | 567 KB | 174 |
|
|
75
|
+
| Small (Intro) | 463 KB | 214 |
|
|
76
|
+
| hOCR German PDF | 44 KB | 2,936 |
|
|
77
|
+
| hOCR Invoice | 4 KB | 25,740 |
|
|
78
|
+
| hOCR Embedded Tables | 37 KB | 3,328 |
|
|
79
|
+
|
|
80
|
+
> These numbers line up with the Python/Node bindings because everything flows through the same Rust engine.
|
|
81
|
+
|
|
65
82
|
## Quick Start
|
|
66
83
|
|
|
67
84
|
```ruby
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require 'mkmf'
|
|
4
4
|
require 'rb_sys/mkmf'
|
|
5
5
|
require 'rbconfig'
|
|
6
|
+
require 'pathname'
|
|
6
7
|
|
|
7
8
|
if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
|
|
8
9
|
devkit = ENV.fetch('RI_DEVKIT', nil)
|
|
@@ -24,5 +25,14 @@ default_profile = ENV.fetch('CARGO_PROFILE', 'release')
|
|
|
24
25
|
|
|
25
26
|
create_rust_makefile('html_to_markdown_rb') do |config|
|
|
26
27
|
config.profile = default_profile.to_sym
|
|
27
|
-
|
|
28
|
+
|
|
29
|
+
native_dir = File.expand_path('native', __dir__)
|
|
30
|
+
relative_native =
|
|
31
|
+
begin
|
|
32
|
+
Pathname.new(native_dir).relative_path_from(Pathname.new(__dir__)).to_s
|
|
33
|
+
rescue ArgumentError
|
|
34
|
+
native_dir
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
config.ext_dir = relative_native
|
|
28
38
|
end
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "html-to-markdown-rb"
|
|
3
|
-
version = "2.7.
|
|
4
|
-
edition
|
|
3
|
+
version = "2.7.1"
|
|
4
|
+
edition = "2024"
|
|
5
5
|
authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
repository = "https://github.com/Goldziher/html-to-markdown"
|
|
8
8
|
homepage = "https://github.com/Goldziher/html-to-markdown"
|
|
9
9
|
documentation = "https://docs.rs/html-to-markdown-rs"
|
|
10
10
|
readme = "README.md"
|
|
11
|
-
rust-version
|
|
11
|
+
rust-version = "1.85"
|
|
12
12
|
description = "Ruby bindings (Magnus) for html-to-markdown - high-performance HTML to Markdown converter"
|
|
13
13
|
keywords = ["html", "markdown", "ruby", "magnus", "bindings"]
|
|
14
14
|
categories = ["api-bindings"]
|
|
@@ -21,7 +21,7 @@ crate-type = ["cdylib", "rlib"]
|
|
|
21
21
|
default = []
|
|
22
22
|
|
|
23
23
|
[dependencies]
|
|
24
|
-
html-to-markdown-rs = { version = "2.7.
|
|
24
|
+
html-to-markdown-rs = { version = "2.7.1", features = ["inline-images"] }
|
|
25
25
|
magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
|
|
26
26
|
|
|
27
27
|
[dev-dependencies]
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.7.
|
|
4
|
+
version: 2.7.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11-
|
|
11
|
+
date: 2025-11-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|