html-to-markdown 2.5.4-x86_64-darwin-22 → 2.5.5-x86_64-darwin-22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +80 -0
- data/README.md +1 -1
- data/Rakefile +23 -0
- data/html-to-markdown-rb.gemspec +37 -0
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown_rb.bundle +0 -0
- metadata +81 -6
- data/lib/bin/html-to-markdown +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: db28b05ee3dae4da7cab200a8998b9800242c6e1bda8ee09dc1a7cecc80ca5f5
|
|
4
|
+
data.tar.gz: 6e62808471e9d91e4c87a8f4cb53d7fe5ef5f1823adf6a5ed16d023c4c2d2aca
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3d931d00f431f475e572a20c450c8af6a2657ba5bbdeb6f44985bd411be257519dd3f2a80bb3aa76df6968ec79dfe28830939aecb776584436cde4f99a18d159
|
|
7
|
+
data.tar.gz: 5186fbbbd213dde4d4b9f555675b7c6f68a4aaa36ef3a815529e3d4f91c5d3edade387c27c0b0ea1caabd4ac47fc46d0ce1c71483e068d4e77efda62f15c3efa
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
plugins:
|
|
2
|
+
- rubocop-rspec
|
|
3
|
+
|
|
4
|
+
AllCops:
|
|
5
|
+
NewCops: enable
|
|
6
|
+
TargetRubyVersion: 3.2
|
|
7
|
+
Exclude:
|
|
8
|
+
- "tmp/**/*"
|
|
9
|
+
- "vendor/**/*"
|
|
10
|
+
|
|
11
|
+
Style/Documentation:
|
|
12
|
+
Enabled: false
|
|
13
|
+
|
|
14
|
+
Metrics/BlockLength:
|
|
15
|
+
Exclude:
|
|
16
|
+
- "spec/**/*"
|
|
17
|
+
- "*.gemspec"
|
|
18
|
+
|
|
19
|
+
Metrics/MethodLength:
|
|
20
|
+
Max: 15
|
|
21
|
+
|
|
22
|
+
RSpec/MultipleExpectations:
|
|
23
|
+
Enabled: false
|
|
24
|
+
|
|
25
|
+
RSpec/ExampleLength:
|
|
26
|
+
Enabled: false
|
|
27
|
+
|
|
28
|
+
RSpec/SpecFilePathFormat:
|
|
29
|
+
Enabled: false
|
data/Gemfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
source 'https://rubygems.org'
|
|
4
|
+
|
|
5
|
+
ruby '>= 3.2'
|
|
6
|
+
|
|
7
|
+
gemspec
|
|
8
|
+
|
|
9
|
+
group :development, :test do
|
|
10
|
+
gem 'rake-compiler'
|
|
11
|
+
gem 'rb_sys' # provides build tooling when developing locally
|
|
12
|
+
gem 'rspec'
|
|
13
|
+
gem 'rubocop', require: false
|
|
14
|
+
gem 'rubocop-rspec', require: false
|
|
15
|
+
end
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
html-to-markdown (2.5.5)
|
|
5
|
+
rb_sys (>= 0.9, < 1.0)
|
|
6
|
+
|
|
7
|
+
GEM
|
|
8
|
+
remote: https://rubygems.org/
|
|
9
|
+
specs:
|
|
10
|
+
ast (2.4.3)
|
|
11
|
+
diff-lcs (1.6.2)
|
|
12
|
+
json (2.15.2)
|
|
13
|
+
language_server-protocol (3.17.0.5)
|
|
14
|
+
lint_roller (1.1.0)
|
|
15
|
+
parallel (1.27.0)
|
|
16
|
+
parser (3.3.10.0)
|
|
17
|
+
ast (~> 2.4.1)
|
|
18
|
+
racc
|
|
19
|
+
prism (1.6.0)
|
|
20
|
+
racc (1.8.1)
|
|
21
|
+
rainbow (3.1.1)
|
|
22
|
+
rake (13.3.0)
|
|
23
|
+
rake-compiler (1.3.0)
|
|
24
|
+
rake
|
|
25
|
+
rake-compiler-dock (1.9.1)
|
|
26
|
+
rb_sys (0.9.117)
|
|
27
|
+
rake-compiler-dock (= 1.9.1)
|
|
28
|
+
regexp_parser (2.11.3)
|
|
29
|
+
rspec (3.13.2)
|
|
30
|
+
rspec-core (~> 3.13.0)
|
|
31
|
+
rspec-expectations (~> 3.13.0)
|
|
32
|
+
rspec-mocks (~> 3.13.0)
|
|
33
|
+
rspec-core (3.13.6)
|
|
34
|
+
rspec-support (~> 3.13.0)
|
|
35
|
+
rspec-expectations (3.13.5)
|
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
37
|
+
rspec-support (~> 3.13.0)
|
|
38
|
+
rspec-mocks (3.13.6)
|
|
39
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
40
|
+
rspec-support (~> 3.13.0)
|
|
41
|
+
rspec-support (3.13.6)
|
|
42
|
+
rubocop (1.81.6)
|
|
43
|
+
json (~> 2.3)
|
|
44
|
+
language_server-protocol (~> 3.17.0.2)
|
|
45
|
+
lint_roller (~> 1.1.0)
|
|
46
|
+
parallel (~> 1.10)
|
|
47
|
+
parser (>= 3.3.0.2)
|
|
48
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
49
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
50
|
+
rubocop-ast (>= 1.47.1, < 2.0)
|
|
51
|
+
ruby-progressbar (~> 1.7)
|
|
52
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
53
|
+
rubocop-ast (1.47.1)
|
|
54
|
+
parser (>= 3.3.7.2)
|
|
55
|
+
prism (~> 1.4)
|
|
56
|
+
rubocop-rspec (3.7.0)
|
|
57
|
+
lint_roller (~> 1.1)
|
|
58
|
+
rubocop (~> 1.72, >= 1.72.1)
|
|
59
|
+
ruby-progressbar (1.13.0)
|
|
60
|
+
unicode-display_width (3.2.0)
|
|
61
|
+
unicode-emoji (~> 4.1)
|
|
62
|
+
unicode-emoji (4.1.0)
|
|
63
|
+
|
|
64
|
+
PLATFORMS
|
|
65
|
+
arm64-darwin-24
|
|
66
|
+
ruby
|
|
67
|
+
|
|
68
|
+
DEPENDENCIES
|
|
69
|
+
html-to-markdown!
|
|
70
|
+
rake-compiler
|
|
71
|
+
rb_sys
|
|
72
|
+
rspec
|
|
73
|
+
rubocop
|
|
74
|
+
rubocop-rspec
|
|
75
|
+
|
|
76
|
+
RUBY VERSION
|
|
77
|
+
ruby 3.2.9p248
|
|
78
|
+
|
|
79
|
+
BUNDLED WITH
|
|
80
|
+
2.5.12
|
data/README.md
CHANGED
|
@@ -156,7 +156,7 @@ You can also call the CLI binary directly for scripting:
|
|
|
156
156
|
|
|
157
157
|
```ruby
|
|
158
158
|
HtmlToMarkdown::CLIProxy.call(['--version'])
|
|
159
|
-
# => "html-to-markdown 2.5.
|
|
159
|
+
# => "html-to-markdown 2.5.5"
|
|
160
160
|
```
|
|
161
161
|
|
|
162
162
|
Rebuild the CLI locally if you see `CLI binary not built` during tests:
|
data/Rakefile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/gem_tasks'
|
|
4
|
+
require 'rb_sys/extensiontask'
|
|
5
|
+
require 'rspec/core/rake_task'
|
|
6
|
+
|
|
7
|
+
GEMSPEC = Gem::Specification.load(File.expand_path('html-to-markdown-rb.gemspec', __dir__))
|
|
8
|
+
|
|
9
|
+
RbSys::ExtensionTask.new('html-to-markdown-rb', GEMSPEC) do |ext|
|
|
10
|
+
ext.lib_dir = 'lib'
|
|
11
|
+
ext.cross_compile = true
|
|
12
|
+
ext.cross_platform = %w[
|
|
13
|
+
x86_64-linux
|
|
14
|
+
x86_64-darwin
|
|
15
|
+
arm64-darwin
|
|
16
|
+
x64-mingw32
|
|
17
|
+
]
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
21
|
+
|
|
22
|
+
task spec: :compile
|
|
23
|
+
task default: :spec
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/html_to_markdown/version'
|
|
4
|
+
|
|
5
|
+
readme_path = File.expand_path('README.md', __dir__)
|
|
6
|
+
readme_body = File.read(readme_path, encoding: 'UTF-8')
|
|
7
|
+
|
|
8
|
+
Gem::Specification.new do |spec|
|
|
9
|
+
spec.name = 'html-to-markdown'
|
|
10
|
+
spec.version = HtmlToMarkdown::VERSION
|
|
11
|
+
spec.authors = ["Na'aman Hirschfeld"]
|
|
12
|
+
spec.email = ['nhirschfeld@gmail.com']
|
|
13
|
+
|
|
14
|
+
spec.summary = 'Blazing-fast HTML to Markdown conversion for Ruby, powered by Rust.'
|
|
15
|
+
spec.description = readme_body
|
|
16
|
+
spec.homepage = 'https://github.com/Goldziher/html-to-markdown'
|
|
17
|
+
spec.license = 'MIT'
|
|
18
|
+
|
|
19
|
+
spec.required_ruby_version = Gem::Requirement.new('>= 3.2')
|
|
20
|
+
|
|
21
|
+
spec.bindir = 'exe'
|
|
22
|
+
spec.executables = ['html-to-markdown']
|
|
23
|
+
spec.require_paths = ['lib']
|
|
24
|
+
|
|
25
|
+
spec.files = `git ls-files -z`.split("\x0")
|
|
26
|
+
spec.extra_rdoc_files = ['README.md']
|
|
27
|
+
|
|
28
|
+
spec.extensions = ['extconf.rb']
|
|
29
|
+
|
|
30
|
+
spec.add_dependency 'rb_sys', '>= 0.9', '< 1.0'
|
|
31
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
32
|
+
spec.metadata['homepage_uri'] = 'https://github.com/Goldziher/html-to-markdown'
|
|
33
|
+
spec.metadata['source_code_uri'] = 'https://github.com/Goldziher/html-to-markdown'
|
|
34
|
+
spec.metadata['bug_tracker_uri'] = 'https://github.com/Goldziher/html-to-markdown/issues'
|
|
35
|
+
spec.metadata['changelog_uri'] = 'https://github.com/Goldziher/html-to-markdown/releases'
|
|
36
|
+
spec.metadata['documentation_uri'] = 'https://github.com/Goldziher/html-to-markdown/blob/main/README.md'
|
|
37
|
+
end
|
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.5.
|
|
4
|
+
version: 2.5.5
|
|
5
5
|
platform: x86_64-darwin-22
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
@@ -10,19 +10,94 @@ bindir: exe
|
|
|
10
10
|
cert_chain: []
|
|
11
11
|
date: 2025-10-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
|
-
description:
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
description: "# html-to-markdown-rb\n\nBlazing-fast HTML → Markdown conversion for
|
|
14
|
+
Ruby, powered by the same Rust engine used by our Python, Node.js, and WebAssembly
|
|
15
|
+
packages. Ship identical Markdown across every runtime while enjoying native extension
|
|
16
|
+
performance.\n\n[](https://crates.io/crates/html-to-markdown-rs)\n[](https://www.npmjs.com/package/html-to-markdown-node)\n[](https://pypi.org/project/html-to-markdown/)\n[](https://rubygems.org/gems/html-to-markdown)\n[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)\n\n##
|
|
21
|
+
Features\n\n- ⚡ **Rust-fast**: Ruby bindings around a highly optimised Rust core
|
|
22
|
+
(60‑80× faster than BeautifulSoup-based converters).\n- \U0001F501 **Identical output**:
|
|
23
|
+
Shares logic with the Python wheels, npm bindings, WASM package, and CLI — consistent
|
|
24
|
+
Markdown everywhere.\n- ⚙️ **Rich configuration**: Control heading styles, list
|
|
25
|
+
indentation, whitespace handling, HTML preprocessing, and more.\n- \U0001F5BC️ **Inline
|
|
26
|
+
image extraction**: Pull out embedded images (PNG/JPEG/SVG/data URIs) alongside
|
|
27
|
+
Markdown.\n- \U0001F9F0 **Bundled CLI proxy**: Call the Rust CLI straight from Ruby
|
|
28
|
+
or shell scripts.\n- \U0001F6E0️ **First-class Rails support**: Works with `Gem.win_platform?`
|
|
29
|
+
builds, supports Trusted Publishing, and compiles on install if no native gem matches.\n\n##
|
|
30
|
+
Installation\n\n```bash\nbundle add html-to-markdown\n# or\ngem install html-to-markdown\n```\n\nAdd
|
|
31
|
+
the gem to your project and Bundler will compile the native Rust extension on first
|
|
32
|
+
install.\n\n### Requirements\n\n- Ruby **3.2+** (Magnus relies on the fiber scheduler
|
|
33
|
+
APIs added in 3.2)\n- Rust toolchain **1.85+** with Cargo available on your `$PATH`\n-
|
|
34
|
+
Ruby development headers (`ruby-dev`, `ruby-devel`, or the platform equivalent)\n\n**Windows**:
|
|
35
|
+
install [RubyInstaller with MSYS2](https://rubyinstaller.org/) (UCRT64). Run once:\n\n```powershell\nridk
|
|
36
|
+
exec pacman -S --needed --noconfirm base-devel mingw-w64-ucrt-x86_64-toolchain\n```\n\nThis
|
|
37
|
+
provides the standard headers (including `strings.h`) required for the bindgen step.\n\n##
|
|
38
|
+
Performance Snapshot\n\nApple M4 • Real Wikipedia documents • `HtmlToMarkdown.convert`
|
|
39
|
+
(Ruby)\n\n| Document | Size | Latency | Throughput | Docs/sec |\n| -------------------
|
|
40
|
+
| ----- | ------- | ---------- | -------- |\n| Lists (Timeline) | 129KB | 0.69ms
|
|
41
|
+
\ | 187 MB/s | 1,450 |\n| Tables (Countries) | 360KB | 2.19ms | 164 MB/s
|
|
42
|
+
\ | 456 |\n| Mixed (Python wiki) | 656KB | 4.88ms | 134 MB/s | 205 |\n\n>
|
|
43
|
+
Same core, same benchmarks: the Ruby extension stays within single-digit % of the
|
|
44
|
+
Rust CLI and mirrors the Python/Node numbers.\n\n## Quick Start\n\n```ruby\nrequire
|
|
45
|
+
'html_to_markdown'\n\nhtml = <<~HTML\n <h1>Welcome</h1>\n <p>This is <strong>Rust-fast</strong>
|
|
46
|
+
conversion!</p>\n <ul>\n <li>Native extension</li>\n <li>Identical output
|
|
47
|
+
across languages</li>\n </ul>\nHTML\n\nmarkdown = HtmlToMarkdown.convert(html)\nputs
|
|
48
|
+
markdown\n# # Welcome\n#\n# This is **Rust-fast** conversion!\n#\n# - Native extension\n#
|
|
49
|
+
- Identical output across languages\n```\n\n## API\n\n### Conversion Options\n\nPass
|
|
50
|
+
a Ruby hash (string or symbol keys) to tweak rendering. Every option maps one-for-one
|
|
51
|
+
with the Rust/Python/Node APIs.\n\n```ruby\nrequire 'html_to_markdown'\n\nmarkdown
|
|
52
|
+
= HtmlToMarkdown.convert(\n '<pre><code class=\"language-ruby\">puts \"hi\"</code></pre>',\n
|
|
53
|
+
\ heading_style: :atx,\n code_block_style: :fenced,\n bullets: '*+-',\n list_indent_type:
|
|
54
|
+
:spaces,\n list_indent_width: 2,\n whitespace_mode: :normalized,\n highlight_style:
|
|
55
|
+
:double_equal\n)\n\nputs markdown\n```\n\n### HTML Preprocessing\n\nClean up scraped
|
|
56
|
+
HTML (navigation, forms, malformed markup) before conversion:\n\n```ruby\nrequire
|
|
57
|
+
'html_to_markdown'\n\nmarkdown = HtmlToMarkdown.convert(\n html,\n preprocessing:
|
|
58
|
+
{\n enabled: true,\n preset: :aggressive, # :minimal, :standard, :aggressive\n
|
|
59
|
+
\ remove_navigation: true,\n remove_forms: true\n }\n)\n```\n\n### Inline
|
|
60
|
+
Images\n\nExtract inline binary data (data URIs, SVG) together with the converted
|
|
61
|
+
Markdown.\n\n```ruby\nrequire 'html_to_markdown'\n\nresult = HtmlToMarkdown.convert_with_inline_images(\n
|
|
62
|
+
\ '<img src=\"data:image/png;base64,iVBORw0...\" alt=\"Pixel\">',\n image_config:
|
|
63
|
+
{\n max_decoded_size_bytes: 1 * 1024 * 1024,\n infer_dimensions: true,\n filename_prefix:
|
|
64
|
+
'img_',\n capture_svg: true\n }\n)\n\nputs result.markdown\nresult.inline_images.each
|
|
65
|
+
do |img|\n puts \"#{img.filename} -> #{img.format} (#{img.data.bytesize} bytes)\"\nend\n```\n\n##
|
|
66
|
+
CLI\n\nThe gem bundles a small proxy for the Rust CLI binary. Use it when you need
|
|
67
|
+
parity with the standalone `html-to-markdown` executable.\n\n```ruby\nrequire 'html_to_markdown/cli'\n\nHtmlToMarkdown::CLI.run(%w[--heading-style
|
|
68
|
+
atx input.html], stdout: $stdout)\n# => writes converted Markdown to STDOUT\n```\n\nYou
|
|
69
|
+
can also call the CLI binary directly for scripting:\n\n```ruby\nHtmlToMarkdown::CLIProxy.call(['--version'])\n#
|
|
70
|
+
=> \"html-to-markdown 2.5.5\"\n```\n\nRebuild the CLI locally if you see `CLI binary
|
|
71
|
+
not built` during tests:\n\n```bash\nbundle exec rake compile # builds
|
|
72
|
+
the extension\nbundle exec ruby scripts/prepare_ruby_gem.rb # copies the CLI into
|
|
73
|
+
lib/bin/\n```\n\n## Error Handling\n\nConversion errors raise `HtmlToMarkdown::Error`
|
|
74
|
+
(wrapping the Rust error context). CLI invocations use specialised subclasses:\n\n-
|
|
75
|
+
`HtmlToMarkdown::CLIProxy::MissingBinaryError`\n- `HtmlToMarkdown::CLIProxy::CLIExecutionError`\n\nRescue
|
|
76
|
+
them to provide clearer feedback in your application.\n\n## Consistent Across Languages\n\nThe
|
|
77
|
+
Ruby gem shares the exact Rust core with:\n\n- [Python wheels](https://pypi.org/project/html-to-markdown/)\n-
|
|
78
|
+
[Node.js / Bun bindings](https://www.npmjs.com/package/html-to-markdown-node)\n-
|
|
79
|
+
[WebAssembly package](https://www.npmjs.com/package/html-to-markdown-wasm)\n- The
|
|
80
|
+
Rust crate and CLI\n\nUse whichever runtime fits your stack while keeping formatting
|
|
81
|
+
behaviour identical.\n\n## Development\n\n```bash\nbundle exec rake compile #
|
|
82
|
+
build the native extension\nbundle exec rspec # run test suite\n```\n\nThe
|
|
83
|
+
extension uses [Magnus](https://github.com/matsadler/magnus) plus `rb-sys` for bindgen.
|
|
84
|
+
When editing the Rust code under `src/`, rerun `rake compile`.\n\n## License\n\nMIT
|
|
85
|
+
© Na'aman Hirschfeld\n"
|
|
16
86
|
email:
|
|
17
87
|
- nhirschfeld@gmail.com
|
|
18
88
|
executables:
|
|
19
89
|
- html-to-markdown
|
|
20
90
|
extensions: []
|
|
21
|
-
extra_rdoc_files:
|
|
91
|
+
extra_rdoc_files:
|
|
92
|
+
- README.md
|
|
22
93
|
files:
|
|
94
|
+
- ".rubocop.yml"
|
|
95
|
+
- Gemfile
|
|
96
|
+
- Gemfile.lock
|
|
23
97
|
- README.md
|
|
98
|
+
- Rakefile
|
|
24
99
|
- exe/html-to-markdown
|
|
25
|
-
-
|
|
100
|
+
- html-to-markdown-rb.gemspec
|
|
26
101
|
- lib/html_to_markdown.rb
|
|
27
102
|
- lib/html_to_markdown/cli.rb
|
|
28
103
|
- lib/html_to_markdown/cli_proxy.rb
|
data/lib/bin/html-to-markdown
DELETED
|
Binary file
|