html-to-markdown 2.5.4-arm64-darwin-23 → 2.5.5-arm64-darwin-23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15154c04dea1201c8b7c49cfa351ddf785299f276d30bcfeb69c0de3756a7af5
4
- data.tar.gz: 6784eba790bb138d5450bec6fa07f2846175a76658cd574086c5626de6c93038
3
+ metadata.gz: 4359a85303475a82fdfa3d40579dc612453192cb362d16719b1b67d154daa26b
4
+ data.tar.gz: 73f681ff7b5ef2eabf007c242f76be95d3cc8245dccd7304a483f94e349f5e5a
5
5
  SHA512:
6
- metadata.gz: 9a11d205c170ab6160c8a847e74830d4277b9321f92cadc635b9b2807bea0f3fc20d7d8e2843a85b3b53101bae9b4f4bebcb45acc163fc1411d797db74fdc9c0
7
- data.tar.gz: e2139fec34bf3d72c534489f234fa007290b2bd614b34f02016c75c1ede16b647fe85eba63daaaf1d474474fe71c2c96095c1d316d019f2f71970acb4a113143
6
+ metadata.gz: 509f84b79f38e51e8c1100feb53661144f95c094539c39ad47b23941af7587e68995a667c9a19655bb18359fbeffedaa6d72fabc0e9d2f6eab3dbfe52026fc08
7
+ data.tar.gz: e35b2e83f7d8c88a5c68afb456edb877ff48c5f6d8a6b2cef594eb9601d84af2d370554a0f1c5d592da0c5edc6104490ce6cbe67282615a179f04a1c2b3f6d31
data/.rubocop.yml ADDED
@@ -0,0 +1,29 @@
1
+ plugins:
2
+ - rubocop-rspec
3
+
4
+ AllCops:
5
+ NewCops: enable
6
+ TargetRubyVersion: 3.2
7
+ Exclude:
8
+ - "tmp/**/*"
9
+ - "vendor/**/*"
10
+
11
+ Style/Documentation:
12
+ Enabled: false
13
+
14
+ Metrics/BlockLength:
15
+ Exclude:
16
+ - "spec/**/*"
17
+ - "*.gemspec"
18
+
19
+ Metrics/MethodLength:
20
+ Max: 15
21
+
22
+ RSpec/MultipleExpectations:
23
+ Enabled: false
24
+
25
+ RSpec/ExampleLength:
26
+ Enabled: false
27
+
28
+ RSpec/SpecFilePathFormat:
29
+ Enabled: false
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ ruby '>= 3.2'
6
+
7
+ gemspec
8
+
9
+ group :development, :test do
10
+ gem 'rake-compiler'
11
+ gem 'rb_sys' # provides build tooling when developing locally
12
+ gem 'rspec'
13
+ gem 'rubocop', require: false
14
+ gem 'rubocop-rspec', require: false
15
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,80 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ html-to-markdown (2.5.5)
5
+ rb_sys (>= 0.9, < 1.0)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ ast (2.4.3)
11
+ diff-lcs (1.6.2)
12
+ json (2.15.2)
13
+ language_server-protocol (3.17.0.5)
14
+ lint_roller (1.1.0)
15
+ parallel (1.27.0)
16
+ parser (3.3.10.0)
17
+ ast (~> 2.4.1)
18
+ racc
19
+ prism (1.6.0)
20
+ racc (1.8.1)
21
+ rainbow (3.1.1)
22
+ rake (13.3.0)
23
+ rake-compiler (1.3.0)
24
+ rake
25
+ rake-compiler-dock (1.9.1)
26
+ rb_sys (0.9.117)
27
+ rake-compiler-dock (= 1.9.1)
28
+ regexp_parser (2.11.3)
29
+ rspec (3.13.2)
30
+ rspec-core (~> 3.13.0)
31
+ rspec-expectations (~> 3.13.0)
32
+ rspec-mocks (~> 3.13.0)
33
+ rspec-core (3.13.6)
34
+ rspec-support (~> 3.13.0)
35
+ rspec-expectations (3.13.5)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.13.0)
38
+ rspec-mocks (3.13.6)
39
+ diff-lcs (>= 1.2.0, < 2.0)
40
+ rspec-support (~> 3.13.0)
41
+ rspec-support (3.13.6)
42
+ rubocop (1.81.6)
43
+ json (~> 2.3)
44
+ language_server-protocol (~> 3.17.0.2)
45
+ lint_roller (~> 1.1.0)
46
+ parallel (~> 1.10)
47
+ parser (>= 3.3.0.2)
48
+ rainbow (>= 2.2.2, < 4.0)
49
+ regexp_parser (>= 2.9.3, < 3.0)
50
+ rubocop-ast (>= 1.47.1, < 2.0)
51
+ ruby-progressbar (~> 1.7)
52
+ unicode-display_width (>= 2.4.0, < 4.0)
53
+ rubocop-ast (1.47.1)
54
+ parser (>= 3.3.7.2)
55
+ prism (~> 1.4)
56
+ rubocop-rspec (3.7.0)
57
+ lint_roller (~> 1.1)
58
+ rubocop (~> 1.72, >= 1.72.1)
59
+ ruby-progressbar (1.13.0)
60
+ unicode-display_width (3.2.0)
61
+ unicode-emoji (~> 4.1)
62
+ unicode-emoji (4.1.0)
63
+
64
+ PLATFORMS
65
+ arm64-darwin-24
66
+ ruby
67
+
68
+ DEPENDENCIES
69
+ html-to-markdown!
70
+ rake-compiler
71
+ rb_sys
72
+ rspec
73
+ rubocop
74
+ rubocop-rspec
75
+
76
+ RUBY VERSION
77
+ ruby 3.2.9p248
78
+
79
+ BUNDLED WITH
80
+ 2.5.12
data/README.md CHANGED
@@ -156,7 +156,7 @@ You can also call the CLI binary directly for scripting:
156
156
 
157
157
  ```ruby
158
158
  HtmlToMarkdown::CLIProxy.call(['--version'])
159
- # => "html-to-markdown 2.5.4"
159
+ # => "html-to-markdown 2.5.5"
160
160
  ```
161
161
 
162
162
  Rebuild the CLI locally if you see `CLI binary not built` during tests:
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rb_sys/extensiontask'
5
+ require 'rspec/core/rake_task'
6
+
7
+ GEMSPEC = Gem::Specification.load(File.expand_path('html-to-markdown-rb.gemspec', __dir__))
8
+
9
+ RbSys::ExtensionTask.new('html-to-markdown-rb', GEMSPEC) do |ext|
10
+ ext.lib_dir = 'lib'
11
+ ext.cross_compile = true
12
+ ext.cross_platform = %w[
13
+ x86_64-linux
14
+ x86_64-darwin
15
+ arm64-darwin
16
+ x64-mingw32
17
+ ]
18
+ end
19
+
20
+ RSpec::Core::RakeTask.new(:spec)
21
+
22
+ task spec: :compile
23
+ task default: :spec
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/html_to_markdown/version'
4
+
5
+ readme_path = File.expand_path('README.md', __dir__)
6
+ readme_body = File.read(readme_path, encoding: 'UTF-8')
7
+
8
+ Gem::Specification.new do |spec|
9
+ spec.name = 'html-to-markdown'
10
+ spec.version = HtmlToMarkdown::VERSION
11
+ spec.authors = ["Na'aman Hirschfeld"]
12
+ spec.email = ['nhirschfeld@gmail.com']
13
+
14
+ spec.summary = 'Blazing-fast HTML to Markdown conversion for Ruby, powered by Rust.'
15
+ spec.description = readme_body
16
+ spec.homepage = 'https://github.com/Goldziher/html-to-markdown'
17
+ spec.license = 'MIT'
18
+
19
+ spec.required_ruby_version = Gem::Requirement.new('>= 3.2')
20
+
21
+ spec.bindir = 'exe'
22
+ spec.executables = ['html-to-markdown']
23
+ spec.require_paths = ['lib']
24
+
25
+ spec.files = `git ls-files -z`.split("\x0")
26
+ spec.extra_rdoc_files = ['README.md']
27
+
28
+ spec.extensions = ['extconf.rb']
29
+
30
+ spec.add_dependency 'rb_sys', '>= 0.9', '< 1.0'
31
+ spec.metadata['rubygems_mfa_required'] = 'true'
32
+ spec.metadata['homepage_uri'] = 'https://github.com/Goldziher/html-to-markdown'
33
+ spec.metadata['source_code_uri'] = 'https://github.com/Goldziher/html-to-markdown'
34
+ spec.metadata['bug_tracker_uri'] = 'https://github.com/Goldziher/html-to-markdown/issues'
35
+ spec.metadata['changelog_uri'] = 'https://github.com/Goldziher/html-to-markdown/releases'
36
+ spec.metadata['documentation_uri'] = 'https://github.com/Goldziher/html-to-markdown/blob/main/README.md'
37
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HtmlToMarkdown
4
- VERSION = '2.5.4'
4
+ VERSION = '2.5.5'
5
5
  end
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.4
4
+ version: 2.5.5
5
5
  platform: arm64-darwin-23
6
6
  authors:
7
7
  - Na'aman Hirschfeld
@@ -10,19 +10,94 @@ bindir: exe
10
10
  cert_chain: []
11
11
  date: 2025-10-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: |-
14
- html-to-markdown wraps our ultra-fast Rust converter with a Ruby-native API via Magnus and rb-sys.
15
- Enjoy identical output to the Python, Node, and WASM bindings, a bundled CLI proxy, and seamless cross-platform installs.
13
+ description: "# html-to-markdown-rb\n\nBlazing-fast HTML → Markdown conversion for
14
+ Ruby, powered by the same Rust engine used by our Python, Node.js, and WebAssembly
15
+ packages. Ship identical Markdown across every runtime while enjoying native extension
16
+ performance.\n\n[![Crates.io](https://img.shields.io/crates/v/html-to-markdown-rs.svg)](https://crates.io/crates/html-to-markdown-rs)\n[![npm
17
+ version](https://badge.fury.io/js/html-to-markdown-node.svg)](https://www.npmjs.com/package/html-to-markdown-node)\n[![PyPI
18
+ version](https://badge.fury.io/py/html-to-markdown.svg)](https://pypi.org/project/html-to-markdown/)\n[![Gem
19
+ Version](https://badge.fury.io/rb/html-to-markdown.svg)](https://rubygems.org/gems/html-to-markdown)\n[![License:
20
+ MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)\n\n##
21
+ Features\n\n- ⚡ **Rust-fast**: Ruby bindings around a highly optimised Rust core
22
+ (60‑80× faster than BeautifulSoup-based converters).\n- \U0001F501 **Identical output**:
23
+ Shares logic with the Python wheels, npm bindings, WASM package, and CLI — consistent
24
+ Markdown everywhere.\n- ⚙️ **Rich configuration**: Control heading styles, list
25
+ indentation, whitespace handling, HTML preprocessing, and more.\n- \U0001F5BC️ **Inline
26
+ image extraction**: Pull out embedded images (PNG/JPEG/SVG/data URIs) alongside
27
+ Markdown.\n- \U0001F9F0 **Bundled CLI proxy**: Call the Rust CLI straight from Ruby
28
+ or shell scripts.\n- \U0001F6E0️ **First-class Rails support**: Works with `Gem.win_platform?`
29
+ builds, supports Trusted Publishing, and compiles on install if no native gem matches.\n\n##
30
+ Installation\n\n```bash\nbundle add html-to-markdown\n# or\ngem install html-to-markdown\n```\n\nAdd
31
+ the gem to your project and Bundler will compile the native Rust extension on first
32
+ install.\n\n### Requirements\n\n- Ruby **3.2+** (Magnus relies on the fiber scheduler
33
+ APIs added in 3.2)\n- Rust toolchain **1.85+** with Cargo available on your `$PATH`\n-
34
+ Ruby development headers (`ruby-dev`, `ruby-devel`, or the platform equivalent)\n\n**Windows**:
35
+ install [RubyInstaller with MSYS2](https://rubyinstaller.org/) (UCRT64). Run once:\n\n```powershell\nridk
36
+ exec pacman -S --needed --noconfirm base-devel mingw-w64-ucrt-x86_64-toolchain\n```\n\nThis
37
+ provides the standard headers (including `strings.h`) required for the bindgen step.\n\n##
38
+ Performance Snapshot\n\nApple M4 • Real Wikipedia documents • `HtmlToMarkdown.convert`
39
+ (Ruby)\n\n| Document | Size | Latency | Throughput | Docs/sec |\n| -------------------
40
+ | ----- | ------- | ---------- | -------- |\n| Lists (Timeline) | 129KB | 0.69ms
41
+ \ | 187 MB/s | 1,450 |\n| Tables (Countries) | 360KB | 2.19ms | 164 MB/s
42
+ \ | 456 |\n| Mixed (Python wiki) | 656KB | 4.88ms | 134 MB/s | 205 |\n\n>
43
+ Same core, same benchmarks: the Ruby extension stays within single-digit % of the
44
+ Rust CLI and mirrors the Python/Node numbers.\n\n## Quick Start\n\n```ruby\nrequire
45
+ 'html_to_markdown'\n\nhtml = <<~HTML\n <h1>Welcome</h1>\n <p>This is <strong>Rust-fast</strong>
46
+ conversion!</p>\n <ul>\n <li>Native extension</li>\n <li>Identical output
47
+ across languages</li>\n </ul>\nHTML\n\nmarkdown = HtmlToMarkdown.convert(html)\nputs
48
+ markdown\n# # Welcome\n#\n# This is **Rust-fast** conversion!\n#\n# - Native extension\n#
49
+ - Identical output across languages\n```\n\n## API\n\n### Conversion Options\n\nPass
50
+ a Ruby hash (string or symbol keys) to tweak rendering. Every option maps one-for-one
51
+ with the Rust/Python/Node APIs.\n\n```ruby\nrequire 'html_to_markdown'\n\nmarkdown
52
+ = HtmlToMarkdown.convert(\n '<pre><code class=\"language-ruby\">puts \"hi\"</code></pre>',\n
53
+ \ heading_style: :atx,\n code_block_style: :fenced,\n bullets: '*+-',\n list_indent_type:
54
+ :spaces,\n list_indent_width: 2,\n whitespace_mode: :normalized,\n highlight_style:
55
+ :double_equal\n)\n\nputs markdown\n```\n\n### HTML Preprocessing\n\nClean up scraped
56
+ HTML (navigation, forms, malformed markup) before conversion:\n\n```ruby\nrequire
57
+ 'html_to_markdown'\n\nmarkdown = HtmlToMarkdown.convert(\n html,\n preprocessing:
58
+ {\n enabled: true,\n preset: :aggressive, # :minimal, :standard, :aggressive\n
59
+ \ remove_navigation: true,\n remove_forms: true\n }\n)\n```\n\n### Inline
60
+ Images\n\nExtract inline binary data (data URIs, SVG) together with the converted
61
+ Markdown.\n\n```ruby\nrequire 'html_to_markdown'\n\nresult = HtmlToMarkdown.convert_with_inline_images(\n
62
+ \ '<img src=\"...\" alt=\"Pixel\">',\n image_config:
63
+ {\n max_decoded_size_bytes: 1 * 1024 * 1024,\n infer_dimensions: true,\n filename_prefix:
64
+ 'img_',\n capture_svg: true\n }\n)\n\nputs result.markdown\nresult.inline_images.each
65
+ do |img|\n puts \"#{img.filename} -> #{img.format} (#{img.data.bytesize} bytes)\"\nend\n```\n\n##
66
+ CLI\n\nThe gem bundles a small proxy for the Rust CLI binary. Use it when you need
67
+ parity with the standalone `html-to-markdown` executable.\n\n```ruby\nrequire 'html_to_markdown/cli'\n\nHtmlToMarkdown::CLI.run(%w[--heading-style
68
+ atx input.html], stdout: $stdout)\n# => writes converted Markdown to STDOUT\n```\n\nYou
69
+ can also call the CLI binary directly for scripting:\n\n```ruby\nHtmlToMarkdown::CLIProxy.call(['--version'])\n#
70
+ => \"html-to-markdown 2.5.5\"\n```\n\nRebuild the CLI locally if you see `CLI binary
71
+ not built` during tests:\n\n```bash\nbundle exec rake compile # builds
72
+ the extension\nbundle exec ruby scripts/prepare_ruby_gem.rb # copies the CLI into
73
+ lib/bin/\n```\n\n## Error Handling\n\nConversion errors raise `HtmlToMarkdown::Error`
74
+ (wrapping the Rust error context). CLI invocations use specialised subclasses:\n\n-
75
+ `HtmlToMarkdown::CLIProxy::MissingBinaryError`\n- `HtmlToMarkdown::CLIProxy::CLIExecutionError`\n\nRescue
76
+ them to provide clearer feedback in your application.\n\n## Consistent Across Languages\n\nThe
77
+ Ruby gem shares the exact Rust core with:\n\n- [Python wheels](https://pypi.org/project/html-to-markdown/)\n-
78
+ [Node.js / Bun bindings](https://www.npmjs.com/package/html-to-markdown-node)\n-
79
+ [WebAssembly package](https://www.npmjs.com/package/html-to-markdown-wasm)\n- The
80
+ Rust crate and CLI\n\nUse whichever runtime fits your stack while keeping formatting
81
+ behaviour identical.\n\n## Development\n\n```bash\nbundle exec rake compile #
82
+ build the native extension\nbundle exec rspec # run test suite\n```\n\nThe
83
+ extension uses [Magnus](https://github.com/matsadler/magnus) plus `rb-sys` for bindgen.
84
+ When editing the Rust code under `src/`, rerun `rake compile`.\n\n## License\n\nMIT
85
+ © Na'aman Hirschfeld\n"
16
86
  email:
17
87
  - nhirschfeld@gmail.com
18
88
  executables:
19
89
  - html-to-markdown
20
90
  extensions: []
21
- extra_rdoc_files: []
91
+ extra_rdoc_files:
92
+ - README.md
22
93
  files:
94
+ - ".rubocop.yml"
95
+ - Gemfile
96
+ - Gemfile.lock
23
97
  - README.md
98
+ - Rakefile
24
99
  - exe/html-to-markdown
25
- - lib/bin/html-to-markdown
100
+ - html-to-markdown-rb.gemspec
26
101
  - lib/html_to_markdown.rb
27
102
  - lib/html_to_markdown/cli.rb
28
103
  - lib/html_to_markdown/cli_proxy.rb
Binary file