html-to-markdown 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.toml +28 -0
- data/README.md +146 -0
- data/exe/html-to-markdown +6 -0
- data/extconf.rb +27 -0
- data/lib/bin/html-to-markdown +0 -0
- data/lib/html_to_markdown/cli.rb +21 -0
- data/lib/html_to_markdown/cli_proxy.rb +71 -0
- data/lib/html_to_markdown/version.rb +5 -0
- data/lib/html_to_markdown.rb +24 -0
- data/spec/cli_proxy_spec.rb +42 -0
- data/spec/convert_spec.rb +29 -0
- data/spec/spec_helper.rb +10 -0
- data/src/lib.rs +432 -0
- metadata +82 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 9e0e6194a6f1f081dd93f13abb5e299118f8e92bb5a7f706d6dc3359439dd5ae
|
|
4
|
+
data.tar.gz: 7146575639b67ad477a74640d5cd8d19219bb58a60c0d1311595fd46ee9be2ad
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 0d55454f4a640146ddbe4ed0173f140901a9bfc6c1333827a51c5f404aed8b38d7c90bd6910e15aef0dd2da25f91fbfe6c08b1b5e914e6fc2d466caca148d1f2
|
|
7
|
+
data.tar.gz: 88fbedd30ddacdbac50be232944ff331edf344129d91c9d0799d999443833f0726a963881e6e608892923a115d7e3b01cf041443f46363059ab2e68a9e6cde2f
|
data/Cargo.toml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "html-to-markdown-rb"
|
|
3
|
+
version.workspace = true
|
|
4
|
+
edition.workspace = true
|
|
5
|
+
authors.workspace = true
|
|
6
|
+
license.workspace = true
|
|
7
|
+
repository.workspace = true
|
|
8
|
+
homepage.workspace = true
|
|
9
|
+
documentation.workspace = true
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
rust-version.workspace = true
|
|
12
|
+
description = "Ruby bindings (Magnus) for html-to-markdown - high-performance HTML to Markdown converter"
|
|
13
|
+
keywords = ["html", "markdown", "ruby", "magnus", "bindings"]
|
|
14
|
+
categories = ["api-bindings"]
|
|
15
|
+
|
|
16
|
+
[lib]
|
|
17
|
+
name = "html_to_markdown_rb"
|
|
18
|
+
crate-type = ["cdylib", "rlib"]
|
|
19
|
+
|
|
20
|
+
[features]
|
|
21
|
+
default = []
|
|
22
|
+
|
|
23
|
+
[dependencies]
|
|
24
|
+
html-to-markdown-rs = { workspace = true, features = ["inline-images"] }
|
|
25
|
+
magnus = { git = "https://github.com/matsadler/magnus", rev = "f6db11769efb517427bf7f121f9c32e18b059b38", features = ["rb-sys"] }
|
|
26
|
+
|
|
27
|
+
[dev-dependencies]
|
|
28
|
+
pretty_assertions = "1.4"
|
data/README.md
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# html-to-markdown-rb
|
|
2
|
+
|
|
3
|
+
Ruby bindings for the `html-to-markdown` Rust engine – the same core that powers the Python wheels, Node.js NAPI bindings, WebAssembly package, and CLI. The gem exposes fast HTML → Markdown conversion with identical rendering behaviour across every supported language.
|
|
4
|
+
|
|
5
|
+
[](https://rubygems.org/gems/html-to-markdown)
|
|
6
|
+
[](https://github.com/Goldziher/html-to-markdown/blob/main/LICENSE)
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
bundle add html-to-markdown
|
|
12
|
+
# or
|
|
13
|
+
gem install html-to-markdown
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Add the gem to your project and Bundler will compile the native Rust extension on first install.
|
|
17
|
+
|
|
18
|
+
### Requirements
|
|
19
|
+
|
|
20
|
+
- Ruby **3.2+** (Magnus relies on the fiber scheduler APIs added in 3.2)
|
|
21
|
+
- Rust toolchain **1.85+** with Cargo available on your `$PATH`
|
|
22
|
+
- Ruby development headers (`ruby-dev`, `ruby-devel`, or the platform equivalent)
|
|
23
|
+
|
|
24
|
+
**Windows**: install [RubyInstaller with MSYS2](https://rubyinstaller.org/) (UCRT64). Run once:
|
|
25
|
+
|
|
26
|
+
```powershell
|
|
27
|
+
ridk exec pacman -S --needed --noconfirm base-devel mingw-w64-ucrt-x86_64-toolchain
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
This provides the standard headers (including `strings.h`) required for the bindgen step.
|
|
31
|
+
|
|
32
|
+
## Quick Start
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
require 'html_to_markdown'
|
|
36
|
+
|
|
37
|
+
html = <<~HTML
|
|
38
|
+
<h1>Welcome</h1>
|
|
39
|
+
<p>This is <strong>Rust-fast</strong> conversion!</p>
|
|
40
|
+
<ul>
|
|
41
|
+
<li>Native extension</li>
|
|
42
|
+
<li>Identical output across languages</li>
|
|
43
|
+
</ul>
|
|
44
|
+
HTML
|
|
45
|
+
|
|
46
|
+
markdown = HtmlToMarkdown.convert(html)
|
|
47
|
+
puts markdown
|
|
48
|
+
# # Welcome
|
|
49
|
+
#
|
|
50
|
+
# This is **Rust-fast** conversion!
|
|
51
|
+
#
|
|
52
|
+
# - Native extension
|
|
53
|
+
# - Identical output across languages
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Conversion with Options
|
|
57
|
+
|
|
58
|
+
All configuration mirrors the Rust API. Options accept symbols or strings and match the same defaults as the other bindings.
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
require 'html_to_markdown'
|
|
62
|
+
|
|
63
|
+
markdown = HtmlToMarkdown.convert(
|
|
64
|
+
'<pre><code class="language-ruby">puts "hi"</code></pre>',
|
|
65
|
+
heading_style: :atx,
|
|
66
|
+
code_block_style: :fenced,
|
|
67
|
+
bullets: ['*', '-', '+'],
|
|
68
|
+
wrap: true,
|
|
69
|
+
wrap_width: 80,
|
|
70
|
+
preserve_tags: %w[table figure]
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Inline Images
|
|
75
|
+
|
|
76
|
+
Extract inline binary data (data URIs, SVG) together with the converted Markdown.
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
require 'html_to_markdown'
|
|
80
|
+
|
|
81
|
+
result = HtmlToMarkdown.convert_with_inline_images(
|
|
82
|
+
'<img src="..." alt="Pixel">',
|
|
83
|
+
image_config: {
|
|
84
|
+
max_decoded_size_bytes: 1 * 1024 * 1024,
|
|
85
|
+
infer_dimensions: true,
|
|
86
|
+
filename_prefix: 'img_',
|
|
87
|
+
capture_svg: true
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
puts result.markdown
|
|
92
|
+
result.inline_images.each do |img|
|
|
93
|
+
puts "#{img.filename} -> #{img.format} (#{img.data.bytesize} bytes)"
|
|
94
|
+
end
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### CLI Proxy
|
|
98
|
+
|
|
99
|
+
The gem bundles a small proxy for the Rust CLI binary. Use it when you need parity with the standalone `html-to-markdown` executable.
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
require 'html_to_markdown/cli'
|
|
103
|
+
|
|
104
|
+
HtmlToMarkdown::CLI.run(%w[--heading-style atx input.html], stdout: $stdout)
|
|
105
|
+
# => writes converted Markdown to STDOUT
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
You can also call the CLI binary directly for scripting:
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
HtmlToMarkdown::CLIProxy.call(['--version'])
|
|
112
|
+
# => "html-to-markdown 2.5.1"
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Error Handling
|
|
116
|
+
|
|
117
|
+
Conversion errors raise `HtmlToMarkdown::Error` (wrapping the Rust error context). CLI invocations use specialised subclasses:
|
|
118
|
+
|
|
119
|
+
- `HtmlToMarkdown::CLIProxy::MissingBinaryError`
|
|
120
|
+
- `HtmlToMarkdown::CLIProxy::CLIExecutionError`
|
|
121
|
+
|
|
122
|
+
Rescue them to provide clearer feedback in your application.
|
|
123
|
+
|
|
124
|
+
## Consistent Across Languages
|
|
125
|
+
|
|
126
|
+
The Ruby gem shares the exact Rust core with:
|
|
127
|
+
|
|
128
|
+
- [Python wheels](https://pypi.org/project/html-to-markdown/)
|
|
129
|
+
- [Node.js / Bun bindings](https://www.npmjs.com/package/html-to-markdown-node)
|
|
130
|
+
- [WebAssembly package](https://www.npmjs.com/package/html-to-markdown-wasm)
|
|
131
|
+
- The Rust crate and CLI
|
|
132
|
+
|
|
133
|
+
Use whichever runtime fits your stack while keeping formatting behaviour identical.
|
|
134
|
+
|
|
135
|
+
## Development
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
bundle exec rake compile # build the native extension
|
|
139
|
+
bundle exec rspec # run test suite
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
The extension uses [Magnus](https://github.com/matsadler/magnus) plus `rb-sys` for bindgen. When editing the Rust code under `src/`, rerun `rake compile`.
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT © Na'aman Hirschfeld
|
data/extconf.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'mkmf'
|
|
4
|
+
require 'rb_sys/mkmf'
|
|
5
|
+
require 'rbconfig'
|
|
6
|
+
|
|
7
|
+
if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/
|
|
8
|
+
devkit = ENV['RI_DEVKIT']
|
|
9
|
+
prefix = ENV['MSYSTEM_PREFIX'] || '/ucrt64'
|
|
10
|
+
|
|
11
|
+
if devkit
|
|
12
|
+
sysroot = "#{devkit}#{prefix}".tr('\\\\', '/')
|
|
13
|
+
extra_args = [
|
|
14
|
+
'--target=x86_64-pc-windows-gnu',
|
|
15
|
+
"--sysroot=#{sysroot}"
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
existing = ENV['BINDGEN_EXTRA_CLANG_ARGS'].to_s.split(/\s+/)
|
|
19
|
+
ENV['BINDGEN_EXTRA_CLANG_ARGS'] = (existing + extra_args).uniq.join(' ')
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
default_profile = ENV.fetch('CARGO_PROFILE', 'release')
|
|
24
|
+
|
|
25
|
+
create_rust_makefile('html_to_markdown_rb') do |config|
|
|
26
|
+
config.profile = default_profile.to_sym
|
|
27
|
+
end
|
|
Binary file
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'html_to_markdown/cli_proxy'
|
|
4
|
+
|
|
5
|
+
module HtmlToMarkdown
|
|
6
|
+
module CLI
|
|
7
|
+
module_function
|
|
8
|
+
|
|
9
|
+
def run(argv = ARGV, stdout: $stdout, stderr: $stderr)
|
|
10
|
+
output = CLIProxy.call(argv)
|
|
11
|
+
stdout.print(output)
|
|
12
|
+
0
|
|
13
|
+
rescue CLIProxy::CLIExecutionError => e
|
|
14
|
+
stderr.print(e.stderr)
|
|
15
|
+
e.status || 1
|
|
16
|
+
rescue CLIProxy::MissingBinaryError, CLIProxy::Error => e
|
|
17
|
+
stderr.puts(e.message)
|
|
18
|
+
1
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'open3'
|
|
4
|
+
require 'pathname'
|
|
5
|
+
|
|
6
|
+
module HtmlToMarkdown
|
|
7
|
+
module CLIProxy
|
|
8
|
+
Error = Class.new(StandardError)
|
|
9
|
+
MissingBinaryError = Class.new(Error)
|
|
10
|
+
|
|
11
|
+
class CLIExecutionError < Error
|
|
12
|
+
attr_reader :stderr, :status
|
|
13
|
+
|
|
14
|
+
def initialize(message, stderr:, status:)
|
|
15
|
+
super(message)
|
|
16
|
+
@stderr = stderr
|
|
17
|
+
@status = status
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
def call(argv)
|
|
24
|
+
binary = find_cli_binary
|
|
25
|
+
args = Array(argv).map(&:to_s)
|
|
26
|
+
stdout, stderr, status = Open3.capture3(binary.to_s, *args)
|
|
27
|
+
return stdout if status.success?
|
|
28
|
+
|
|
29
|
+
raise CLIExecutionError.new(
|
|
30
|
+
"html-to-markdown CLI exited with status #{status.exitstatus}",
|
|
31
|
+
stderr: stderr,
|
|
32
|
+
status: status.exitstatus
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def find_cli_binary
|
|
37
|
+
binary_name = Gem.win_platform? ? 'html-to-markdown.exe' : 'html-to-markdown'
|
|
38
|
+
found = search_paths(binary_name).find(&:file?)
|
|
39
|
+
return found if found
|
|
40
|
+
|
|
41
|
+
raise MissingBinaryError, missing_binary_message
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def root_path
|
|
45
|
+
@root_path ||= Pathname(__dir__).join('../..').expand_path
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def lib_path
|
|
49
|
+
@lib_path ||= Pathname(__dir__).join('..').expand_path
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def search_paths(binary_name)
|
|
53
|
+
paths = [
|
|
54
|
+
root_path.join('target', 'release', binary_name),
|
|
55
|
+
lib_path.join('bin', binary_name),
|
|
56
|
+
lib_path.join(binary_name)
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
workspace_root = root_path.parent&.parent
|
|
60
|
+
paths << workspace_root.join('target', 'release', binary_name) if workspace_root
|
|
61
|
+
paths
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def missing_binary_message
|
|
65
|
+
<<~MSG.strip
|
|
66
|
+
html-to-markdown CLI binary not found. Build it with
|
|
67
|
+
`cargo build --release --package html-to-markdown-cli`.
|
|
68
|
+
MSG
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'html_to_markdown/version'
|
|
4
|
+
require 'html_to_markdown_rb'
|
|
5
|
+
|
|
6
|
+
module HtmlToMarkdown
|
|
7
|
+
autoload :CLI, 'html_to_markdown/cli'
|
|
8
|
+
autoload :CLIProxy, 'html_to_markdown/cli_proxy'
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
alias native_convert convert
|
|
12
|
+
alias native_convert_with_inline_images convert_with_inline_images
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def convert(html, options = nil)
|
|
18
|
+
native_convert(html.to_s, options)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def convert_with_inline_images(html, options = nil, image_config = nil)
|
|
22
|
+
native_convert_with_inline_images(html.to_s, options, image_config)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
require 'html_to_markdown/cli_proxy'
|
|
5
|
+
require 'html_to_markdown/cli'
|
|
6
|
+
require 'stringio'
|
|
7
|
+
|
|
8
|
+
RSpec.describe HtmlToMarkdown::CLIProxy do
|
|
9
|
+
describe '.call' do
|
|
10
|
+
it 'executes the CLI binary' do
|
|
11
|
+
begin
|
|
12
|
+
binary = described_class.find_cli_binary
|
|
13
|
+
rescue HtmlToMarkdown::CLIProxy::MissingBinaryError
|
|
14
|
+
skip 'CLI binary not built'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
expect(binary).to be_file
|
|
18
|
+
|
|
19
|
+
output = described_class.call(['--version'])
|
|
20
|
+
expect(output).to include(HtmlToMarkdown::VERSION)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe HtmlToMarkdown::CLI do
|
|
25
|
+
it 'writes CLI output to stdout' do
|
|
26
|
+
begin
|
|
27
|
+
HtmlToMarkdown::CLIProxy.find_cli_binary
|
|
28
|
+
rescue HtmlToMarkdown::CLIProxy::MissingBinaryError
|
|
29
|
+
skip 'CLI binary not built'
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
stdout = StringIO.new
|
|
33
|
+
stderr = StringIO.new
|
|
34
|
+
|
|
35
|
+
exit_code = described_class.run(['--version'], stdout: stdout, stderr: stderr)
|
|
36
|
+
|
|
37
|
+
expect(exit_code).to eq(0)
|
|
38
|
+
expect(stdout.string).to include(HtmlToMarkdown::VERSION)
|
|
39
|
+
expect(stderr.string).to be_empty
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe HtmlToMarkdown do
|
|
6
|
+
describe '.convert' do
|
|
7
|
+
it 'converts simple headings' do
|
|
8
|
+
expect(described_class.convert('<h1>Hello</h1>')).to eq("# Hello\n")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it 'accepts options hash' do
|
|
12
|
+
result = described_class.convert(
|
|
13
|
+
'<h1>Hello</h1>',
|
|
14
|
+
heading_style: :atx_closed,
|
|
15
|
+
default_title: true
|
|
16
|
+
)
|
|
17
|
+
expect(result).to include('Hello')
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
describe '.convert_with_inline_images' do
|
|
22
|
+
it 'returns inline images metadata' do
|
|
23
|
+
html = '<p><img src="" alt="fake"></p>'
|
|
24
|
+
extraction = described_class.convert_with_inline_images(html)
|
|
25
|
+
expect(extraction).to include(:markdown, :inline_images, :warnings)
|
|
26
|
+
expect(extraction[:inline_images].first[:description]).to eq('fake')
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/src/lib.rs
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
use html_to_markdown_rs::{
|
|
2
|
+
convert as convert_inner, convert_with_inline_images as convert_with_inline_images_inner, error::ConversionError,
|
|
3
|
+
CodeBlockStyle, ConversionOptions, HeadingStyle, HighlightStyle, HtmlExtraction, InlineImage, InlineImageConfig,
|
|
4
|
+
InlineImageFormat, InlineImageSource, InlineImageWarning, ListIndentType, NewlineStyle, PreprocessingOptions,
|
|
5
|
+
PreprocessingPreset, WhitespaceMode,
|
|
6
|
+
};
|
|
7
|
+
use magnus::prelude::*;
|
|
8
|
+
use magnus::{function, scan_args::scan_args, Error, RArray, RHash, Ruby, Symbol, TryConvert, Value};
|
|
9
|
+
|
|
10
|
+
const DEFAULT_INLINE_IMAGE_LIMIT: u64 = 5 * 1024 * 1024;
|
|
11
|
+
|
|
12
|
+
fn conversion_error(err: ConversionError) -> Error {
|
|
13
|
+
match err {
|
|
14
|
+
ConversionError::ConfigError(msg) => arg_error(msg),
|
|
15
|
+
other => runtime_error(other.to_string()),
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
fn arg_error(message: impl Into<String>) -> Error {
|
|
20
|
+
let ruby = Ruby::get().expect("Ruby not initialised");
|
|
21
|
+
Error::new(ruby.exception_arg_error(), message.into())
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
fn runtime_error(message: impl Into<String>) -> Error {
|
|
25
|
+
let ruby = Ruby::get().expect("Ruby not initialised");
|
|
26
|
+
Error::new(ruby.exception_runtime_error(), message.into())
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
fn symbol_to_string(value: Value) -> Result<String, Error> {
|
|
30
|
+
if let Some(symbol) = Symbol::from_value(value) {
|
|
31
|
+
Ok(symbol.name()?.to_string())
|
|
32
|
+
} else {
|
|
33
|
+
String::try_convert(value)
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
fn get_kw(ruby: &Ruby, hash: RHash, name: &str) -> Option<Value> {
|
|
38
|
+
let sym = ruby.intern(name);
|
|
39
|
+
hash.get(sym).or_else(|| hash.get(name))
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
fn parse_heading_style(value: Value) -> Result<HeadingStyle, Error> {
|
|
43
|
+
match symbol_to_string(value)?.as_str() {
|
|
44
|
+
"underlined" => Ok(HeadingStyle::Underlined),
|
|
45
|
+
"atx" => Ok(HeadingStyle::Atx),
|
|
46
|
+
"atx_closed" => Ok(HeadingStyle::AtxClosed),
|
|
47
|
+
other => Err(arg_error(format!("invalid heading_style: {other}"))),
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
fn parse_list_indent_type(value: Value) -> Result<ListIndentType, Error> {
|
|
52
|
+
match symbol_to_string(value)?.as_str() {
|
|
53
|
+
"spaces" => Ok(ListIndentType::Spaces),
|
|
54
|
+
"tabs" => Ok(ListIndentType::Tabs),
|
|
55
|
+
other => Err(arg_error(format!("invalid list_indent_type: {other}"))),
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
fn parse_highlight_style(value: Value) -> Result<HighlightStyle, Error> {
|
|
60
|
+
match symbol_to_string(value)?.as_str() {
|
|
61
|
+
"double_equal" => Ok(HighlightStyle::DoubleEqual),
|
|
62
|
+
"html" => Ok(HighlightStyle::Html),
|
|
63
|
+
"bold" => Ok(HighlightStyle::Bold),
|
|
64
|
+
"none" => Ok(HighlightStyle::None),
|
|
65
|
+
other => Err(arg_error(format!("invalid highlight_style: {other}"))),
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
fn parse_whitespace_mode(value: Value) -> Result<WhitespaceMode, Error> {
|
|
70
|
+
match symbol_to_string(value)?.as_str() {
|
|
71
|
+
"normalized" => Ok(WhitespaceMode::Normalized),
|
|
72
|
+
"strict" => Ok(WhitespaceMode::Strict),
|
|
73
|
+
other => Err(arg_error(format!("invalid whitespace_mode: {other}"))),
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
fn parse_newline_style(value: Value) -> Result<NewlineStyle, Error> {
|
|
78
|
+
match symbol_to_string(value)?.as_str() {
|
|
79
|
+
"spaces" => Ok(NewlineStyle::Spaces),
|
|
80
|
+
"backslash" => Ok(NewlineStyle::Backslash),
|
|
81
|
+
other => Err(arg_error(format!("invalid newline_style: {other}"))),
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
fn parse_code_block_style(value: Value) -> Result<CodeBlockStyle, Error> {
|
|
86
|
+
match symbol_to_string(value)?.as_str() {
|
|
87
|
+
"indented" => Ok(CodeBlockStyle::Indented),
|
|
88
|
+
"backticks" => Ok(CodeBlockStyle::Backticks),
|
|
89
|
+
"tildes" => Ok(CodeBlockStyle::Tildes),
|
|
90
|
+
other => Err(arg_error(format!("invalid code_block_style: {other}"))),
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
fn parse_preset(value: Value) -> Result<PreprocessingPreset, Error> {
|
|
95
|
+
match symbol_to_string(value)?.as_str() {
|
|
96
|
+
"minimal" => Ok(PreprocessingPreset::Minimal),
|
|
97
|
+
"standard" => Ok(PreprocessingPreset::Standard),
|
|
98
|
+
"aggressive" => Ok(PreprocessingPreset::Aggressive),
|
|
99
|
+
other => Err(arg_error(format!("invalid preprocessing preset: {other}"))),
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
fn parse_vec_of_strings(value: Value) -> Result<Vec<String>, Error> {
|
|
104
|
+
let array = RArray::from_value(value).ok_or_else(|| arg_error("expected an Array of strings"))?;
|
|
105
|
+
|
|
106
|
+
array.to_vec::<String>()
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
fn parse_preprocessing_options(ruby: &Ruby, value: Value) -> Result<PreprocessingOptions, Error> {
|
|
110
|
+
let hash = RHash::from_value(value).ok_or_else(|| arg_error("expected preprocessing to be a Hash"))?;
|
|
111
|
+
|
|
112
|
+
let mut opts = PreprocessingOptions::default();
|
|
113
|
+
|
|
114
|
+
if let Some(enabled) = get_kw(ruby, hash, "enabled") {
|
|
115
|
+
opts.enabled = bool::try_convert(enabled)?;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if let Some(preset) = get_kw(ruby, hash, "preset") {
|
|
119
|
+
opts.preset = parse_preset(preset)?;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if let Some(remove_navigation) = get_kw(ruby, hash, "remove_navigation") {
|
|
123
|
+
opts.remove_navigation = bool::try_convert(remove_navigation)?;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if let Some(remove_forms) = get_kw(ruby, hash, "remove_forms") {
|
|
127
|
+
opts.remove_forms = bool::try_convert(remove_forms)?;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
Ok(opts)
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fn build_conversion_options(ruby: &Ruby, options: Option<Value>) -> Result<ConversionOptions, Error> {
|
|
134
|
+
let mut opts = ConversionOptions::default();
|
|
135
|
+
|
|
136
|
+
let Some(options) = options else {
|
|
137
|
+
return Ok(opts);
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
if options.is_nil() {
|
|
141
|
+
return Ok(opts);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let hash = RHash::from_value(options).ok_or_else(|| arg_error("options must be provided as a Hash"))?;
|
|
145
|
+
|
|
146
|
+
if let Some(heading_style) = get_kw(ruby, hash, "heading_style") {
|
|
147
|
+
opts.heading_style = parse_heading_style(heading_style)?;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if let Some(list_indent_type) = get_kw(ruby, hash, "list_indent_type") {
|
|
151
|
+
opts.list_indent_type = parse_list_indent_type(list_indent_type)?;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if let Some(list_indent_width) = get_kw(ruby, hash, "list_indent_width") {
|
|
155
|
+
opts.list_indent_width = usize::try_convert(list_indent_width)?;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if let Some(bullets) = get_kw(ruby, hash, "bullets") {
|
|
159
|
+
opts.bullets = String::try_convert(bullets)?;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if let Some(strong_em_symbol) = get_kw(ruby, hash, "strong_em_symbol") {
|
|
163
|
+
let value = String::try_convert(strong_em_symbol)?;
|
|
164
|
+
let mut chars = value.chars();
|
|
165
|
+
let ch = chars
|
|
166
|
+
.next()
|
|
167
|
+
.ok_or_else(|| arg_error("strong_em_symbol must not be empty"))?;
|
|
168
|
+
if chars.next().is_some() {
|
|
169
|
+
return Err(arg_error("strong_em_symbol must be a single character"));
|
|
170
|
+
}
|
|
171
|
+
opts.strong_em_symbol = ch;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if let Some(escape_asterisks) = get_kw(ruby, hash, "escape_asterisks") {
|
|
175
|
+
opts.escape_asterisks = bool::try_convert(escape_asterisks)?;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if let Some(escape_underscores) = get_kw(ruby, hash, "escape_underscores") {
|
|
179
|
+
opts.escape_underscores = bool::try_convert(escape_underscores)?;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if let Some(escape_misc) = get_kw(ruby, hash, "escape_misc") {
|
|
183
|
+
opts.escape_misc = bool::try_convert(escape_misc)?;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if let Some(escape_ascii) = get_kw(ruby, hash, "escape_ascii") {
|
|
187
|
+
opts.escape_ascii = bool::try_convert(escape_ascii)?;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if let Some(code_language) = get_kw(ruby, hash, "code_language") {
|
|
191
|
+
opts.code_language = String::try_convert(code_language)?;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if let Some(autolinks) = get_kw(ruby, hash, "autolinks") {
|
|
195
|
+
opts.autolinks = bool::try_convert(autolinks)?;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if let Some(default_title) = get_kw(ruby, hash, "default_title") {
|
|
199
|
+
opts.default_title = bool::try_convert(default_title)?;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if let Some(br_in_tables) = get_kw(ruby, hash, "br_in_tables") {
|
|
203
|
+
opts.br_in_tables = bool::try_convert(br_in_tables)?;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if let Some(hocr_spatial_tables) = get_kw(ruby, hash, "hocr_spatial_tables") {
|
|
207
|
+
opts.hocr_spatial_tables = bool::try_convert(hocr_spatial_tables)?;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if let Some(highlight_style) = get_kw(ruby, hash, "highlight_style") {
|
|
211
|
+
opts.highlight_style = parse_highlight_style(highlight_style)?;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if let Some(extract_metadata) = get_kw(ruby, hash, "extract_metadata") {
|
|
215
|
+
opts.extract_metadata = bool::try_convert(extract_metadata)?;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if let Some(whitespace_mode) = get_kw(ruby, hash, "whitespace_mode") {
|
|
219
|
+
opts.whitespace_mode = parse_whitespace_mode(whitespace_mode)?;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if let Some(strip_newlines) = get_kw(ruby, hash, "strip_newlines") {
|
|
223
|
+
opts.strip_newlines = bool::try_convert(strip_newlines)?;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if let Some(wrap) = get_kw(ruby, hash, "wrap") {
|
|
227
|
+
opts.wrap = bool::try_convert(wrap)?;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if let Some(wrap_width) = get_kw(ruby, hash, "wrap_width") {
|
|
231
|
+
opts.wrap_width = usize::try_convert(wrap_width)?;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if let Some(convert_as_inline) = get_kw(ruby, hash, "convert_as_inline") {
|
|
235
|
+
opts.convert_as_inline = bool::try_convert(convert_as_inline)?;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
if let Some(sub_symbol) = get_kw(ruby, hash, "sub_symbol") {
|
|
239
|
+
opts.sub_symbol = String::try_convert(sub_symbol)?;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if let Some(sup_symbol) = get_kw(ruby, hash, "sup_symbol") {
|
|
243
|
+
opts.sup_symbol = String::try_convert(sup_symbol)?;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if let Some(newline_style) = get_kw(ruby, hash, "newline_style") {
|
|
247
|
+
opts.newline_style = parse_newline_style(newline_style)?;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if let Some(code_block_style) = get_kw(ruby, hash, "code_block_style") {
|
|
251
|
+
opts.code_block_style = parse_code_block_style(code_block_style)?;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if let Some(keep_inline_images_in) = get_kw(ruby, hash, "keep_inline_images_in") {
|
|
255
|
+
opts.keep_inline_images_in = parse_vec_of_strings(keep_inline_images_in)?;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if let Some(preprocessing) = get_kw(ruby, hash, "preprocessing") {
|
|
259
|
+
opts.preprocessing = parse_preprocessing_options(ruby, preprocessing)?;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if let Some(encoding) = get_kw(ruby, hash, "encoding") {
|
|
263
|
+
opts.encoding = String::try_convert(encoding)?;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if let Some(debug) = get_kw(ruby, hash, "debug") {
|
|
267
|
+
opts.debug = bool::try_convert(debug)?;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if let Some(strip_tags) = get_kw(ruby, hash, "strip_tags") {
|
|
271
|
+
opts.strip_tags = parse_vec_of_strings(strip_tags)?;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if let Some(preserve_tags) = get_kw(ruby, hash, "preserve_tags") {
|
|
275
|
+
opts.preserve_tags = parse_vec_of_strings(preserve_tags)?;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
Ok(opts)
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
fn build_inline_image_config(ruby: &Ruby, config: Option<Value>) -> Result<InlineImageConfig, Error> {
|
|
282
|
+
let mut cfg = InlineImageConfig::new(DEFAULT_INLINE_IMAGE_LIMIT);
|
|
283
|
+
|
|
284
|
+
let Some(config) = config else {
|
|
285
|
+
return Ok(cfg);
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
if config.is_nil() {
|
|
289
|
+
return Ok(cfg);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
let hash = RHash::from_value(config).ok_or_else(|| arg_error("inline image config must be provided as a Hash"))?;
|
|
293
|
+
|
|
294
|
+
if let Some(limit) = get_kw(ruby, hash, "max_decoded_size_bytes") {
|
|
295
|
+
cfg.max_decoded_size_bytes = u64::try_convert(limit)?;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
if let Some(prefix) = get_kw(ruby, hash, "filename_prefix") {
|
|
299
|
+
cfg.filename_prefix = if prefix.is_nil() {
|
|
300
|
+
None
|
|
301
|
+
} else {
|
|
302
|
+
Some(String::try_convert(prefix)?)
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if let Some(capture_svg) = get_kw(ruby, hash, "capture_svg") {
|
|
307
|
+
cfg.capture_svg = bool::try_convert(capture_svg)?;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if let Some(infer_dimensions) = get_kw(ruby, hash, "infer_dimensions") {
|
|
311
|
+
cfg.infer_dimensions = bool::try_convert(infer_dimensions)?;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
Ok(cfg)
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
fn inline_image_to_value(ruby: &Ruby, image: InlineImage) -> Result<Value, Error> {
|
|
318
|
+
let InlineImage {
|
|
319
|
+
data,
|
|
320
|
+
format,
|
|
321
|
+
filename,
|
|
322
|
+
description,
|
|
323
|
+
dimensions,
|
|
324
|
+
source,
|
|
325
|
+
attributes,
|
|
326
|
+
} = image;
|
|
327
|
+
|
|
328
|
+
let hash = ruby.hash_new();
|
|
329
|
+
let data_value = ruby.str_from_slice(&data);
|
|
330
|
+
hash.aset(ruby.intern("data"), data_value)?;
|
|
331
|
+
|
|
332
|
+
let format_value = match format {
|
|
333
|
+
InlineImageFormat::Png => "png".to_string(),
|
|
334
|
+
InlineImageFormat::Jpeg => "jpeg".to_string(),
|
|
335
|
+
InlineImageFormat::Gif => "gif".to_string(),
|
|
336
|
+
InlineImageFormat::Bmp => "bmp".to_string(),
|
|
337
|
+
InlineImageFormat::Webp => "webp".to_string(),
|
|
338
|
+
InlineImageFormat::Svg => "svg".to_string(),
|
|
339
|
+
InlineImageFormat::Other(other) => other,
|
|
340
|
+
};
|
|
341
|
+
hash.aset(ruby.intern("format"), format_value)?;
|
|
342
|
+
|
|
343
|
+
match filename {
|
|
344
|
+
Some(name) => hash.aset(ruby.intern("filename"), name)?,
|
|
345
|
+
None => hash.aset(ruby.intern("filename"), ruby.qnil())?,
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
match description {
|
|
349
|
+
Some(desc) => hash.aset(ruby.intern("description"), desc)?,
|
|
350
|
+
None => hash.aset(ruby.intern("description"), ruby.qnil())?,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if let Some((width, height)) = dimensions {
|
|
354
|
+
let dims = ruby.ary_new();
|
|
355
|
+
dims.push(width as i64)?;
|
|
356
|
+
dims.push(height as i64)?;
|
|
357
|
+
hash.aset(ruby.intern("dimensions"), dims)?;
|
|
358
|
+
} else {
|
|
359
|
+
hash.aset(ruby.intern("dimensions"), ruby.qnil())?;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
let source_value = match source {
|
|
363
|
+
InlineImageSource::ImgDataUri => "img_data_uri",
|
|
364
|
+
InlineImageSource::SvgElement => "svg_element",
|
|
365
|
+
};
|
|
366
|
+
hash.aset(ruby.intern("source"), source_value)?;
|
|
367
|
+
|
|
368
|
+
let attrs = ruby.hash_new();
|
|
369
|
+
for (key, value) in attributes {
|
|
370
|
+
attrs.aset(key, value)?;
|
|
371
|
+
}
|
|
372
|
+
hash.aset(ruby.intern("attributes"), attrs)?;
|
|
373
|
+
|
|
374
|
+
Ok(hash.as_value())
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
fn warning_to_value(ruby: &Ruby, warning: InlineImageWarning) -> Result<Value, Error> {
|
|
378
|
+
let hash = ruby.hash_new();
|
|
379
|
+
hash.aset(ruby.intern("index"), warning.index as i64)?;
|
|
380
|
+
hash.aset(ruby.intern("message"), warning.message)?;
|
|
381
|
+
Ok(hash.as_value())
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
fn extraction_to_value(ruby: &Ruby, extraction: HtmlExtraction) -> Result<Value, Error> {
|
|
385
|
+
let hash = ruby.hash_new();
|
|
386
|
+
hash.aset(ruby.intern("markdown"), extraction.markdown)?;
|
|
387
|
+
|
|
388
|
+
let inline_images = ruby.ary_new();
|
|
389
|
+
for image in extraction.inline_images {
|
|
390
|
+
inline_images.push(inline_image_to_value(ruby, image)?)?;
|
|
391
|
+
}
|
|
392
|
+
hash.aset(ruby.intern("inline_images"), inline_images)?;
|
|
393
|
+
|
|
394
|
+
let warnings = ruby.ary_new();
|
|
395
|
+
for warning in extraction.warnings {
|
|
396
|
+
warnings.push(warning_to_value(ruby, warning)?)?;
|
|
397
|
+
}
|
|
398
|
+
hash.aset(ruby.intern("warnings"), warnings)?;
|
|
399
|
+
|
|
400
|
+
Ok(hash.as_value())
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
fn convert_fn(ruby: &Ruby, args: &[Value]) -> Result<String, Error> {
|
|
404
|
+
let parsed = scan_args::<(String,), (Option<Value>,), (), (), (), ()>(args)?;
|
|
405
|
+
let html = parsed.required.0;
|
|
406
|
+
let options = build_conversion_options(ruby, parsed.optional.0)?;
|
|
407
|
+
|
|
408
|
+
convert_inner(&html, Some(options)).map_err(conversion_error)
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
fn convert_with_inline_images_fn(ruby: &Ruby, args: &[Value]) -> Result<Value, Error> {
|
|
412
|
+
let parsed = scan_args::<(String,), (Option<Value>, Option<Value>), (), (), (), ()>(args)?;
|
|
413
|
+
let html = parsed.required.0;
|
|
414
|
+
let options = build_conversion_options(ruby, parsed.optional.0)?;
|
|
415
|
+
let config = build_inline_image_config(ruby, parsed.optional.1)?;
|
|
416
|
+
|
|
417
|
+
let extraction = convert_with_inline_images_inner(&html, Some(options), config).map_err(conversion_error)?;
|
|
418
|
+
|
|
419
|
+
extraction_to_value(ruby, extraction)
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
#[magnus::init]
|
|
423
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
424
|
+
let module = ruby.define_module("HtmlToMarkdown")?;
|
|
425
|
+
module.define_singleton_method("convert", function!(convert_fn, -1))?;
|
|
426
|
+
module.define_singleton_method(
|
|
427
|
+
"convert_with_inline_images",
|
|
428
|
+
function!(convert_with_inline_images_fn, -1),
|
|
429
|
+
)?;
|
|
430
|
+
|
|
431
|
+
Ok(())
|
|
432
|
+
}
|
metadata
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: html-to-markdown
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 2.5.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Na'aman Hirschfeld
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rb_sys
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0.9'
|
|
19
|
+
- - "<"
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '1.0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
requirements:
|
|
26
|
+
- - ">="
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
version: '0.9'
|
|
29
|
+
- - "<"
|
|
30
|
+
- !ruby/object:Gem::Version
|
|
31
|
+
version: '1.0'
|
|
32
|
+
description: High-performance HTML to Markdown conversion from Ruby using Magnus and
|
|
33
|
+
rb-sys.
|
|
34
|
+
email:
|
|
35
|
+
- nhirschfeld@gmail.com
|
|
36
|
+
executables:
|
|
37
|
+
- html-to-markdown
|
|
38
|
+
extensions:
|
|
39
|
+
- extconf.rb
|
|
40
|
+
extra_rdoc_files: []
|
|
41
|
+
files:
|
|
42
|
+
- Cargo.toml
|
|
43
|
+
- README.md
|
|
44
|
+
- exe/html-to-markdown
|
|
45
|
+
- extconf.rb
|
|
46
|
+
- lib/bin/html-to-markdown
|
|
47
|
+
- lib/html_to_markdown.rb
|
|
48
|
+
- lib/html_to_markdown/cli.rb
|
|
49
|
+
- lib/html_to_markdown/cli_proxy.rb
|
|
50
|
+
- lib/html_to_markdown/version.rb
|
|
51
|
+
- spec/cli_proxy_spec.rb
|
|
52
|
+
- spec/convert_spec.rb
|
|
53
|
+
- spec/spec_helper.rb
|
|
54
|
+
- src/lib.rs
|
|
55
|
+
homepage: https://github.com/Goldziher/html-to-markdown
|
|
56
|
+
licenses:
|
|
57
|
+
- MIT
|
|
58
|
+
metadata:
|
|
59
|
+
rubygems_mfa_required: 'true'
|
|
60
|
+
homepage_uri: https://github.com/Goldziher/html-to-markdown
|
|
61
|
+
source_code_uri: https://github.com/Goldziher/html-to-markdown
|
|
62
|
+
bug_tracker_uri: https://github.com/Goldziher/html-to-markdown/issues
|
|
63
|
+
changelog_uri: https://github.com/Goldziher/html-to-markdown/releases
|
|
64
|
+
documentation_uri: https://github.com/Goldziher/html-to-markdown/blob/main/README.md
|
|
65
|
+
rdoc_options: []
|
|
66
|
+
require_paths:
|
|
67
|
+
- lib
|
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
69
|
+
requirements:
|
|
70
|
+
- - ">="
|
|
71
|
+
- !ruby/object:Gem::Version
|
|
72
|
+
version: '3.2'
|
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
74
|
+
requirements:
|
|
75
|
+
- - ">="
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: '0'
|
|
78
|
+
requirements: []
|
|
79
|
+
rubygems_version: 3.7.2
|
|
80
|
+
specification_version: 4
|
|
81
|
+
summary: Ruby bindings for the html-to-markdown Rust library
|
|
82
|
+
test_files: []
|