html-to-markdown 2.6.4 → 2.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -29
- data/Gemfile +15 -15
- data/Gemfile.lock +2 -3
- data/README.md +209 -210
- data/Rakefile +24 -24
- data/exe/html-to-markdown +6 -6
- data/ext/html-to-markdown-rb/extconf.rb +28 -28
- data/ext/html-to-markdown-rb/native/Cargo.toml +28 -0
- data/ext/html-to-markdown-rb/native/README.md +209 -0
- data/ext/html-to-markdown-rb/native/extconf.rb +3 -0
- data/ext/html-to-markdown-rb/native/src/lib.rs +432 -0
- data/html-to-markdown-rb.gemspec +59 -59
- data/lib/html_to_markdown/cli.rb +21 -21
- data/lib/html_to_markdown/cli_proxy.rb +71 -71
- data/lib/html_to_markdown/version.rb +5 -5
- data/lib/html_to_markdown.rb +24 -24
- data/spec/cli_proxy_spec.rb +42 -42
- data/spec/convert_spec.rb +29 -29
- data/spec/spec_helper.rb +10 -10
- metadata +6 -2
|
@@ -1,71 +1,71 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'open3'
|
|
4
|
-
require 'pathname'
|
|
5
|
-
|
|
6
|
-
module HtmlToMarkdown
|
|
7
|
-
module CLIProxy
|
|
8
|
-
Error = Class.new(StandardError)
|
|
9
|
-
MissingBinaryError = Class.new(Error)
|
|
10
|
-
|
|
11
|
-
class CLIExecutionError < Error
|
|
12
|
-
attr_reader :stderr, :status
|
|
13
|
-
|
|
14
|
-
def initialize(message, stderr:, status:)
|
|
15
|
-
super(message)
|
|
16
|
-
@stderr = stderr
|
|
17
|
-
@status = status
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
module_function
|
|
22
|
-
|
|
23
|
-
def call(argv)
|
|
24
|
-
binary = find_cli_binary
|
|
25
|
-
args = Array(argv).map(&:to_s)
|
|
26
|
-
stdout, stderr, status = Open3.capture3(binary.to_s, *args)
|
|
27
|
-
return stdout if status.success?
|
|
28
|
-
|
|
29
|
-
raise CLIExecutionError.new(
|
|
30
|
-
"html-to-markdown CLI exited with status #{status.exitstatus}",
|
|
31
|
-
stderr: stderr,
|
|
32
|
-
status: status.exitstatus
|
|
33
|
-
)
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def find_cli_binary
|
|
37
|
-
binary_name = Gem.win_platform? ? 'html-to-markdown.exe' : 'html-to-markdown'
|
|
38
|
-
found = search_paths(binary_name).find(&:file?)
|
|
39
|
-
return found if found
|
|
40
|
-
|
|
41
|
-
raise MissingBinaryError, missing_binary_message
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def root_path
|
|
45
|
-
@root_path ||= Pathname(__dir__).join('../..').expand_path
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def lib_path
|
|
49
|
-
@lib_path ||= Pathname(__dir__).join('..').expand_path
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def search_paths(binary_name)
|
|
53
|
-
paths = [
|
|
54
|
-
root_path.join('target', 'release', binary_name),
|
|
55
|
-
lib_path.join('bin', binary_name),
|
|
56
|
-
lib_path.join(binary_name)
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
workspace_root = root_path.parent&.parent
|
|
60
|
-
paths << workspace_root.join('target', 'release', binary_name) if workspace_root
|
|
61
|
-
paths
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def missing_binary_message
|
|
65
|
-
<<~MSG.strip
|
|
66
|
-
html-to-markdown CLI binary not found. Build it with
|
|
67
|
-
`cargo build --release --package html-to-markdown-cli`.
|
|
68
|
-
MSG
|
|
69
|
-
end
|
|
70
|
-
end
|
|
71
|
-
end
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'open3'
|
|
4
|
+
require 'pathname'
|
|
5
|
+
|
|
6
|
+
module HtmlToMarkdown
|
|
7
|
+
module CLIProxy
|
|
8
|
+
Error = Class.new(StandardError)
|
|
9
|
+
MissingBinaryError = Class.new(Error)
|
|
10
|
+
|
|
11
|
+
class CLIExecutionError < Error
|
|
12
|
+
attr_reader :stderr, :status
|
|
13
|
+
|
|
14
|
+
def initialize(message, stderr:, status:)
|
|
15
|
+
super(message)
|
|
16
|
+
@stderr = stderr
|
|
17
|
+
@status = status
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
def call(argv)
|
|
24
|
+
binary = find_cli_binary
|
|
25
|
+
args = Array(argv).map(&:to_s)
|
|
26
|
+
stdout, stderr, status = Open3.capture3(binary.to_s, *args)
|
|
27
|
+
return stdout if status.success?
|
|
28
|
+
|
|
29
|
+
raise CLIExecutionError.new(
|
|
30
|
+
"html-to-markdown CLI exited with status #{status.exitstatus}",
|
|
31
|
+
stderr: stderr,
|
|
32
|
+
status: status.exitstatus
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def find_cli_binary
|
|
37
|
+
binary_name = Gem.win_platform? ? 'html-to-markdown.exe' : 'html-to-markdown'
|
|
38
|
+
found = search_paths(binary_name).find(&:file?)
|
|
39
|
+
return found if found
|
|
40
|
+
|
|
41
|
+
raise MissingBinaryError, missing_binary_message
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def root_path
|
|
45
|
+
@root_path ||= Pathname(__dir__).join('../..').expand_path
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def lib_path
|
|
49
|
+
@lib_path ||= Pathname(__dir__).join('..').expand_path
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def search_paths(binary_name)
|
|
53
|
+
paths = [
|
|
54
|
+
root_path.join('target', 'release', binary_name),
|
|
55
|
+
lib_path.join('bin', binary_name),
|
|
56
|
+
lib_path.join(binary_name)
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
workspace_root = root_path.parent&.parent
|
|
60
|
+
paths << workspace_root.join('target', 'release', binary_name) if workspace_root
|
|
61
|
+
paths
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def missing_binary_message
|
|
65
|
+
<<~MSG.strip
|
|
66
|
+
html-to-markdown CLI binary not found. Build it with
|
|
67
|
+
`cargo build --release --package html-to-markdown-cli`.
|
|
68
|
+
MSG
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module HtmlToMarkdown
|
|
4
|
-
VERSION = '2.6.
|
|
5
|
-
end
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module HtmlToMarkdown
|
|
4
|
+
VERSION = '2.6.6'
|
|
5
|
+
end
|
data/lib/html_to_markdown.rb
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative 'html_to_markdown/version'
|
|
4
|
-
require 'html_to_markdown_rb'
|
|
5
|
-
|
|
6
|
-
module HtmlToMarkdown
|
|
7
|
-
autoload :CLI, 'html_to_markdown/cli'
|
|
8
|
-
autoload :CLIProxy, 'html_to_markdown/cli_proxy'
|
|
9
|
-
|
|
10
|
-
class << self
|
|
11
|
-
alias native_convert convert
|
|
12
|
-
alias native_convert_with_inline_images convert_with_inline_images
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
module_function
|
|
16
|
-
|
|
17
|
-
def convert(html, options = nil)
|
|
18
|
-
native_convert(html.to_s, options)
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def convert_with_inline_images(html, options = nil, image_config = nil)
|
|
22
|
-
native_convert_with_inline_images(html.to_s, options, image_config)
|
|
23
|
-
end
|
|
24
|
-
end
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'html_to_markdown/version'
|
|
4
|
+
require 'html_to_markdown_rb'
|
|
5
|
+
|
|
6
|
+
module HtmlToMarkdown
|
|
7
|
+
autoload :CLI, 'html_to_markdown/cli'
|
|
8
|
+
autoload :CLIProxy, 'html_to_markdown/cli_proxy'
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
alias native_convert convert
|
|
12
|
+
alias native_convert_with_inline_images convert_with_inline_images
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def convert(html, options = nil)
|
|
18
|
+
native_convert(html.to_s, options)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def convert_with_inline_images(html, options = nil, image_config = nil)
|
|
22
|
+
native_convert_with_inline_images(html.to_s, options, image_config)
|
|
23
|
+
end
|
|
24
|
+
end
|
data/spec/cli_proxy_spec.rb
CHANGED
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'spec_helper'
|
|
4
|
-
require 'html_to_markdown/cli_proxy'
|
|
5
|
-
require 'html_to_markdown/cli'
|
|
6
|
-
require 'stringio'
|
|
7
|
-
|
|
8
|
-
RSpec.describe HtmlToMarkdown::CLIProxy do
|
|
9
|
-
describe '.call' do
|
|
10
|
-
it 'executes the CLI binary' do
|
|
11
|
-
begin
|
|
12
|
-
binary = described_class.find_cli_binary
|
|
13
|
-
rescue HtmlToMarkdown::CLIProxy::MissingBinaryError
|
|
14
|
-
skip 'CLI binary not built'
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
expect(binary).to be_file
|
|
18
|
-
|
|
19
|
-
output = described_class.call(['--version'])
|
|
20
|
-
expect(output).to include(HtmlToMarkdown::VERSION)
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
describe HtmlToMarkdown::CLI do
|
|
25
|
-
it 'writes CLI output to stdout' do
|
|
26
|
-
begin
|
|
27
|
-
HtmlToMarkdown::CLIProxy.find_cli_binary
|
|
28
|
-
rescue HtmlToMarkdown::CLIProxy::MissingBinaryError
|
|
29
|
-
skip 'CLI binary not built'
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
stdout = StringIO.new
|
|
33
|
-
stderr = StringIO.new
|
|
34
|
-
|
|
35
|
-
exit_code = described_class.run(['--version'], stdout: stdout, stderr: stderr)
|
|
36
|
-
|
|
37
|
-
expect(exit_code).to eq(0)
|
|
38
|
-
expect(stdout.string).to include(HtmlToMarkdown::VERSION)
|
|
39
|
-
expect(stderr.string).to be_empty
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
require 'html_to_markdown/cli_proxy'
|
|
5
|
+
require 'html_to_markdown/cli'
|
|
6
|
+
require 'stringio'
|
|
7
|
+
|
|
8
|
+
RSpec.describe HtmlToMarkdown::CLIProxy do
|
|
9
|
+
describe '.call' do
|
|
10
|
+
it 'executes the CLI binary' do
|
|
11
|
+
begin
|
|
12
|
+
binary = described_class.find_cli_binary
|
|
13
|
+
rescue HtmlToMarkdown::CLIProxy::MissingBinaryError
|
|
14
|
+
skip 'CLI binary not built'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
expect(binary).to be_file
|
|
18
|
+
|
|
19
|
+
output = described_class.call(['--version'])
|
|
20
|
+
expect(output).to include(HtmlToMarkdown::VERSION)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe HtmlToMarkdown::CLI do
|
|
25
|
+
it 'writes CLI output to stdout' do
|
|
26
|
+
begin
|
|
27
|
+
HtmlToMarkdown::CLIProxy.find_cli_binary
|
|
28
|
+
rescue HtmlToMarkdown::CLIProxy::MissingBinaryError
|
|
29
|
+
skip 'CLI binary not built'
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
stdout = StringIO.new
|
|
33
|
+
stderr = StringIO.new
|
|
34
|
+
|
|
35
|
+
exit_code = described_class.run(['--version'], stdout: stdout, stderr: stderr)
|
|
36
|
+
|
|
37
|
+
expect(exit_code).to eq(0)
|
|
38
|
+
expect(stdout.string).to include(HtmlToMarkdown::VERSION)
|
|
39
|
+
expect(stderr.string).to be_empty
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
data/spec/convert_spec.rb
CHANGED
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'spec_helper'
|
|
4
|
-
|
|
5
|
-
RSpec.describe HtmlToMarkdown do
|
|
6
|
-
describe '.convert' do
|
|
7
|
-
it 'converts simple headings' do
|
|
8
|
-
expect(described_class.convert('<h1>Hello</h1>')).to eq("# Hello\n")
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it 'accepts options hash' do
|
|
12
|
-
result = described_class.convert(
|
|
13
|
-
'<h1>Hello</h1>',
|
|
14
|
-
heading_style: :atx_closed,
|
|
15
|
-
default_title: true
|
|
16
|
-
)
|
|
17
|
-
expect(result).to include('Hello')
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
describe '.convert_with_inline_images' do
|
|
22
|
-
it 'returns inline images metadata' do
|
|
23
|
-
html = '<p><img src="data:image/png;base64,ZmFrZQ==" alt="fake"></p>'
|
|
24
|
-
extraction = described_class.convert_with_inline_images(html)
|
|
25
|
-
expect(extraction).to include(:markdown, :inline_images, :warnings)
|
|
26
|
-
expect(extraction[:inline_images].first[:description]).to eq('fake')
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
end
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe HtmlToMarkdown do
|
|
6
|
+
describe '.convert' do
|
|
7
|
+
it 'converts simple headings' do
|
|
8
|
+
expect(described_class.convert('<h1>Hello</h1>')).to eq("# Hello\n")
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it 'accepts options hash' do
|
|
12
|
+
result = described_class.convert(
|
|
13
|
+
'<h1>Hello</h1>',
|
|
14
|
+
heading_style: :atx_closed,
|
|
15
|
+
default_title: true
|
|
16
|
+
)
|
|
17
|
+
expect(result).to include('Hello')
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
describe '.convert_with_inline_images' do
|
|
22
|
+
it 'returns inline images metadata' do
|
|
23
|
+
html = '<p><img src="data:image/png;base64,ZmFrZQ==" alt="fake"></p>'
|
|
24
|
+
extraction = described_class.convert_with_inline_images(html)
|
|
25
|
+
expect(extraction).to include(:markdown, :inline_images, :warnings)
|
|
26
|
+
expect(extraction[:inline_images].first[:description]).to eq('fake')
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'bundler/setup'
|
|
4
|
-
require 'html_to_markdown'
|
|
5
|
-
|
|
6
|
-
RSpec.configure do |config|
|
|
7
|
-
config.expect_with :rspec do |c|
|
|
8
|
-
c.syntax = :expect
|
|
9
|
-
end
|
|
10
|
-
end
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/setup'
|
|
4
|
+
require 'html_to_markdown'
|
|
5
|
+
|
|
6
|
+
RSpec.configure do |config|
|
|
7
|
+
config.expect_with :rspec do |c|
|
|
8
|
+
c.syntax = :expect
|
|
9
|
+
end
|
|
10
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.6.
|
|
4
|
+
version: 2.6.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Na'aman Hirschfeld
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11-
|
|
11
|
+
date: 2025-11-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|
|
@@ -49,6 +49,10 @@ files:
|
|
|
49
49
|
- Rakefile
|
|
50
50
|
- exe/html-to-markdown
|
|
51
51
|
- ext/html-to-markdown-rb/extconf.rb
|
|
52
|
+
- ext/html-to-markdown-rb/native/Cargo.toml
|
|
53
|
+
- ext/html-to-markdown-rb/native/README.md
|
|
54
|
+
- ext/html-to-markdown-rb/native/extconf.rb
|
|
55
|
+
- ext/html-to-markdown-rb/native/src/lib.rs
|
|
52
56
|
- html-to-markdown-rb.gemspec
|
|
53
57
|
- lib/html_to_markdown.rb
|
|
54
58
|
- lib/html_to_markdown/cli.rb
|