html-to-markdown 2.10.1 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,59 +1,59 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'lib/html_to_markdown/version'
4
-
5
- repo_root = File.expand_path('../..', __dir__)
6
- crate_prefix = 'packages/ruby/'
7
- git_cmd = %(git -C "#{repo_root}" ls-files -z #{crate_prefix})
8
- git_files =
9
- `#{git_cmd}`.split("\x0")
10
- .select { |path| path.start_with?(crate_prefix) }
11
- .map { |path| path.delete_prefix(crate_prefix) }
12
- fallback_files = Dir.chdir(__dir__) do
13
- Dir.glob(
14
- %w[
15
- README.md
16
- ext/html-to-markdown-rb/extconf.rb
17
- exe/*
18
- lib/**/*.rb
19
- lib/bin/*
20
- src/**/*.rs
21
- spec/**/*.rb
22
- ]
23
- )
24
- end
25
- files = git_files.empty? ? fallback_files : git_files
26
-
27
- Gem::Specification.new do |spec|
28
- spec.name = 'html-to-markdown'
29
- spec.version = HtmlToMarkdown::VERSION
30
- spec.authors = ["Na'aman Hirschfeld"]
31
- spec.email = ['nhirschfeld@gmail.com']
32
-
33
- spec.summary = 'Blazing-fast HTML to Markdown conversion for Ruby, powered by Rust.'
34
- spec.description = <<~DESC.strip
35
- html-to-markdown is a native Ruby extension built on the shared Rust engine that powers the html-to-markdown project.
36
- It delivers identical HTML-to-Markdown output across languages, exposes inline image extraction, and ships with a CLI for automation workflows.
37
- DESC
38
- spec.homepage = 'https://github.com/Goldziher/html-to-markdown'
39
- spec.license = 'MIT'
40
-
41
- spec.required_ruby_version = Gem::Requirement.new('>= 3.2')
42
-
43
- spec.bindir = 'exe'
44
- spec.executables = ['html-to-markdown']
45
- spec.require_paths = ['lib']
46
-
47
- spec.files = files
48
- spec.extra_rdoc_files = ['README.md']
49
-
50
- spec.extensions = ['ext/html-to-markdown-rb/extconf.rb']
51
-
52
- spec.add_dependency 'rb_sys', '>= 0.9', '< 1.0'
53
- spec.metadata['rubygems_mfa_required'] = 'true'
54
- spec.metadata['homepage_uri'] = 'https://github.com/Goldziher/html-to-markdown'
55
- spec.metadata['source_code_uri'] = 'https://github.com/Goldziher/html-to-markdown'
56
- spec.metadata['bug_tracker_uri'] = 'https://github.com/Goldziher/html-to-markdown/issues'
57
- spec.metadata['changelog_uri'] = 'https://github.com/Goldziher/html-to-markdown/releases'
58
- spec.metadata['documentation_uri'] = 'https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md'
59
- end
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/html_to_markdown/version'
4
+
5
+ repo_root = File.expand_path('../..', __dir__)
6
+ crate_prefix = 'packages/ruby/'
7
+ git_cmd = %(git -C "#{repo_root}" ls-files -z #{crate_prefix})
8
+ git_files =
9
+ `#{git_cmd}`.split("\x0")
10
+ .select { |path| path.start_with?(crate_prefix) }
11
+ .map { |path| path.delete_prefix(crate_prefix) }
12
+ fallback_files = Dir.chdir(__dir__) do
13
+ Dir.glob(
14
+ %w[
15
+ README.md
16
+ ext/html-to-markdown-rb/extconf.rb
17
+ exe/*
18
+ lib/**/*.rb
19
+ lib/bin/*
20
+ src/**/*.rs
21
+ spec/**/*.rb
22
+ ]
23
+ )
24
+ end
25
+ files = git_files.empty? ? fallback_files : git_files
26
+
27
+ Gem::Specification.new do |spec|
28
+ spec.name = 'html-to-markdown'
29
+ spec.version = HtmlToMarkdown::VERSION
30
+ spec.authors = ["Na'aman Hirschfeld"]
31
+ spec.email = ['nhirschfeld@gmail.com']
32
+
33
+ spec.summary = 'Blazing-fast HTML to Markdown conversion for Ruby, powered by Rust.'
34
+ spec.description = <<~DESC.strip
35
+ html-to-markdown is a native Ruby extension built on the shared Rust engine that powers the html-to-markdown project.
36
+ It delivers identical HTML-to-Markdown output across languages, exposes inline image extraction, and ships with a CLI for automation workflows.
37
+ DESC
38
+ spec.homepage = 'https://github.com/Goldziher/html-to-markdown'
39
+ spec.license = 'MIT'
40
+
41
+ spec.required_ruby_version = Gem::Requirement.new('>= 3.2')
42
+
43
+ spec.bindir = 'exe'
44
+ spec.executables = ['html-to-markdown']
45
+ spec.require_paths = ['lib']
46
+
47
+ spec.files = files
48
+ spec.extra_rdoc_files = ['README.md']
49
+
50
+ spec.extensions = ['ext/html-to-markdown-rb/extconf.rb']
51
+
52
+ spec.add_dependency 'rb_sys', '>= 0.9', '< 1.0'
53
+ spec.metadata['rubygems_mfa_required'] = 'true'
54
+ spec.metadata['homepage_uri'] = 'https://github.com/Goldziher/html-to-markdown'
55
+ spec.metadata['source_code_uri'] = 'https://github.com/Goldziher/html-to-markdown'
56
+ spec.metadata['bug_tracker_uri'] = 'https://github.com/Goldziher/html-to-markdown/issues'
57
+ spec.metadata['changelog_uri'] = 'https://github.com/Goldziher/html-to-markdown/releases'
58
+ spec.metadata['documentation_uri'] = 'https://github.com/Goldziher/html-to-markdown/blob/main/packages/ruby/README.md'
59
+ end
@@ -1,21 +1,21 @@
1
- # frozen_string_literal: true
2
-
3
- require 'html_to_markdown/cli_proxy'
4
-
5
- module HtmlToMarkdown
6
- module CLI
7
- module_function
8
-
9
- def run(argv = ARGV, stdout: $stdout, stderr: $stderr)
10
- output = CLIProxy.call(argv)
11
- stdout.print(output)
12
- 0
13
- rescue CLIProxy::CLIExecutionError => e
14
- stderr.print(e.stderr)
15
- e.status || 1
16
- rescue CLIProxy::MissingBinaryError, CLIProxy::Error => e
17
- stderr.puts(e.message)
18
- 1
19
- end
20
- end
21
- end
1
+ # frozen_string_literal: true
2
+
3
+ require 'html_to_markdown/cli_proxy'
4
+
5
+ module HtmlToMarkdown
6
+ module CLI
7
+ module_function
8
+
9
+ def run(argv = ARGV, stdout: $stdout, stderr: $stderr)
10
+ output = CLIProxy.call(argv)
11
+ stdout.print(output)
12
+ 0
13
+ rescue CLIProxy::CLIExecutionError => e
14
+ stderr.print(e.stderr)
15
+ e.status || 1
16
+ rescue CLIProxy::MissingBinaryError, CLIProxy::Error => e
17
+ stderr.puts(e.message)
18
+ 1
19
+ end
20
+ end
21
+ end
@@ -1,71 +1,71 @@
1
- # frozen_string_literal: true
2
-
3
- require 'open3'
4
- require 'pathname'
5
-
6
- module HtmlToMarkdown
7
- module CLIProxy
8
- Error = Class.new(StandardError)
9
- MissingBinaryError = Class.new(Error)
10
-
11
- class CLIExecutionError < Error
12
- attr_reader :stderr, :status
13
-
14
- def initialize(message, stderr:, status:)
15
- super(message)
16
- @stderr = stderr
17
- @status = status
18
- end
19
- end
20
-
21
- module_function
22
-
23
- def call(argv)
24
- binary = find_cli_binary
25
- args = Array(argv).map(&:to_s)
26
- stdout, stderr, status = Open3.capture3(binary.to_s, *args)
27
- return stdout if status.success?
28
-
29
- raise CLIExecutionError.new(
30
- "html-to-markdown CLI exited with status #{status.exitstatus}",
31
- stderr: stderr,
32
- status: status.exitstatus
33
- )
34
- end
35
-
36
- def find_cli_binary
37
- binary_name = Gem.win_platform? ? 'html-to-markdown.exe' : 'html-to-markdown'
38
- found = search_paths(binary_name).find(&:file?)
39
- return found if found
40
-
41
- raise MissingBinaryError, missing_binary_message
42
- end
43
-
44
- def root_path
45
- @root_path ||= Pathname(__dir__.to_s).join('../..').expand_path
46
- end
47
-
48
- def lib_path
49
- @lib_path ||= Pathname(__dir__.to_s).join('..').expand_path
50
- end
51
-
52
- def search_paths(binary_name)
53
- paths = [
54
- root_path.join('target', 'release', binary_name),
55
- lib_path.join('bin', binary_name),
56
- lib_path.join(binary_name)
57
- ]
58
-
59
- workspace_root = root_path.parent&.parent
60
- paths << workspace_root.join('target', 'release', binary_name) if workspace_root
61
- paths
62
- end
63
-
64
- def missing_binary_message
65
- <<~MSG.strip
66
- html-to-markdown CLI binary not found. Build it with
67
- `cargo build --release --package html-to-markdown-cli`.
68
- MSG
69
- end
70
- end
71
- end
1
+ # frozen_string_literal: true
2
+
3
+ require 'open3'
4
+ require 'pathname'
5
+
6
+ module HtmlToMarkdown
7
+ module CLIProxy
8
+ Error = Class.new(StandardError)
9
+ MissingBinaryError = Class.new(Error)
10
+
11
+ class CLIExecutionError < Error
12
+ attr_reader :stderr, :status
13
+
14
+ def initialize(message, stderr:, status:)
15
+ super(message)
16
+ @stderr = stderr
17
+ @status = status
18
+ end
19
+ end
20
+
21
+ module_function
22
+
23
+ def call(argv)
24
+ binary = find_cli_binary
25
+ args = Array(argv).map(&:to_s)
26
+ stdout, stderr, status = Open3.capture3(binary.to_s, *args)
27
+ return stdout if status.success?
28
+
29
+ raise CLIExecutionError.new(
30
+ "html-to-markdown CLI exited with status #{status.exitstatus}",
31
+ stderr: stderr,
32
+ status: status.exitstatus
33
+ )
34
+ end
35
+
36
+ def find_cli_binary
37
+ binary_name = Gem.win_platform? ? 'html-to-markdown.exe' : 'html-to-markdown'
38
+ found = search_paths(binary_name).find(&:file?)
39
+ return found if found
40
+
41
+ raise MissingBinaryError, missing_binary_message
42
+ end
43
+
44
+ def root_path
45
+ @root_path ||= Pathname(__dir__.to_s).join('../..').expand_path
46
+ end
47
+
48
+ def lib_path
49
+ @lib_path ||= Pathname(__dir__.to_s).join('..').expand_path
50
+ end
51
+
52
+ def search_paths(binary_name)
53
+ paths = [
54
+ root_path.join('target', 'release', binary_name),
55
+ lib_path.join('bin', binary_name),
56
+ lib_path.join(binary_name)
57
+ ]
58
+
59
+ workspace_root = root_path.parent&.parent
60
+ paths << workspace_root.join('target', 'release', binary_name) if workspace_root
61
+ paths
62
+ end
63
+
64
+ def missing_binary_message
65
+ <<~MSG.strip
66
+ html-to-markdown CLI binary not found. Build it with
67
+ `cargo build --release --package html-to-markdown-cli`.
68
+ MSG
69
+ end
70
+ end
71
+ end
@@ -1,5 +1,5 @@
1
- # frozen_string_literal: true
2
-
3
- module HtmlToMarkdown
4
- VERSION = '2.10.1'
5
- end
1
+ # frozen_string_literal: true
2
+
3
+ module HtmlToMarkdown
4
+ VERSION = '2.11.1'
5
+ end
@@ -1,36 +1,36 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'html_to_markdown/version'
4
- require 'html_to_markdown_rb'
5
-
6
- module HtmlToMarkdown
7
- autoload :CLI, 'html_to_markdown/cli'
8
- autoload :CLIProxy, 'html_to_markdown/cli_proxy'
9
-
10
- class Options; end # rubocop:disable Lint/EmptyClass
11
-
12
- class << self
13
- alias native_convert convert
14
- alias native_convert_with_inline_images convert_with_inline_images
15
- alias native_options options
16
- alias native_convert_with_options convert_with_options
17
- end
18
-
19
- module_function
20
-
21
- def convert(html, options = nil)
22
- native_convert(html.to_s, options)
23
- end
24
-
25
- def convert_with_options(html, options_handle)
26
- native_convert_with_options(html.to_s, options_handle)
27
- end
28
-
29
- def convert_with_inline_images(html, options = nil, image_config = nil)
30
- native_convert_with_inline_images(html.to_s, options, image_config)
31
- end
32
-
33
- def options(options_hash = nil)
34
- native_options(options_hash)
35
- end
36
- end
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'html_to_markdown/version'
4
+ require 'html_to_markdown_rb'
5
+
6
+ module HtmlToMarkdown
7
+ autoload :CLI, 'html_to_markdown/cli'
8
+ autoload :CLIProxy, 'html_to_markdown/cli_proxy'
9
+
10
+ class Options; end # rubocop:disable Lint/EmptyClass
11
+
12
+ class << self
13
+ alias native_convert convert
14
+ alias native_convert_with_inline_images convert_with_inline_images
15
+ alias native_options options
16
+ alias native_convert_with_options convert_with_options
17
+ end
18
+
19
+ module_function
20
+
21
+ def convert(html, options = nil)
22
+ native_convert(html.to_s, options)
23
+ end
24
+
25
+ def convert_with_options(html, options_handle)
26
+ native_convert_with_options(html.to_s, options_handle)
27
+ end
28
+
29
+ def convert_with_inline_images(html, options = nil, image_config = nil)
30
+ native_convert_with_inline_images(html.to_s, options, image_config)
31
+ end
32
+
33
+ def options(options_hash = nil)
34
+ native_options(options_hash)
35
+ end
36
+ end
@@ -1,24 +1,24 @@
1
- module HtmlToMarkdown
2
- module CLI
3
- # Module method (module_function creates both module and instance methods)
4
- #
5
- # Run the CLI with the given arguments
6
- #
7
- # @param argv Command-line arguments (defaults to ARGV)
8
- # @param stdout Output stream for standard output
9
- # @param stderr Output stream for standard error
10
- # @return Exit code (0 for success, non-zero for failure)
11
- def self.run: (
12
- ?Array[String] argv,
13
- ?stdout: IO,
14
- ?stderr: IO
15
- ) -> Integer
16
-
17
- # Instance method version (created by module_function)
18
- def run: (
19
- ?Array[String] argv,
20
- ?stdout: IO,
21
- ?stderr: IO
22
- ) -> Integer
23
- end
24
- end
1
+ module HtmlToMarkdown
2
+ module CLI
3
+ # Module method (module_function creates both module and instance methods)
4
+ #
5
+ # Run the CLI with the given arguments
6
+ #
7
+ # @param argv Command-line arguments (defaults to ARGV)
8
+ # @param stdout Output stream for standard output
9
+ # @param stderr Output stream for standard error
10
+ # @return Exit code (0 for success, non-zero for failure)
11
+ def self.run: (
12
+ ?Array[String] argv,
13
+ ?stdout: IO,
14
+ ?stderr: IO
15
+ ) -> Integer
16
+
17
+ # Instance method version (created by module_function)
18
+ def run: (
19
+ ?Array[String] argv,
20
+ ?stdout: IO,
21
+ ?stderr: IO
22
+ ) -> Integer
23
+ end
24
+ end
@@ -1,48 +1,48 @@
1
- module HtmlToMarkdown
2
- module CLIProxy
3
- # Base error class
4
- class Error < StandardError
5
- end
6
-
7
- # Error when CLI binary is not found
8
- class MissingBinaryError < Error
9
- end
10
-
11
- # Error when CLI execution fails
12
- class CLIExecutionError < Error
13
- attr_reader stderr: String
14
- attr_reader status: Integer?
15
-
16
- def initialize: (String message, stderr: String, status: Integer?) -> void
17
- end
18
-
19
- # Module methods (module_function creates both module and instance methods)
20
-
21
- # Execute CLI with given arguments
22
- def self.call: (Array[String] argv) -> String
23
-
24
- # Find the CLI binary in search paths
25
- def self.find_cli_binary: () -> Pathname
26
-
27
- # Get root path of the gem
28
- def self.root_path: () -> Pathname
29
-
30
- # Get lib path of the gem
31
- def self.lib_path: () -> Pathname
32
-
33
- # Get search paths for CLI binary
34
- def self.search_paths: (String binary_name) -> Array[Pathname]
35
-
36
- # Get error message for missing binary
37
- def self.missing_binary_message: () -> String
38
-
39
- # Instance method versions (created by module_function)
40
-
41
- def call: (Array[String] argv) -> String
42
- def find_cli_binary: () -> Pathname
43
- def root_path: () -> Pathname
44
- def lib_path: () -> Pathname
45
- def search_paths: (String binary_name) -> Array[Pathname]
46
- def missing_binary_message: () -> String
47
- end
48
- end
1
+ module HtmlToMarkdown
2
+ module CLIProxy
3
+ # Base error class
4
+ class Error < StandardError
5
+ end
6
+
7
+ # Error when CLI binary is not found
8
+ class MissingBinaryError < Error
9
+ end
10
+
11
+ # Error when CLI execution fails
12
+ class CLIExecutionError < Error
13
+ attr_reader stderr: String
14
+ attr_reader status: Integer?
15
+
16
+ def initialize: (String message, stderr: String, status: Integer?) -> void
17
+ end
18
+
19
+ # Module methods (module_function creates both module and instance methods)
20
+
21
+ # Execute CLI with given arguments
22
+ def self.call: (Array[String] argv) -> String
23
+
24
+ # Find the CLI binary in search paths
25
+ def self.find_cli_binary: () -> Pathname
26
+
27
+ # Get root path of the gem
28
+ def self.root_path: () -> Pathname
29
+
30
+ # Get lib path of the gem
31
+ def self.lib_path: () -> Pathname
32
+
33
+ # Get search paths for CLI binary
34
+ def self.search_paths: (String binary_name) -> Array[Pathname]
35
+
36
+ # Get error message for missing binary
37
+ def self.missing_binary_message: () -> String
38
+
39
+ # Instance method versions (created by module_function)
40
+
41
+ def call: (Array[String] argv) -> String
42
+ def find_cli_binary: () -> Pathname
43
+ def root_path: () -> Pathname
44
+ def lib_path: () -> Pathname
45
+ def search_paths: (String binary_name) -> Array[Pathname]
46
+ def missing_binary_message: () -> String
47
+ end
48
+ end