html-to-markdown 3.4.0 → 3.6.0.pre.rc.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +347 -0
- data/Steepfile +10 -2
- data/ext/html_to_markdown_rb/Cargo.toml +3 -2
- data/ext/html_to_markdown_rb/extconf.rb +5 -5
- data/ext/html_to_markdown_rb/native/Cargo.lock +962 -0
- data/ext/html_to_markdown_rb/native/Cargo.toml +6 -11
- data/ext/html_to_markdown_rb/native/extconf.rb +14 -0
- data/ext/html_to_markdown_rb/src/lib.rs +1715 -646
- data/lib/html_to_markdown/native.rb +913 -37
- data/lib/html_to_markdown/version.rb +3 -3
- data/lib/html_to_markdown.rb +9 -4
- data/lib/html_to_markdown_rb.so +0 -0
- data/sig/types.rbs +59 -292
- metadata +32 -179
- data/ext/html_to_markdown_rb/Makefile +0 -592
- data/lib/bin/html-to-markdown +0 -0
- data/vendor/Cargo.toml +0 -33
- data/vendor/html-to-markdown-rs/Cargo.toml +0 -54
- data/vendor/html-to-markdown-rs/README.md +0 -278
- data/vendor/html-to-markdown-rs/examples/basic.rs +0 -24
- data/vendor/html-to-markdown-rs/examples/table.rs +0 -25
- data/vendor/html-to-markdown-rs/examples/test_deser.rs +0 -12
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +0 -58
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +0 -113
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +0 -39
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +0 -89
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +0 -100
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +0 -61
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +0 -34
- data/vendor/html-to-markdown-rs/src/convert_api.rs +0 -349
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +0 -178
- data/vendor/html-to-markdown-rs/src/converter/block/container.rs +0 -114
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +0 -149
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +0 -428
- data/vendor/html-to-markdown-rs/src/converter/block/horizontal_rule.rs +0 -103
- data/vendor/html-to-markdown-rs/src/converter/block/line_break.rs +0 -89
- data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +0 -10
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +0 -140
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +0 -298
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +0 -453
- data/vendor/html-to-markdown-rs/src/converter/block/table/caption.rs +0 -44
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +0 -276
- data/vendor/html-to-markdown-rs/src/converter/block/table/cells.rs +0 -336
- data/vendor/html-to-markdown-rs/src/converter/block/table/layout.rs +0 -58
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +0 -266
- data/vendor/html-to-markdown-rs/src/converter/block/table/scanner.rs +0 -146
- data/vendor/html-to-markdown-rs/src/converter/block/table/utils.rs +0 -34
- data/vendor/html-to-markdown-rs/src/converter/block/unknown.rs +0 -138
- data/vendor/html-to-markdown-rs/src/converter/context.rs +0 -208
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +0 -337
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +0 -770
- data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +0 -82
- data/vendor/html-to-markdown-rs/src/converter/format/djot.rs +0 -64
- data/vendor/html-to-markdown-rs/src/converter/format/markdown.rs +0 -59
- data/vendor/html-to-markdown-rs/src/converter/format/mod.rs +0 -43
- data/vendor/html-to-markdown-rs/src/converter/handlers/blockquote.rs +0 -173
- data/vendor/html-to-markdown-rs/src/converter/handlers/code_block.rs +0 -434
- data/vendor/html-to-markdown-rs/src/converter/handlers/graphic.rs +0 -234
- data/vendor/html-to-markdown-rs/src/converter/handlers/image.rs +0 -282
- data/vendor/html-to-markdown-rs/src/converter/handlers/link.rs +0 -316
- data/vendor/html-to-markdown-rs/src/converter/handlers/mod.rs +0 -26
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +0 -306
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +0 -345
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +0 -428
- data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +0 -237
- data/vendor/html-to-markdown-rs/src/converter/inline/ruby.rs +0 -337
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/marks.rs +0 -566
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/mod.rs +0 -86
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/typography.rs +0 -558
- data/vendor/html-to-markdown-rs/src/converter/list/definition.rs +0 -232
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +0 -332
- data/vendor/html-to-markdown-rs/src/converter/list/mod.rs +0 -70
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +0 -201
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +0 -195
- data/vendor/html-to-markdown-rs/src/converter/list/utils.rs +0 -314
- data/vendor/html-to-markdown-rs/src/converter/main.rs +0 -710
- data/vendor/html-to-markdown-rs/src/converter/main_helpers.rs +0 -452
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +0 -393
- data/vendor/html-to-markdown-rs/src/converter/media/graphic.rs +0 -4
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +0 -183
- data/vendor/html-to-markdown-rs/src/converter/media/mod.rs +0 -87
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +0 -280
- data/vendor/html-to-markdown-rs/src/converter/metadata.rs +0 -220
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +0 -156
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +0 -516
- data/vendor/html-to-markdown-rs/src/converter/preprocessing_helpers.rs +0 -201
- data/vendor/html-to-markdown-rs/src/converter/reference_collector.rs +0 -69
- data/vendor/html-to-markdown-rs/src/converter/semantic/attributes.rs +0 -269
- data/vendor/html-to-markdown-rs/src/converter/semantic/definition_list.rs +0 -266
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +0 -391
- data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +0 -112
- data/vendor/html-to-markdown-rs/src/converter/semantic/sectioning.rs +0 -85
- data/vendor/html-to-markdown-rs/src/converter/semantic/summary.rs +0 -324
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +0 -8
- data/vendor/html-to-markdown-rs/src/converter/text/processing.rs +0 -56
- data/vendor/html-to-markdown-rs/src/converter/text_node.rs +0 -269
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +0 -151
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +0 -74
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +0 -271
- data/vendor/html-to-markdown-rs/src/converter/utility/mod.rs +0 -17
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +0 -1002
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +0 -126
- data/vendor/html-to-markdown-rs/src/converter/utility/siblings.rs +0 -97
- data/vendor/html-to-markdown-rs/src/converter/visitor_hooks.rs +0 -189
- data/vendor/html-to-markdown-rs/src/error.rs +0 -43
- data/vendor/html-to-markdown-rs/src/exports.rs +0 -24
- data/vendor/html-to-markdown-rs/src/inline_images.rs +0 -336
- data/vendor/html-to-markdown-rs/src/lib.rs +0 -139
- data/vendor/html-to-markdown-rs/src/metadata/collector.rs +0 -457
- data/vendor/html-to-markdown-rs/src/metadata/config.rs +0 -394
- data/vendor/html-to-markdown-rs/src/metadata/extraction.rs +0 -398
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +0 -288
- data/vendor/html-to-markdown-rs/src/metadata/types.rs +0 -477
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +0 -559
- data/vendor/html-to-markdown-rs/src/options/inline_image.rs +0 -111
- data/vendor/html-to-markdown-rs/src/options/mod.rs +0 -20
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +0 -201
- data/vendor/html-to-markdown-rs/src/options/validation.rs +0 -416
- data/vendor/html-to-markdown-rs/src/prelude.rs +0 -1
- data/vendor/html-to-markdown-rs/src/rcdom.rs +0 -487
- data/vendor/html-to-markdown-rs/src/text.rs +0 -358
- data/vendor/html-to-markdown-rs/src/types/document.rs +0 -191
- data/vendor/html-to-markdown-rs/src/types/mod.rs +0 -17
- data/vendor/html-to-markdown-rs/src/types/result.rs +0 -54
- data/vendor/html-to-markdown-rs/src/types/structure_builder.rs +0 -791
- data/vendor/html-to-markdown-rs/src/types/structure_collector.rs +0 -483
- data/vendor/html-to-markdown-rs/src/types/tables.rs +0 -52
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +0 -33
- data/vendor/html-to-markdown-rs/src/validation.rs +0 -158
- data/vendor/html-to-markdown-rs/src/visitor/default_impl.rs +0 -63
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +0 -41
- data/vendor/html-to-markdown-rs/src/visitor/traits.rs +0 -370
- data/vendor/html-to-markdown-rs/src/visitor/types.rs +0 -319
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/callbacks/mod.rs +0 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/content.rs +0 -126
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/mod.rs +0 -27
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/state.rs +0 -110
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/traversal.rs +0 -250
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +0 -597
- data/vendor/html-to-markdown-rs/src/wrapper/sync.rs +0 -413
- data/vendor/html-to-markdown-rs/src/wrapper/utils.rs +0 -290
- data/vendor/html-to-markdown-rs/src/wrapper.rs +0 -9
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +0 -87
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +0 -297
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +0 -153
- data/vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs +0 -132
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +0 -631
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +0 -49
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +0 -58
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +0 -17
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +0 -41
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +0 -40
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +0 -26
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +0 -185
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +0 -100
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +0 -133
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +0 -144
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +0 -62
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +0 -128
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +0 -20
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +0 -62
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +0 -68
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +0 -87
- data/vendor/html-to-markdown-rs/tests/issue_336_regressions.rs +0 -74
- data/vendor/html-to-markdown-rs/tests/issue_339_regressions.rs +0 -92
- data/vendor/html-to-markdown-rs/tests/issue_347_regressions.rs +0 -154
- data/vendor/html-to-markdown-rs/tests/issue_348_visitor_plain.rs +0 -93
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +0 -44
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +0 -199
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +0 -273
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +0 -61
- data/vendor/html-to-markdown-rs/tests/reference_links_test.rs +0 -169
- data/vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs +0 -137
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +0 -522
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +0 -743
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +0 -41
- data/vendor/html-to-markdown-rs/tests/test_issue_187.rs +0 -204
- data/vendor/html-to-markdown-rs/tests/test_issue_218.rs +0 -68
- data/vendor/html-to-markdown-rs/tests/test_issue_277.rs +0 -77
- data/vendor/html-to-markdown-rs/tests/test_max_depth.rs +0 -82
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +0 -45
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +0 -396
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +0 -34
- data/vendor/html-to-markdown-rs/tests/visitor_code_integration_test.rs +0 -121
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +0 -1190
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +0 -372
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "html-to-markdown-rb"
|
|
3
|
-
version = "3.
|
|
3
|
+
version = "3.6.0-rc.23"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
description = "High-performance HTML to Markdown converter"
|
|
7
7
|
readme = false
|
|
8
|
-
keywords = ["
|
|
9
|
-
categories = [
|
|
8
|
+
keywords = ["converter", "html", "markdown"]
|
|
9
|
+
categories = []
|
|
10
10
|
|
|
11
11
|
[package.metadata.cargo-machete]
|
|
12
|
-
ignored = ["
|
|
12
|
+
ignored = ["rb-sys"]
|
|
13
13
|
|
|
14
14
|
[lib]
|
|
15
15
|
name = "html_to_markdown_rb"
|
|
@@ -18,14 +18,9 @@ crate-type = ["cdylib"]
|
|
|
18
18
|
|
|
19
19
|
[dependencies]
|
|
20
20
|
async-trait = "0.1"
|
|
21
|
-
html-to-markdown-rs = {
|
|
22
|
-
"full",
|
|
23
|
-
"metadata",
|
|
24
|
-
"visitor",
|
|
25
|
-
"serde",
|
|
26
|
-
"inline-images",
|
|
27
|
-
] }
|
|
21
|
+
html-to-markdown-rs = { version = "3.6.0-rc.23", features = ["serde", "metadata", "visitor", "inline-images", "testkit"] }
|
|
28
22
|
magnus = "0.8"
|
|
23
|
+
rb-sys = ">=0.9, <0.9.128"
|
|
29
24
|
serde = { version = "1", features = ["derive"] }
|
|
30
25
|
serde_json = "1"
|
|
31
26
|
tokio = { version = "1", features = ["rt-multi-thread"] }
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "mkmf"
|
|
4
|
+
require "rb_sys/mkmf"
|
|
5
|
+
|
|
6
|
+
default_profile = ENV.fetch("CARGO_PROFILE", "release")
|
|
7
|
+
|
|
8
|
+
create_rust_makefile("html_to_markdown_rb") do |config|
|
|
9
|
+
config.profile = default_profile.to_sym
|
|
10
|
+
# extconf.rb and Cargo.toml are siblings under ext/html_to_markdown_rb/native/; rb_sys interprets
|
|
11
|
+
# ext_dir relative to extconf.rb, so "." finds the sibling Cargo.toml. "native" would
|
|
12
|
+
# resolve to native/native/Cargo.toml and break `gem install` on end-user machines.
|
|
13
|
+
config.ext_dir = "."
|
|
14
|
+
end
|