html-to-markdown 3.2.3 → 3.4.0.pre.rc.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Steepfile +6 -0
- data/ext/html_to_markdown_rb/Cargo.toml +2 -2
- data/ext/html_to_markdown_rb/native/Cargo.toml +28 -0
- data/ext/html_to_markdown_rb/src/html-to-markdown/version.rb +10 -0
- data/ext/html_to_markdown_rb/src/html-to-markdown.rb +13 -0
- data/ext/html_to_markdown_rb/src/lib.rs +2088 -268
- data/lib/bin/html-to-markdown +0 -0
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +5 -3
- data/sig/types.rbs +769 -0
- data/vendor/Cargo.toml +2 -2
- data/vendor/html-to-markdown-rs/Cargo.toml +1 -1
- data/vendor/html-to-markdown-rs/examples/basic.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/table.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_deser.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_escape.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_inline_formatting.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_lists.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_semantic_tags.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_tables.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_task_lists.rs +1 -1
- data/vendor/html-to-markdown-rs/examples/test_whitespace.rs +1 -1
- data/vendor/html-to-markdown-rs/src/convert_api.rs +15 -25
- data/vendor/html-to-markdown-rs/src/converter/block/blockquote.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/container.rs +3 -3
- data/vendor/html-to-markdown-rs/src/converter/block/div.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/heading.rs +6 -7
- data/vendor/html-to-markdown-rs/src/converter/block/horizontal_rule.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/line_break.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +0 -108
- data/vendor/html-to-markdown-rs/src/converter/block/paragraph.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/preformatted.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/builder.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/cell.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/layout.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +2 -4
- data/vendor/html-to-markdown-rs/src/converter/block/unknown.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/context.rs +10 -0
- data/vendor/html-to-markdown-rs/src/converter/dom_context.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/form/elements.rs +14 -14
- data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/format/mod.rs +0 -3
- data/vendor/html-to-markdown-rs/src/converter/inline/code.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/emphasis.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/link.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +0 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/ruby.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/semantic/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/list/definition.rs +3 -3
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/list/mod.rs +0 -1
- data/vendor/html-to-markdown-rs/src/converter/list/ordered.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/list/unordered.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/main.rs +57 -31
- data/vendor/html-to-markdown-rs/src/converter/media/embedded.rs +8 -8
- data/vendor/html-to-markdown-rs/src/converter/media/image.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/media/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/media/svg.rs +5 -5
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +6 -17
- data/vendor/html-to-markdown-rs/src/converter/plain_text.rs +64 -11
- data/vendor/html-to-markdown-rs/src/converter/preprocessing_helpers.rs +80 -22
- data/vendor/html-to-markdown-rs/src/converter/semantic/figure.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/text/mod.rs +0 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/attributes.rs +5 -9
- data/vendor/html-to-markdown-rs/src/converter/utility/caching.rs +3 -3
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +10 -10
- data/vendor/html-to-markdown-rs/src/converter/utility/preprocessing.rs +13 -13
- data/vendor/html-to-markdown-rs/src/converter/utility/serialization.rs +4 -4
- data/vendor/html-to-markdown-rs/src/converter/utility/siblings.rs +6 -14
- data/vendor/html-to-markdown-rs/src/inline_images.rs +6 -0
- data/vendor/html-to-markdown-rs/src/lib.rs +17 -18
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +31 -0
- data/vendor/html-to-markdown-rs/src/prelude.rs +1 -12
- data/vendor/html-to-markdown-rs/src/text.rs +0 -44
- data/vendor/html-to-markdown-rs/src/types/warnings.rs +2 -0
- data/vendor/html-to-markdown-rs/src/visitor/types.rs +5 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +4 -1
- data/vendor/html-to-markdown-rs/tests/br_in_inline_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/djot_output_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/exclude_selectors_test.rs +136 -0
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_121_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_127_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_128_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_131_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_134_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_139_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_143_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_145_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_146_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_176_regressions.rs +2 -2
- data/vendor/html-to-markdown-rs/tests/issue_190_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_199_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_200_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_212_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs +2 -2
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/plain_output_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/preprocessing_tests.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/reference_links_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/sectioning_elements_test.rs +137 -0
- data/vendor/html-to-markdown-rs/tests/skip_images_test.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/tables_test.rs +2 -2
- data/vendor/html-to-markdown-rs/tests/test_custom_elements.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/test_issue_187.rs +5 -2
- data/vendor/html-to-markdown-rs/tests/test_issue_218.rs +4 -4
- data/vendor/html-to-markdown-rs/tests/test_issue_277.rs +77 -0
- data/vendor/html-to-markdown-rs/tests/test_max_depth.rs +82 -0
- data/vendor/html-to-markdown-rs/tests/test_nested_simple.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/test_script_style_stripping.rs +4 -4
- data/vendor/html-to-markdown-rs/tests/test_spa_bisect.rs +1 -1
- data/vendor/html-to-markdown-rs/tests/visitor_code_integration_test.rs +6 -6
- data/vendor/html-to-markdown-rs/tests/visitor_integration_test.rs +103 -35
- data/vendor/html-to-markdown-rs/tests/xml_tables_test.rs +1 -1
- metadata +21 -43
- data/.bundle/config +0 -2
- data/.gitignore +0 -3
- data/.rubocop.yml +0 -59
- data/Gemfile +0 -18
- data/Gemfile.lock +0 -173
- data/README.md +0 -331
- data/Rakefile +0 -26
- data/exe/html-to-markdown +0 -6
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +0 -6
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +0 -9
- data/html-to-markdown-rb.gemspec +0 -99
- data/lib/html_to_markdown_rs.rb +0 -3
- data/sig/html_to_markdown.rbs +0 -149
- data/vendor/html-to-markdown-rs/src/converter/text/escaping.rs +0 -94
- data/vendor/html-to-markdown-rs/src/converter/text/normalization.rs +0 -86
- data/vendor/html-to-markdown-rs/src/safety.rs +0 -70
data/Gemfile.lock
DELETED
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
PATH
|
|
2
|
-
remote: .
|
|
3
|
-
specs:
|
|
4
|
-
html-to-markdown (3.2.3)
|
|
5
|
-
rb_sys (>= 0.9, < 1.0)
|
|
6
|
-
|
|
7
|
-
GEM
|
|
8
|
-
remote: https://rubygems.org/
|
|
9
|
-
specs:
|
|
10
|
-
ast (2.4.3)
|
|
11
|
-
concurrent-ruby (1.3.6)
|
|
12
|
-
csv (3.3.5)
|
|
13
|
-
diff-lcs (1.6.2)
|
|
14
|
-
ffi (1.17.4-arm64-darwin)
|
|
15
|
-
ffi (1.17.4-x86_64-linux-gnu)
|
|
16
|
-
fileutils (1.8.0)
|
|
17
|
-
json (2.19.3)
|
|
18
|
-
language_server-protocol (3.17.0.5)
|
|
19
|
-
lint_roller (1.1.0)
|
|
20
|
-
listen (3.10.0)
|
|
21
|
-
logger
|
|
22
|
-
rb-fsevent (~> 0.10, >= 0.10.3)
|
|
23
|
-
rb-inotify (~> 0.9, >= 0.9.10)
|
|
24
|
-
logger (1.7.0)
|
|
25
|
-
parallel (2.0.1)
|
|
26
|
-
parser (3.3.11.1)
|
|
27
|
-
ast (~> 2.4.1)
|
|
28
|
-
racc
|
|
29
|
-
prism (1.9.0)
|
|
30
|
-
racc (1.8.1)
|
|
31
|
-
rainbow (3.1.1)
|
|
32
|
-
rake (13.4.2)
|
|
33
|
-
rake-compiler (1.3.1)
|
|
34
|
-
rake
|
|
35
|
-
rake-compiler-dock (1.11.0)
|
|
36
|
-
rb-fsevent (0.11.2)
|
|
37
|
-
rb-inotify (0.11.1)
|
|
38
|
-
ffi (~> 1.0)
|
|
39
|
-
rb_sys (0.9.126)
|
|
40
|
-
json (>= 2)
|
|
41
|
-
rake-compiler-dock (= 1.11.0)
|
|
42
|
-
rbs (4.0.2)
|
|
43
|
-
logger
|
|
44
|
-
prism (>= 1.6.0)
|
|
45
|
-
tsort
|
|
46
|
-
regexp_parser (2.12.0)
|
|
47
|
-
rspec (3.13.2)
|
|
48
|
-
rspec-core (~> 3.13.0)
|
|
49
|
-
rspec-expectations (~> 3.13.0)
|
|
50
|
-
rspec-mocks (~> 3.13.0)
|
|
51
|
-
rspec-core (3.13.6)
|
|
52
|
-
rspec-support (~> 3.13.0)
|
|
53
|
-
rspec-expectations (3.13.5)
|
|
54
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
|
55
|
-
rspec-support (~> 3.13.0)
|
|
56
|
-
rspec-mocks (3.13.8)
|
|
57
|
-
diff-lcs (>= 1.2.0, < 2.0)
|
|
58
|
-
rspec-support (~> 3.13.0)
|
|
59
|
-
rspec-support (3.13.7)
|
|
60
|
-
rubocop (1.86.1)
|
|
61
|
-
json (~> 2.3)
|
|
62
|
-
language_server-protocol (~> 3.17.0.2)
|
|
63
|
-
lint_roller (~> 1.1.0)
|
|
64
|
-
parallel (>= 1.10)
|
|
65
|
-
parser (>= 3.3.0.2)
|
|
66
|
-
rainbow (>= 2.2.2, < 4.0)
|
|
67
|
-
regexp_parser (>= 2.9.3, < 3.0)
|
|
68
|
-
rubocop-ast (>= 1.49.0, < 2.0)
|
|
69
|
-
ruby-progressbar (~> 1.7)
|
|
70
|
-
unicode-display_width (>= 2.4.0, < 4.0)
|
|
71
|
-
rubocop-ast (1.49.1)
|
|
72
|
-
parser (>= 3.3.7.2)
|
|
73
|
-
prism (~> 1.7)
|
|
74
|
-
rubocop-performance (1.26.1)
|
|
75
|
-
lint_roller (~> 1.1)
|
|
76
|
-
rubocop (>= 1.75.0, < 2.0)
|
|
77
|
-
rubocop-ast (>= 1.47.1, < 2.0)
|
|
78
|
-
rubocop-rspec (3.9.0)
|
|
79
|
-
lint_roller (~> 1.1)
|
|
80
|
-
rubocop (~> 1.81)
|
|
81
|
-
ruby-progressbar (1.13.0)
|
|
82
|
-
securerandom (0.4.1)
|
|
83
|
-
steep (2.0.0)
|
|
84
|
-
concurrent-ruby (>= 1.1.10)
|
|
85
|
-
csv (>= 3.0.9)
|
|
86
|
-
fileutils (>= 1.1.0)
|
|
87
|
-
json (>= 2.1.0)
|
|
88
|
-
language_server-protocol (>= 3.17.0.4, < 4.0)
|
|
89
|
-
listen (~> 3.0)
|
|
90
|
-
logger (>= 1.3.0)
|
|
91
|
-
parser (>= 3.2)
|
|
92
|
-
prism (>= 0.25.0)
|
|
93
|
-
rainbow (>= 2.2.2, < 4.0)
|
|
94
|
-
rbs (~> 4.0)
|
|
95
|
-
securerandom (>= 0.1)
|
|
96
|
-
strscan (>= 1.0.0)
|
|
97
|
-
terminal-table (>= 2, < 5)
|
|
98
|
-
uri (>= 0.12.0)
|
|
99
|
-
strscan (3.1.8)
|
|
100
|
-
terminal-table (4.0.0)
|
|
101
|
-
unicode-display_width (>= 1.1.1, < 4)
|
|
102
|
-
tsort (0.2.0)
|
|
103
|
-
unicode-display_width (3.2.0)
|
|
104
|
-
unicode-emoji (~> 4.1)
|
|
105
|
-
unicode-emoji (4.2.0)
|
|
106
|
-
uri (1.1.1)
|
|
107
|
-
|
|
108
|
-
PLATFORMS
|
|
109
|
-
arm64-darwin
|
|
110
|
-
x86_64-linux
|
|
111
|
-
|
|
112
|
-
DEPENDENCIES
|
|
113
|
-
html-to-markdown!
|
|
114
|
-
rake-compiler
|
|
115
|
-
rb_sys
|
|
116
|
-
rbs
|
|
117
|
-
rspec
|
|
118
|
-
rubocop
|
|
119
|
-
rubocop-performance
|
|
120
|
-
rubocop-rspec
|
|
121
|
-
steep
|
|
122
|
-
|
|
123
|
-
CHECKSUMS
|
|
124
|
-
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
125
|
-
concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
|
|
126
|
-
csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
|
|
127
|
-
diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
|
|
128
|
-
ffi (1.17.4-arm64-darwin) sha256=19071aaf1419251b0a46852abf960e77330a3b334d13a4ab51d58b31a937001b
|
|
129
|
-
ffi (1.17.4-x86_64-linux-gnu) sha256=9d3db14c2eae074b382fa9c083fe95aec6e0a1451da249eab096c34002bc752d
|
|
130
|
-
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
|
|
131
|
-
html-to-markdown (3.2.3)
|
|
132
|
-
json (2.19.3) sha256=289b0bb53052a1fa8c34ab33cc750b659ba14a5c45f3fcf4b18762dc67c78646
|
|
133
|
-
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
134
|
-
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
135
|
-
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
136
|
-
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
137
|
-
parallel (2.0.1) sha256=337782d3e39f4121e67563bf91dd8ece67f48923d90698614773a0ec9a5b2c7d
|
|
138
|
-
parser (3.3.11.1) sha256=d17ace7aabe3e72c3cc94043714be27cc6f852f104d81aa284c2281aecc65d54
|
|
139
|
-
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
140
|
-
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
141
|
-
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
142
|
-
rake (13.4.2) sha256=cb825b2bd5f1f8e91ca37bddb4b9aaf345551b4731da62949be002fa89283701
|
|
143
|
-
rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
|
|
144
|
-
rake-compiler-dock (1.11.0) sha256=eab51f2cd533eb35cea6b624a75281f047123e70a64c58b607471bb49428f8c2
|
|
145
|
-
rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
|
|
146
|
-
rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
|
|
147
|
-
rb_sys (0.9.126) sha256=ba958e0b8b4b89eeae0b3d24b64c809eb2c37e0ab0773a49e9b1c2e22c95aef8
|
|
148
|
-
rbs (4.0.2) sha256=af75671e66cd03434cc546622741ebf83f6197ec4328375805306330bf78ef25
|
|
149
|
-
regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
|
|
150
|
-
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
151
|
-
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
152
|
-
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
153
|
-
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
|
|
154
|
-
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
|
|
155
|
-
rubocop (1.86.1) sha256=44415f3f01d01a21e01132248d2fd0867572475b566ca188a0a42133a08d4531
|
|
156
|
-
rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
|
|
157
|
-
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
|
|
158
|
-
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
159
|
-
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
160
|
-
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
161
|
-
steep (2.0.0) sha256=6eb0ecc09637bbb54f0a5f2cf63daea6d3208ccace64b4f1107d976333605c30
|
|
162
|
-
strscan (3.1.8) sha256=aae2db611a225559f21ffbb71765c9a4e60fd262534a9ea84f4f11c7f32f679e
|
|
163
|
-
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
|
164
|
-
tsort (0.2.0) sha256=9650a793f6859a43b6641671278f79cfead60ac714148aabe4e3f0060480089f
|
|
165
|
-
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
|
|
166
|
-
unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
|
|
167
|
-
uri (1.1.1) sha256=379fa58d27ffb1387eaada68c749d1426738bd0f654d812fcc07e7568f5c57c6
|
|
168
|
-
|
|
169
|
-
RUBY VERSION
|
|
170
|
-
ruby 3.4.8
|
|
171
|
-
|
|
172
|
-
BUNDLED WITH
|
|
173
|
-
4.0.3
|
data/README.md
DELETED
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
# html-to-markdown
|
|
2
|
-
|
|
3
|
-
<div align="center" style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; margin: 20px 0;">
|
|
4
|
-
<!-- Language Bindings -->
|
|
5
|
-
<a href="https://crates.io/crates/html-to-markdown-rs">
|
|
6
|
-
<img src="https://img.shields.io/crates/v/html-to-markdown-rs?label=Rust&color=007ec6" alt="Rust">
|
|
7
|
-
</a>
|
|
8
|
-
<a href="https://pypi.org/project/html-to-markdown/">
|
|
9
|
-
<img src="https://img.shields.io/pypi/v/html-to-markdown?label=Python&color=007ec6" alt="Python">
|
|
10
|
-
</a>
|
|
11
|
-
<a href="https://www.npmjs.com/package/@kreuzberg/html-to-markdown-node">
|
|
12
|
-
<img src="https://img.shields.io/npm/v/@kreuzberg/html-to-markdown-node?label=Node.js&color=007ec6" alt="Node.js">
|
|
13
|
-
</a>
|
|
14
|
-
<a href="https://www.npmjs.com/package/@kreuzberg/html-to-markdown-wasm">
|
|
15
|
-
<img src="https://img.shields.io/npm/v/@kreuzberg/html-to-markdown-wasm?label=WASM&color=007ec6" alt="WASM">
|
|
16
|
-
</a>
|
|
17
|
-
<a href="https://central.sonatype.com/artifact/dev.kreuzberg/html-to-markdown">
|
|
18
|
-
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
|
|
19
|
-
</a>
|
|
20
|
-
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown">
|
|
21
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/html-to-markdown?label=Go&color=007ec6&filter=v3.2.0" alt="Go">
|
|
22
|
-
</a>
|
|
23
|
-
<a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
|
|
24
|
-
<img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
|
|
25
|
-
</a>
|
|
26
|
-
<a href="https://packagist.org/packages/kreuzberg-dev/html-to-markdown">
|
|
27
|
-
<img src="https://img.shields.io/packagist/v/kreuzberg-dev/html-to-markdown?label=PHP&color=007ec6" alt="PHP">
|
|
28
|
-
</a>
|
|
29
|
-
<a href="https://rubygems.org/gems/html-to-markdown">
|
|
30
|
-
<img src="https://img.shields.io/gem/v/html-to-markdown?label=Ruby&color=007ec6" alt="Ruby">
|
|
31
|
-
</a>
|
|
32
|
-
<a href="https://hex.pm/packages/html_to_markdown">
|
|
33
|
-
<img src="https://img.shields.io/hexpm/v/html_to_markdown?label=Elixir&color=007ec6" alt="Elixir">
|
|
34
|
-
</a>
|
|
35
|
-
<a href="https://kreuzberg-dev.r-universe.dev/htmltomarkdown">
|
|
36
|
-
<img src="https://img.shields.io/cran/v/htmltomarkdown?label=R&color=007ec6" alt="R">
|
|
37
|
-
</a>
|
|
38
|
-
<a href="https://github.com/kreuzberg-dev/html-to-markdown/releases">
|
|
39
|
-
<img src="https://img.shields.io/badge/C-FFI-007ec6" alt="C">
|
|
40
|
-
</a>
|
|
41
|
-
|
|
42
|
-
<!-- Project Info -->
|
|
43
|
-
<a href="https://docs.html-to-markdown.kreuzberg.dev">
|
|
44
|
-
<img src="https://img.shields.io/badge/Docs-kreuzberg.dev-007ec6" alt="Documentation">
|
|
45
|
-
</a>
|
|
46
|
-
<a href="https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE">
|
|
47
|
-
<img src="https://img.shields.io/badge/License-MIT-blue.svg" alt="License">
|
|
48
|
-
</a>
|
|
49
|
-
</div>
|
|
50
|
-
|
|
51
|
-
<img width="1128" height="191" alt="html-to-markdown" src="https://github.com/user-attachments/assets/419fc06c-8313-4324-b159-4b4d3cfce5c0" />
|
|
52
|
-
|
|
53
|
-
<div align="center" style="margin-top: 20px;">
|
|
54
|
-
<a href="https://discord.gg/pXxagNK2zN">
|
|
55
|
-
<img height="22" src="https://img.shields.io/badge/Discord-Join%20our%20community-7289da?logo=discord&logoColor=white" alt="Discord">
|
|
56
|
-
</a>
|
|
57
|
-
</div>
|
|
58
|
-
|
|
59
|
-
Blazing-fast HTML to Markdown conversion for Ruby, powered by the same Rust engine used by our Python, Node.js, WebAssembly, and PHP packages.
|
|
60
|
-
Ship identical Markdown across every runtime while enjoying native extension performance with Magnus bindings.
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
## Installation
|
|
64
|
-
|
|
65
|
-
```bash
|
|
66
|
-
gem install html-to-markdown
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
Requires Ruby 3.2+ with Magnus native extension bindings. Published for Linux, macOS.
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
## Performance Snapshot
|
|
79
|
-
|
|
80
|
-
**Apple M4** · `convert()` · Real Wikipedia documents
|
|
81
|
-
|
|
82
|
-
| Document | Size | Latency | Throughput |
|
|
83
|
-
|----------|------|---------|------------|
|
|
84
|
-
| Lists (Timeline) | 129KB | 0.71ms | 182 MB/s |
|
|
85
|
-
| Tables (Countries) | 360KB | 2.15ms | 167 MB/s |
|
|
86
|
-
| Mixed (Python wiki) | 656KB | 4.89ms | 134 MB/s |
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
## Quick Start
|
|
92
|
-
|
|
93
|
-
Basic conversion:
|
|
94
|
-
|
|
95
|
-
```ruby
|
|
96
|
-
require 'html_to_markdown'
|
|
97
|
-
|
|
98
|
-
html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
|
|
99
|
-
result = HtmlToMarkdown.convert(html)
|
|
100
|
-
markdown = result[:content]
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
With conversion options:
|
|
105
|
-
|
|
106
|
-
```ruby
|
|
107
|
-
require 'html_to_markdown'
|
|
108
|
-
|
|
109
|
-
html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
|
|
110
|
-
result = HtmlToMarkdown.convert(html, heading_style: :atx, code_block_style: :fenced)
|
|
111
|
-
markdown = result[:content]
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
## API Reference
|
|
116
|
-
|
|
117
|
-
### Core Function
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
**`convert(html, options: nil, visitor: nil) -> ConversionResult`**
|
|
121
|
-
|
|
122
|
-
Converts HTML to Markdown. Returns a `ConversionResult` hash with all results in a single call.
|
|
123
|
-
|
|
124
|
-
```ruby
|
|
125
|
-
require 'html_to_markdown'
|
|
126
|
-
|
|
127
|
-
result = HtmlToMarkdown.convert(html)
|
|
128
|
-
markdown = result[:content] # Converted Markdown string
|
|
129
|
-
metadata = result[:metadata] # Metadata (when extract_metadata: true)
|
|
130
|
-
tables = result[:tables] # Structured table data (when extract_tables: true)
|
|
131
|
-
document = result[:document] # Document-level info
|
|
132
|
-
images = result[:images] # Extracted images
|
|
133
|
-
warnings = result[:warnings] # Any conversion warnings
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
### Options
|
|
139
|
-
|
|
140
|
-
**`ConversionOptions`** – Key configuration fields:
|
|
141
|
-
|
|
142
|
-
- `heading_style`: Heading format (`"underlined"` | `"atx"` | `"atx_closed"`) — default: `"underlined"`
|
|
143
|
-
- `list_indent_width`: Spaces per indent level — default: `2`
|
|
144
|
-
- `bullets`: Bullet characters cycle — default: `"*+-"`
|
|
145
|
-
- `wrap`: Enable text wrapping — default: `false`
|
|
146
|
-
- `wrap_width`: Wrap at column — default: `80`
|
|
147
|
-
- `code_language`: Default fenced code block language — default: none
|
|
148
|
-
- `extract_metadata`: Enable metadata extraction into `result.metadata` — default: `false`
|
|
149
|
-
- `extract_tables`: Enable structured table extraction into `result.tables` — default: `false`
|
|
150
|
-
- `output_format`: Output markup format (`"markdown"` | `"djot"` | `"plain"`) — default: `"markdown"`
|
|
151
|
-
|
|
152
|
-
## Djot Output Format
|
|
153
|
-
|
|
154
|
-
The library supports converting HTML to [Djot](https://djot.net/), a lightweight markup language similar to Markdown but with a different syntax for some elements. Set `output_format` to `"djot"` to use this format.
|
|
155
|
-
|
|
156
|
-
### Syntax Differences
|
|
157
|
-
|
|
158
|
-
| Element | Markdown | Djot |
|
|
159
|
-
|---------|----------|------|
|
|
160
|
-
| Strong | `**text**` | `*text*` |
|
|
161
|
-
| Emphasis | `*text*` | `_text_` |
|
|
162
|
-
| Strikethrough | `~~text~~` | `{-text-}` |
|
|
163
|
-
| Inserted/Added | N/A | `{+text+}` |
|
|
164
|
-
| Highlighted | N/A | `{=text=}` |
|
|
165
|
-
| Subscript | N/A | `~text~` |
|
|
166
|
-
| Superscript | N/A | `^text^` |
|
|
167
|
-
|
|
168
|
-
### Example Usage
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
```ruby
|
|
173
|
-
require 'html_to_markdown'
|
|
174
|
-
|
|
175
|
-
html = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
|
|
176
|
-
|
|
177
|
-
# Default Markdown output
|
|
178
|
-
markdown = HtmlToMarkdown.convert(html)
|
|
179
|
-
# Result: "This is **bold** and *italic* text."
|
|
180
|
-
|
|
181
|
-
# Djot output
|
|
182
|
-
djot = HtmlToMarkdown.convert(html, output_format: 'djot')
|
|
183
|
-
# Result: "This is *bold* and _italic_ text."
|
|
184
|
-
```
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
|
|
189
|
-
|
|
190
|
-
## Plain Text Output
|
|
191
|
-
|
|
192
|
-
Set `output_format` to `"plain"` to strip all markup and return only visible text. This bypasses the Markdown conversion pipeline entirely for maximum speed.
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
```ruby
|
|
197
|
-
require 'html_to_markdown'
|
|
198
|
-
|
|
199
|
-
html = "<h1>Title</h1><p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
|
|
200
|
-
|
|
201
|
-
plain = HtmlToMarkdown.convert(html, output_format: 'plain')
|
|
202
|
-
# Result: "Title\n\nThis is bold and italic text."
|
|
203
|
-
```
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
Plain text mode is useful for search indexing, text extraction, and feeding content to LLMs.
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
## Metadata Extraction
|
|
212
|
-
|
|
213
|
-
The metadata extraction feature enables comprehensive document analysis during conversion. Extract document properties, headers, links, images, and structured data in a single pass — all via the standard `convert()` function.
|
|
214
|
-
|
|
215
|
-
**Use Cases:**
|
|
216
|
-
|
|
217
|
-
- **SEO analysis** – Extract title, description, Open Graph tags, Twitter cards
|
|
218
|
-
- **Table of contents generation** – Build structured outlines from heading hierarchy
|
|
219
|
-
- **Content migration** – Document all external links and resources
|
|
220
|
-
- **Accessibility audits** – Check for images without alt text, empty links, invalid heading hierarchy
|
|
221
|
-
- **Link validation** – Classify and validate anchor, internal, external, email, and phone links
|
|
222
|
-
|
|
223
|
-
**Zero Overhead When Disabled:** Metadata extraction adds negligible overhead and happens during the HTML parsing pass. Pass `extract_metadata: true` in `ConversionOptions` to enable it; the result is available at `result.metadata`.
|
|
224
|
-
|
|
225
|
-
### Example: Quick Start
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
```ruby
|
|
230
|
-
require 'html_to_markdown'
|
|
231
|
-
|
|
232
|
-
html = '<h1>Article</h1><img src="test.jpg" alt="test">'
|
|
233
|
-
result = HtmlToMarkdown.convert(html, extract_metadata: true)
|
|
234
|
-
|
|
235
|
-
puts result[:content] # Converted Markdown
|
|
236
|
-
puts result[:metadata][:document][:title] # Document title
|
|
237
|
-
puts result[:metadata][:headers] # All h1-h6 elements
|
|
238
|
-
puts result[:metadata][:links] # All hyperlinks
|
|
239
|
-
puts result[:metadata][:images] # All images with alt text
|
|
240
|
-
puts result[:metadata][:structured_data] # JSON-LD, Microdata, RDFa
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
## Visitor Pattern
|
|
249
|
-
|
|
250
|
-
The visitor pattern enables custom HTML→Markdown conversion logic by providing callbacks for specific HTML elements during traversal. Pass a visitor as the third argument to `convert()`.
|
|
251
|
-
|
|
252
|
-
**Use Cases:**
|
|
253
|
-
|
|
254
|
-
- **Custom Markdown dialects** – Convert to Obsidian, Notion, or other flavors
|
|
255
|
-
- **Content filtering** – Remove tracking pixels, ads, or unwanted elements
|
|
256
|
-
- **URL rewriting** – Rewrite CDN URLs, add query parameters, validate links
|
|
257
|
-
- **Accessibility validation** – Check alt text, heading hierarchy, link text
|
|
258
|
-
- **Analytics** – Track element usage, link destinations, image sources
|
|
259
|
-
|
|
260
|
-
**Supported Visitor Methods:** 40+ callbacks for text, inline elements, links, images, headings, lists, blocks, and tables.
|
|
261
|
-
|
|
262
|
-
### Example: Quick Start
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
```ruby
|
|
267
|
-
require 'html_to_markdown'
|
|
268
|
-
|
|
269
|
-
class MyVisitor
|
|
270
|
-
def visit_link(ctx, href, text, title = nil)
|
|
271
|
-
# Rewrite CDN URLs
|
|
272
|
-
if href.start_with?('https://old-cdn.com')
|
|
273
|
-
href = href.sub('https://old-cdn.com', 'https://new-cdn.com')
|
|
274
|
-
end
|
|
275
|
-
{ type: :custom, output: "[#{text}](#{href})" }
|
|
276
|
-
end
|
|
277
|
-
|
|
278
|
-
def visit_image(ctx, src, alt = nil, title = nil)
|
|
279
|
-
# Skip tracking pixels
|
|
280
|
-
src.include?('tracking') ? { type: :skip } : { type: :continue }
|
|
281
|
-
end
|
|
282
|
-
end
|
|
283
|
-
|
|
284
|
-
html = '<a href="https://old-cdn.com/file.pdf">Download</a>'
|
|
285
|
-
result = HtmlToMarkdown.convert(html, visitor: MyVisitor.new)
|
|
286
|
-
markdown = result[:content]
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
## Examples
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
## Links
|
|
296
|
-
|
|
297
|
-
- **GitHub:** [github.com/kreuzberg-dev/html-to-markdown](https://github.com/kreuzberg-dev/html-to-markdown)
|
|
298
|
-
|
|
299
|
-
- **RubyGems:** [rubygems.org/gems/html-to-markdown](https://rubygems.org/gems/html-to-markdown)
|
|
300
|
-
|
|
301
|
-
- **Kreuzberg Ecosystem:** [kreuzberg.dev](https://kreuzberg.dev)
|
|
302
|
-
- **Discord:** [discord.gg/pXxagNK2zN](https://discord.gg/pXxagNK2zN)
|
|
303
|
-
|
|
304
|
-
## Contributing
|
|
305
|
-
|
|
306
|
-
We welcome contributions! Please see our [Contributing Guide](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/CONTRIBUTING.md) for details on:
|
|
307
|
-
|
|
308
|
-
- Setting up the development environment
|
|
309
|
-
- Running tests locally
|
|
310
|
-
- Submitting pull requests
|
|
311
|
-
- Reporting issues
|
|
312
|
-
|
|
313
|
-
All contributions must follow our code quality standards (enforced via pre-commit hooks):
|
|
314
|
-
|
|
315
|
-
- Proper test coverage (Rust 95%+, language bindings 80%+)
|
|
316
|
-
- Formatting and linting checks
|
|
317
|
-
- Documentation for public APIs
|
|
318
|
-
|
|
319
|
-
## License
|
|
320
|
-
|
|
321
|
-
MIT License – see [LICENSE](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/LICENSE).
|
|
322
|
-
|
|
323
|
-
## Support
|
|
324
|
-
|
|
325
|
-
If you find this library useful, consider [sponsoring the project](https://github.com/sponsors/kreuzberg-dev).
|
|
326
|
-
|
|
327
|
-
Have questions or run into issues? We're here to help:
|
|
328
|
-
|
|
329
|
-
- **GitHub Issues:** [github.com/kreuzberg-dev/html-to-markdown/issues](https://github.com/kreuzberg-dev/html-to-markdown/issues)
|
|
330
|
-
- **Discussions:** [github.com/kreuzberg-dev/html-to-markdown/discussions](https://github.com/kreuzberg-dev/html-to-markdown/discussions)
|
|
331
|
-
- **Discord Community:** [discord.gg/pXxagNK2zN](https://discord.gg/pXxagNK2zN)
|
data/Rakefile
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'bundler/gem_tasks'
|
|
4
|
-
require 'rake/extensiontask'
|
|
5
|
-
require 'rspec/core/rake_task'
|
|
6
|
-
|
|
7
|
-
GEMSPEC = Gem::Specification.load(File.expand_path('html_to_markdown_rs.gemspec', __dir__))
|
|
8
|
-
|
|
9
|
-
Rake::ExtensionTask.new('html_to_markdown_rb', GEMSPEC) do |ext|
|
|
10
|
-
ext.lib_dir = 'lib'
|
|
11
|
-
ext.ext_dir = 'ext/html_to_markdown_rb'
|
|
12
|
-
ext.cross_compile = true
|
|
13
|
-
ext.cross_platform = %w[
|
|
14
|
-
x86_64-linux
|
|
15
|
-
aarch64-linux
|
|
16
|
-
x86_64-darwin
|
|
17
|
-
arm64-darwin
|
|
18
|
-
x64-mingw32
|
|
19
|
-
x64-mingw-ucrt
|
|
20
|
-
]
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
RSpec::Core::RakeTask.new(:spec)
|
|
24
|
-
|
|
25
|
-
task spec: :compile
|
|
26
|
-
task default: :spec
|
data/exe/html-to-markdown
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
# This file is auto-generated by alef. DO NOT EDIT.
|
|
2
|
-
# frozen_string_literal: true
|
|
3
|
-
|
|
4
|
-
require_relative 'html_to_markdown_rs/version'
|
|
5
|
-
require_relative 'html_to_markdown_rs/native'
|
|
6
|
-
|
|
7
|
-
module Html_to_markdown_rs
|
|
8
|
-
# Re-export all types and functions from native extension
|
|
9
|
-
end
|
data/html-to-markdown-rb.gemspec
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative 'lib/html_to_markdown/version'
|
|
4
|
-
|
|
5
|
-
repo_root = File.expand_path('../..', __dir__)
|
|
6
|
-
crate_prefix = 'packages/ruby/'
|
|
7
|
-
git_cmd = %(git -C "#{repo_root}" ls-files -z #{crate_prefix})
|
|
8
|
-
ruby_files =
|
|
9
|
-
`#{git_cmd}`.split("\x0")
|
|
10
|
-
.select { |path| path.start_with?(crate_prefix) }
|
|
11
|
-
.map { |path| path.delete_prefix(crate_prefix) }
|
|
12
|
-
|
|
13
|
-
fallback_files = Dir.chdir(__dir__) do
|
|
14
|
-
Dir.glob(
|
|
15
|
-
%w[
|
|
16
|
-
README.md
|
|
17
|
-
ext/**/*
|
|
18
|
-
exe/*
|
|
19
|
-
lib/**/*.rb
|
|
20
|
-
lib/bin/*
|
|
21
|
-
src/**/*.rs
|
|
22
|
-
spec/**/*.rb
|
|
23
|
-
sig/**/*.rbs
|
|
24
|
-
]
|
|
25
|
-
)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Vendor files: include vendored crates and workspace Cargo.toml
|
|
29
|
-
vendor_files = Dir.chdir(__dir__) do
|
|
30
|
-
Dir.glob('vendor/html-to-markdown-rs/**/*', File::FNM_DOTMATCH)
|
|
31
|
-
.select { |f| File.file?(f) }
|
|
32
|
-
.grep_v(%r{/target/})
|
|
33
|
-
.grep_v(/\.(swp|bak|tmp)$/)
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Include vendor/Cargo.toml (workspace definition) if it exists
|
|
37
|
-
workspace_toml = if File.exist?(File.join(__dir__, 'vendor/Cargo.toml'))
|
|
38
|
-
['vendor/Cargo.toml']
|
|
39
|
-
else
|
|
40
|
-
[]
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# When vendor exists, use ext/ files from filesystem (modified by vendor script)
|
|
44
|
-
# instead of git (which has the unmodified Cargo.toml with workspace paths)
|
|
45
|
-
ext_files_from_fs = Dir.chdir(__dir__) do
|
|
46
|
-
Dir.glob('ext/**/*', File::FNM_DOTMATCH)
|
|
47
|
-
.reject { |f| File.directory?(f) }
|
|
48
|
-
.reject { |f| f.include?('/target/') }
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Include native artifacts (.so, .bundle, .dylib) if present (for platform gems)
|
|
52
|
-
native_files = Dir.chdir(__dir__) do
|
|
53
|
-
Dir.glob('lib/**/*.{so,bundle,dylib}')
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
files = if vendor_files.any?
|
|
57
|
-
# Vendor exists: use ext/ from filesystem (has modified Cargo.toml)
|
|
58
|
-
non_ext_ruby_files = (ruby_files.empty? ? fallback_files : ruby_files)
|
|
59
|
-
.reject { |f| f.start_with?('ext/') }
|
|
60
|
-
non_ext_ruby_files + ext_files_from_fs + vendor_files + workspace_toml + native_files
|
|
61
|
-
else
|
|
62
|
-
ruby_files.empty? ? fallback_files : ruby_files
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
files = files.uniq
|
|
66
|
-
|
|
67
|
-
Gem::Specification.new do |spec|
|
|
68
|
-
spec.name = 'html-to-markdown'
|
|
69
|
-
spec.version = HtmlToMarkdown::VERSION
|
|
70
|
-
spec.authors = ["Na'aman Hirschfeld"]
|
|
71
|
-
spec.email = ['nhirschfeld@gmail.com']
|
|
72
|
-
|
|
73
|
-
spec.summary = 'Blazing-fast HTML to Markdown conversion for Ruby, powered by Rust.'
|
|
74
|
-
spec.description = <<~DESC.strip
|
|
75
|
-
html-to-markdown is a native Ruby extension built on the shared Rust engine that powers the html-to-markdown project.
|
|
76
|
-
It delivers identical HTML-to-Markdown output across languages, exposes inline image extraction, and ships with a CLI for automation workflows.
|
|
77
|
-
DESC
|
|
78
|
-
spec.homepage = 'https://github.com/kreuzberg-dev/html-to-markdown'
|
|
79
|
-
spec.license = 'MIT'
|
|
80
|
-
|
|
81
|
-
spec.required_ruby_version = Gem::Requirement.new('>= 3.2')
|
|
82
|
-
|
|
83
|
-
spec.bindir = 'exe'
|
|
84
|
-
spec.executables = ['html-to-markdown']
|
|
85
|
-
spec.require_paths = ['lib']
|
|
86
|
-
|
|
87
|
-
spec.files = files
|
|
88
|
-
spec.extra_rdoc_files = ['README.md']
|
|
89
|
-
|
|
90
|
-
spec.extensions = ['ext/html_to_markdown_rb/extconf.rb']
|
|
91
|
-
|
|
92
|
-
spec.add_dependency 'rb_sys', '>= 0.9', '< 1.0'
|
|
93
|
-
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
94
|
-
spec.metadata['homepage_uri'] = 'https://github.com/kreuzberg-dev/html-to-markdown'
|
|
95
|
-
spec.metadata['source_code_uri'] = 'https://github.com/kreuzberg-dev/html-to-markdown'
|
|
96
|
-
spec.metadata['bug_tracker_uri'] = 'https://github.com/kreuzberg-dev/html-to-markdown/issues'
|
|
97
|
-
spec.metadata['changelog_uri'] = 'https://github.com/kreuzberg-dev/html-to-markdown/releases'
|
|
98
|
-
spec.metadata['documentation_uri'] = 'https://github.com/kreuzberg-dev/html-to-markdown/blob/main/packages/ruby/README.md'
|
|
99
|
-
end
|
data/lib/html_to_markdown_rs.rb
DELETED