html-to-markdown 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +42 -12
- data/Gemfile +1 -0
- data/Gemfile.lock +27 -55
- data/README.md +9 -10
- data/Rakefile +4 -10
- data/ext/html-to-markdown_rb/Cargo.toml +14 -0
- data/ext/html_to_markdown_rb/Cargo.toml +16 -0
- data/ext/html_to_markdown_rb/extconf.rb +10 -0
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +6 -0
- data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +9 -0
- data/ext/html_to_markdown_rb/src/lib.rs +3941 -0
- data/html-to-markdown-rb.gemspec +1 -1
- data/lib/html_to_markdown/version.rb +1 -1
- data/lib/html_to_markdown.rb +31 -21
- data/{ext/html-to-markdown-rb/native/extconf.rb → lib/html_to_markdown_rs.rb} +1 -1
- data/sig/html_to_markdown.rbs +17 -5
- data/vendor/Cargo.toml +4 -4
- data/vendor/html-to-markdown-rs/Cargo.toml +2 -2
- data/vendor/html-to-markdown-rs/examples/test_deser.rs +12 -0
- data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/list/item.rs +10 -2
- data/vendor/html-to-markdown-rs/src/converter/mod.rs +2 -2
- data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +1 -1
- data/vendor/html-to-markdown-rs/src/exports.rs +3 -3
- data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
- data/vendor/html-to-markdown-rs/src/lib.rs +1 -2
- data/vendor/html-to-markdown-rs/src/metadata/config.rs +1 -1
- data/vendor/html-to-markdown-rs/src/metadata/mod.rs +5 -5
- data/vendor/html-to-markdown-rs/src/options/conversion.rs +6 -12
- data/vendor/html-to-markdown-rs/src/options/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +3 -9
- data/vendor/html-to-markdown-rs/src/options/validation.rs +3 -3
- data/vendor/html-to-markdown-rs/src/types/document.rs +11 -0
- data/vendor/html-to-markdown-rs/src/types/result.rs +5 -2
- data/vendor/html-to-markdown-rs/src/types/tables.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor/mod.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/state.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/traversal.rs +1 -1
- data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +8 -8
- data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +6 -0
- data/vendor/html-to-markdown-rs/tests/integration_test.rs +3 -3
- data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -2
- data/vendor/html-to-markdown-rs/tests/lists_test.rs +4 -4
- metadata +11 -18
- data/ext/html-to-markdown-rb/extconf.rb +0 -41
- data/ext/html-to-markdown-rb/native/Cargo.lock +0 -934
- data/ext/html-to-markdown-rb/native/Cargo.toml +0 -48
- data/ext/html-to-markdown-rb/native/README.md +0 -215
- data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +0 -54
- data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +0 -158
- data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -11
- data/ext/html-to-markdown-rb/native/src/lib.rs +0 -128
- data/ext/html-to-markdown-rb/native/src/options.rs +0 -238
- data/ext/html-to-markdown-rb/native/src/types.rs +0 -24
- data/lib/html_to_markdown/cli.rb +0 -21
- data/lib/html_to_markdown/cli_proxy.rb +0 -74
- data/spec/cli_proxy_spec.rb +0 -42
- data/spec/spec_helper.rb +0 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 141498bb290e80b0432a4e56fb0f624dbf1ca675204d02ee2a500908d683c336
|
|
4
|
+
data.tar.gz: 8b9f10451650a18ea5a5131a12646208218f5c81e604eecd10664654b772dff8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d2fe03445e09fce3d4687dcffe357d311c1965420a106805ff66b775db91aff6b75e89233d39dd4af79143a95c35e835c0054dbf0b070b2ef3135261d6bf1a38
|
|
7
|
+
data.tar.gz: 36d1fba490663808f1ceb7eccce1f0952e518bf1285d4a8dfef36ff4cab40e683f30f7f2798727215689a817fdc5663466f9fccee8ff43b03dde66b4a98f62d7
|
data/.rubocop.yml
CHANGED
|
@@ -1,29 +1,59 @@
|
|
|
1
1
|
plugins:
|
|
2
|
+
- rubocop-performance
|
|
2
3
|
- rubocop-rspec
|
|
3
4
|
|
|
4
5
|
AllCops:
|
|
5
|
-
NewCops: enable
|
|
6
6
|
TargetRubyVersion: 3.2
|
|
7
|
+
NewCops: enable
|
|
8
|
+
SuggestExtensions: false
|
|
7
9
|
Exclude:
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
+
- 'vendor/**/*'
|
|
11
|
+
- 'tmp/**/*'
|
|
12
|
+
- 'lib/**/*.bundle'
|
|
13
|
+
- 'ext/**/*'
|
|
14
|
+
|
|
15
|
+
Style/FrozenStringLiteralComment:
|
|
16
|
+
Enabled: true
|
|
17
|
+
EnforcedStyle: always
|
|
18
|
+
|
|
19
|
+
Style/StringLiterals:
|
|
20
|
+
Enabled: true
|
|
21
|
+
EnforcedStyle: single_quotes
|
|
22
|
+
|
|
23
|
+
Style/StringLiteralsInInterpolation:
|
|
24
|
+
Enabled: true
|
|
25
|
+
EnforcedStyle: single_quotes
|
|
10
26
|
|
|
11
27
|
Style/Documentation:
|
|
12
28
|
Enabled: false
|
|
13
29
|
|
|
14
|
-
|
|
30
|
+
Layout/LineLength:
|
|
31
|
+
Max: 120
|
|
32
|
+
AllowedPatterns:
|
|
33
|
+
- '\A\s*#'
|
|
15
34
|
Exclude:
|
|
16
|
-
-
|
|
17
|
-
- "*.gemspec"
|
|
35
|
+
- 'spec/**/*'
|
|
18
36
|
|
|
19
37
|
Metrics/MethodLength:
|
|
20
|
-
Max:
|
|
38
|
+
Max: 20
|
|
39
|
+
Exclude:
|
|
40
|
+
- 'spec/**/*'
|
|
21
41
|
|
|
22
|
-
|
|
23
|
-
Enabled:
|
|
42
|
+
Metrics/BlockLength:
|
|
43
|
+
Enabled: true
|
|
44
|
+
Max: 350
|
|
45
|
+
CountComments: false
|
|
46
|
+
|
|
47
|
+
Metrics/AbcSize:
|
|
48
|
+
Max: 20
|
|
49
|
+
Exclude:
|
|
50
|
+
- 'spec/**/*'
|
|
24
51
|
|
|
25
52
|
RSpec/ExampleLength:
|
|
26
|
-
|
|
53
|
+
Max: 50
|
|
27
54
|
|
|
28
|
-
RSpec/
|
|
29
|
-
|
|
55
|
+
RSpec/MultipleExpectations:
|
|
56
|
+
Max: 25
|
|
57
|
+
|
|
58
|
+
RSpec/NestedGroups:
|
|
59
|
+
Max: 6
|
data/Gemfile
CHANGED
|
@@ -12,6 +12,7 @@ group :development, :test do
|
|
|
12
12
|
gem 'rb_sys' # provides build tooling when developing locally
|
|
13
13
|
gem 'rspec'
|
|
14
14
|
gem 'rubocop', require: false
|
|
15
|
+
gem 'rubocop-performance', require: false
|
|
15
16
|
gem 'rubocop-rspec', require: false
|
|
16
17
|
gem 'steep', require: false
|
|
17
18
|
end
|
data/Gemfile.lock
CHANGED
|
@@ -1,38 +1,19 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
html-to-markdown (3.
|
|
4
|
+
html-to-markdown (3.2.0)
|
|
5
5
|
rb_sys (>= 0.9, < 1.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
-
activesupport (8.1.3)
|
|
11
|
-
base64
|
|
12
|
-
bigdecimal
|
|
13
|
-
concurrent-ruby (~> 1.0, >= 1.3.1)
|
|
14
|
-
connection_pool (>= 2.2.5)
|
|
15
|
-
drb
|
|
16
|
-
i18n (>= 1.6, < 2)
|
|
17
|
-
json
|
|
18
|
-
logger (>= 1.4.2)
|
|
19
|
-
minitest (>= 5.1)
|
|
20
|
-
securerandom (>= 0.3)
|
|
21
|
-
tzinfo (~> 2.0, >= 2.0.5)
|
|
22
|
-
uri (>= 0.13.1)
|
|
23
10
|
ast (2.4.3)
|
|
24
|
-
base64 (0.3.0)
|
|
25
|
-
bigdecimal (4.1.0)
|
|
26
11
|
concurrent-ruby (1.3.6)
|
|
27
|
-
connection_pool (3.0.2)
|
|
28
12
|
csv (3.3.5)
|
|
29
13
|
diff-lcs (1.6.2)
|
|
30
|
-
drb (2.2.3)
|
|
31
14
|
ffi (1.17.4-arm64-darwin)
|
|
32
15
|
ffi (1.17.4-x86_64-linux-gnu)
|
|
33
16
|
fileutils (1.8.0)
|
|
34
|
-
i18n (1.14.8)
|
|
35
|
-
concurrent-ruby (~> 1.0)
|
|
36
17
|
json (2.19.3)
|
|
37
18
|
language_server-protocol (3.17.0.5)
|
|
38
19
|
lint_roller (1.1.0)
|
|
@@ -41,31 +22,28 @@ GEM
|
|
|
41
22
|
rb-fsevent (~> 0.10, >= 0.10.3)
|
|
42
23
|
rb-inotify (~> 0.9, >= 0.9.10)
|
|
43
24
|
logger (1.7.0)
|
|
44
|
-
|
|
45
|
-
drb (~> 2.0)
|
|
46
|
-
prism (~> 1.5)
|
|
47
|
-
mutex_m (0.3.0)
|
|
48
|
-
parallel (1.27.0)
|
|
25
|
+
parallel (2.0.1)
|
|
49
26
|
parser (3.3.11.1)
|
|
50
27
|
ast (~> 2.4.1)
|
|
51
28
|
racc
|
|
52
29
|
prism (1.9.0)
|
|
53
30
|
racc (1.8.1)
|
|
54
31
|
rainbow (3.1.1)
|
|
55
|
-
rake (13.
|
|
32
|
+
rake (13.4.1)
|
|
56
33
|
rake-compiler (1.3.1)
|
|
57
34
|
rake
|
|
58
35
|
rake-compiler-dock (1.11.0)
|
|
59
36
|
rb-fsevent (0.11.2)
|
|
60
37
|
rb-inotify (0.11.1)
|
|
61
38
|
ffi (~> 1.0)
|
|
62
|
-
rb_sys (0.9.
|
|
39
|
+
rb_sys (0.9.126)
|
|
63
40
|
json (>= 2)
|
|
64
41
|
rake-compiler-dock (= 1.11.0)
|
|
65
|
-
rbs (
|
|
42
|
+
rbs (4.0.2)
|
|
66
43
|
logger
|
|
44
|
+
prism (>= 1.6.0)
|
|
67
45
|
tsort
|
|
68
|
-
regexp_parser (2.
|
|
46
|
+
regexp_parser (2.12.0)
|
|
69
47
|
rspec (3.13.2)
|
|
70
48
|
rspec-core (~> 3.13.0)
|
|
71
49
|
rspec-expectations (~> 3.13.0)
|
|
@@ -79,11 +57,11 @@ GEM
|
|
|
79
57
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
80
58
|
rspec-support (~> 3.13.0)
|
|
81
59
|
rspec-support (3.13.7)
|
|
82
|
-
rubocop (1.86.
|
|
60
|
+
rubocop (1.86.1)
|
|
83
61
|
json (~> 2.3)
|
|
84
62
|
language_server-protocol (~> 3.17.0.2)
|
|
85
63
|
lint_roller (~> 1.1.0)
|
|
86
|
-
parallel (
|
|
64
|
+
parallel (>= 1.10)
|
|
87
65
|
parser (>= 3.3.0.2)
|
|
88
66
|
rainbow (>= 2.2.2, < 4.0)
|
|
89
67
|
regexp_parser (>= 2.9.3, < 3.0)
|
|
@@ -93,13 +71,16 @@ GEM
|
|
|
93
71
|
rubocop-ast (1.49.1)
|
|
94
72
|
parser (>= 3.3.7.2)
|
|
95
73
|
prism (~> 1.7)
|
|
74
|
+
rubocop-performance (1.26.1)
|
|
75
|
+
lint_roller (~> 1.1)
|
|
76
|
+
rubocop (>= 1.75.0, < 2.0)
|
|
77
|
+
rubocop-ast (>= 1.47.1, < 2.0)
|
|
96
78
|
rubocop-rspec (3.9.0)
|
|
97
79
|
lint_roller (~> 1.1)
|
|
98
80
|
rubocop (~> 1.81)
|
|
99
81
|
ruby-progressbar (1.13.0)
|
|
100
82
|
securerandom (0.4.1)
|
|
101
|
-
steep (
|
|
102
|
-
activesupport (>= 5.1)
|
|
83
|
+
steep (2.0.0)
|
|
103
84
|
concurrent-ruby (>= 1.1.10)
|
|
104
85
|
csv (>= 3.0.9)
|
|
105
86
|
fileutils (>= 1.1.0)
|
|
@@ -107,10 +88,10 @@ GEM
|
|
|
107
88
|
language_server-protocol (>= 3.17.0.4, < 4.0)
|
|
108
89
|
listen (~> 3.0)
|
|
109
90
|
logger (>= 1.3.0)
|
|
110
|
-
|
|
111
|
-
|
|
91
|
+
parser (>= 3.2)
|
|
92
|
+
prism (>= 0.25.0)
|
|
112
93
|
rainbow (>= 2.2.2, < 4.0)
|
|
113
|
-
rbs (~>
|
|
94
|
+
rbs (~> 4.0)
|
|
114
95
|
securerandom (>= 0.1)
|
|
115
96
|
strscan (>= 1.0.0)
|
|
116
97
|
terminal-table (>= 2, < 5)
|
|
@@ -119,8 +100,6 @@ GEM
|
|
|
119
100
|
terminal-table (4.0.0)
|
|
120
101
|
unicode-display_width (>= 1.1.1, < 4)
|
|
121
102
|
tsort (0.2.0)
|
|
122
|
-
tzinfo (2.0.6)
|
|
123
|
-
concurrent-ruby (~> 1.0)
|
|
124
103
|
unicode-display_width (3.2.0)
|
|
125
104
|
unicode-emoji (~> 4.1)
|
|
126
105
|
unicode-emoji (4.2.0)
|
|
@@ -137,59 +116,52 @@ DEPENDENCIES
|
|
|
137
116
|
rbs
|
|
138
117
|
rspec
|
|
139
118
|
rubocop
|
|
119
|
+
rubocop-performance
|
|
140
120
|
rubocop-rspec
|
|
141
121
|
steep
|
|
142
122
|
|
|
143
123
|
CHECKSUMS
|
|
144
|
-
activesupport (8.1.3) sha256=21a5e0dfbd4c3ddd9e1317ec6a4d782fa226e7867dc70b0743acda81a1dca20e
|
|
145
124
|
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
|
|
146
|
-
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
147
|
-
bigdecimal (4.1.0) sha256=6dc07767aa3dc456ccd48e7ae70a07b474e9afd7c5bc576f80bd6da5c8dd6cae
|
|
148
125
|
concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
|
|
149
|
-
connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
|
|
150
126
|
csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
|
|
151
127
|
diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
|
|
152
|
-
drb (2.2.3) sha256=0b00d6fdb50995fe4a45dea13663493c841112e4068656854646f418fda13373
|
|
153
128
|
ffi (1.17.4-arm64-darwin) sha256=19071aaf1419251b0a46852abf960e77330a3b334d13a4ab51d58b31a937001b
|
|
154
129
|
ffi (1.17.4-x86_64-linux-gnu) sha256=9d3db14c2eae074b382fa9c083fe95aec6e0a1451da249eab096c34002bc752d
|
|
155
130
|
fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
|
|
156
|
-
html-to-markdown (3.
|
|
157
|
-
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
131
|
+
html-to-markdown (3.2.0)
|
|
158
132
|
json (2.19.3) sha256=289b0bb53052a1fa8c34ab33cc750b659ba14a5c45f3fcf4b18762dc67c78646
|
|
159
133
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
160
134
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
161
135
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
|
162
136
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
163
|
-
|
|
164
|
-
mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
|
|
165
|
-
parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
|
|
137
|
+
parallel (2.0.1) sha256=337782d3e39f4121e67563bf91dd8ece67f48923d90698614773a0ec9a5b2c7d
|
|
166
138
|
parser (3.3.11.1) sha256=d17ace7aabe3e72c3cc94043714be27cc6f852f104d81aa284c2281aecc65d54
|
|
167
139
|
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
168
140
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
169
141
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
170
|
-
rake (13.
|
|
142
|
+
rake (13.4.1) sha256=b4e81bd6a748308a6799619d824ec6a23cd1acd07d9ec41e5f2ebfb2294447c8
|
|
171
143
|
rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
|
|
172
144
|
rake-compiler-dock (1.11.0) sha256=eab51f2cd533eb35cea6b624a75281f047123e70a64c58b607471bb49428f8c2
|
|
173
145
|
rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
|
|
174
146
|
rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
|
|
175
|
-
rb_sys (0.9.
|
|
176
|
-
rbs (
|
|
177
|
-
regexp_parser (2.
|
|
147
|
+
rb_sys (0.9.126) sha256=ba958e0b8b4b89eeae0b3d24b64c809eb2c37e0ab0773a49e9b1c2e22c95aef8
|
|
148
|
+
rbs (4.0.2) sha256=af75671e66cd03434cc546622741ebf83f6197ec4328375805306330bf78ef25
|
|
149
|
+
regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
|
|
178
150
|
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
179
151
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
180
152
|
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
181
153
|
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
|
|
182
154
|
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
|
|
183
|
-
rubocop (1.86.
|
|
155
|
+
rubocop (1.86.1) sha256=44415f3f01d01a21e01132248d2fd0867572475b566ca188a0a42133a08d4531
|
|
184
156
|
rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
|
|
157
|
+
rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
|
|
185
158
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
|
186
159
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
187
160
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
188
|
-
steep (
|
|
161
|
+
steep (2.0.0) sha256=6eb0ecc09637bbb54f0a5f2cf63daea6d3208ccace64b4f1107d976333605c30
|
|
189
162
|
strscan (3.1.8) sha256=aae2db611a225559f21ffbb71765c9a4e60fd262534a9ea84f4f11c7f32f679e
|
|
190
163
|
terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
|
|
191
164
|
tsort (0.2.0) sha256=9650a793f6859a43b6641671278f79cfead60ac714148aabe4e3f0060480089f
|
|
192
|
-
tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
|
|
193
165
|
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
|
|
194
166
|
unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
|
|
195
167
|
uri (1.1.1) sha256=379fa58d27ffb1387eaada68c749d1426738bd0f654d812fcc07e7568f5c57c6
|
data/README.md
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
|
|
19
19
|
</a>
|
|
20
20
|
<a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown">
|
|
21
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/html-to-markdown?label=Go&color=007ec6&filter=v3.
|
|
21
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/html-to-markdown?label=Go&color=007ec6&filter=v3.2.0" alt="Go">
|
|
22
22
|
</a>
|
|
23
23
|
<a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
|
|
24
24
|
<img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
|
|
@@ -56,7 +56,6 @@
|
|
|
56
56
|
</a>
|
|
57
57
|
</div>
|
|
58
58
|
|
|
59
|
-
|
|
60
59
|
Blazing-fast HTML to Markdown conversion for Ruby, powered by the same Rust engine used by our Python, Node.js, WebAssembly, and PHP packages.
|
|
61
60
|
Ship identical Markdown across every runtime while enjoying native extension performance with Magnus bindings.
|
|
62
61
|
|
|
@@ -78,10 +77,10 @@ Requires Ruby 3.2+ with Magnus native extension bindings. Published for Linux, m
|
|
|
78
77
|
|
|
79
78
|
## Performance Snapshot
|
|
80
79
|
|
|
81
|
-
Apple M4
|
|
80
|
+
**Apple M4** · `convert()` · Real Wikipedia documents
|
|
82
81
|
|
|
83
82
|
| Document | Size | Latency | Throughput |
|
|
84
|
-
|
|
83
|
+
|----------|------|---------|------------|
|
|
85
84
|
| Lists (Timeline) | 129KB | 0.71ms | 182 MB/s |
|
|
86
85
|
| Tables (Countries) | 360KB | 2.15ms | 167 MB/s |
|
|
87
86
|
| Mixed (Python wiki) | 656KB | 4.89ms | 134 MB/s |
|
|
@@ -102,7 +101,6 @@ markdown = result[:content]
|
|
|
102
101
|
```
|
|
103
102
|
|
|
104
103
|
|
|
105
|
-
|
|
106
104
|
With conversion options:
|
|
107
105
|
|
|
108
106
|
```ruby
|
|
@@ -114,8 +112,6 @@ markdown = result[:content]
|
|
|
114
112
|
```
|
|
115
113
|
|
|
116
114
|
|
|
117
|
-
|
|
118
|
-
|
|
119
115
|
## API Reference
|
|
120
116
|
|
|
121
117
|
### Core Function
|
|
@@ -153,7 +149,6 @@ warnings = result[:warnings] # Any conversion warnings
|
|
|
153
149
|
- `extract_tables`: Enable structured table extraction into `result.tables` — default: `false`
|
|
154
150
|
- `output_format`: Output markup format (`"markdown"` | `"djot"` | `"plain"`) — default: `"markdown"`
|
|
155
151
|
|
|
156
|
-
|
|
157
152
|
## Djot Output Format
|
|
158
153
|
|
|
159
154
|
The library supports converting HTML to [Djot](https://djot.net/), a lightweight markup language similar to Markdown but with a different syntax for some elements. Set `output_format` to `"djot"` to use this format.
|
|
@@ -173,6 +168,7 @@ The library supports converting HTML to [Djot](https://djot.net/), a lightweight
|
|
|
173
168
|
### Example Usage
|
|
174
169
|
|
|
175
170
|
|
|
171
|
+
|
|
176
172
|
```ruby
|
|
177
173
|
require 'html_to_markdown'
|
|
178
174
|
|
|
@@ -188,14 +184,15 @@ djot = HtmlToMarkdown.convert(html, output_format: 'djot')
|
|
|
188
184
|
```
|
|
189
185
|
|
|
190
186
|
|
|
191
|
-
Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
|
|
192
187
|
|
|
188
|
+
Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
|
|
193
189
|
|
|
194
190
|
## Plain Text Output
|
|
195
191
|
|
|
196
192
|
Set `output_format` to `"plain"` to strip all markup and return only visible text. This bypasses the Markdown conversion pipeline entirely for maximum speed.
|
|
197
193
|
|
|
198
194
|
|
|
195
|
+
|
|
199
196
|
```ruby
|
|
200
197
|
require 'html_to_markdown'
|
|
201
198
|
|
|
@@ -206,6 +203,7 @@ plain = HtmlToMarkdown.convert(html, output_format: 'plain')
|
|
|
206
203
|
```
|
|
207
204
|
|
|
208
205
|
|
|
206
|
+
|
|
209
207
|
Plain text mode is useful for search indexing, text extraction, and feeding content to LLMs.
|
|
210
208
|
|
|
211
209
|
|
|
@@ -227,6 +225,7 @@ The metadata extraction feature enables comprehensive document analysis during c
|
|
|
227
225
|
### Example: Quick Start
|
|
228
226
|
|
|
229
227
|
|
|
228
|
+
|
|
230
229
|
```ruby
|
|
231
230
|
require 'html_to_markdown'
|
|
232
231
|
|
|
@@ -263,6 +262,7 @@ The visitor pattern enables custom HTML→Markdown conversion logic by providing
|
|
|
263
262
|
### Example: Quick Start
|
|
264
263
|
|
|
265
264
|
|
|
265
|
+
|
|
266
266
|
```ruby
|
|
267
267
|
require 'html_to_markdown'
|
|
268
268
|
|
|
@@ -289,7 +289,6 @@ markdown = result[:content]
|
|
|
289
289
|
|
|
290
290
|
|
|
291
291
|
|
|
292
|
-
|
|
293
292
|
## Examples
|
|
294
293
|
|
|
295
294
|
|
data/Rakefile
CHANGED
|
@@ -4,29 +4,23 @@ require 'bundler/gem_tasks'
|
|
|
4
4
|
require 'rake/extensiontask'
|
|
5
5
|
require 'rspec/core/rake_task'
|
|
6
6
|
|
|
7
|
-
GEMSPEC = Gem::Specification.load(File.expand_path('
|
|
8
|
-
|
|
9
|
-
# Vendor html-to-markdown-rs core crate before compilation
|
|
10
|
-
task :vendor do
|
|
11
|
-
vendor_script = File.expand_path('../../scripts/ci/ruby/vendor-core-crate.py', __dir__)
|
|
12
|
-
puts 'Vendoring html-to-markdown-rs core crate...'
|
|
13
|
-
sh "python3 #{vendor_script}"
|
|
14
|
-
end
|
|
7
|
+
GEMSPEC = Gem::Specification.load(File.expand_path('html_to_markdown_rs.gemspec', __dir__))
|
|
15
8
|
|
|
16
9
|
Rake::ExtensionTask.new('html_to_markdown_rb', GEMSPEC) do |ext|
|
|
17
10
|
ext.lib_dir = 'lib'
|
|
18
|
-
ext.ext_dir = 'ext/
|
|
11
|
+
ext.ext_dir = 'ext/html_to_markdown_rb'
|
|
19
12
|
ext.cross_compile = true
|
|
20
13
|
ext.cross_platform = %w[
|
|
21
14
|
x86_64-linux
|
|
15
|
+
aarch64-linux
|
|
22
16
|
x86_64-darwin
|
|
23
17
|
arm64-darwin
|
|
24
18
|
x64-mingw32
|
|
19
|
+
x64-mingw-ucrt
|
|
25
20
|
]
|
|
26
21
|
end
|
|
27
22
|
|
|
28
23
|
RSpec::Core::RakeTask.new(:spec)
|
|
29
24
|
|
|
30
|
-
task compile: :vendor
|
|
31
25
|
task spec: :compile
|
|
32
26
|
task default: :spec
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "html-to-markdown-rb"
|
|
3
|
+
version = "3.2.0"
|
|
4
|
+
edition = "2024"
|
|
5
|
+
license = "MIT"
|
|
6
|
+
|
|
7
|
+
[lib]
|
|
8
|
+
crate-type = ["cdylib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
html-to-markdown-rs = { path = "../../../../crates/html-to-markdown", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
|
|
12
|
+
magnus = "0.8"
|
|
13
|
+
serde = { version = "1", features = ["derive"] }
|
|
14
|
+
serde_json = "1"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[workspace]
|
|
2
|
+
|
|
3
|
+
[package]
|
|
4
|
+
name = "html-to-markdown-rb"
|
|
5
|
+
version = "3.2.0"
|
|
6
|
+
edition = "2024"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
|
|
9
|
+
[lib]
|
|
10
|
+
crate-type = ["cdylib"]
|
|
11
|
+
|
|
12
|
+
[dependencies]
|
|
13
|
+
html-to-markdown-rs = { path = "../../../../crates/html-to-markdown", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
|
|
14
|
+
magnus = "0.8"
|
|
15
|
+
serde = { version = "1", features = ["derive"] }
|
|
16
|
+
serde_json = "1"
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This file is auto-generated by alef. DO NOT EDIT.
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require_relative 'html_to_markdown_rs/version'
|
|
5
|
+
require_relative 'html_to_markdown_rs/native'
|
|
6
|
+
|
|
7
|
+
module Html_to_markdown_rs
|
|
8
|
+
# Re-export all types and functions from native extension
|
|
9
|
+
end
|