html-to-markdown 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +42 -12
  3. data/Gemfile +1 -0
  4. data/Gemfile.lock +27 -55
  5. data/README.md +9 -10
  6. data/Rakefile +4 -10
  7. data/ext/html-to-markdown_rb/Cargo.toml +14 -0
  8. data/ext/html_to_markdown_rb/Cargo.toml +16 -0
  9. data/ext/html_to_markdown_rb/extconf.rb +10 -0
  10. data/ext/html_to_markdown_rb/src/html_to_markdown_rs/version.rb +6 -0
  11. data/ext/html_to_markdown_rb/src/html_to_markdown_rs.rb +9 -0
  12. data/ext/html_to_markdown_rb/src/lib.rs +3941 -0
  13. data/html-to-markdown-rb.gemspec +1 -1
  14. data/lib/html_to_markdown/version.rb +1 -1
  15. data/lib/html_to_markdown.rb +31 -21
  16. data/{ext/html-to-markdown-rb/native/extconf.rb → lib/html_to_markdown_rs.rb} +1 -1
  17. data/sig/html_to_markdown.rbs +17 -5
  18. data/vendor/Cargo.toml +4 -4
  19. data/vendor/html-to-markdown-rs/Cargo.toml +2 -2
  20. data/vendor/html-to-markdown-rs/examples/test_deser.rs +12 -0
  21. data/vendor/html-to-markdown-rs/src/converter/block/mod.rs +1 -1
  22. data/vendor/html-to-markdown-rs/src/converter/block/table/mod.rs +1 -1
  23. data/vendor/html-to-markdown-rs/src/converter/form/mod.rs +1 -1
  24. data/vendor/html-to-markdown-rs/src/converter/inline/mod.rs +1 -1
  25. data/vendor/html-to-markdown-rs/src/converter/list/item.rs +10 -2
  26. data/vendor/html-to-markdown-rs/src/converter/mod.rs +2 -2
  27. data/vendor/html-to-markdown-rs/src/converter/semantic/mod.rs +1 -1
  28. data/vendor/html-to-markdown-rs/src/converter/utility/content.rs +1 -1
  29. data/vendor/html-to-markdown-rs/src/exports.rs +3 -3
  30. data/vendor/html-to-markdown-rs/src/inline_images.rs +1 -1
  31. data/vendor/html-to-markdown-rs/src/lib.rs +1 -2
  32. data/vendor/html-to-markdown-rs/src/metadata/config.rs +1 -1
  33. data/vendor/html-to-markdown-rs/src/metadata/mod.rs +5 -5
  34. data/vendor/html-to-markdown-rs/src/options/conversion.rs +6 -12
  35. data/vendor/html-to-markdown-rs/src/options/mod.rs +1 -1
  36. data/vendor/html-to-markdown-rs/src/options/preprocessing.rs +3 -9
  37. data/vendor/html-to-markdown-rs/src/options/validation.rs +3 -3
  38. data/vendor/html-to-markdown-rs/src/types/document.rs +11 -0
  39. data/vendor/html-to-markdown-rs/src/types/result.rs +5 -2
  40. data/vendor/html-to-markdown-rs/src/types/tables.rs +1 -1
  41. data/vendor/html-to-markdown-rs/src/visitor/mod.rs +1 -1
  42. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/state.rs +1 -1
  43. data/vendor/html-to-markdown-rs/src/visitor_helpers/helpers/traversal.rs +1 -1
  44. data/vendor/html-to-markdown-rs/src/visitor_helpers.rs +8 -8
  45. data/vendor/html-to-markdown-rs/tests/commonmark_compliance_test.rs +6 -0
  46. data/vendor/html-to-markdown-rs/tests/integration_test.rs +3 -3
  47. data/vendor/html-to-markdown-rs/tests/issue_140_regressions.rs +8 -2
  48. data/vendor/html-to-markdown-rs/tests/lists_test.rs +4 -4
  49. metadata +11 -18
  50. data/ext/html-to-markdown-rb/extconf.rb +0 -41
  51. data/ext/html-to-markdown-rb/native/Cargo.lock +0 -934
  52. data/ext/html-to-markdown-rb/native/Cargo.toml +0 -48
  53. data/ext/html-to-markdown-rb/native/README.md +0 -215
  54. data/ext/html-to-markdown-rb/native/src/conversion/inline_images.rs +0 -54
  55. data/ext/html-to-markdown-rb/native/src/conversion/metadata.rs +0 -158
  56. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +0 -11
  57. data/ext/html-to-markdown-rb/native/src/lib.rs +0 -128
  58. data/ext/html-to-markdown-rb/native/src/options.rs +0 -238
  59. data/ext/html-to-markdown-rb/native/src/types.rs +0 -24
  60. data/lib/html_to_markdown/cli.rb +0 -21
  61. data/lib/html_to_markdown/cli_proxy.rb +0 -74
  62. data/spec/cli_proxy_spec.rb +0 -42
  63. data/spec/spec_helper.rb +0 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c23b51454716c4f5224bc9a0b6cfcfcf3f9935709379395662d9d89cab96f223
4
- data.tar.gz: '0878f8bad06ca970013d87f6064150bed2db8b5e12d087474acaa4dd17a00559'
3
+ metadata.gz: 141498bb290e80b0432a4e56fb0f624dbf1ca675204d02ee2a500908d683c336
4
+ data.tar.gz: 8b9f10451650a18ea5a5131a12646208218f5c81e604eecd10664654b772dff8
5
5
  SHA512:
6
- metadata.gz: e21bd6d2ec9cbd40df454f2b441cb2da333b1c73a062686f830c4bd3368dad2dacec3bb0953f5c8902ad2ab411453c690597d64f0c86103d65b71438c647a7f1
7
- data.tar.gz: 38cf61f5035e6becae227f4117f10208eb9b0ca2d99b805b6e8feefdc8bf2611e44605c967218ea69892df3af8a417026fb70e20cb3ab9a9e28771c3ecc723c9
6
+ metadata.gz: d2fe03445e09fce3d4687dcffe357d311c1965420a106805ff66b775db91aff6b75e89233d39dd4af79143a95c35e835c0054dbf0b070b2ef3135261d6bf1a38
7
+ data.tar.gz: 36d1fba490663808f1ceb7eccce1f0952e518bf1285d4a8dfef36ff4cab40e683f30f7f2798727215689a817fdc5663466f9fccee8ff43b03dde66b4a98f62d7
data/.rubocop.yml CHANGED
@@ -1,29 +1,59 @@
1
1
  plugins:
2
+ - rubocop-performance
2
3
  - rubocop-rspec
3
4
 
4
5
  AllCops:
5
- NewCops: enable
6
6
  TargetRubyVersion: 3.2
7
+ NewCops: enable
8
+ SuggestExtensions: false
7
9
  Exclude:
8
- - "tmp/**/*"
9
- - "vendor/**/*"
10
+ - 'vendor/**/*'
11
+ - 'tmp/**/*'
12
+ - 'lib/**/*.bundle'
13
+ - 'ext/**/*'
14
+
15
+ Style/FrozenStringLiteralComment:
16
+ Enabled: true
17
+ EnforcedStyle: always
18
+
19
+ Style/StringLiterals:
20
+ Enabled: true
21
+ EnforcedStyle: single_quotes
22
+
23
+ Style/StringLiteralsInInterpolation:
24
+ Enabled: true
25
+ EnforcedStyle: single_quotes
10
26
 
11
27
  Style/Documentation:
12
28
  Enabled: false
13
29
 
14
- Metrics/BlockLength:
30
+ Layout/LineLength:
31
+ Max: 120
32
+ AllowedPatterns:
33
+ - '\A\s*#'
15
34
  Exclude:
16
- - "spec/**/*"
17
- - "*.gemspec"
35
+ - 'spec/**/*'
18
36
 
19
37
  Metrics/MethodLength:
20
- Max: 15
38
+ Max: 20
39
+ Exclude:
40
+ - 'spec/**/*'
21
41
 
22
- RSpec/MultipleExpectations:
23
- Enabled: false
42
+ Metrics/BlockLength:
43
+ Enabled: true
44
+ Max: 350
45
+ CountComments: false
46
+
47
+ Metrics/AbcSize:
48
+ Max: 20
49
+ Exclude:
50
+ - 'spec/**/*'
24
51
 
25
52
  RSpec/ExampleLength:
26
- Enabled: false
53
+ Max: 50
27
54
 
28
- RSpec/SpecFilePathFormat:
29
- Enabled: false
55
+ RSpec/MultipleExpectations:
56
+ Max: 25
57
+
58
+ RSpec/NestedGroups:
59
+ Max: 6
data/Gemfile CHANGED
@@ -12,6 +12,7 @@ group :development, :test do
12
12
  gem 'rb_sys' # provides build tooling when developing locally
13
13
  gem 'rspec'
14
14
  gem 'rubocop', require: false
15
+ gem 'rubocop-performance', require: false
15
16
  gem 'rubocop-rspec', require: false
16
17
  gem 'steep', require: false
17
18
  end
data/Gemfile.lock CHANGED
@@ -1,38 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html-to-markdown (3.1.0)
4
+ html-to-markdown (3.2.0)
5
5
  rb_sys (>= 0.9, < 1.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- activesupport (8.1.3)
11
- base64
12
- bigdecimal
13
- concurrent-ruby (~> 1.0, >= 1.3.1)
14
- connection_pool (>= 2.2.5)
15
- drb
16
- i18n (>= 1.6, < 2)
17
- json
18
- logger (>= 1.4.2)
19
- minitest (>= 5.1)
20
- securerandom (>= 0.3)
21
- tzinfo (~> 2.0, >= 2.0.5)
22
- uri (>= 0.13.1)
23
10
  ast (2.4.3)
24
- base64 (0.3.0)
25
- bigdecimal (4.1.0)
26
11
  concurrent-ruby (1.3.6)
27
- connection_pool (3.0.2)
28
12
  csv (3.3.5)
29
13
  diff-lcs (1.6.2)
30
- drb (2.2.3)
31
14
  ffi (1.17.4-arm64-darwin)
32
15
  ffi (1.17.4-x86_64-linux-gnu)
33
16
  fileutils (1.8.0)
34
- i18n (1.14.8)
35
- concurrent-ruby (~> 1.0)
36
17
  json (2.19.3)
37
18
  language_server-protocol (3.17.0.5)
38
19
  lint_roller (1.1.0)
@@ -41,31 +22,28 @@ GEM
41
22
  rb-fsevent (~> 0.10, >= 0.10.3)
42
23
  rb-inotify (~> 0.9, >= 0.9.10)
43
24
  logger (1.7.0)
44
- minitest (6.0.3)
45
- drb (~> 2.0)
46
- prism (~> 1.5)
47
- mutex_m (0.3.0)
48
- parallel (1.27.0)
25
+ parallel (2.0.1)
49
26
  parser (3.3.11.1)
50
27
  ast (~> 2.4.1)
51
28
  racc
52
29
  prism (1.9.0)
53
30
  racc (1.8.1)
54
31
  rainbow (3.1.1)
55
- rake (13.3.1)
32
+ rake (13.4.1)
56
33
  rake-compiler (1.3.1)
57
34
  rake
58
35
  rake-compiler-dock (1.11.0)
59
36
  rb-fsevent (0.11.2)
60
37
  rb-inotify (0.11.1)
61
38
  ffi (~> 1.0)
62
- rb_sys (0.9.125)
39
+ rb_sys (0.9.126)
63
40
  json (>= 2)
64
41
  rake-compiler-dock (= 1.11.0)
65
- rbs (3.10.4)
42
+ rbs (4.0.2)
66
43
  logger
44
+ prism (>= 1.6.0)
67
45
  tsort
68
- regexp_parser (2.11.3)
46
+ regexp_parser (2.12.0)
69
47
  rspec (3.13.2)
70
48
  rspec-core (~> 3.13.0)
71
49
  rspec-expectations (~> 3.13.0)
@@ -79,11 +57,11 @@ GEM
79
57
  diff-lcs (>= 1.2.0, < 2.0)
80
58
  rspec-support (~> 3.13.0)
81
59
  rspec-support (3.13.7)
82
- rubocop (1.86.0)
60
+ rubocop (1.86.1)
83
61
  json (~> 2.3)
84
62
  language_server-protocol (~> 3.17.0.2)
85
63
  lint_roller (~> 1.1.0)
86
- parallel (~> 1.10)
64
+ parallel (>= 1.10)
87
65
  parser (>= 3.3.0.2)
88
66
  rainbow (>= 2.2.2, < 4.0)
89
67
  regexp_parser (>= 2.9.3, < 3.0)
@@ -93,13 +71,16 @@ GEM
93
71
  rubocop-ast (1.49.1)
94
72
  parser (>= 3.3.7.2)
95
73
  prism (~> 1.7)
74
+ rubocop-performance (1.26.1)
75
+ lint_roller (~> 1.1)
76
+ rubocop (>= 1.75.0, < 2.0)
77
+ rubocop-ast (>= 1.47.1, < 2.0)
96
78
  rubocop-rspec (3.9.0)
97
79
  lint_roller (~> 1.1)
98
80
  rubocop (~> 1.81)
99
81
  ruby-progressbar (1.13.0)
100
82
  securerandom (0.4.1)
101
- steep (1.10.0)
102
- activesupport (>= 5.1)
83
+ steep (2.0.0)
103
84
  concurrent-ruby (>= 1.1.10)
104
85
  csv (>= 3.0.9)
105
86
  fileutils (>= 1.1.0)
@@ -107,10 +88,10 @@ GEM
107
88
  language_server-protocol (>= 3.17.0.4, < 4.0)
108
89
  listen (~> 3.0)
109
90
  logger (>= 1.3.0)
110
- mutex_m (>= 0.3.0)
111
- parser (>= 3.1)
91
+ parser (>= 3.2)
92
+ prism (>= 0.25.0)
112
93
  rainbow (>= 2.2.2, < 4.0)
113
- rbs (~> 3.9)
94
+ rbs (~> 4.0)
114
95
  securerandom (>= 0.1)
115
96
  strscan (>= 1.0.0)
116
97
  terminal-table (>= 2, < 5)
@@ -119,8 +100,6 @@ GEM
119
100
  terminal-table (4.0.0)
120
101
  unicode-display_width (>= 1.1.1, < 4)
121
102
  tsort (0.2.0)
122
- tzinfo (2.0.6)
123
- concurrent-ruby (~> 1.0)
124
103
  unicode-display_width (3.2.0)
125
104
  unicode-emoji (~> 4.1)
126
105
  unicode-emoji (4.2.0)
@@ -137,59 +116,52 @@ DEPENDENCIES
137
116
  rbs
138
117
  rspec
139
118
  rubocop
119
+ rubocop-performance
140
120
  rubocop-rspec
141
121
  steep
142
122
 
143
123
  CHECKSUMS
144
- activesupport (8.1.3) sha256=21a5e0dfbd4c3ddd9e1317ec6a4d782fa226e7867dc70b0743acda81a1dca20e
145
124
  ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
146
- base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
147
- bigdecimal (4.1.0) sha256=6dc07767aa3dc456ccd48e7ae70a07b474e9afd7c5bc576f80bd6da5c8dd6cae
148
125
  concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
149
- connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
150
126
  csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
151
127
  diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
152
- drb (2.2.3) sha256=0b00d6fdb50995fe4a45dea13663493c841112e4068656854646f418fda13373
153
128
  ffi (1.17.4-arm64-darwin) sha256=19071aaf1419251b0a46852abf960e77330a3b334d13a4ab51d58b31a937001b
154
129
  ffi (1.17.4-x86_64-linux-gnu) sha256=9d3db14c2eae074b382fa9c083fe95aec6e0a1451da249eab096c34002bc752d
155
130
  fileutils (1.8.0) sha256=8c6b1df54e2540bdb2f39258f08af78853aa70bad52b4d394bbc6424593c6e02
156
- html-to-markdown (3.1.0)
157
- i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
131
+ html-to-markdown (3.2.0)
158
132
  json (2.19.3) sha256=289b0bb53052a1fa8c34ab33cc750b659ba14a5c45f3fcf4b18762dc67c78646
159
133
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
160
134
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
161
135
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
162
136
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
163
- minitest (6.0.3) sha256=88ac8a1de36c00692420e7cb3cc11a0773bbcb126aee1c249f320160a7d11411
164
- mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
165
- parallel (1.27.0) sha256=4ac151e1806b755fb4e2dc2332cbf0e54f2e24ba821ff2d3dcf86bf6dc4ae130
137
+ parallel (2.0.1) sha256=337782d3e39f4121e67563bf91dd8ece67f48923d90698614773a0ec9a5b2c7d
166
138
  parser (3.3.11.1) sha256=d17ace7aabe3e72c3cc94043714be27cc6f852f104d81aa284c2281aecc65d54
167
139
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
168
140
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
169
141
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
170
- rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
142
+ rake (13.4.1) sha256=b4e81bd6a748308a6799619d824ec6a23cd1acd07d9ec41e5f2ebfb2294447c8
171
143
  rake-compiler (1.3.1) sha256=6b351612b6e2d73ddd5563ee799bb58685176e05363db6758504bd11573d670a
172
144
  rake-compiler-dock (1.11.0) sha256=eab51f2cd533eb35cea6b624a75281f047123e70a64c58b607471bb49428f8c2
173
145
  rb-fsevent (0.11.2) sha256=43900b972e7301d6570f64b850a5aa67833ee7d87b458ee92805d56b7318aefe
174
146
  rb-inotify (0.11.1) sha256=a0a700441239b0ff18eb65e3866236cd78613d6b9f78fea1f9ac47a85e47be6e
175
- rb_sys (0.9.125) sha256=14efd4e07eaf7c07edb1bab548d2a4767869a47a8821fc5ea52d9bf982ef00a8
176
- rbs (3.10.4) sha256=b17d7c4be4bb31a11a3b529830f0aa206a807ca42f2e7921a3027dfc6b7e5ce8
177
- regexp_parser (2.11.3) sha256=ca13f381a173b7a93450e53459075c9b76a10433caadcb2f1180f2c741fc55a4
147
+ rb_sys (0.9.126) sha256=ba958e0b8b4b89eeae0b3d24b64c809eb2c37e0ab0773a49e9b1c2e22c95aef8
148
+ rbs (4.0.2) sha256=af75671e66cd03434cc546622741ebf83f6197ec4328375805306330bf78ef25
149
+ regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
178
150
  rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
179
151
  rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
180
152
  rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
181
153
  rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
182
154
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
183
- rubocop (1.86.0) sha256=4ff1186fe16ebe9baff5e7aad66bb0ad4cabf5cdcd419f773146dbba2565d186
155
+ rubocop (1.86.1) sha256=44415f3f01d01a21e01132248d2fd0867572475b566ca188a0a42133a08d4531
184
156
  rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
157
+ rubocop-performance (1.26.1) sha256=cd19b936ff196df85829d264b522fd4f98b6c89ad271fa52744a8c11b8f71834
185
158
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
186
159
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
187
160
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
188
- steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
161
+ steep (2.0.0) sha256=6eb0ecc09637bbb54f0a5f2cf63daea6d3208ccace64b4f1107d976333605c30
189
162
  strscan (3.1.8) sha256=aae2db611a225559f21ffbb71765c9a4e60fd262534a9ea84f4f11c7f32f679e
190
163
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
191
164
  tsort (0.2.0) sha256=9650a793f6859a43b6641671278f79cfead60ac714148aabe4e3f0060480089f
192
- tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
193
165
  unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
194
166
  unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
195
167
  uri (1.1.1) sha256=379fa58d27ffb1387eaada68c749d1426738bd0f654d812fcc07e7568f5c57c6
data/README.md CHANGED
@@ -18,7 +18,7 @@
18
18
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/html-to-markdown?label=Java&color=007ec6" alt="Java">
19
19
  </a>
20
20
  <a href="https://pkg.go.dev/github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown">
21
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/html-to-markdown?label=Go&color=007ec6&filter=v3.1.0" alt="Go">
21
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/html-to-markdown?label=Go&color=007ec6&filter=v3.2.0" alt="Go">
22
22
  </a>
23
23
  <a href="https://www.nuget.org/packages/KreuzbergDev.HtmlToMarkdown/">
24
24
  <img src="https://img.shields.io/nuget/v/KreuzbergDev.HtmlToMarkdown?label=C%23&color=007ec6" alt="C#">
@@ -56,7 +56,6 @@
56
56
  </a>
57
57
  </div>
58
58
 
59
-
60
59
  Blazing-fast HTML to Markdown conversion for Ruby, powered by the same Rust engine used by our Python, Node.js, WebAssembly, and PHP packages.
61
60
  Ship identical Markdown across every runtime while enjoying native extension performance with Magnus bindings.
62
61
 
@@ -78,10 +77,10 @@ Requires Ruby 3.2+ with Magnus native extension bindings. Published for Linux, m
78
77
 
79
78
  ## Performance Snapshot
80
79
 
81
- Apple M4 Real Wikipedia documents • `convert()` (Ruby)
80
+ **Apple M4** · `convert()` · Real Wikipedia documents
82
81
 
83
82
  | Document | Size | Latency | Throughput |
84
- | -------- | ---- | ------- | ---------- |
83
+ |----------|------|---------|------------|
85
84
  | Lists (Timeline) | 129KB | 0.71ms | 182 MB/s |
86
85
  | Tables (Countries) | 360KB | 2.15ms | 167 MB/s |
87
86
  | Mixed (Python wiki) | 656KB | 4.89ms | 134 MB/s |
@@ -102,7 +101,6 @@ markdown = result[:content]
102
101
  ```
103
102
 
104
103
 
105
-
106
104
  With conversion options:
107
105
 
108
106
  ```ruby
@@ -114,8 +112,6 @@ markdown = result[:content]
114
112
  ```
115
113
 
116
114
 
117
-
118
-
119
115
  ## API Reference
120
116
 
121
117
  ### Core Function
@@ -153,7 +149,6 @@ warnings = result[:warnings] # Any conversion warnings
153
149
  - `extract_tables`: Enable structured table extraction into `result.tables` — default: `false`
154
150
  - `output_format`: Output markup format (`"markdown"` | `"djot"` | `"plain"`) — default: `"markdown"`
155
151
 
156
-
157
152
  ## Djot Output Format
158
153
 
159
154
  The library supports converting HTML to [Djot](https://djot.net/), a lightweight markup language similar to Markdown but with a different syntax for some elements. Set `output_format` to `"djot"` to use this format.
@@ -173,6 +168,7 @@ The library supports converting HTML to [Djot](https://djot.net/), a lightweight
173
168
  ### Example Usage
174
169
 
175
170
 
171
+
176
172
  ```ruby
177
173
  require 'html_to_markdown'
178
174
 
@@ -188,14 +184,15 @@ djot = HtmlToMarkdown.convert(html, output_format: 'djot')
188
184
  ```
189
185
 
190
186
 
191
- Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
192
187
 
188
+ Djot's extended syntax allows you to express more semantic meaning in lightweight text, making it useful for documents that require strikethrough, insertion tracking, or mathematical notation.
193
189
 
194
190
  ## Plain Text Output
195
191
 
196
192
  Set `output_format` to `"plain"` to strip all markup and return only visible text. This bypasses the Markdown conversion pipeline entirely for maximum speed.
197
193
 
198
194
 
195
+
199
196
  ```ruby
200
197
  require 'html_to_markdown'
201
198
 
@@ -206,6 +203,7 @@ plain = HtmlToMarkdown.convert(html, output_format: 'plain')
206
203
  ```
207
204
 
208
205
 
206
+
209
207
  Plain text mode is useful for search indexing, text extraction, and feeding content to LLMs.
210
208
 
211
209
 
@@ -227,6 +225,7 @@ The metadata extraction feature enables comprehensive document analysis during c
227
225
  ### Example: Quick Start
228
226
 
229
227
 
228
+
230
229
  ```ruby
231
230
  require 'html_to_markdown'
232
231
 
@@ -263,6 +262,7 @@ The visitor pattern enables custom HTML→Markdown conversion logic by providing
263
262
  ### Example: Quick Start
264
263
 
265
264
 
265
+
266
266
  ```ruby
267
267
  require 'html_to_markdown'
268
268
 
@@ -289,7 +289,6 @@ markdown = result[:content]
289
289
 
290
290
 
291
291
 
292
-
293
292
  ## Examples
294
293
 
295
294
 
data/Rakefile CHANGED
@@ -4,29 +4,23 @@ require 'bundler/gem_tasks'
4
4
  require 'rake/extensiontask'
5
5
  require 'rspec/core/rake_task'
6
6
 
7
- GEMSPEC = Gem::Specification.load(File.expand_path('html-to-markdown-rb.gemspec', __dir__))
8
-
9
- # Vendor html-to-markdown-rs core crate before compilation
10
- task :vendor do
11
- vendor_script = File.expand_path('../../scripts/ci/ruby/vendor-core-crate.py', __dir__)
12
- puts 'Vendoring html-to-markdown-rs core crate...'
13
- sh "python3 #{vendor_script}"
14
- end
7
+ GEMSPEC = Gem::Specification.load(File.expand_path('html_to_markdown_rs.gemspec', __dir__))
15
8
 
16
9
  Rake::ExtensionTask.new('html_to_markdown_rb', GEMSPEC) do |ext|
17
10
  ext.lib_dir = 'lib'
18
- ext.ext_dir = 'ext/html-to-markdown-rb'
11
+ ext.ext_dir = 'ext/html_to_markdown_rb'
19
12
  ext.cross_compile = true
20
13
  ext.cross_platform = %w[
21
14
  x86_64-linux
15
+ aarch64-linux
22
16
  x86_64-darwin
23
17
  arm64-darwin
24
18
  x64-mingw32
19
+ x64-mingw-ucrt
25
20
  ]
26
21
  end
27
22
 
28
23
  RSpec::Core::RakeTask.new(:spec)
29
24
 
30
- task compile: :vendor
31
25
  task spec: :compile
32
26
  task default: :spec
@@ -0,0 +1,14 @@
1
+ [package]
2
+ name = "html-to-markdown-rb"
3
+ version = "3.2.0"
4
+ edition = "2024"
5
+ license = "MIT"
6
+
7
+ [lib]
8
+ crate-type = ["cdylib"]
9
+
10
+ [dependencies]
11
+ html-to-markdown-rs = { path = "../../../../crates/html-to-markdown", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
12
+ magnus = "0.8"
13
+ serde = { version = "1", features = ["derive"] }
14
+ serde_json = "1"
@@ -0,0 +1,16 @@
1
+ [workspace]
2
+
3
+ [package]
4
+ name = "html-to-markdown-rb"
5
+ version = "3.2.0"
6
+ edition = "2024"
7
+ license = "MIT"
8
+
9
+ [lib]
10
+ crate-type = ["cdylib"]
11
+
12
+ [dependencies]
13
+ html-to-markdown-rs = { path = "../../../../crates/html-to-markdown", features = ["full", "metadata", "visitor", "serde", "inline-images"] }
14
+ magnus = "0.8"
15
+ serde = { version = "1", features = ["derive"] }
16
+ serde_json = "1"
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+ require 'rb_sys/mkmf'
5
+
6
+ default_profile = ENV.fetch('CARGO_PROFILE', 'release')
7
+
8
+ create_rust_makefile('html_to_markdown_rb') do |config|
9
+ config.profile = default_profile.to_sym
10
+ end
@@ -0,0 +1,6 @@
1
+ # This file is auto-generated by alef. DO NOT EDIT.
2
+ # frozen_string_literal: true
3
+
4
+ module Html_to_markdown_rs
5
+ VERSION = "3.2.0"
6
+ end
@@ -0,0 +1,9 @@
1
+ # This file is auto-generated by alef. DO NOT EDIT.
2
+ # frozen_string_literal: true
3
+
4
+ require_relative 'html_to_markdown_rs/version'
5
+ require_relative 'html_to_markdown_rs/native'
6
+
7
+ module Html_to_markdown_rs
8
+ # Re-export all types and functions from native extension
9
+ end