html-to-markdown 2.27.2 → 2.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +8 -8
  3. data/ext/html-to-markdown-rb/native/Cargo.toml +1 -1
  4. data/ext/html-to-markdown-rb/native/src/conversion/mod.rs +6 -0
  5. data/ext/html-to-markdown-rb/native/src/conversion/tables.rs +71 -0
  6. data/ext/html-to-markdown-rb/native/src/lib.rs +27 -1
  7. data/lib/html_to_markdown/version.rb +1 -1
  8. data/lib/html_to_markdown.rb +29 -0
  9. data/rust-vendor/getrandom/.cargo-checksum.json +1 -1
  10. data/rust-vendor/getrandom/.cargo_vcs_info.json +1 -1
  11. data/rust-vendor/getrandom/CHANGELOG.md +62 -43
  12. data/rust-vendor/getrandom/Cargo.lock +49 -56
  13. data/rust-vendor/getrandom/Cargo.toml +2 -2
  14. data/rust-vendor/getrandom/Cargo.toml.orig +2 -2
  15. data/rust-vendor/getrandom/src/backends/efi_rng.rs +8 -10
  16. data/rust-vendor/getrandom/src/backends/getentropy.rs +13 -4
  17. data/rust-vendor/getrandom/src/backends/linux_android_with_fallback.rs +10 -25
  18. data/rust-vendor/getrandom/src/backends/netbsd.rs +17 -25
  19. data/rust-vendor/getrandom/src/backends/rdrand.rs +15 -9
  20. data/rust-vendor/getrandom/src/backends/rndr.rs +2 -1
  21. data/rust-vendor/getrandom/src/backends/vxworks.rs +7 -3
  22. data/rust-vendor/getrandom/src/backends/windows.rs +21 -5
  23. data/rust-vendor/getrandom/src/utils/lazy_bool.rs +39 -0
  24. data/rust-vendor/getrandom/src/utils/lazy_ptr.rs +57 -0
  25. data/rust-vendor/html-to-markdown-rs/Cargo.toml +2 -2
  26. data/rust-vendor/html-to-markdown-rs/README.md +29 -0
  27. data/rust-vendor/html-to-markdown-rs/src/convert_api.rs +368 -0
  28. data/rust-vendor/html-to-markdown-rs/src/converter/main.rs +10 -5
  29. data/rust-vendor/html-to-markdown-rs/src/converter/text_node.rs +2 -1
  30. data/rust-vendor/html-to-markdown-rs/src/lib.rs +3 -0
  31. data/rust-vendor/html-to-markdown-rs/src/prelude.rs +3 -0
  32. data/rust-vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs +82 -0
  33. data/rust-vendor/quote/.cargo-checksum.json +1 -1
  34. data/rust-vendor/quote/.cargo_vcs_info.json +1 -1
  35. data/rust-vendor/quote/.github/workflows/ci.yml +2 -2
  36. data/rust-vendor/quote/Cargo.lock +21 -21
  37. data/rust-vendor/quote/Cargo.toml +2 -2
  38. data/rust-vendor/quote/Cargo.toml.orig +2 -2
  39. data/rust-vendor/quote/README.md +0 -1
  40. data/rust-vendor/quote/src/lib.rs +1 -1
  41. data/rust-vendor/quote/src/to_tokens.rs +7 -0
  42. data/rust-vendor/quote/tests/ui/not-quotable.stderr +1 -1
  43. data/rust-vendor/quote/tests/ui/not-repeatable.stderr +3 -11
  44. data/rust-vendor/r-efi/.cargo-checksum.json +1 -1
  45. data/rust-vendor/r-efi/.cargo_vcs_info.json +1 -1
  46. data/rust-vendor/r-efi/AUTHORS +1 -0
  47. data/rust-vendor/r-efi/Cargo.lock +1 -1
  48. data/rust-vendor/r-efi/Cargo.toml +1 -3
  49. data/rust-vendor/r-efi/Cargo.toml.orig +1 -5
  50. data/rust-vendor/r-efi/NEWS.md +16 -0
  51. data/rust-vendor/r-efi/src/base.rs +1 -1
  52. data/rust-vendor/r-efi/src/lib.rs +27 -12
  53. data/rust-vendor/r-efi/src/protocols/absolute_pointer.rs +4 -4
  54. data/rust-vendor/r-efi/src/protocols/block_io.rs +8 -8
  55. data/rust-vendor/r-efi/src/protocols/bus_specific_driver_override.rs +2 -2
  56. data/rust-vendor/r-efi/src/protocols/debug_support.rs +10 -10
  57. data/rust-vendor/r-efi/src/protocols/debugport.rs +8 -8
  58. data/rust-vendor/r-efi/src/protocols/decompress.rs +4 -4
  59. data/rust-vendor/r-efi/src/protocols/device_path_from_text.rs +4 -4
  60. data/rust-vendor/r-efi/src/protocols/device_path_to_text.rs +4 -4
  61. data/rust-vendor/r-efi/src/protocols/device_path_utilities.rs +16 -16
  62. data/rust-vendor/r-efi/src/protocols/disk_io.rs +4 -4
  63. data/rust-vendor/r-efi/src/protocols/disk_io2.rs +8 -8
  64. data/rust-vendor/r-efi/src/protocols/driver_binding.rs +6 -6
  65. data/rust-vendor/r-efi/src/protocols/driver_diagnostics2.rs +2 -2
  66. data/rust-vendor/r-efi/src/protocols/driver_family_override.rs +2 -2
  67. data/rust-vendor/r-efi/src/protocols/file.rs +28 -28
  68. data/rust-vendor/r-efi/src/protocols/graphics_output.rs +6 -6
  69. data/rust-vendor/r-efi/src/protocols/hii_database.rs +24 -24
  70. data/rust-vendor/r-efi/src/protocols/hii_font.rs +8 -8
  71. data/rust-vendor/r-efi/src/protocols/hii_font_ex.rs +10 -10
  72. data/rust-vendor/r-efi/src/protocols/hii_string.rs +10 -10
  73. data/rust-vendor/r-efi/src/protocols/ip4.rs +16 -16
  74. data/rust-vendor/r-efi/src/protocols/ip6.rs +18 -18
  75. data/rust-vendor/r-efi/src/protocols/load_file.rs +2 -2
  76. data/rust-vendor/r-efi/src/protocols/loaded_image.rs +2 -2
  77. data/rust-vendor/r-efi/src/protocols/managed_network.rs +16 -16
  78. data/rust-vendor/r-efi/src/protocols/memory_attribute.rs +6 -6
  79. data/rust-vendor/r-efi/src/protocols/mp_services.rs +15 -15
  80. data/rust-vendor/r-efi/src/protocols/pci_io.rs +26 -26
  81. data/rust-vendor/r-efi/src/protocols/platform_driver_override.rs +6 -6
  82. data/rust-vendor/r-efi/src/protocols/rng.rs +4 -4
  83. data/rust-vendor/r-efi/src/protocols/service_binding.rs +4 -4
  84. data/rust-vendor/r-efi/src/protocols/shell.rs +81 -81
  85. data/rust-vendor/r-efi/src/protocols/shell_dynamic_command.rs +4 -4
  86. data/rust-vendor/r-efi/src/protocols/simple_file_system.rs +2 -2
  87. data/rust-vendor/r-efi/src/protocols/simple_network.rs +26 -26
  88. data/rust-vendor/r-efi/src/protocols/simple_text_input.rs +4 -4
  89. data/rust-vendor/r-efi/src/protocols/simple_text_input_ex.rs +11 -11
  90. data/rust-vendor/r-efi/src/protocols/simple_text_output.rs +18 -18
  91. data/rust-vendor/r-efi/src/protocols/tcp4.rs +20 -20
  92. data/rust-vendor/r-efi/src/protocols/tcp6.rs +18 -18
  93. data/rust-vendor/r-efi/src/protocols/timestamp.rs +3 -3
  94. data/rust-vendor/r-efi/src/protocols/udp4.rs +16 -16
  95. data/rust-vendor/r-efi/src/protocols/udp6.rs +14 -14
  96. data/rust-vendor/r-efi/src/system.rs +115 -115
  97. data/rust-vendor/r-efi/src/vendor/intel/console_control.rs +6 -6
  98. data/rust-vendor/r-efi-5.3.0/.cargo-checksum.json +1 -0
  99. data/rust-vendor/r-efi-5.3.0/.cargo_vcs_info.json +6 -0
  100. data/rust-vendor/r-efi-5.3.0/.github/workflows/publish.yml +39 -0
  101. data/rust-vendor/r-efi-5.3.0/.github/workflows/rust-tests.yml +125 -0
  102. data/rust-vendor/r-efi-5.3.0/AUTHORS +74 -0
  103. data/rust-vendor/r-efi-5.3.0/Cargo.lock +16 -0
  104. data/rust-vendor/r-efi-5.3.0/Cargo.toml +70 -0
  105. data/rust-vendor/r-efi-5.3.0/Cargo.toml.orig +51 -0
  106. data/rust-vendor/r-efi-5.3.0/Makefile +85 -0
  107. data/rust-vendor/r-efi-5.3.0/NEWS.md +301 -0
  108. data/rust-vendor/r-efi-5.3.0/README.md +99 -0
  109. data/rust-vendor/r-efi-5.3.0/examples/freestanding.rs +34 -0
  110. data/rust-vendor/r-efi-5.3.0/examples/gop-query.rs +188 -0
  111. data/rust-vendor/r-efi-5.3.0/examples/hello-world.rs +55 -0
  112. data/rust-vendor/r-efi-5.3.0/src/base.rs +993 -0
  113. data/rust-vendor/r-efi-5.3.0/src/hii.rs +1300 -0
  114. data/rust-vendor/r-efi-5.3.0/src/lib.rs +182 -0
  115. data/rust-vendor/r-efi-5.3.0/src/protocols/absolute_pointer.rs +69 -0
  116. data/rust-vendor/r-efi-5.3.0/src/protocols/block_io.rs +70 -0
  117. data/rust-vendor/r-efi-5.3.0/src/protocols/bus_specific_driver_override.rs +32 -0
  118. data/rust-vendor/r-efi-5.3.0/src/protocols/debug_support.rs +835 -0
  119. data/rust-vendor/r-efi-5.3.0/src/protocols/debugport.rs +42 -0
  120. data/rust-vendor/r-efi-5.3.0/src/protocols/decompress.rs +37 -0
  121. data/rust-vendor/r-efi-5.3.0/src/protocols/device_path.rs +82 -0
  122. data/rust-vendor/r-efi-5.3.0/src/protocols/device_path_from_text.rs +26 -0
  123. data/rust-vendor/r-efi-5.3.0/src/protocols/device_path_to_text.rs +30 -0
  124. data/rust-vendor/r-efi-5.3.0/src/protocols/device_path_utilities.rs +63 -0
  125. data/rust-vendor/r-efi-5.3.0/src/protocols/disk_io.rs +40 -0
  126. data/rust-vendor/r-efi-5.3.0/src/protocols/disk_io2.rs +58 -0
  127. data/rust-vendor/r-efi-5.3.0/src/protocols/driver_binding.rs +42 -0
  128. data/rust-vendor/r-efi-5.3.0/src/protocols/driver_diagnostics2.rs +38 -0
  129. data/rust-vendor/r-efi-5.3.0/src/protocols/driver_family_override.rs +23 -0
  130. data/rust-vendor/r-efi-5.3.0/src/protocols/file.rs +183 -0
  131. data/rust-vendor/r-efi-5.3.0/src/protocols/graphics_output.rs +103 -0
  132. data/rust-vendor/r-efi-5.3.0/src/protocols/hii_database.rs +299 -0
  133. data/rust-vendor/r-efi-5.3.0/src/protocols/hii_font.rs +87 -0
  134. data/rust-vendor/r-efi-5.3.0/src/protocols/hii_font_ex.rs +107 -0
  135. data/rust-vendor/r-efi-5.3.0/src/protocols/hii_package_list.rs +14 -0
  136. data/rust-vendor/r-efi-5.3.0/src/protocols/hii_string.rs +71 -0
  137. data/rust-vendor/r-efi-5.3.0/src/protocols/ip4.rs +202 -0
  138. data/rust-vendor/r-efi-5.3.0/src/protocols/ip6.rs +264 -0
  139. data/rust-vendor/r-efi-5.3.0/src/protocols/load_file.rs +26 -0
  140. data/rust-vendor/r-efi-5.3.0/src/protocols/load_file2.rs +15 -0
  141. data/rust-vendor/r-efi-5.3.0/src/protocols/loaded_image.rs +39 -0
  142. data/rust-vendor/r-efi-5.3.0/src/protocols/loaded_image_device_path.rs +13 -0
  143. data/rust-vendor/r-efi-5.3.0/src/protocols/managed_network.rs +147 -0
  144. data/rust-vendor/r-efi-5.3.0/src/protocols/memory_attribute.rs +40 -0
  145. data/rust-vendor/r-efi-5.3.0/src/protocols/mp_services.rs +121 -0
  146. data/rust-vendor/r-efi-5.3.0/src/protocols/pci_io.rs +203 -0
  147. data/rust-vendor/r-efi-5.3.0/src/protocols/platform_driver_override.rs +46 -0
  148. data/rust-vendor/r-efi-5.3.0/src/protocols/rng.rs +83 -0
  149. data/rust-vendor/r-efi-5.3.0/src/protocols/service_binding.rs +20 -0
  150. data/rust-vendor/r-efi-5.3.0/src/protocols/shell.rs +295 -0
  151. data/rust-vendor/r-efi-5.3.0/src/protocols/shell_dynamic_command.rs +33 -0
  152. data/rust-vendor/r-efi-5.3.0/src/protocols/shell_parameters.rs +23 -0
  153. data/rust-vendor/r-efi-5.3.0/src/protocols/simple_file_system.rs +26 -0
  154. data/rust-vendor/r-efi-5.3.0/src/protocols/simple_network.rs +196 -0
  155. data/rust-vendor/r-efi-5.3.0/src/protocols/simple_text_input.rs +38 -0
  156. data/rust-vendor/r-efi-5.3.0/src/protocols/simple_text_input_ex.rs +85 -0
  157. data/rust-vendor/r-efi-5.3.0/src/protocols/simple_text_output.rs +86 -0
  158. data/rust-vendor/r-efi-5.3.0/src/protocols/tcp4.rs +224 -0
  159. data/rust-vendor/r-efi-5.3.0/src/protocols/tcp6.rs +202 -0
  160. data/rust-vendor/r-efi-5.3.0/src/protocols/timestamp.rs +32 -0
  161. data/rust-vendor/r-efi-5.3.0/src/protocols/udp4.rs +151 -0
  162. data/rust-vendor/r-efi-5.3.0/src/protocols/udp6.rs +137 -0
  163. data/rust-vendor/r-efi-5.3.0/src/protocols.rs +54 -0
  164. data/rust-vendor/r-efi-5.3.0/src/system.rs +1130 -0
  165. data/rust-vendor/r-efi-5.3.0/src/vendor/intel/console_control.rs +37 -0
  166. data/rust-vendor/r-efi-5.3.0/src/vendor.rs +10 -0
  167. data/rust-vendor/tokio/.cargo-checksum.json +1 -1
  168. data/rust-vendor/tokio/.cargo_vcs_info.json +1 -1
  169. data/rust-vendor/tokio/CHANGELOG.md +94 -0
  170. data/rust-vendor/tokio/Cargo.lock +1549 -0
  171. data/rust-vendor/tokio/Cargo.toml +96 -83
  172. data/rust-vendor/tokio/Cargo.toml.orig +7 -7
  173. data/rust-vendor/tokio/README.md +1 -1
  174. data/rust-vendor/tokio/src/fs/open_options.rs +4 -1
  175. data/rust-vendor/tokio/src/fs/read.rs +4 -1
  176. data/rust-vendor/tokio/src/fs/write.rs +4 -1
  177. data/rust-vendor/tokio/src/io/async_write.rs +3 -4
  178. data/rust-vendor/tokio/src/io/poll_evented.rs +23 -1
  179. data/rust-vendor/tokio/src/io/stderr.rs +15 -1
  180. data/rust-vendor/tokio/src/io/stdout.rs +14 -0
  181. data/rust-vendor/tokio/src/io/util/async_write_ext.rs +2 -2
  182. data/rust-vendor/tokio/src/io/util/write_buf.rs +11 -2
  183. data/rust-vendor/tokio/src/lib.rs +12 -28
  184. data/rust-vendor/tokio/src/macros/select.rs +6 -8
  185. data/rust-vendor/tokio/src/net/tcp/socket.rs +25 -1
  186. data/rust-vendor/tokio/src/net/tcp/stream.rs +40 -1
  187. data/rust-vendor/tokio/src/process/unix/pidfd_reaper.rs +1 -41
  188. data/rust-vendor/tokio/src/runtime/blocking/pool.rs +18 -14
  189. data/rust-vendor/tokio/src/runtime/builder.rs +10 -4
  190. data/rust-vendor/tokio/src/runtime/handle.rs +3 -2
  191. data/rust-vendor/tokio/src/runtime/io/driver/uring.rs +49 -61
  192. data/rust-vendor/tokio/src/runtime/io/driver.rs +6 -5
  193. data/rust-vendor/tokio/src/runtime/mod.rs +20 -1
  194. data/rust-vendor/tokio/src/runtime/runtime.rs +71 -1
  195. data/rust-vendor/tokio/src/runtime/scheduler/current_thread/mod.rs +24 -8
  196. data/rust-vendor/tokio/src/runtime/scheduler/multi_thread/worker.rs +5 -0
  197. data/rust-vendor/tokio/src/runtime/task/core.rs +1 -0
  198. data/rust-vendor/tokio/src/runtime/task/join.rs +7 -3
  199. data/rust-vendor/tokio/src/runtime/task/list.rs +5 -3
  200. data/rust-vendor/tokio/src/runtime/task/mod.rs +0 -5
  201. data/rust-vendor/tokio/src/runtime/tests/loom_blocking.rs +39 -1
  202. data/rust-vendor/tokio/src/signal/mod.rs +6 -17
  203. data/rust-vendor/tokio/src/signal/registry.rs +1 -1
  204. data/rust-vendor/tokio/src/signal/unix.rs +24 -44
  205. data/rust-vendor/tokio/src/signal/windows/sys.rs +52 -64
  206. data/rust-vendor/tokio/src/signal/windows.rs +35 -23
  207. data/rust-vendor/tokio/src/sync/mpsc/mod.rs +3 -1
  208. data/rust-vendor/tokio/src/sync/oneshot.rs +13 -0
  209. data/rust-vendor/tokio/src/sync/rwlock.rs +4 -5
  210. data/rust-vendor/tokio/src/sync/tests/loom_oneshot.rs +27 -1
  211. data/rust-vendor/tokio/src/task/blocking.rs +16 -1
  212. data/rust-vendor/tokio/src/task/builder.rs +2 -2
  213. data/rust-vendor/tokio/src/task/mod.rs +1 -1
  214. data/rust-vendor/tokio/src/task/spawn.rs +8 -3
  215. data/rust-vendor/tokio/src/task/yield_now.rs +13 -23
  216. data/rust-vendor/tokio/src/time/clock.rs +62 -0
  217. data/rust-vendor/tokio/src/util/memchr.rs +32 -4
  218. data/rust-vendor/tokio/src/util/sharded_list.rs +6 -4
  219. data/rust-vendor/tokio/tests/fs_link.rs +54 -0
  220. data/rust-vendor/tokio/tests/io_async_fd_memory_leak.rs +209 -0
  221. data/rust-vendor/tokio/tests/io_write_buf.rs +56 -0
  222. data/rust-vendor/tokio/tests/process_issue_7144.rs +8 -0
  223. data/rust-vendor/tokio/tests/rt_basic.rs +41 -0
  224. data/rust-vendor/tokio/tests/rt_common_before_park.rs +92 -0
  225. data/rust-vendor/tokio/tests/rt_metrics.rs +1 -1
  226. data/rust-vendor/tokio/tests/rt_panic.rs +12 -0
  227. data/rust-vendor/tokio/tests/rt_shutdown_err.rs +82 -0
  228. data/rust-vendor/tokio/tests/rt_threaded.rs +49 -1
  229. data/rust-vendor/tokio/tests/rt_unstable_metrics.rs +32 -0
  230. data/rust-vendor/tokio/tests/tcp_connect.rs +2 -3
  231. data/rust-vendor/tokio/tests/tcp_shutdown.rs +1 -3
  232. data/rust-vendor/tokio/tests/tcp_socket.rs +3 -4
  233. data/rust-vendor/tokio/tests/tcp_stream.rs +3 -0
  234. data/sig/html_to_markdown.rbs +46 -0
  235. data/spec/convert_with_tables_spec.rb +194 -0
  236. metadata +80 -3
  237. data/rust-vendor/getrandom/src/utils/lazy.rs +0 -64
@@ -62,7 +62,6 @@ async fn bind_before_connect() {
62
62
  }
63
63
 
64
64
  #[tokio::test]
65
- #[expect(deprecated)] // set_linger is deprecated
66
65
  async fn basic_linger() {
67
66
  // Create server
68
67
  let addr = assert_ok!("127.0.0.1:0".parse());
@@ -71,7 +70,7 @@ async fn basic_linger() {
71
70
 
72
71
  assert!(srv.linger().unwrap().is_none());
73
72
 
74
- srv.set_linger(Some(Duration::new(0, 0))).unwrap();
73
+ srv.set_zero_linger().unwrap();
75
74
  assert_eq!(srv.linger().unwrap(), Some(Duration::new(0, 0)));
76
75
  }
77
76
 
@@ -124,10 +123,10 @@ const SET_BUF_SIZE: u32 = 4096;
124
123
  // Linux doubles the buffer size for kernel usage, and exposes that when
125
124
  // retrieving the buffer size.
126
125
 
127
- #[cfg(not(target_os = "linux"))]
126
+ #[cfg(not(any(target_os = "android", target_os = "linux")))]
128
127
  const GET_BUF_SIZE: u32 = SET_BUF_SIZE;
129
128
 
130
- #[cfg(target_os = "linux")]
129
+ #[cfg(any(target_os = "android", target_os = "linux"))]
131
130
  const GET_BUF_SIZE: u32 = 2 * SET_BUF_SIZE;
132
131
 
133
132
  test!(keepalive, set_keepalive(true));
@@ -25,6 +25,9 @@ async fn set_linger() {
25
25
  assert_ok!(stream.set_linger(Some(Duration::from_secs(1))));
26
26
  assert_eq!(stream.linger().unwrap().unwrap().as_secs(), 1);
27
27
 
28
+ assert_ok!(stream.set_zero_linger());
29
+ assert_eq!(stream.linger().unwrap().unwrap().as_secs(), 0);
30
+
28
31
  assert_ok!(stream.set_linger(None));
29
32
  assert!(stream.linger().unwrap().is_none());
30
33
  }
@@ -185,6 +185,18 @@ module HtmlToMarkdown
185
185
  structured_data: Array[structured_data]
186
186
  }
187
187
 
188
+ type table_data = {
189
+ cells: Array[Array[String]],
190
+ markdown: String,
191
+ is_header_row: Array[bool]
192
+ }
193
+
194
+ type table_extraction_result = {
195
+ content: String,
196
+ metadata: extended_metadata?,
197
+ tables: Array[table_data]
198
+ }
199
+
188
200
  # Native methods (implemented in Rust via Magnus/rb-sys)
189
201
  # These are aliased from the Rust extension and available as both module and instance methods
190
202
  private
@@ -217,6 +229,11 @@ module HtmlToMarkdown
217
229
  conversion_options? options,
218
230
  visitor? visitor
219
231
  ) -> String
232
+ def self.native_convert_with_tables: (
233
+ String html,
234
+ conversion_options? options,
235
+ metadata_config? metadata_config
236
+ ) -> table_extraction_result
220
237
 
221
238
  def native_convert: (String html, conversion_options? options) -> String
222
239
  def native_options: (conversion_options? options_hash) -> Options
@@ -246,6 +263,11 @@ module HtmlToMarkdown
246
263
  conversion_options? options,
247
264
  visitor? visitor
248
265
  ) -> String
266
+ def native_convert_with_tables: (
267
+ String html,
268
+ conversion_options? options,
269
+ metadata_config? metadata_config
270
+ ) -> table_extraction_result
249
271
 
250
272
  # Visitor interface for customizing conversion behavior
251
273
  type visitor = Object
@@ -422,6 +444,25 @@ module HtmlToMarkdown
422
444
  ?metadata_config metadata_config
423
445
  ) -> [String, extended_metadata]
424
446
 
447
+ # Convert HTML and extract tables as structured data
448
+ #
449
+ # Args:
450
+ # html: HTML string to convert
451
+ # options: Optional conversion configuration
452
+ # metadata_config: Optional metadata extraction configuration
453
+ #
454
+ # Returns:
455
+ # table_extraction_result: Hash containing content, metadata, and tables array
456
+ #
457
+ # Example:
458
+ # result = HtmlToMarkdown.convert_with_tables(html)
459
+ # puts result[:tables].length
460
+ def self.convert_with_tables: (
461
+ String html,
462
+ ?conversion_options options,
463
+ ?metadata_config metadata_config
464
+ ) -> table_extraction_result
465
+
425
466
  # Instance method versions (created by module_function)
426
467
  def convert: (String html, ?conversion_options options, ?visitor visitor) -> String
427
468
  def options: (?conversion_options options_hash) -> Options
@@ -449,4 +490,9 @@ module HtmlToMarkdown
449
490
  Options options_handle,
450
491
  ?metadata_config metadata_config
451
492
  ) -> [String, extended_metadata]
493
+ def convert_with_tables: (
494
+ String html,
495
+ ?conversion_options options,
496
+ ?metadata_config metadata_config
497
+ ) -> table_extraction_result
452
498
  end
@@ -0,0 +1,194 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe HtmlToMarkdown do
6
+ describe '.convert_with_tables' do
7
+ it 'returns a hash with content, metadata, and tables keys' do
8
+ html = '<table><tr><td>Cell</td></tr></table>'
9
+ result = described_class.convert_with_tables(html)
10
+
11
+ expect(result).to be_a(Hash)
12
+ expect(result).to include(:content, :metadata, :tables)
13
+ end
14
+
15
+ context 'with a basic table with header' do
16
+ let(:html) do
17
+ <<~HTML
18
+ <table>
19
+ <thead>
20
+ <tr><th>Name</th><th>Age</th></tr>
21
+ </thead>
22
+ <tbody>
23
+ <tr><td>Alice</td><td>30</td></tr>
24
+ </tbody>
25
+ </table>
26
+ HTML
27
+ end
28
+
29
+ it 'extracts exactly one table' do
30
+ result = described_class.convert_with_tables(html)
31
+
32
+ expect(result[:tables].length).to eq(1)
33
+ end
34
+
35
+ it 'extracts cells as rows of columns' do
36
+ result = described_class.convert_with_tables(html)
37
+ table = result[:tables][0]
38
+
39
+ expect(table[:cells]).to be_an(Array)
40
+ expect(table[:cells].length).to eq(2)
41
+ expect(table[:cells][0]).to eq(%w[Name Age])
42
+ expect(table[:cells][1]).to eq(%w[Alice 30])
43
+ end
44
+
45
+ it 'provides markdown representation' do
46
+ result = described_class.convert_with_tables(html)
47
+ table = result[:tables][0]
48
+
49
+ expect(table[:markdown]).to be_a(String)
50
+ expect(table[:markdown]).to include('Name')
51
+ expect(table[:markdown]).to include('Alice')
52
+ end
53
+
54
+ it 'marks header rows correctly' do
55
+ result = described_class.convert_with_tables(html)
56
+ table = result[:tables][0]
57
+
58
+ expect(table[:is_header_row]).to be_an(Array)
59
+ expect(table[:is_header_row].length).to eq(2)
60
+ expect(table[:is_header_row][0]).to be true
61
+ expect(table[:is_header_row][1]).to be false
62
+ end
63
+
64
+ it 'includes converted markdown content' do
65
+ result = described_class.convert_with_tables(html)
66
+
67
+ expect(result[:content]).to be_a(String)
68
+ expect(result[:content]).not_to be_empty
69
+ end
70
+ end
71
+
72
+ context 'with empty HTML' do
73
+ it 'returns empty tables array' do
74
+ result = described_class.convert_with_tables('')
75
+
76
+ expect(result[:tables]).to eq([])
77
+ expect(result[:content]).to be_a(String)
78
+ end
79
+ end
80
+
81
+ context 'with HTML containing no tables' do
82
+ it 'returns empty tables array' do
83
+ html = '<p>No tables here</p>'
84
+ result = described_class.convert_with_tables(html)
85
+
86
+ expect(result[:tables]).to eq([])
87
+ expect(result[:content]).to include('No tables here')
88
+ end
89
+ end
90
+
91
+ context 'with multiple tables' do
92
+ let(:html) do
93
+ <<~HTML
94
+ <table>
95
+ <tr><th>A</th></tr>
96
+ <tr><td>1</td></tr>
97
+ </table>
98
+ <p>Some text between tables</p>
99
+ <table>
100
+ <tr><th>B</th><th>C</th></tr>
101
+ <tr><td>2</td><td>3</td></tr>
102
+ <tr><td>4</td><td>5</td></tr>
103
+ </table>
104
+ HTML
105
+ end
106
+
107
+ it 'extracts all tables' do
108
+ result = described_class.convert_with_tables(html)
109
+
110
+ expect(result[:tables].length).to eq(2)
111
+ end
112
+
113
+ it 'preserves table order' do
114
+ result = described_class.convert_with_tables(html)
115
+
116
+ first_table = result[:tables][0]
117
+ second_table = result[:tables][1]
118
+
119
+ expect(first_table[:cells][0]).to eq(['A'])
120
+ expect(second_table[:cells][0]).to eq(%w[B C])
121
+ end
122
+
123
+ it 'extracts correct row counts per table' do
124
+ result = described_class.convert_with_tables(html)
125
+
126
+ expect(result[:tables][0][:cells].length).to eq(2)
127
+ expect(result[:tables][1][:cells].length).to eq(3)
128
+ end
129
+ end
130
+
131
+ context 'with special characters in cells' do
132
+ let(:html) do
133
+ <<~HTML
134
+ <table>
135
+ <tr><th>Key</th><th>Value</th></tr>
136
+ <tr><td>Brackets &lt;&gt;</td><td>Ampersand &amp;</td></tr>
137
+ <tr><td>Quotes "double"</td><td>Quotes 'single'</td></tr>
138
+ <tr><td>Unicode: cafe\u0301</td><td>Emoji: test</td></tr>
139
+ </table>
140
+ HTML
141
+ end
142
+
143
+ it 'handles HTML entities in cells' do
144
+ result = described_class.convert_with_tables(html)
145
+ table = result[:tables][0]
146
+
147
+ expect(table[:cells][1][0]).to include('<>')
148
+ expect(table[:cells][1][1]).to include('&')
149
+ end
150
+
151
+ it 'handles quotes in cells' do
152
+ result = described_class.convert_with_tables(html)
153
+ table = result[:tables][0]
154
+
155
+ expect(table[:cells][2][0]).to include('"double"')
156
+ expect(table[:cells][2][1]).to include("'single'")
157
+ end
158
+
159
+ it 'handles unicode in cells' do
160
+ result = described_class.convert_with_tables(html)
161
+ table = result[:tables][0]
162
+
163
+ expect(table[:cells][3][0]).to be_a(String)
164
+ end
165
+ end
166
+
167
+ context 'with conversion options' do
168
+ it 'accepts options hash' do
169
+ html = '<table><tr><th>Header</th></tr><tr><td>Data</td></tr></table>'
170
+ result = described_class.convert_with_tables(html, { heading_style: :atx })
171
+
172
+ expect(result).to be_a(Hash)
173
+ expect(result[:tables].length).to eq(1)
174
+ end
175
+
176
+ it 'accepts nil options' do
177
+ html = '<table><tr><td>Data</td></tr></table>'
178
+ result = described_class.convert_with_tables(html, nil, nil)
179
+
180
+ expect(result).to be_a(Hash)
181
+ expect(result[:tables].length).to eq(1)
182
+ end
183
+ end
184
+
185
+ context 'with metadata config' do
186
+ it 'includes metadata when configured' do
187
+ html = '<html><head><title>Test</title></head><body><table><tr><td>Data</td></tr></table></body></html>'
188
+ result = described_class.convert_with_tables(html, nil, { extract_headers: true })
189
+
190
+ expect(result[:metadata]).to be_a(Hash).or(be_nil)
191
+ end
192
+ end
193
+ end
194
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.27.2
4
+ version: 2.28.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Na'aman Hirschfeld
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-03-02 00:00:00.000000000 Z
11
+ date: 2026-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -61,6 +61,7 @@ files:
61
61
  - ext/html-to-markdown-rb/native/src/conversion/inline_images.rs
62
62
  - ext/html-to-markdown-rb/native/src/conversion/metadata.rs
63
63
  - ext/html-to-markdown-rb/native/src/conversion/mod.rs
64
+ - ext/html-to-markdown-rb/native/src/conversion/tables.rs
64
65
  - ext/html-to-markdown-rb/native/src/lib.rs
65
66
  - ext/html-to-markdown-rb/native/src/options.rs
66
67
  - ext/html-to-markdown-rb/native/src/profiling.rs
@@ -1550,7 +1551,8 @@ files:
1550
1551
  - rust-vendor/getrandom/src/sys_rng.rs
1551
1552
  - rust-vendor/getrandom/src/util.rs
1552
1553
  - rust-vendor/getrandom/src/utils/get_errno.rs
1553
- - rust-vendor/getrandom/src/utils/lazy.rs
1554
+ - rust-vendor/getrandom/src/utils/lazy_bool.rs
1555
+ - rust-vendor/getrandom/src/utils/lazy_ptr.rs
1554
1556
  - rust-vendor/getrandom/src/utils/sanitizer.rs
1555
1557
  - rust-vendor/getrandom/src/utils/sys_fill_exact.rs
1556
1558
  - rust-vendor/getrandom/tests/mod.rs
@@ -1948,6 +1950,7 @@ files:
1948
1950
  - rust-vendor/html-to-markdown-rs/tests/issue_199_regressions.rs
1949
1951
  - rust-vendor/html-to-markdown-rs/tests/issue_200_regressions.rs
1950
1952
  - rust-vendor/html-to-markdown-rs/tests/issue_212_regressions.rs
1953
+ - rust-vendor/html-to-markdown-rs/tests/issue_216_217_regressions.rs
1951
1954
  - rust-vendor/html-to-markdown-rs/tests/json_ld_script_extraction.rs
1952
1955
  - rust-vendor/html-to-markdown-rs/tests/lists_test.rs
1953
1956
  - rust-vendor/html-to-markdown-rs/tests/plain_output_test.rs
@@ -4761,6 +4764,75 @@ files:
4761
4764
  - rust-vendor/quote/tests/ui/not-repeatable.stderr
4762
4765
  - rust-vendor/quote/tests/ui/wrong-type-span.rs
4763
4766
  - rust-vendor/quote/tests/ui/wrong-type-span.stderr
4767
+ - rust-vendor/r-efi-5.3.0/.cargo-checksum.json
4768
+ - rust-vendor/r-efi-5.3.0/.cargo_vcs_info.json
4769
+ - rust-vendor/r-efi-5.3.0/.github/workflows/publish.yml
4770
+ - rust-vendor/r-efi-5.3.0/.github/workflows/rust-tests.yml
4771
+ - rust-vendor/r-efi-5.3.0/AUTHORS
4772
+ - rust-vendor/r-efi-5.3.0/Cargo.lock
4773
+ - rust-vendor/r-efi-5.3.0/Cargo.toml
4774
+ - rust-vendor/r-efi-5.3.0/Cargo.toml.orig
4775
+ - rust-vendor/r-efi-5.3.0/Makefile
4776
+ - rust-vendor/r-efi-5.3.0/NEWS.md
4777
+ - rust-vendor/r-efi-5.3.0/README.md
4778
+ - rust-vendor/r-efi-5.3.0/examples/freestanding.rs
4779
+ - rust-vendor/r-efi-5.3.0/examples/gop-query.rs
4780
+ - rust-vendor/r-efi-5.3.0/examples/hello-world.rs
4781
+ - rust-vendor/r-efi-5.3.0/src/base.rs
4782
+ - rust-vendor/r-efi-5.3.0/src/hii.rs
4783
+ - rust-vendor/r-efi-5.3.0/src/lib.rs
4784
+ - rust-vendor/r-efi-5.3.0/src/protocols.rs
4785
+ - rust-vendor/r-efi-5.3.0/src/protocols/absolute_pointer.rs
4786
+ - rust-vendor/r-efi-5.3.0/src/protocols/block_io.rs
4787
+ - rust-vendor/r-efi-5.3.0/src/protocols/bus_specific_driver_override.rs
4788
+ - rust-vendor/r-efi-5.3.0/src/protocols/debug_support.rs
4789
+ - rust-vendor/r-efi-5.3.0/src/protocols/debugport.rs
4790
+ - rust-vendor/r-efi-5.3.0/src/protocols/decompress.rs
4791
+ - rust-vendor/r-efi-5.3.0/src/protocols/device_path.rs
4792
+ - rust-vendor/r-efi-5.3.0/src/protocols/device_path_from_text.rs
4793
+ - rust-vendor/r-efi-5.3.0/src/protocols/device_path_to_text.rs
4794
+ - rust-vendor/r-efi-5.3.0/src/protocols/device_path_utilities.rs
4795
+ - rust-vendor/r-efi-5.3.0/src/protocols/disk_io.rs
4796
+ - rust-vendor/r-efi-5.3.0/src/protocols/disk_io2.rs
4797
+ - rust-vendor/r-efi-5.3.0/src/protocols/driver_binding.rs
4798
+ - rust-vendor/r-efi-5.3.0/src/protocols/driver_diagnostics2.rs
4799
+ - rust-vendor/r-efi-5.3.0/src/protocols/driver_family_override.rs
4800
+ - rust-vendor/r-efi-5.3.0/src/protocols/file.rs
4801
+ - rust-vendor/r-efi-5.3.0/src/protocols/graphics_output.rs
4802
+ - rust-vendor/r-efi-5.3.0/src/protocols/hii_database.rs
4803
+ - rust-vendor/r-efi-5.3.0/src/protocols/hii_font.rs
4804
+ - rust-vendor/r-efi-5.3.0/src/protocols/hii_font_ex.rs
4805
+ - rust-vendor/r-efi-5.3.0/src/protocols/hii_package_list.rs
4806
+ - rust-vendor/r-efi-5.3.0/src/protocols/hii_string.rs
4807
+ - rust-vendor/r-efi-5.3.0/src/protocols/ip4.rs
4808
+ - rust-vendor/r-efi-5.3.0/src/protocols/ip6.rs
4809
+ - rust-vendor/r-efi-5.3.0/src/protocols/load_file.rs
4810
+ - rust-vendor/r-efi-5.3.0/src/protocols/load_file2.rs
4811
+ - rust-vendor/r-efi-5.3.0/src/protocols/loaded_image.rs
4812
+ - rust-vendor/r-efi-5.3.0/src/protocols/loaded_image_device_path.rs
4813
+ - rust-vendor/r-efi-5.3.0/src/protocols/managed_network.rs
4814
+ - rust-vendor/r-efi-5.3.0/src/protocols/memory_attribute.rs
4815
+ - rust-vendor/r-efi-5.3.0/src/protocols/mp_services.rs
4816
+ - rust-vendor/r-efi-5.3.0/src/protocols/pci_io.rs
4817
+ - rust-vendor/r-efi-5.3.0/src/protocols/platform_driver_override.rs
4818
+ - rust-vendor/r-efi-5.3.0/src/protocols/rng.rs
4819
+ - rust-vendor/r-efi-5.3.0/src/protocols/service_binding.rs
4820
+ - rust-vendor/r-efi-5.3.0/src/protocols/shell.rs
4821
+ - rust-vendor/r-efi-5.3.0/src/protocols/shell_dynamic_command.rs
4822
+ - rust-vendor/r-efi-5.3.0/src/protocols/shell_parameters.rs
4823
+ - rust-vendor/r-efi-5.3.0/src/protocols/simple_file_system.rs
4824
+ - rust-vendor/r-efi-5.3.0/src/protocols/simple_network.rs
4825
+ - rust-vendor/r-efi-5.3.0/src/protocols/simple_text_input.rs
4826
+ - rust-vendor/r-efi-5.3.0/src/protocols/simple_text_input_ex.rs
4827
+ - rust-vendor/r-efi-5.3.0/src/protocols/simple_text_output.rs
4828
+ - rust-vendor/r-efi-5.3.0/src/protocols/tcp4.rs
4829
+ - rust-vendor/r-efi-5.3.0/src/protocols/tcp6.rs
4830
+ - rust-vendor/r-efi-5.3.0/src/protocols/timestamp.rs
4831
+ - rust-vendor/r-efi-5.3.0/src/protocols/udp4.rs
4832
+ - rust-vendor/r-efi-5.3.0/src/protocols/udp6.rs
4833
+ - rust-vendor/r-efi-5.3.0/src/system.rs
4834
+ - rust-vendor/r-efi-5.3.0/src/vendor.rs
4835
+ - rust-vendor/r-efi-5.3.0/src/vendor/intel/console_control.rs
4764
4836
  - rust-vendor/r-efi/.cargo-checksum.json
4765
4837
  - rust-vendor/r-efi/.cargo_vcs_info.json
4766
4838
  - rust-vendor/r-efi/.github/workflows/publish.yml
@@ -6317,6 +6389,7 @@ files:
6317
6389
  - rust-vendor/tokio/.cargo-checksum.json
6318
6390
  - rust-vendor/tokio/.cargo_vcs_info.json
6319
6391
  - rust-vendor/tokio/CHANGELOG.md
6392
+ - rust-vendor/tokio/Cargo.lock
6320
6393
  - rust-vendor/tokio/Cargo.toml
6321
6394
  - rust-vendor/tokio/Cargo.toml.orig
6322
6395
  - rust-vendor/tokio/LICENSE
@@ -6718,6 +6791,7 @@ files:
6718
6791
  - rust-vendor/tokio/tests/fs_uring_read.rs
6719
6792
  - rust-vendor/tokio/tests/fs_write.rs
6720
6793
  - rust-vendor/tokio/tests/io_async_fd.rs
6794
+ - rust-vendor/tokio/tests/io_async_fd_memory_leak.rs
6721
6795
  - rust-vendor/tokio/tests/io_async_read.rs
6722
6796
  - rust-vendor/tokio/tests/io_buf_reader.rs
6723
6797
  - rust-vendor/tokio/tests/io_buf_writer.rs
@@ -6774,12 +6848,14 @@ files:
6774
6848
  - rust-vendor/tokio/tests/process_smoke.rs
6775
6849
  - rust-vendor/tokio/tests/rt_basic.rs
6776
6850
  - rust-vendor/tokio/tests/rt_common.rs
6851
+ - rust-vendor/tokio/tests/rt_common_before_park.rs
6777
6852
  - rust-vendor/tokio/tests/rt_handle.rs
6778
6853
  - rust-vendor/tokio/tests/rt_handle_block_on.rs
6779
6854
  - rust-vendor/tokio/tests/rt_local.rs
6780
6855
  - rust-vendor/tokio/tests/rt_metrics.rs
6781
6856
  - rust-vendor/tokio/tests/rt_panic.rs
6782
6857
  - rust-vendor/tokio/tests/rt_poll_callbacks.rs
6858
+ - rust-vendor/tokio/tests/rt_shutdown_err.rs
6783
6859
  - rust-vendor/tokio/tests/rt_threaded.rs
6784
6860
  - rust-vendor/tokio/tests/rt_time_start_paused.rs
6785
6861
  - rust-vendor/tokio/tests/rt_unstable_metrics.rs
@@ -9662,6 +9738,7 @@ files:
9662
9738
  - sig/open3.rbs
9663
9739
  - spec/cli_proxy_spec.rb
9664
9740
  - spec/convert_spec.rb
9741
+ - spec/convert_with_tables_spec.rb
9665
9742
  - spec/metadata_extraction_spec.rb
9666
9743
  - spec/spec_helper.rb
9667
9744
  - spec/visitor_issue_187_spec.rb
@@ -1,64 +0,0 @@
1
- //! Helpers built around pointer-sized atomics.
2
- use core::sync::atomic::{AtomicUsize, Ordering};
3
-
4
- // This structure represents a lazily initialized static usize value. Useful
5
- // when it is preferable to just rerun initialization instead of locking.
6
- // unsync_init will invoke an init() function until it succeeds, then return the
7
- // cached value for future calls.
8
- //
9
- // unsync_init supports init() "failing". If the init() method returns UNINIT,
10
- // that value will be returned as normal, but will not be cached.
11
- //
12
- // Users should only depend on the _value_ returned by init() functions.
13
- // Specifically, for the following init() function:
14
- // fn init() -> usize {
15
- // a();
16
- // let v = b();
17
- // c();
18
- // v
19
- // }
20
- // the effects of c() or writes to shared memory will not necessarily be
21
- // observed and additional synchronization methods may be needed.
22
- struct LazyUsize(AtomicUsize);
23
-
24
- impl LazyUsize {
25
- // The initialization is not completed.
26
- const UNINIT: usize = usize::MAX;
27
-
28
- const fn new() -> Self {
29
- Self(AtomicUsize::new(Self::UNINIT))
30
- }
31
-
32
- // Runs the init() function at most once, returning the value of some run of
33
- // init(). Multiple callers can run their init() functions in parallel.
34
- // init() should always return the same value, if it succeeds.
35
- fn unsync_init(&self, init: impl FnOnce() -> usize) -> usize {
36
- #[cold]
37
- fn do_init(this: &LazyUsize, init: impl FnOnce() -> usize) -> usize {
38
- let val = init();
39
- this.0.store(val, Ordering::Relaxed);
40
- val
41
- }
42
-
43
- // Relaxed ordering is fine, as we only have a single atomic variable.
44
- let val = self.0.load(Ordering::Relaxed);
45
- if val != Self::UNINIT {
46
- val
47
- } else {
48
- do_init(self, init)
49
- }
50
- }
51
- }
52
-
53
- // Identical to LazyUsize except with bool instead of usize.
54
- pub(crate) struct LazyBool(LazyUsize);
55
-
56
- impl LazyBool {
57
- pub const fn new() -> Self {
58
- Self(LazyUsize::new())
59
- }
60
-
61
- pub fn unsync_init(&self, init: impl FnOnce() -> bool) -> bool {
62
- self.0.unsync_init(|| usize::from(init())) != 0
63
- }
64
- }