kreuzberg 4.4.1 → 4.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +8 -8
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.lock +79 -1219
  5. data/ext/kreuzberg_rb/native/Cargo.toml +6 -6
  6. data/lib/kreuzberg/result.rb +8 -4
  7. data/lib/kreuzberg/version.rb +1 -1
  8. data/lib/kreuzberg.rb +2 -0
  9. data/spec/binding/batch_operations_spec.rb +4 -3
  10. data/vendor/Cargo.toml +4 -4
  11. data/vendor/kreuzberg/Cargo.toml +11 -25
  12. data/vendor/kreuzberg/README.md +1 -1
  13. data/vendor/kreuzberg/src/extraction/docx/drawing.rs +5 -2
  14. data/vendor/kreuzberg/src/extraction/docx/math.rs +1706 -0
  15. data/vendor/kreuzberg/src/extraction/docx/mod.rs +1 -0
  16. data/vendor/kreuzberg/src/extraction/docx/parser.rs +992 -78
  17. data/vendor/kreuzberg/src/extraction/docx/table.rs +12 -1
  18. data/vendor/kreuzberg/src/extraction/email.rs +198 -6
  19. data/vendor/kreuzberg/src/extraction/html/converter.rs +50 -29
  20. data/vendor/kreuzberg/src/extraction/image.rs +20 -2
  21. data/vendor/kreuzberg/src/extraction/markdown.rs +87 -0
  22. data/vendor/kreuzberg/src/extraction/mod.rs +2 -0
  23. data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +59 -36
  24. data/vendor/kreuzberg/src/extraction/pptx/elements.rs +2 -0
  25. data/vendor/kreuzberg/src/extraction/pptx/mod.rs +43 -15
  26. data/vendor/kreuzberg/src/extractors/bibtex.rs +3 -3
  27. data/vendor/kreuzberg/src/extractors/csv.rs +98 -1
  28. data/vendor/kreuzberg/src/extractors/docbook.rs +39 -15
  29. data/vendor/kreuzberg/src/extractors/docx.rs +63 -25
  30. data/vendor/kreuzberg/src/extractors/epub/content.rs +15 -1
  31. data/vendor/kreuzberg/src/extractors/fictionbook.rs +637 -65
  32. data/vendor/kreuzberg/src/extractors/html.rs +12 -1
  33. data/vendor/kreuzberg/src/extractors/jupyter.rs +24 -8
  34. data/vendor/kreuzberg/src/extractors/latex/commands.rs +3 -1
  35. data/vendor/kreuzberg/src/extractors/latex/environments.rs +9 -4
  36. data/vendor/kreuzberg/src/extractors/latex/parser.rs +55 -82
  37. data/vendor/kreuzberg/src/extractors/latex/utilities.rs +78 -9
  38. data/vendor/kreuzberg/src/extractors/odt.rs +257 -36
  39. data/vendor/kreuzberg/src/extractors/pptx.rs +17 -3
  40. data/vendor/kreuzberg/src/extractors/rst.rs +31 -15
  41. data/vendor/kreuzberg/src/extractors/rtf/formatting.rs +24 -1
  42. data/vendor/kreuzberg/src/extractors/rtf/mod.rs +31 -2
  43. data/vendor/kreuzberg/src/extractors/rtf/parser.rs +26 -12
  44. data/vendor/kreuzberg/src/extractors/rtf/tables.rs +9 -4
  45. data/vendor/kreuzberg/src/extractors/typst.rs +81 -19
  46. data/vendor/kreuzberg/src/mcp/server.rs +16 -19
  47. data/vendor/kreuzberg/src/ocr/processor/execution.rs +62 -25
  48. data/vendor/kreuzberg/src/paddle_ocr/backend.rs +2 -2
  49. data/vendor/kreuzberg/tests/batch_orchestration.rs +4 -2
  50. data/vendor/kreuzberg/tests/docbook_extractor_tests.rs +0 -4
  51. data/vendor/kreuzberg/tests/epub_native_extractor_tests.rs +2 -2
  52. data/vendor/kreuzberg/tests/fictionbook_extractor_tests.rs +11 -3
  53. data/vendor/kreuzberg/tests/pdf_markdown_regression.rs +194 -84
  54. data/vendor/kreuzberg-ffi/Cargo.toml +3 -3
  55. data/vendor/kreuzberg-ffi/kreuzberg.h +2 -2
  56. data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
  57. data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
  58. data/vendor/kreuzberg-tesseract/Cargo.toml +2 -2
  59. data/vendor/kreuzberg-tesseract/build.rs +38 -8
  60. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 56e750209fdd4c61b193bbc25ce1f7e7f3646cee22fcd7ee79af381aa1c95561
4
- data.tar.gz: 2b741797f40b209ad5b8451aba4adae514914ee6f8dd86a55b8bbf7d5e910e98
3
+ metadata.gz: b65c17f6cf4332ce366d8da0197077c2e222a262a2da29b0681c1139918527bf
4
+ data.tar.gz: 1082caefd179e5eeaa44b69ad93f02458dae2e6cc2edf5aaf1dfc1eee838276c
5
5
  SHA512:
6
- metadata.gz: 7110c61739f8a373080d03a017ab674713831df05acbb64e6f1c8d8fa6d7ca8e365f3f9cac3a0b47f046b7bd1778e4e01488142d8c1c1de355570363ce710210
7
- data.tar.gz: d7a02f18c7e656475bb54081885eb6d82031bd76cc3f5515a68561191ccc5051216157924cfc36555da804afca90e21634780592163bdffad299cd4bc1a5fb0f
6
+ metadata.gz: '01496d04b7671527a8e861ac3d155a4c1ed96c75b6898d419ad021edba646e896fbd3fa46920c0a8ebf8d7eba79ab3ad83edce44261b813b16a1e6d28161fc61'
7
+ data.tar.gz: 64aa2e0654a3300c06948c65ed1d944f2c05f08f9989fcd27f5ab38e20772292d5913edea6beed522da4056ba0288f4416bed1ee6f90438155e3ec112d6ef4cb
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.4.1)
4
+ kreuzberg (4.4.2)
5
5
  rb_sys (~> 0.9.119)
6
6
  sorbet-runtime (~> 0.5)
7
7
 
@@ -60,7 +60,7 @@ GEM
60
60
  rb-fsevent (~> 0.10, >= 0.10.3)
61
61
  rb-inotify (~> 0.9, >= 0.9.10)
62
62
  logger (1.7.0)
63
- mcp (0.7.1)
63
+ mcp (0.8.0)
64
64
  json-schema (>= 4.1)
65
65
  method_source (1.1.0)
66
66
  minitest (6.0.2)
@@ -79,7 +79,7 @@ GEM
79
79
  pry-byebug (3.12.0)
80
80
  byebug (~> 13.0)
81
81
  pry (>= 0.13, < 0.17)
82
- public_suffix (7.0.2)
82
+ public_suffix (7.0.5)
83
83
  racc (1.8.1)
84
84
  rainbow (3.1.1)
85
85
  rake (13.3.1)
@@ -134,7 +134,7 @@ GEM
134
134
  rubocop (~> 1.81)
135
135
  ruby-progressbar (1.13.0)
136
136
  securerandom (0.4.1)
137
- sorbet-runtime (0.6.12971)
137
+ sorbet-runtime (0.6.12984)
138
138
  steep (1.10.0)
139
139
  activesupport (>= 5.1)
140
140
  concurrent-ruby (>= 1.1.10)
@@ -222,12 +222,12 @@ CHECKSUMS
222
222
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
223
223
  json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
224
224
  json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
225
- kreuzberg (4.4.1)
225
+ kreuzberg (4.4.2)
226
226
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
227
227
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
228
228
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
229
229
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
230
- mcp (0.7.1) sha256=fa967895d6952bad0d981ea907731d8528d2c246d2079d56a9c8bae83d14f1c7
230
+ mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
231
231
  method_source (1.1.0) sha256=181301c9c45b731b4769bc81e8860e72f9161ad7d66dd99103c9ab84f560f5c5
232
232
  minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
233
233
  mutex_m (0.3.0) sha256=cfcb04ac16b69c4813777022fdceda24e9f798e48092a2b817eb4c0a782b0751
@@ -236,7 +236,7 @@ CHECKSUMS
236
236
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
237
237
  pry (0.16.0) sha256=d76c69065698ed1f85e717bd33d7942c38a50868f6b0673c636192b3d1b6054e
238
238
  pry-byebug (3.12.0) sha256=594e094ae8a8390a7ad4c7b36ae36e13304ed02664c67417d108dc5f7213d1b7
239
- public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
239
+ public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
240
240
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
241
241
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
242
242
  rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
@@ -259,7 +259,7 @@ CHECKSUMS
259
259
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
260
260
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
261
261
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
262
- sorbet-runtime (0.6.12971) sha256=1c2c75a262f88c4fbdb36b5617b0b11bfc7c69b11a500b3334bd67d075288a45
262
+ sorbet-runtime (0.6.12984) sha256=3fff20a5b147a2e191210563d61886ac121fc1cd8b5e0faf6bc18873139e0fe4
263
263
  steep (1.10.0) sha256=1b295b55f9aaff1b8d3ee42453ee55bc2a1078fda0268f288edb2dc014f4d7d1
264
264
  strscan (3.1.7) sha256=5f76462b94a3ea50b44973225b7d75b2cb96d4e1bee9ef1319b99ca117b72c8c
265
265
  terminal-table (4.0.0) sha256=f504793203f8251b2ea7c7068333053f0beeea26093ec9962e62ea79f94301d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.1" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.4.2" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">