kreuzberg 4.3.5 → 4.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +3 -3
  3. data/README.md +1 -1
  4. data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
  5. data/kreuzberg.gemspec +1 -1
  6. data/lib/kreuzberg/version.rb +1 -1
  7. data/spec/binding/embeddings_spec.rb +0 -682
  8. data/spec/binding/images_spec.rb +0 -577
  9. data/spec/binding/keywords_extraction_spec.rb +0 -548
  10. data/spec/binding/pages_extraction_spec.rb +0 -449
  11. data/spec/binding/tables_spec.rb +0 -467
  12. data/spec/smoke/package_spec.rb +22 -0
  13. data/vendor/Cargo.toml +1 -1
  14. data/vendor/kreuzberg/Cargo.toml +1 -1
  15. data/vendor/kreuzberg/README.md +1 -1
  16. data/vendor/kreuzberg/src/core/config/pdf.rs +3 -3
  17. data/vendor/kreuzberg/src/core/config/processing.rs +1 -0
  18. data/vendor/kreuzberg/src/core/config_validation/sections.rs +4 -4
  19. data/vendor/kreuzberg/src/core/pipeline/format.rs +35 -1
  20. data/vendor/kreuzberg/src/core/pipeline/tests.rs +22 -26
  21. data/vendor/kreuzberg/src/extraction/image_ocr.rs +1 -1
  22. data/vendor/kreuzberg/src/extraction/transform/content.rs +22 -5
  23. data/vendor/kreuzberg/src/extraction/transform/elements.rs +23 -13
  24. data/vendor/kreuzberg/src/extraction/transform/mod.rs +85 -0
  25. data/vendor/kreuzberg/src/extractors/docx.rs +9 -2
  26. data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +67 -8
  27. data/vendor/kreuzberg/src/extractors/pdf/mod.rs +23 -6
  28. data/vendor/kreuzberg/src/ocr/cache.rs +8 -4
  29. data/vendor/kreuzberg/src/ocr/conversion.rs +4 -3
  30. data/vendor/kreuzberg/src/ocr/processor/execution.rs +29 -25
  31. data/vendor/kreuzberg/src/ocr/table/mod.rs +283 -0
  32. data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +15 -4
  33. data/vendor/kreuzberg/src/pdf/hierarchy/extraction.rs +166 -0
  34. data/vendor/kreuzberg/src/pdf/hierarchy/mod.rs +3 -2
  35. data/vendor/kreuzberg/src/pdf/markdown/assembly.rs +285 -0
  36. data/vendor/kreuzberg/src/pdf/markdown/bridge.rs +415 -0
  37. data/vendor/kreuzberg/src/pdf/markdown/classify.rs +235 -0
  38. data/vendor/kreuzberg/src/pdf/markdown/constants.rs +29 -0
  39. data/vendor/kreuzberg/src/pdf/markdown/lines.rs +230 -0
  40. data/vendor/kreuzberg/src/pdf/markdown/mod.rs +18 -0
  41. data/vendor/kreuzberg/src/pdf/markdown/paragraphs.rs +323 -0
  42. data/vendor/kreuzberg/src/pdf/markdown/pipeline.rs +198 -0
  43. data/vendor/kreuzberg/src/pdf/markdown/render.rs +421 -0
  44. data/vendor/kreuzberg/src/pdf/markdown/types.rs +31 -0
  45. data/vendor/kreuzberg/src/types/ocr_elements.rs +12 -10
  46. data/vendor/kreuzberg/tests/debug_table_cells.rs +56 -0
  47. data/vendor/kreuzberg/tests/pdf_markdown_all_docs.rs +0 -1
  48. data/vendor/kreuzberg/tests/pdf_markdown_extraction.rs +12 -9
  49. data/vendor/kreuzberg/tests/pdf_table_detection.rs +0 -2
  50. data/vendor/kreuzberg/tests/pdf_table_ground_truth.rs +404 -0
  51. data/vendor/kreuzberg-ffi/Cargo.toml +1 -1
  52. data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
  53. data/vendor/kreuzberg-paddle-ocr/src/crnn_net.rs +14 -22
  54. data/vendor/kreuzberg-paddle-ocr/tests/diagnostic.rs +5 -5
  55. data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
  56. data/vendor/kreuzberg-pdfium-render/src/lib.rs +19 -23
  57. data/vendor/kreuzberg-pdfium-render/src/pdf/document/bookmark.rs +3 -3
  58. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/private.rs +41 -41
  59. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/extraction.rs +823 -0
  60. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/private.rs +8 -8
  61. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/content_mark.rs +170 -0
  62. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/content_marks.rs +78 -0
  63. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/group.rs +3 -3
  64. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/image.rs +9 -9
  65. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/private.rs +2 -2
  66. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object.rs +29 -0
  67. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/paragraph.rs +238 -113
  68. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/struct_element.rs +673 -0
  69. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/struct_tree.rs +125 -0
  70. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/text/char.rs +16 -0
  71. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/text.rs +5 -4
  72. data/vendor/kreuzberg-pdfium-render/src/pdf/document/page.rs +24 -4
  73. data/vendor/kreuzberg-pdfium-render/src/pdf/document/pages.rs +3 -3
  74. data/vendor/kreuzberg-pdfium-render/src/pdf/link.rs +2 -2
  75. data/vendor/kreuzberg-pdfium-render/src/utils.rs +15 -1
  76. data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
  77. metadata +20 -4
  78. data/vendor/kreuzberg/src/pdf/markdown.rs +0 -2014
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3936788c6812a84428d0467330f573c20c9b569c399eab105cc2815d777b2141
4
- data.tar.gz: 1b115f87bc4a40960584de9459725d0afabfefe6244ddea434461a2c6f36a647
3
+ metadata.gz: 1b3eb519a94cf2a82e9d9b649ce98e122fec14d01568ced3edc925f1cb49f4ad
4
+ data.tar.gz: cdfa987af6f7bc8b0a6defb76b3426a616ed9d9451bd3aa27680eae7eba5325c
5
5
  SHA512:
6
- metadata.gz: 73fb7522dcd091b449d5146e65f008bded545a6c49a35d77468bd9bfa61d30a7413dc7364eef6ce79cca4606c5c710ea3f574509743569a7fb4f8a7bc579f402
7
- data.tar.gz: e21fb401768da5005a1edb720b0c00c82aaa9a8ef60b6f2bab3587b3c8c94cd8fdcc220daeb8545bae6523bf8be22dbc1dab7c966ae8f30e888342d96c7e5df2
6
+ metadata.gz: 1c6170355aa3f4443b68aed401e2d7b8f20a4b792c18e130b5f59be9ea3102527bb9098f77f30d1383287d404846ca99a397807827284e7b0c774df58d85cd51
7
+ data.tar.gz: 5362d11257dd57715e8f9e0743a3f5e8fba64c9177f4858b587fd7230dbae2c54fac194d0aa64da7db1f970ad8eaf605683d6547cdd39c81b55d0bebe43502d5
data/Gemfile.lock CHANGED
@@ -1,8 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kreuzberg (4.3.5)
4
+ kreuzberg (4.3.6)
5
5
  rb_sys (~> 0.9.119)
6
+ sorbet-runtime (~> 0.5)
6
7
 
7
8
  GEM
8
9
  remote: https://rubygems.org/
@@ -179,7 +180,6 @@ DEPENDENCIES
179
180
  rubocop (~> 1.66)
180
181
  rubocop-performance (~> 1.21)
181
182
  rubocop-rspec (~> 3.0)
182
- sorbet-runtime (~> 0.5)
183
183
  steep (~> 1.8)
184
184
  yard (~> 0.9)
185
185
 
@@ -210,7 +210,7 @@ CHECKSUMS
210
210
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
211
211
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
212
212
  json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
213
- kreuzberg (4.3.5)
213
+ kreuzberg (4.3.6)
214
214
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
215
215
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
216
216
  listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
data/README.md CHANGED
@@ -22,7 +22,7 @@
22
22
  <img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
23
23
  </a>
24
24
  <a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
25
- <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.3.5" alt="Go">
25
+ <img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.3.6" alt="Go">
26
26
  </a>
27
27
  <a href="https://www.nuget.org/packages/Kreuzberg/">
28
28
  <img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
@@ -37,7 +37,7 @@ collapsible_if = "allow"
37
37
 
38
38
  [package]
39
39
  name = "kreuzberg-rb"
40
- version = "4.3.5"
40
+ version = "4.3.6"
41
41
  edition = "2024"
42
42
  rust-version = "1.91"
43
43
  authors = ["Na'aman Hirschfeld <nhirschfeld@gmail.com>"]
data/kreuzberg.gemspec CHANGED
@@ -241,7 +241,7 @@ Gem::Specification.new do |spec|
241
241
  spec.add_development_dependency 'rake', '~> 13.0'
242
242
  spec.add_development_dependency 'rake-compiler', '~> 1.2'
243
243
  spec.add_development_dependency 'rspec', '~> 3.12'
244
- spec.add_development_dependency 'sorbet-runtime', '~> 0.5'
244
+ spec.add_dependency 'sorbet-runtime', '~> 0.5'
245
245
  unless Gem.win_platform?
246
246
  spec.add_development_dependency 'rbs', '~> 3.0'
247
247
  spec.add_development_dependency 'rubocop', '~> 1.66'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kreuzberg
4
- VERSION = '4.3.5'
4
+ VERSION = '4.3.6'
5
5
  end