kreuzberg 4.3.5 → 4.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.toml +1 -1
- data/kreuzberg.gemspec +1 -1
- data/lib/kreuzberg/version.rb +1 -1
- data/spec/binding/embeddings_spec.rb +0 -682
- data/spec/binding/images_spec.rb +0 -577
- data/spec/binding/keywords_extraction_spec.rb +0 -548
- data/spec/binding/pages_extraction_spec.rb +0 -449
- data/spec/binding/tables_spec.rb +0 -467
- data/spec/smoke/package_spec.rb +22 -0
- data/vendor/Cargo.toml +1 -1
- data/vendor/kreuzberg/Cargo.toml +1 -1
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/core/config/pdf.rs +3 -3
- data/vendor/kreuzberg/src/core/config/processing.rs +1 -0
- data/vendor/kreuzberg/src/core/config_validation/sections.rs +4 -4
- data/vendor/kreuzberg/src/core/pipeline/format.rs +35 -1
- data/vendor/kreuzberg/src/core/pipeline/tests.rs +22 -26
- data/vendor/kreuzberg/src/extraction/image_ocr.rs +1 -1
- data/vendor/kreuzberg/src/extraction/transform/content.rs +22 -5
- data/vendor/kreuzberg/src/extraction/transform/elements.rs +23 -13
- data/vendor/kreuzberg/src/extraction/transform/mod.rs +85 -0
- data/vendor/kreuzberg/src/extractors/docx.rs +9 -2
- data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +67 -8
- data/vendor/kreuzberg/src/extractors/pdf/mod.rs +23 -6
- data/vendor/kreuzberg/src/ocr/cache.rs +8 -4
- data/vendor/kreuzberg/src/ocr/conversion.rs +4 -3
- data/vendor/kreuzberg/src/ocr/processor/execution.rs +29 -25
- data/vendor/kreuzberg/src/ocr/table/mod.rs +283 -0
- data/vendor/kreuzberg/src/pdf/hierarchy/clustering.rs +15 -4
- data/vendor/kreuzberg/src/pdf/hierarchy/extraction.rs +166 -0
- data/vendor/kreuzberg/src/pdf/hierarchy/mod.rs +3 -2
- data/vendor/kreuzberg/src/pdf/markdown/assembly.rs +285 -0
- data/vendor/kreuzberg/src/pdf/markdown/bridge.rs +415 -0
- data/vendor/kreuzberg/src/pdf/markdown/classify.rs +235 -0
- data/vendor/kreuzberg/src/pdf/markdown/constants.rs +29 -0
- data/vendor/kreuzberg/src/pdf/markdown/lines.rs +230 -0
- data/vendor/kreuzberg/src/pdf/markdown/mod.rs +18 -0
- data/vendor/kreuzberg/src/pdf/markdown/paragraphs.rs +323 -0
- data/vendor/kreuzberg/src/pdf/markdown/pipeline.rs +198 -0
- data/vendor/kreuzberg/src/pdf/markdown/render.rs +421 -0
- data/vendor/kreuzberg/src/pdf/markdown/types.rs +31 -0
- data/vendor/kreuzberg/src/types/ocr_elements.rs +12 -10
- data/vendor/kreuzberg/tests/debug_table_cells.rs +56 -0
- data/vendor/kreuzberg/tests/pdf_markdown_all_docs.rs +0 -1
- data/vendor/kreuzberg/tests/pdf_markdown_extraction.rs +12 -9
- data/vendor/kreuzberg/tests/pdf_table_detection.rs +0 -2
- data/vendor/kreuzberg/tests/pdf_table_ground_truth.rs +404 -0
- data/vendor/kreuzberg-ffi/Cargo.toml +1 -1
- data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
- data/vendor/kreuzberg-paddle-ocr/src/crnn_net.rs +14 -22
- data/vendor/kreuzberg-paddle-ocr/tests/diagnostic.rs +5 -5
- data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
- data/vendor/kreuzberg-pdfium-render/src/lib.rs +19 -23
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/bookmark.rs +3 -3
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/annotation/private.rs +41 -41
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/extraction.rs +823 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/field/private.rs +8 -8
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/content_mark.rs +170 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/content_marks.rs +78 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/group.rs +3 -3
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/image.rs +9 -9
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object/private.rs +2 -2
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/object.rs +29 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/paragraph.rs +238 -113
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/struct_element.rs +673 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/struct_tree.rs +125 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/text/char.rs +16 -0
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page/text.rs +5 -4
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/page.rs +24 -4
- data/vendor/kreuzberg-pdfium-render/src/pdf/document/pages.rs +3 -3
- data/vendor/kreuzberg-pdfium-render/src/pdf/link.rs +2 -2
- data/vendor/kreuzberg-pdfium-render/src/utils.rs +15 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- metadata +20 -4
- data/vendor/kreuzberg/src/pdf/markdown.rs +0 -2014
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1b3eb519a94cf2a82e9d9b649ce98e122fec14d01568ced3edc925f1cb49f4ad
|
|
4
|
+
data.tar.gz: cdfa987af6f7bc8b0a6defb76b3426a616ed9d9451bd3aa27680eae7eba5325c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1c6170355aa3f4443b68aed401e2d7b8f20a4b792c18e130b5f59be9ea3102527bb9098f77f30d1383287d404846ca99a397807827284e7b0c774df58d85cd51
|
|
7
|
+
data.tar.gz: 5362d11257dd57715e8f9e0743a3f5e8fba64c9177f4858b587fd7230dbae2c54fac194d0aa64da7db1f970ad8eaf605683d6547cdd39c81b55d0bebe43502d5
|
data/Gemfile.lock
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
kreuzberg (4.3.
|
|
4
|
+
kreuzberg (4.3.6)
|
|
5
5
|
rb_sys (~> 0.9.119)
|
|
6
|
+
sorbet-runtime (~> 0.5)
|
|
6
7
|
|
|
7
8
|
GEM
|
|
8
9
|
remote: https://rubygems.org/
|
|
@@ -179,7 +180,6 @@ DEPENDENCIES
|
|
|
179
180
|
rubocop (~> 1.66)
|
|
180
181
|
rubocop-performance (~> 1.21)
|
|
181
182
|
rubocop-rspec (~> 3.0)
|
|
182
|
-
sorbet-runtime (~> 0.5)
|
|
183
183
|
steep (~> 1.8)
|
|
184
184
|
yard (~> 0.9)
|
|
185
185
|
|
|
@@ -210,7 +210,7 @@ CHECKSUMS
|
|
|
210
210
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
211
211
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
212
212
|
json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
|
|
213
|
-
kreuzberg (4.3.
|
|
213
|
+
kreuzberg (4.3.6)
|
|
214
214
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
215
215
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
216
216
|
listen (3.10.0) sha256=c6e182db62143aeccc2e1960033bebe7445309c7272061979bb098d03760c9d2
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.3.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.3.6" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
data/kreuzberg.gemspec
CHANGED
|
@@ -241,7 +241,7 @@ Gem::Specification.new do |spec|
|
|
|
241
241
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
242
242
|
spec.add_development_dependency 'rake-compiler', '~> 1.2'
|
|
243
243
|
spec.add_development_dependency 'rspec', '~> 3.12'
|
|
244
|
-
spec.
|
|
244
|
+
spec.add_dependency 'sorbet-runtime', '~> 0.5'
|
|
245
245
|
unless Gem.win_platform?
|
|
246
246
|
spec.add_development_dependency 'rbs', '~> 3.0'
|
|
247
247
|
spec.add_development_dependency 'rubocop', '~> 1.66'
|
data/lib/kreuzberg/version.rb
CHANGED