kreuzberg 4.8.5 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/kreuzberg_rb/native/Cargo.lock +107 -86
- data/ext/kreuzberg_rb/native/Cargo.toml +3 -3
- data/ext/kreuzberg_rb/native/src/config/types.rs +22 -0
- data/ext/kreuzberg_rb/native/src/error_handling.rs +7 -0
- data/ext/kreuzberg_rb/native/src/result.rs +7 -0
- data/lib/kreuzberg/errors.rb +3 -0
- data/lib/kreuzberg/result.rb +52 -5
- data/lib/kreuzberg/version.rb +1 -1
- data/sig/kreuzberg.rbs +26 -4
- data/vendor/Cargo.toml +7 -7
- data/vendor/kreuzberg/Cargo.toml +8 -8
- data/vendor/kreuzberg/README.md +1 -1
- data/vendor/kreuzberg/src/api/error.rs +1 -0
- data/vendor/kreuzberg/src/api/handlers.rs +74 -1
- data/vendor/kreuzberg/src/api/types.rs +11 -2
- data/vendor/kreuzberg/src/cancellation.rs +105 -0
- data/vendor/kreuzberg/src/chunking/boundary_detection.rs +496 -0
- data/vendor/kreuzberg/src/chunking/core.rs +8 -1
- data/vendor/kreuzberg/src/chunking/mod.rs +9 -10
- data/vendor/kreuzberg/src/chunking/semantic/merge.rs +477 -0
- data/vendor/kreuzberg/src/chunking/semantic/mod.rs +393 -0
- data/vendor/kreuzberg/src/chunking/semantic/topic.rs +224 -0
- data/vendor/kreuzberg/src/chunking/tokenizer_cache.rs +3 -3
- data/vendor/kreuzberg/src/core/config/extraction/core.rs +89 -1
- data/vendor/kreuzberg/src/core/config/layout.rs +8 -0
- data/vendor/kreuzberg/src/core/config/llm.rs +47 -1
- data/vendor/kreuzberg/src/core/config/ocr.rs +16 -1
- data/vendor/kreuzberg/src/core/config/processing.rs +63 -0
- data/vendor/kreuzberg/src/core/extractor/bytes.rs +1 -1
- data/vendor/kreuzberg/src/core/extractor/file.rs +1 -1
- data/vendor/kreuzberg/src/core/extractor/sync.rs +24 -21
- data/vendor/kreuzberg/src/core/formats.rs +2 -2
- data/vendor/kreuzberg/src/core/mime.rs +3 -3
- data/vendor/kreuzberg/src/core/pipeline/cache.rs +2 -2
- data/vendor/kreuzberg/src/doc_orientation.rs +22 -4
- data/vendor/kreuzberg/src/embeddings/mod.rs +250 -18
- data/vendor/kreuzberg/src/error.rs +6 -0
- data/vendor/kreuzberg/src/extraction/derive.rs +6 -1
- data/vendor/kreuzberg/src/extraction/docx/drawing.rs +2 -4
- data/vendor/kreuzberg/src/extraction/docx/mod.rs +185 -0
- data/vendor/kreuzberg/src/extraction/html/structure.rs +5 -7
- data/vendor/kreuzberg/src/extraction/image.rs +1 -0
- data/vendor/kreuzberg/src/extraction/pptx/content_builder.rs +1 -0
- data/vendor/kreuzberg/src/extraction/pst.rs +6 -7
- data/vendor/kreuzberg/src/extraction/transform/document_tree.rs +3 -0
- data/vendor/kreuzberg/src/extraction/transform/mod.rs +5 -0
- data/vendor/kreuzberg/src/extractors/djot_format/extractor.rs +38 -50
- data/vendor/kreuzberg/src/extractors/doc.rs +4 -1
- data/vendor/kreuzberg/src/extractors/docbook.rs +8 -12
- data/vendor/kreuzberg/src/extractors/docx.rs +16 -5
- data/vendor/kreuzberg/src/extractors/excel.rs +5 -2
- data/vendor/kreuzberg/src/extractors/frontmatter_utils.rs +2 -4
- data/vendor/kreuzberg/src/extractors/html.rs +173 -1
- data/vendor/kreuzberg/src/extractors/image.rs +268 -37
- data/vendor/kreuzberg/src/extractors/iwork/keynote.rs +4 -1
- data/vendor/kreuzberg/src/extractors/iwork/mod.rs +4 -8
- data/vendor/kreuzberg/src/extractors/iwork/numbers.rs +4 -1
- data/vendor/kreuzberg/src/extractors/iwork/pages.rs +4 -1
- data/vendor/kreuzberg/src/extractors/markdown.rs +22 -32
- data/vendor/kreuzberg/src/extractors/mdx.rs +22 -32
- data/vendor/kreuzberg/src/extractors/mod.rs +7 -12
- data/vendor/kreuzberg/src/extractors/pdf/extraction.rs +4 -0
- data/vendor/kreuzberg/src/extractors/pdf/mod.rs +161 -49
- data/vendor/kreuzberg/src/extractors/pdf/ocr.rs +148 -13
- data/vendor/kreuzberg/src/extractors/pdf/pages.rs +47 -1
- data/vendor/kreuzberg/src/extractors/ppt.rs +3 -0
- data/vendor/kreuzberg/src/extractors/pptx.rs +3 -0
- data/vendor/kreuzberg/src/extractors/rtf/parser.rs +4 -5
- data/vendor/kreuzberg/src/keywords/mod.rs +6 -10
- data/vendor/kreuzberg/src/language_detection/mod.rs +6 -10
- data/vendor/kreuzberg/src/layout/engine.rs +9 -2
- data/vendor/kreuzberg/src/layout/mod.rs +17 -6
- data/vendor/kreuzberg/src/layout/models/rtdetr.rs +5 -2
- data/vendor/kreuzberg/src/layout/models/slanet.rs +5 -2
- data/vendor/kreuzberg/src/layout/models/table_classifier.rs +5 -2
- data/vendor/kreuzberg/src/layout/models/tatr.rs +5 -2
- data/vendor/kreuzberg/src/layout/models/yolo.rs +2 -1
- data/vendor/kreuzberg/src/layout/session.rs +4 -51
- data/vendor/kreuzberg/src/lib.rs +2 -0
- data/vendor/kreuzberg/src/mcp/errors.rs +18 -0
- data/vendor/kreuzberg/src/mcp/params.rs +19 -1
- data/vendor/kreuzberg/src/mcp/server.rs +14 -3
- data/vendor/kreuzberg/src/ocr/processor/execution.rs +8 -16
- data/vendor/kreuzberg/src/ort_discovery.rs +75 -1
- data/vendor/kreuzberg/src/paddle_ocr/backend.rs +43 -5
- data/vendor/kreuzberg/src/pdf/bindings.rs +40 -15
- data/vendor/kreuzberg/src/pdf/error.rs +3 -0
- data/vendor/kreuzberg/src/pdf/fonts.rs +2 -2
- data/vendor/kreuzberg/src/pdf/images.rs +1 -1
- data/vendor/kreuzberg/src/pdf/layout_runner.rs +1 -0
- data/vendor/kreuzberg/src/pdf/metadata.rs +1 -1
- data/vendor/kreuzberg/src/pdf/oxide/table.rs +6 -0
- data/vendor/kreuzberg/src/pdf/oxide/text.rs +1 -0
- data/vendor/kreuzberg/src/pdf/rendering.rs +1 -1
- data/vendor/kreuzberg/src/pdf/structure/bridge.rs +1 -1
- data/vendor/kreuzberg/src/pdf/structure/layout_classify.rs +2 -3
- data/vendor/kreuzberg/src/pdf/structure/pipeline.rs +116 -15
- data/vendor/kreuzberg/src/pdf/text.rs +2 -1
- data/vendor/kreuzberg/src/plugins/registry/mod.rs +11 -11
- data/vendor/kreuzberg/src/plugins/registry/ocr.rs +30 -31
- data/vendor/kreuzberg/src/stopwords/mod.rs +2 -2
- data/vendor/kreuzberg/src/text/token_reduction/filters/markdown.rs +2 -27
- data/vendor/kreuzberg/src/text/token_reduction/filters.rs +2 -1
- data/vendor/kreuzberg/src/types/extraction.rs +1 -1
- data/vendor/kreuzberg/src/types/internal.rs +18 -0
- data/vendor/kreuzberg/src/types/mod.rs +5 -0
- data/vendor/kreuzberg/src/types/page.rs +26 -1
- data/vendor/kreuzberg/src/utils/markdown_utils.rs +40 -0
- data/vendor/kreuzberg/src/utils/mod.rs +1 -0
- data/vendor/kreuzberg/src/utils/string_pool/buffer_pool.rs +3 -3
- data/vendor/kreuzberg/src/utils/string_pool/language_pool.rs +2 -2
- data/vendor/kreuzberg/src/utils/string_pool/mime_pool.rs +2 -2
- data/vendor/kreuzberg/src/utils/string_utils.rs +7 -7
- data/vendor/kreuzberg/tests/cross_format_parity.rs +9 -4
- data/vendor/kreuzberg/tests/llm_integration.rs +2 -0
- data/vendor/kreuzberg/tests/ocr_content_integrity.rs +154 -0
- data/vendor/kreuzberg/tests/pdf_image_extraction_tests.rs +42 -0
- data/vendor/kreuzberg-ffi/Cargo.toml +4 -4
- data/vendor/kreuzberg-ffi/kreuzberg.h +107 -4
- data/vendor/kreuzberg-ffi/src/cancellation.rs +167 -0
- data/vendor/kreuzberg-ffi/src/error.rs +32 -7
- data/vendor/kreuzberg-ffi/src/lib.rs +10 -4
- data/vendor/kreuzberg-paddle-ocr/Cargo.toml +1 -1
- data/vendor/kreuzberg-paddle-ocr/src/ocr_lite.rs +21 -0
- data/vendor/kreuzberg-pdfium-render/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/Cargo.toml +1 -1
- data/vendor/kreuzberg-tesseract/build.rs +11 -5
- metadata +14 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb7b77bae36a5da34ce209fbf1ea7c0a68aef4b22f8b373b908f9c113f404ef5
|
|
4
|
+
data.tar.gz: a6c8667aee6ae2c9e11d45fc98fcb355561fec6e4a7d51d852664bd6367af8cc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7569a4914ab4a4d440a0c74e622a9f26f7189b62bc9c2d05fc5e857a32c8fabde8eb854edef34e94bd95d5357e44137c1573e7ce68db45ed85c26dbe31e6972b
|
|
7
|
+
data.tar.gz: 9741106549d7bf79cc1ae34a07f686cca1bf6a4c19fcb01b40cd8f1372166c8e9c3a0321e1e26e416ebb98d413ee1c9093d247949292a7c07a85594ea1df508e
|
data/README.md
CHANGED
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
<img src="https://img.shields.io/maven-central/v/dev.kreuzberg/kreuzberg?label=Java&color=007ec6" alt="Java">
|
|
23
23
|
</a>
|
|
24
24
|
<a href="https://github.com/kreuzberg-dev/kreuzberg/releases">
|
|
25
|
-
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.
|
|
25
|
+
<img src="https://img.shields.io/github/v/tag/kreuzberg-dev/kreuzberg?label=Go&color=007ec6&filter=v4.9.0" alt="Go">
|
|
26
26
|
</a>
|
|
27
27
|
<a href="https://www.nuget.org/packages/Kreuzberg/">
|
|
28
28
|
<img src="https://img.shields.io/nuget/v/Kreuzberg?label=C%23&color=007ec6" alt="C#">
|
|
@@ -304,9 +304,9 @@ dependencies = [
|
|
|
304
304
|
|
|
305
305
|
[[package]]
|
|
306
306
|
name = "aws-lc-rs"
|
|
307
|
-
version = "1.16.
|
|
307
|
+
version = "1.16.3"
|
|
308
308
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
309
|
-
checksum = "
|
|
309
|
+
checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f"
|
|
310
310
|
dependencies = [
|
|
311
311
|
"aws-lc-sys",
|
|
312
312
|
"zeroize",
|
|
@@ -314,9 +314,9 @@ dependencies = [
|
|
|
314
314
|
|
|
315
315
|
[[package]]
|
|
316
316
|
name = "aws-lc-sys"
|
|
317
|
-
version = "0.
|
|
317
|
+
version = "0.40.0"
|
|
318
318
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
319
|
-
checksum = "
|
|
319
|
+
checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7"
|
|
320
320
|
dependencies = [
|
|
321
321
|
"cc",
|
|
322
322
|
"cmake",
|
|
@@ -415,7 +415,7 @@ checksum = "53d0c374feba1b9a59042a7c1cf00ce7c34b977b9134fe7c42b08e5183729f66"
|
|
|
415
415
|
dependencies = [
|
|
416
416
|
"paste",
|
|
417
417
|
"roman-numerals-rs",
|
|
418
|
-
"strum",
|
|
418
|
+
"strum 0.27.2",
|
|
419
419
|
"unicode-normalization",
|
|
420
420
|
"unscanny",
|
|
421
421
|
]
|
|
@@ -476,17 +476,17 @@ checksum = "1e4b40c7323adcfc0a41c4b88143ed58346ff65a288fc144329c5c45e05d70c6"
|
|
|
476
476
|
|
|
477
477
|
[[package]]
|
|
478
478
|
name = "bitflags"
|
|
479
|
-
version = "2.11.
|
|
479
|
+
version = "2.11.1"
|
|
480
480
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
481
|
-
checksum = "
|
|
481
|
+
checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
|
|
482
482
|
|
|
483
483
|
[[package]]
|
|
484
484
|
name = "bitstream-io"
|
|
485
|
-
version = "4.
|
|
485
|
+
version = "4.10.0"
|
|
486
486
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
487
|
-
checksum = "
|
|
487
|
+
checksum = "7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f"
|
|
488
488
|
dependencies = [
|
|
489
|
-
"
|
|
489
|
+
"no_std_io2",
|
|
490
490
|
]
|
|
491
491
|
|
|
492
492
|
[[package]]
|
|
@@ -817,9 +817,9 @@ dependencies = [
|
|
|
817
817
|
|
|
818
818
|
[[package]]
|
|
819
819
|
name = "clap"
|
|
820
|
-
version = "4.6.
|
|
820
|
+
version = "4.6.1"
|
|
821
821
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
822
|
-
checksum = "
|
|
822
|
+
checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
|
|
823
823
|
dependencies = [
|
|
824
824
|
"clap_builder",
|
|
825
825
|
]
|
|
@@ -1065,15 +1065,6 @@ version = "0.8.7"
|
|
|
1065
1065
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1066
1066
|
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
|
1067
1067
|
|
|
1068
|
-
[[package]]
|
|
1069
|
-
name = "core2"
|
|
1070
|
-
version = "0.4.0"
|
|
1071
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1072
|
-
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
|
|
1073
|
-
dependencies = [
|
|
1074
|
-
"memchr",
|
|
1075
|
-
]
|
|
1076
|
-
|
|
1077
1068
|
[[package]]
|
|
1078
1069
|
name = "core_maths"
|
|
1079
1070
|
version = "0.1.1"
|
|
@@ -1162,9 +1153,9 @@ dependencies = [
|
|
|
1162
1153
|
|
|
1163
1154
|
[[package]]
|
|
1164
1155
|
name = "ctor"
|
|
1165
|
-
version = "0.
|
|
1156
|
+
version = "0.10.0"
|
|
1166
1157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1167
|
-
checksum = "
|
|
1158
|
+
checksum = "95d0d11eb38e7642efca359c3cf6eb7b2e528182d09110165de70192b0352775"
|
|
1168
1159
|
dependencies = [
|
|
1169
1160
|
"ctor-proc-macro",
|
|
1170
1161
|
"dtor",
|
|
@@ -1248,9 +1239,9 @@ dependencies = [
|
|
|
1248
1239
|
|
|
1249
1240
|
[[package]]
|
|
1250
1241
|
name = "dary_heap"
|
|
1251
|
-
version = "0.3.
|
|
1242
|
+
version = "0.3.9"
|
|
1252
1243
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1253
|
-
checksum = "
|
|
1244
|
+
checksum = "8b1e3a325bc115f096c8b77bbf027a7c2592230e70be2d985be950d3d5e60ebe"
|
|
1254
1245
|
dependencies = [
|
|
1255
1246
|
"serde",
|
|
1256
1247
|
]
|
|
@@ -1417,9 +1408,9 @@ dependencies = [
|
|
|
1417
1408
|
|
|
1418
1409
|
[[package]]
|
|
1419
1410
|
name = "dtor"
|
|
1420
|
-
version = "0.
|
|
1411
|
+
version = "0.7.0"
|
|
1421
1412
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1422
|
-
checksum = "
|
|
1413
|
+
checksum = "17f72721db8027a4e96dd6fb50d2a1d32259c9d3da1b63dee612ccd981e14293"
|
|
1423
1414
|
dependencies = [
|
|
1424
1415
|
"dtor-proc-macro",
|
|
1425
1416
|
]
|
|
@@ -1803,9 +1794,9 @@ dependencies = [
|
|
|
1803
1794
|
|
|
1804
1795
|
[[package]]
|
|
1805
1796
|
name = "geo-types"
|
|
1806
|
-
version = "0.7.
|
|
1797
|
+
version = "0.7.19"
|
|
1807
1798
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1808
|
-
checksum = "
|
|
1799
|
+
checksum = "94776032c45f950d30a13af6113c2ad5625316c9abfbccee4dd5a6695f8fe0f5"
|
|
1809
1800
|
dependencies = [
|
|
1810
1801
|
"approx",
|
|
1811
1802
|
"num-traits",
|
|
@@ -2018,21 +2009,15 @@ dependencies = [
|
|
|
2018
2009
|
|
|
2019
2010
|
[[package]]
|
|
2020
2011
|
name = "hashbrown"
|
|
2021
|
-
version = "0.
|
|
2012
|
+
version = "0.17.0"
|
|
2022
2013
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2023
|
-
checksum = "
|
|
2014
|
+
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
|
|
2024
2015
|
dependencies = [
|
|
2025
2016
|
"allocator-api2",
|
|
2026
2017
|
"equivalent",
|
|
2027
2018
|
"foldhash 0.2.0",
|
|
2028
2019
|
]
|
|
2029
2020
|
|
|
2030
|
-
[[package]]
|
|
2031
|
-
name = "hashbrown"
|
|
2032
|
-
version = "0.17.0"
|
|
2033
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2034
|
-
checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
|
|
2035
|
-
|
|
2036
2021
|
[[package]]
|
|
2037
2022
|
name = "hashify"
|
|
2038
2023
|
version = "0.2.9"
|
|
@@ -2053,9 +2038,9 @@ checksum = "9f4d0e94ddd48749f06bbe4e5389fb9799a0c45bcaf00495042076ef05e3241a"
|
|
|
2053
2038
|
|
|
2054
2039
|
[[package]]
|
|
2055
2040
|
name = "hayro-jbig2"
|
|
2056
|
-
version = "0.
|
|
2041
|
+
version = "0.3.0"
|
|
2057
2042
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2058
|
-
checksum = "
|
|
2043
|
+
checksum = "69374b3668dd45aeb3d3145cda68f2c7b4f223aaa2511e67d076f1c7d741388d"
|
|
2059
2044
|
dependencies = [
|
|
2060
2045
|
"hayro-ccitt",
|
|
2061
2046
|
]
|
|
@@ -2142,9 +2127,9 @@ dependencies = [
|
|
|
2142
2127
|
|
|
2143
2128
|
[[package]]
|
|
2144
2129
|
name = "html-to-markdown-rs"
|
|
2145
|
-
version = "3.
|
|
2130
|
+
version = "3.2.5"
|
|
2146
2131
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2147
|
-
checksum = "
|
|
2132
|
+
checksum = "bcb619abe81160bba2e2185823e10f6c0793220a266f16791aa715287de322cd"
|
|
2148
2133
|
dependencies = [
|
|
2149
2134
|
"ahash",
|
|
2150
2135
|
"astral-tl",
|
|
@@ -2248,9 +2233,9 @@ dependencies = [
|
|
|
2248
2233
|
|
|
2249
2234
|
[[package]]
|
|
2250
2235
|
name = "hyper-rustls"
|
|
2251
|
-
version = "0.27.
|
|
2236
|
+
version = "0.27.9"
|
|
2252
2237
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2253
|
-
checksum = "
|
|
2238
|
+
checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f"
|
|
2254
2239
|
dependencies = [
|
|
2255
2240
|
"http",
|
|
2256
2241
|
"hyper",
|
|
@@ -2783,7 +2768,7 @@ dependencies = [
|
|
|
2783
2768
|
|
|
2784
2769
|
[[package]]
|
|
2785
2770
|
name = "kreuzberg"
|
|
2786
|
-
version = "4.8.
|
|
2771
|
+
version = "4.8.6"
|
|
2787
2772
|
dependencies = [
|
|
2788
2773
|
"ahash",
|
|
2789
2774
|
"async-trait",
|
|
@@ -2878,7 +2863,7 @@ dependencies = [
|
|
|
2878
2863
|
|
|
2879
2864
|
[[package]]
|
|
2880
2865
|
name = "kreuzberg-ffi"
|
|
2881
|
-
version = "4.8.
|
|
2866
|
+
version = "4.8.6"
|
|
2882
2867
|
dependencies = [
|
|
2883
2868
|
"ahash",
|
|
2884
2869
|
"async-trait",
|
|
@@ -2894,7 +2879,7 @@ dependencies = [
|
|
|
2894
2879
|
|
|
2895
2880
|
[[package]]
|
|
2896
2881
|
name = "kreuzberg-paddle-ocr"
|
|
2897
|
-
version = "4.8.
|
|
2882
|
+
version = "4.8.6"
|
|
2898
2883
|
dependencies = [
|
|
2899
2884
|
"geo-clipper",
|
|
2900
2885
|
"geo-types",
|
|
@@ -2908,7 +2893,7 @@ dependencies = [
|
|
|
2908
2893
|
|
|
2909
2894
|
[[package]]
|
|
2910
2895
|
name = "kreuzberg-pdfium-render"
|
|
2911
|
-
version = "4.8.
|
|
2896
|
+
version = "4.8.6"
|
|
2912
2897
|
dependencies = [
|
|
2913
2898
|
"bitflags",
|
|
2914
2899
|
"bytemuck",
|
|
@@ -2931,7 +2916,7 @@ dependencies = [
|
|
|
2931
2916
|
|
|
2932
2917
|
[[package]]
|
|
2933
2918
|
name = "kreuzberg-rb"
|
|
2934
|
-
version = "4.8.
|
|
2919
|
+
version = "4.8.6"
|
|
2935
2920
|
dependencies = [
|
|
2936
2921
|
"async-trait",
|
|
2937
2922
|
"html-to-markdown-rs",
|
|
@@ -2948,7 +2933,7 @@ dependencies = [
|
|
|
2948
2933
|
|
|
2949
2934
|
[[package]]
|
|
2950
2935
|
name = "kreuzberg-tesseract"
|
|
2951
|
-
version = "4.8.
|
|
2936
|
+
version = "4.8.6"
|
|
2952
2937
|
dependencies = [
|
|
2953
2938
|
"cc",
|
|
2954
2939
|
"cmake",
|
|
@@ -2977,9 +2962,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
|
|
|
2977
2962
|
|
|
2978
2963
|
[[package]]
|
|
2979
2964
|
name = "libbz2-rs-sys"
|
|
2980
|
-
version = "0.2.
|
|
2965
|
+
version = "0.2.3"
|
|
2981
2966
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2982
|
-
checksum = "
|
|
2967
|
+
checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f"
|
|
2983
2968
|
|
|
2984
2969
|
[[package]]
|
|
2985
2970
|
name = "libc"
|
|
@@ -3037,9 +3022,9 @@ dependencies = [
|
|
|
3037
3022
|
|
|
3038
3023
|
[[package]]
|
|
3039
3024
|
name = "link-section"
|
|
3040
|
-
version = "0.0
|
|
3025
|
+
version = "0.2.0"
|
|
3041
3026
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3042
|
-
checksum = "
|
|
3027
|
+
checksum = "468808413fa8bdf0edbe61c2bbc182dfc59885b94f496cf3fb42c9c96b1e0149"
|
|
3043
3028
|
|
|
3044
3029
|
[[package]]
|
|
3045
3030
|
name = "linux-raw-sys"
|
|
@@ -3055,9 +3040,9 @@ checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
|
|
|
3055
3040
|
|
|
3056
3041
|
[[package]]
|
|
3057
3042
|
name = "liter-llm"
|
|
3058
|
-
version = "1.2.
|
|
3043
|
+
version = "1.2.1"
|
|
3059
3044
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3060
|
-
checksum = "
|
|
3045
|
+
checksum = "1884be380e45da823105c85ef0fa188af81d57be7de9b65016576e1774fdd5f8"
|
|
3061
3046
|
dependencies = [
|
|
3062
3047
|
"base64 0.22.1",
|
|
3063
3048
|
"bytes",
|
|
@@ -3138,11 +3123,11 @@ dependencies = [
|
|
|
3138
3123
|
|
|
3139
3124
|
[[package]]
|
|
3140
3125
|
name = "lru"
|
|
3141
|
-
version = "0.
|
|
3126
|
+
version = "0.17.0"
|
|
3142
3127
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3143
|
-
checksum = "
|
|
3128
|
+
checksum = "0e0b564323a0fb6d54b864f625ae139de9612e27edb944dda37c109f05aac531"
|
|
3144
3129
|
dependencies = [
|
|
3145
|
-
"hashbrown 0.
|
|
3130
|
+
"hashbrown 0.17.0",
|
|
3146
3131
|
]
|
|
3147
3132
|
|
|
3148
3133
|
[[package]]
|
|
@@ -3462,6 +3447,15 @@ version = "1.0.6"
|
|
|
3462
3447
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3463
3448
|
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
|
3464
3449
|
|
|
3450
|
+
[[package]]
|
|
3451
|
+
name = "no_std_io2"
|
|
3452
|
+
version = "0.9.3"
|
|
3453
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3454
|
+
checksum = "b51ed7824b6e07d354605f4abb3d9d300350701299da96642ee084f5ce631550"
|
|
3455
|
+
dependencies = [
|
|
3456
|
+
"memchr",
|
|
3457
|
+
]
|
|
3458
|
+
|
|
3465
3459
|
[[package]]
|
|
3466
3460
|
name = "nom"
|
|
3467
3461
|
version = "7.1.3"
|
|
@@ -3900,9 +3894,9 @@ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
|
|
3900
3894
|
|
|
3901
3895
|
[[package]]
|
|
3902
3896
|
name = "portable-atomic-util"
|
|
3903
|
-
version = "0.2.
|
|
3897
|
+
version = "0.2.7"
|
|
3904
3898
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3905
|
-
checksum = "
|
|
3899
|
+
checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618"
|
|
3906
3900
|
dependencies = [
|
|
3907
3901
|
"portable-atomic",
|
|
3908
3902
|
]
|
|
@@ -4014,9 +4008,9 @@ checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
|
|
|
4014
4008
|
|
|
4015
4009
|
[[package]]
|
|
4016
4010
|
name = "pxfm"
|
|
4017
|
-
version = "0.1.
|
|
4011
|
+
version = "0.1.29"
|
|
4018
4012
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4019
|
-
checksum = "
|
|
4013
|
+
checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f"
|
|
4020
4014
|
|
|
4021
4015
|
[[package]]
|
|
4022
4016
|
name = "quick-error"
|
|
@@ -4463,9 +4457,9 @@ dependencies = [
|
|
|
4463
4457
|
|
|
4464
4458
|
[[package]]
|
|
4465
4459
|
name = "rmcp"
|
|
4466
|
-
version = "1.
|
|
4460
|
+
version = "1.5.0"
|
|
4467
4461
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4468
|
-
checksum = "
|
|
4462
|
+
checksum = "67d69668de0b0ccd9cc435f700f3b39a7861863cf37a15e1f304ea78688a4826"
|
|
4469
4463
|
dependencies = [
|
|
4470
4464
|
"async-trait",
|
|
4471
4465
|
"base64 0.22.1",
|
|
@@ -4494,9 +4488,9 @@ dependencies = [
|
|
|
4494
4488
|
|
|
4495
4489
|
[[package]]
|
|
4496
4490
|
name = "rmcp-macros"
|
|
4497
|
-
version = "1.
|
|
4491
|
+
version = "1.5.0"
|
|
4498
4492
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
4499
|
-
checksum = "
|
|
4493
|
+
checksum = "48fdc01c81097b0aed18633e676e269fefa3a78ec1df56b4fe597c1241b92025"
|
|
4500
4494
|
dependencies = [
|
|
4501
4495
|
"darling 0.23.0",
|
|
4502
4496
|
"proc-macro2",
|
|
@@ -5117,7 +5111,16 @@ version = "0.27.2"
|
|
|
5117
5111
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5118
5112
|
checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
|
|
5119
5113
|
dependencies = [
|
|
5120
|
-
"strum_macros",
|
|
5114
|
+
"strum_macros 0.27.2",
|
|
5115
|
+
]
|
|
5116
|
+
|
|
5117
|
+
[[package]]
|
|
5118
|
+
name = "strum"
|
|
5119
|
+
version = "0.28.0"
|
|
5120
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5121
|
+
checksum = "9628de9b8791db39ceda2b119bbe13134770b56c138ec1d3af810d045c04f9bd"
|
|
5122
|
+
dependencies = [
|
|
5123
|
+
"strum_macros 0.28.0",
|
|
5121
5124
|
]
|
|
5122
5125
|
|
|
5123
5126
|
[[package]]
|
|
@@ -5132,6 +5135,18 @@ dependencies = [
|
|
|
5132
5135
|
"syn",
|
|
5133
5136
|
]
|
|
5134
5137
|
|
|
5138
|
+
[[package]]
|
|
5139
|
+
name = "strum_macros"
|
|
5140
|
+
version = "0.28.0"
|
|
5141
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5142
|
+
checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664"
|
|
5143
|
+
dependencies = [
|
|
5144
|
+
"heck",
|
|
5145
|
+
"proc-macro2",
|
|
5146
|
+
"quote",
|
|
5147
|
+
"syn",
|
|
5148
|
+
]
|
|
5149
|
+
|
|
5135
5150
|
[[package]]
|
|
5136
5151
|
name = "subtle"
|
|
5137
5152
|
version = "2.6.1"
|
|
@@ -5211,9 +5226,9 @@ dependencies = [
|
|
|
5211
5226
|
|
|
5212
5227
|
[[package]]
|
|
5213
5228
|
name = "text-splitter"
|
|
5214
|
-
version = "0.
|
|
5229
|
+
version = "0.30.1"
|
|
5215
5230
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5216
|
-
checksum = "
|
|
5231
|
+
checksum = "f1c090dcb5a7e4da833fcd8bdaf7fd5a9596c8fe9fe5c5355960243eaa4b5716"
|
|
5217
5232
|
dependencies = [
|
|
5218
5233
|
"ahash",
|
|
5219
5234
|
"auto_enums",
|
|
@@ -5223,7 +5238,7 @@ dependencies = [
|
|
|
5223
5238
|
"itertools 0.14.0",
|
|
5224
5239
|
"memchr",
|
|
5225
5240
|
"pulldown-cmark",
|
|
5226
|
-
"strum",
|
|
5241
|
+
"strum 0.28.0",
|
|
5227
5242
|
"thiserror 2.0.18",
|
|
5228
5243
|
"tokenizers",
|
|
5229
5244
|
]
|
|
@@ -5394,9 +5409,9 @@ dependencies = [
|
|
|
5394
5409
|
|
|
5395
5410
|
[[package]]
|
|
5396
5411
|
name = "tokio"
|
|
5397
|
-
version = "1.
|
|
5412
|
+
version = "1.52.1"
|
|
5398
5413
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5399
|
-
checksum = "
|
|
5414
|
+
checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6"
|
|
5400
5415
|
dependencies = [
|
|
5401
5416
|
"bytes",
|
|
5402
5417
|
"libc",
|
|
@@ -5674,9 +5689,9 @@ checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782"
|
|
|
5674
5689
|
|
|
5675
5690
|
[[package]]
|
|
5676
5691
|
name = "tree-sitter-language-pack"
|
|
5677
|
-
version = "1.
|
|
5692
|
+
version = "1.6.2"
|
|
5678
5693
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5679
|
-
checksum = "
|
|
5694
|
+
checksum = "104c982ba77c77942e81029458eb44524b62cc6f4ddb62ce1397420a08ff3c3b"
|
|
5680
5695
|
dependencies = [
|
|
5681
5696
|
"ahash",
|
|
5682
5697
|
"cc",
|
|
@@ -5853,7 +5868,7 @@ dependencies = [
|
|
|
5853
5868
|
"ureq-proto",
|
|
5854
5869
|
"utf8-zero",
|
|
5855
5870
|
"webpki-root-certs",
|
|
5856
|
-
"webpki-roots 1.0.
|
|
5871
|
+
"webpki-roots 1.0.7",
|
|
5857
5872
|
]
|
|
5858
5873
|
|
|
5859
5874
|
[[package]]
|
|
@@ -5951,9 +5966,9 @@ dependencies = [
|
|
|
5951
5966
|
|
|
5952
5967
|
[[package]]
|
|
5953
5968
|
name = "uuid"
|
|
5954
|
-
version = "1.23.
|
|
5969
|
+
version = "1.23.1"
|
|
5955
5970
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
5956
|
-
checksum = "
|
|
5971
|
+
checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
|
|
5957
5972
|
dependencies = [
|
|
5958
5973
|
"getrandom 0.4.2",
|
|
5959
5974
|
"js-sys",
|
|
@@ -6031,11 +6046,11 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
|
|
6031
6046
|
|
|
6032
6047
|
[[package]]
|
|
6033
6048
|
name = "wasip2"
|
|
6034
|
-
version = "1.0.
|
|
6049
|
+
version = "1.0.3+wasi-0.2.9"
|
|
6035
6050
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6036
|
-
checksum = "
|
|
6051
|
+
checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
|
|
6037
6052
|
dependencies = [
|
|
6038
|
-
"wit-bindgen",
|
|
6053
|
+
"wit-bindgen 0.57.1",
|
|
6039
6054
|
]
|
|
6040
6055
|
|
|
6041
6056
|
[[package]]
|
|
@@ -6044,7 +6059,7 @@ version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
|
|
|
6044
6059
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6045
6060
|
checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
|
|
6046
6061
|
dependencies = [
|
|
6047
|
-
"wit-bindgen",
|
|
6062
|
+
"wit-bindgen 0.51.0",
|
|
6048
6063
|
]
|
|
6049
6064
|
|
|
6050
6065
|
[[package]]
|
|
@@ -6183,9 +6198,9 @@ dependencies = [
|
|
|
6183
6198
|
|
|
6184
6199
|
[[package]]
|
|
6185
6200
|
name = "webpki-root-certs"
|
|
6186
|
-
version = "1.0.
|
|
6201
|
+
version = "1.0.7"
|
|
6187
6202
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6188
|
-
checksum = "
|
|
6203
|
+
checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c"
|
|
6189
6204
|
dependencies = [
|
|
6190
6205
|
"rustls-pki-types",
|
|
6191
6206
|
]
|
|
@@ -6196,14 +6211,14 @@ version = "0.26.11"
|
|
|
6196
6211
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6197
6212
|
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
|
|
6198
6213
|
dependencies = [
|
|
6199
|
-
"webpki-roots 1.0.
|
|
6214
|
+
"webpki-roots 1.0.7",
|
|
6200
6215
|
]
|
|
6201
6216
|
|
|
6202
6217
|
[[package]]
|
|
6203
6218
|
name = "webpki-roots"
|
|
6204
|
-
version = "1.0.
|
|
6219
|
+
version = "1.0.7"
|
|
6205
6220
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6206
|
-
checksum = "
|
|
6221
|
+
checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d"
|
|
6207
6222
|
dependencies = [
|
|
6208
6223
|
"rustls-pki-types",
|
|
6209
6224
|
]
|
|
@@ -6575,6 +6590,12 @@ dependencies = [
|
|
|
6575
6590
|
"wit-bindgen-rust-macro",
|
|
6576
6591
|
]
|
|
6577
6592
|
|
|
6593
|
+
[[package]]
|
|
6594
|
+
name = "wit-bindgen"
|
|
6595
|
+
version = "0.57.1"
|
|
6596
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
6597
|
+
checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
|
|
6598
|
+
|
|
6578
6599
|
[[package]]
|
|
6579
6600
|
name = "wit-bindgen-core"
|
|
6580
6601
|
version = "0.51.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "kreuzberg-rb"
|
|
3
|
-
version = "4.
|
|
3
|
+
version = "4.9.0"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
rust-version = "1.91"
|
|
6
6
|
authors = ["Na'aman Hirschfeld <naaman@kreuzberg.dev>"]
|
|
@@ -55,7 +55,7 @@ rb-sys = { version = "0.9.126", default-features = false, features = [
|
|
|
55
55
|
serde_json = "1.0.149"
|
|
56
56
|
toml = "1.1.2"
|
|
57
57
|
serde_yaml_ng = "0.10"
|
|
58
|
-
tokio = { version = "1.
|
|
58
|
+
tokio = { version = "1.52.1", features = [
|
|
59
59
|
"rt",
|
|
60
60
|
"rt-multi-thread",
|
|
61
61
|
"macros",
|
|
@@ -65,7 +65,7 @@ tokio = { version = "1.51.1", features = [
|
|
|
65
65
|
"time",
|
|
66
66
|
"io-util",
|
|
67
67
|
] }
|
|
68
|
-
html-to-markdown-rs = { version = "3.
|
|
68
|
+
html-to-markdown-rs = { version = "3.2.5", default-features = false }
|
|
69
69
|
|
|
70
70
|
[dev-dependencies]
|
|
71
71
|
pretty_assertions = "1.4"
|
|
@@ -42,6 +42,7 @@ pub fn parse_ocr_config(ruby: &Ruby, hash: RHash) -> Result<OcrConfig, Error> {
|
|
|
42
42
|
};
|
|
43
43
|
|
|
44
44
|
let mut config = OcrConfig {
|
|
45
|
+
enabled: true,
|
|
45
46
|
backend,
|
|
46
47
|
language,
|
|
47
48
|
paddle_ocr_config: None,
|
|
@@ -136,6 +137,8 @@ pub fn parse_chunking_config(ruby: &Ruby, hash: RHash) -> Result<ChunkingConfig,
|
|
|
136
137
|
{
|
|
137
138
|
match symbol_to_string(val)?.as_str() {
|
|
138
139
|
"markdown" => kreuzberg::ChunkerType::Markdown,
|
|
140
|
+
"yaml" => kreuzberg::ChunkerType::Yaml,
|
|
141
|
+
"semantic" => kreuzberg::ChunkerType::Semantic,
|
|
139
142
|
_ => kreuzberg::ChunkerType::Text,
|
|
140
143
|
}
|
|
141
144
|
} else {
|
|
@@ -150,6 +153,14 @@ pub fn parse_chunking_config(ruby: &Ruby, hash: RHash) -> Result<ChunkingConfig,
|
|
|
150
153
|
false
|
|
151
154
|
};
|
|
152
155
|
|
|
156
|
+
let topic_threshold = if let Some(val) = get_kw(ruby, hash, "topic_threshold")
|
|
157
|
+
&& val.equal(ruby.qnil()).ok() != Some(true)
|
|
158
|
+
{
|
|
159
|
+
Some(f64::try_convert(val)? as f32)
|
|
160
|
+
} else {
|
|
161
|
+
None
|
|
162
|
+
};
|
|
163
|
+
|
|
153
164
|
let config = ChunkingConfig {
|
|
154
165
|
max_characters: max_chars,
|
|
155
166
|
overlap: max_overlap,
|
|
@@ -159,6 +170,7 @@ pub fn parse_chunking_config(ruby: &Ruby, hash: RHash) -> Result<ChunkingConfig,
|
|
|
159
170
|
preset,
|
|
160
171
|
sizing,
|
|
161
172
|
prepend_heading_context,
|
|
173
|
+
topic_threshold,
|
|
162
174
|
};
|
|
163
175
|
|
|
164
176
|
Ok(config)
|
|
@@ -840,10 +852,20 @@ pub fn parse_layout_detection_config(ruby: &Ruby, hash: RHash) -> Result<LayoutD
|
|
|
840
852
|
kreuzberg::core::config::layout::TableModel::default()
|
|
841
853
|
};
|
|
842
854
|
|
|
855
|
+
let acceleration = if let Some(val) = get_kw(ruby, hash, "acceleration")
|
|
856
|
+
&& val.equal(ruby.qnil()).ok() != Some(true)
|
|
857
|
+
{
|
|
858
|
+
let accel_hash = RHash::try_convert(val)?;
|
|
859
|
+
Some(parse_acceleration_config(ruby, accel_hash)?)
|
|
860
|
+
} else {
|
|
861
|
+
None
|
|
862
|
+
};
|
|
863
|
+
|
|
843
864
|
let config = LayoutDetectionConfig {
|
|
844
865
|
confidence_threshold,
|
|
845
866
|
apply_heuristics,
|
|
846
867
|
table_model,
|
|
868
|
+
acceleration,
|
|
847
869
|
};
|
|
848
870
|
|
|
849
871
|
Ok(config)
|
|
@@ -108,6 +108,13 @@ pub fn kreuzberg_error(err: KreuzbergError) -> Error {
|
|
|
108
108
|
Error::new(ruby.exception_runtime_error(), format!("EmbeddingError: {}", message))
|
|
109
109
|
}
|
|
110
110
|
}
|
|
111
|
+
KreuzbergError::Cancelled => {
|
|
112
|
+
if let Some(class) = fetch_error_class("CancelledError") {
|
|
113
|
+
Error::new(class, "Extraction cancelled")
|
|
114
|
+
} else {
|
|
115
|
+
Error::new(ruby.exception_runtime_error(), "Extraction cancelled")
|
|
116
|
+
}
|
|
117
|
+
}
|
|
111
118
|
other => Error::new(ruby.exception_runtime_error(), other.to_string()),
|
|
112
119
|
}
|
|
113
120
|
}
|
|
@@ -751,6 +751,13 @@ pub fn extraction_result_to_ruby(ruby: &Ruby, result: RustExtractionResult) -> R
|
|
|
751
751
|
set_hash_entry(ruby, &hash, "llm_usage", ruby.qnil().as_value())?;
|
|
752
752
|
}
|
|
753
753
|
|
|
754
|
+
// Convert structured output (Value::Null maps to qnil via json_value_to_ruby)
|
|
755
|
+
let structured_ruby = match &result.structured_output {
|
|
756
|
+
Some(val) => json_value_to_ruby(ruby, val)?,
|
|
757
|
+
None => ruby.qnil().as_value(),
|
|
758
|
+
};
|
|
759
|
+
set_hash_entry(ruby, &hash, "structured_output", structured_ruby)?;
|
|
760
|
+
|
|
754
761
|
// Convert annotations
|
|
755
762
|
if let Some(annotations) = result.annotations {
|
|
756
763
|
let annotations_array = ruby.ary_new();
|
data/lib/kreuzberg/errors.rb
CHANGED